xref: /freebsd/sys/kern/kern_switch.c (revision 9454b2d864463f856c6aaf147851104b25cf4037)
19454b2d8SWarner Losh /*-
2d5a08a60SJake Burkholder  * Copyright (c) 2001 Jake Burkholder <jake@FreeBSD.org>
3d5a08a60SJake Burkholder  * All rights reserved.
4dba6c5a6SPeter Wemm  *
5dba6c5a6SPeter Wemm  * Redistribution and use in source and binary forms, with or without
6dba6c5a6SPeter Wemm  * modification, are permitted provided that the following conditions
7dba6c5a6SPeter Wemm  * are met:
8dba6c5a6SPeter Wemm  * 1. Redistributions of source code must retain the above copyright
9dba6c5a6SPeter Wemm  *    notice, this list of conditions and the following disclaimer.
10dba6c5a6SPeter Wemm  * 2. Redistributions in binary form must reproduce the above copyright
11dba6c5a6SPeter Wemm  *    notice, this list of conditions and the following disclaimer in the
12dba6c5a6SPeter Wemm  *    documentation and/or other materials provided with the distribution.
13dba6c5a6SPeter Wemm  *
14dba6c5a6SPeter Wemm  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15dba6c5a6SPeter Wemm  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16dba6c5a6SPeter Wemm  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17dba6c5a6SPeter Wemm  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18dba6c5a6SPeter Wemm  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19dba6c5a6SPeter Wemm  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20dba6c5a6SPeter Wemm  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21dba6c5a6SPeter Wemm  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22dba6c5a6SPeter Wemm  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23dba6c5a6SPeter Wemm  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24dba6c5a6SPeter Wemm  * SUCH DAMAGE.
25dba6c5a6SPeter Wemm  */
26dba6c5a6SPeter Wemm 
27e602ba25SJulian Elischer /***
28e602ba25SJulian Elischer Here is the logic..
29e602ba25SJulian Elischer 
30e602ba25SJulian Elischer If there are N processors, then there are at most N KSEs (kernel
31e602ba25SJulian Elischer schedulable entities) working to process threads that belong to a
3209a4a69cSRobert Watson KSEGROUP (kg). If there are X of these KSEs actually running at the
33e602ba25SJulian Elischer moment in question, then there are at most M (N-X) of these KSEs on
34e602ba25SJulian Elischer the run queue, as running KSEs are not on the queue.
35e602ba25SJulian Elischer 
36e602ba25SJulian Elischer Runnable threads are queued off the KSEGROUP in priority order.
37e602ba25SJulian Elischer If there are M or more threads runnable, the top M threads
38e602ba25SJulian Elischer (by priority) are 'preassigned' to the M KSEs not running. The KSEs take
39e602ba25SJulian Elischer their priority from those threads and are put on the run queue.
40e602ba25SJulian Elischer 
41e602ba25SJulian Elischer The last thread that had a priority high enough to have a KSE associated
42e602ba25SJulian Elischer with it, AND IS ON THE RUN QUEUE is pointed to by
43e602ba25SJulian Elischer kg->kg_last_assigned. If no threads queued off the KSEGROUP have KSEs
44e602ba25SJulian Elischer assigned as all the available KSEs are activly running, or because there
45e602ba25SJulian Elischer are no threads queued, that pointer is NULL.
46e602ba25SJulian Elischer 
47e602ba25SJulian Elischer When a KSE is removed from the run queue to become runnable, we know
48e602ba25SJulian Elischer it was associated with the highest priority thread in the queue (at the head
49e602ba25SJulian Elischer of the queue). If it is also the last assigned we know M was 1 and must
50e602ba25SJulian Elischer now be 0. Since the thread is no longer queued that pointer must be
51e602ba25SJulian Elischer removed from it. Since we know there were no more KSEs available,
52e602ba25SJulian Elischer (M was 1 and is now 0) and since we are not FREEING our KSE
53e602ba25SJulian Elischer but using it, we know there are STILL no more KSEs available, we can prove
54e602ba25SJulian Elischer that the next thread in the ksegrp list will not have a KSE to assign to
55e602ba25SJulian Elischer it, so we can show that the pointer must be made 'invalid' (NULL).
56e602ba25SJulian Elischer 
57e602ba25SJulian Elischer The pointer exists so that when a new thread is made runnable, it can
58e602ba25SJulian Elischer have its priority compared with the last assigned thread to see if
59e602ba25SJulian Elischer it should 'steal' its KSE or not.. i.e. is it 'earlier'
60e602ba25SJulian Elischer on the list than that thread or later.. If it's earlier, then the KSE is
61e602ba25SJulian Elischer removed from the last assigned (which is now not assigned a KSE)
62e602ba25SJulian Elischer and reassigned to the new thread, which is placed earlier in the list.
63e602ba25SJulian Elischer The pointer is then backed up to the previous thread (which may or may not
64e602ba25SJulian Elischer be the new thread).
65e602ba25SJulian Elischer 
66e602ba25SJulian Elischer When a thread sleeps or is removed, the KSE becomes available and if there
67e602ba25SJulian Elischer are queued threads that are not assigned KSEs, the highest priority one of
68e602ba25SJulian Elischer them is assigned the KSE, which is then placed back on the run queue at
69e602ba25SJulian Elischer the approipriate place, and the kg->kg_last_assigned pointer is adjusted down
70e602ba25SJulian Elischer to point to it.
71e602ba25SJulian Elischer 
72e602ba25SJulian Elischer The following diagram shows 2 KSEs and 3 threads from a single process.
73e602ba25SJulian Elischer 
74e602ba25SJulian Elischer  RUNQ: --->KSE---KSE--...    (KSEs queued at priorities from threads)
75e602ba25SJulian Elischer               \    \____
76e602ba25SJulian Elischer                \        \
77e602ba25SJulian Elischer     KSEGROUP---thread--thread--thread    (queued in priority order)
78e602ba25SJulian Elischer         \                 /
79e602ba25SJulian Elischer          \_______________/
80e602ba25SJulian Elischer           (last_assigned)
81e602ba25SJulian Elischer 
82e602ba25SJulian Elischer The result of this scheme is that the M available KSEs are always
83e602ba25SJulian Elischer queued at the priorities they have inherrited from the M highest priority
84e602ba25SJulian Elischer threads for that KSEGROUP. If this situation changes, the KSEs are
85e602ba25SJulian Elischer reassigned to keep this true.
86677b542eSDavid E. O'Brien ***/
87e602ba25SJulian Elischer 
88677b542eSDavid E. O'Brien #include <sys/cdefs.h>
89677b542eSDavid E. O'Brien __FBSDID("$FreeBSD$");
90e602ba25SJulian Elischer 
916804a3abSJulian Elischer #include "opt_sched.h"
920c0b25aeSJohn Baldwin 
93ed062c8dSJulian Elischer #ifndef KERN_SWITCH_INCLUDE
94dba6c5a6SPeter Wemm #include <sys/param.h>
95dba6c5a6SPeter Wemm #include <sys/systm.h>
962d50560aSMarcel Moolenaar #include <sys/kdb.h>
97dba6c5a6SPeter Wemm #include <sys/kernel.h>
980384fff8SJason Evans #include <sys/ktr.h>
99f34fa851SJohn Baldwin #include <sys/lock.h>
10035e0e5b3SJohn Baldwin #include <sys/mutex.h>
101dba6c5a6SPeter Wemm #include <sys/proc.h>
102dba6c5a6SPeter Wemm #include <sys/queue.h>
103b43179fbSJeff Roberson #include <sys/sched.h>
104ed062c8dSJulian Elischer #else  /* KERN_SWITCH_INCLUDE */
1050d2a2989SPeter Wemm #if defined(SMP) && (defined(__i386__) || defined(__amd64__))
106cc66ebe2SPeter Wemm #include <sys/smp.h>
107cc66ebe2SPeter Wemm #endif
108182da820SMatthew Dillon #include <machine/critical.h>
1096804a3abSJulian Elischer #if defined(SMP) && defined(SCHED_4BSD)
1106804a3abSJulian Elischer #include <sys/sysctl.h>
1116804a3abSJulian Elischer #endif
1126804a3abSJulian Elischer 
1139923b511SScott Long #ifdef FULL_PREEMPTION
1149923b511SScott Long #ifndef PREEMPTION
1159923b511SScott Long #error "The FULL_PREEMPTION option requires the PREEMPTION option"
1169923b511SScott Long #endif
1179923b511SScott Long #endif
118dba6c5a6SPeter Wemm 
119d2ac2316SJake Burkholder CTASSERT((RQB_BPW * RQB_LEN) == RQ_NQS);
120d2ac2316SJake Burkholder 
121ed062c8dSJulian Elischer #define td_kse td_sched
122ed062c8dSJulian Elischer 
123e602ba25SJulian Elischer /************************************************************************
124e602ba25SJulian Elischer  * Functions that manipulate runnability from a thread perspective.	*
125e602ba25SJulian Elischer  ************************************************************************/
126e602ba25SJulian Elischer /*
1275215b187SJeff Roberson  * Select the KSE that will be run next.  From that find the thread, and
128e602ba25SJulian Elischer  * remove it from the KSEGRP's run queue.  If there is thread clustering,
129e602ba25SJulian Elischer  * this will be what does it.
130e602ba25SJulian Elischer  */
131b40ce416SJulian Elischer struct thread *
132b40ce416SJulian Elischer choosethread(void)
133dba6c5a6SPeter Wemm {
134e602ba25SJulian Elischer 	struct kse *ke;
135e602ba25SJulian Elischer 	struct thread *td;
136e602ba25SJulian Elischer 	struct ksegrp *kg;
137e602ba25SJulian Elischer 
1380d2a2989SPeter Wemm #if defined(SMP) && (defined(__i386__) || defined(__amd64__))
139cc66ebe2SPeter Wemm 	if (smp_active == 0 && PCPU_GET(cpuid) != 0) {
140cc66ebe2SPeter Wemm 		/* Shutting down, run idlethread on AP's */
141cc66ebe2SPeter Wemm 		td = PCPU_GET(idlethread);
142cc66ebe2SPeter Wemm 		ke = td->td_kse;
143cc66ebe2SPeter Wemm 		CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td);
144cc66ebe2SPeter Wemm 		ke->ke_flags |= KEF_DIDRUN;
145cc66ebe2SPeter Wemm 		TD_SET_RUNNING(td);
146cc66ebe2SPeter Wemm 		return (td);
147cc66ebe2SPeter Wemm 	}
148cc66ebe2SPeter Wemm #endif
149cc66ebe2SPeter Wemm 
150fe799533SAndrew Gallatin retry:
151cc66ebe2SPeter Wemm 	ke = sched_choose();
152cc66ebe2SPeter Wemm 	if (ke) {
153e602ba25SJulian Elischer 		td = ke->ke_thread;
154e602ba25SJulian Elischer 		KASSERT((td->td_kse == ke), ("kse/thread mismatch"));
155e602ba25SJulian Elischer 		kg = ke->ke_ksegrp;
156ed062c8dSJulian Elischer 		if (td->td_proc->p_flag & P_HADTHREADS) {
15733c06e1dSJulian Elischer 			if (kg->kg_last_assigned == td) {
158e602ba25SJulian Elischer 				kg->kg_last_assigned = TAILQ_PREV(td,
159e602ba25SJulian Elischer 				    threadqueue, td_runq);
16033c06e1dSJulian Elischer 			}
161d03c79eeSDavid Xu 			TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
1621a5cd27bSJulian Elischer 		}
163e602ba25SJulian Elischer 		CTR2(KTR_RUNQ, "choosethread: td=%p pri=%d",
164e602ba25SJulian Elischer 		    td, td->td_priority);
165e602ba25SJulian Elischer 	} else {
16640e55026SJulian Elischer 		/* Simulate runq_choose() having returned the idle thread */
167e602ba25SJulian Elischer 		td = PCPU_GET(idlethread);
168472be958SJulian Elischer 		ke = td->td_kse;
169e602ba25SJulian Elischer 		CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td);
170e602ba25SJulian Elischer 	}
171472be958SJulian Elischer 	ke->ke_flags |= KEF_DIDRUN;
17293a7aa79SJulian Elischer 
17393a7aa79SJulian Elischer 	/*
174faaa20f6SJulian Elischer 	 * If we are in panic, only allow system threads,
175faaa20f6SJulian Elischer 	 * plus the one we are running in, to be run.
17693a7aa79SJulian Elischer 	 */
177fe799533SAndrew Gallatin 	if (panicstr && ((td->td_proc->p_flag & P_SYSTEM) == 0 &&
178faaa20f6SJulian Elischer 	    (td->td_flags & TDF_INPANIC) == 0)) {
179faaa20f6SJulian Elischer 		/* note that it is no longer on the run queue */
180faaa20f6SJulian Elischer 		TD_SET_CAN_RUN(td);
181fe799533SAndrew Gallatin 		goto retry;
182faaa20f6SJulian Elischer 	}
18393a7aa79SJulian Elischer 
18471fad9fdSJulian Elischer 	TD_SET_RUNNING(td);
185e602ba25SJulian Elischer 	return (td);
186e602ba25SJulian Elischer }
187e602ba25SJulian Elischer 
188e602ba25SJulian Elischer /*
189ed062c8dSJulian Elischer  * Given a surplus system slot, try assign a new runnable thread to it.
190ed062c8dSJulian Elischer  * Called from:
191ed062c8dSJulian Elischer  *  sched_thread_exit()  (local)
192ed062c8dSJulian Elischer  *  sched_switch()  (local)
193ed062c8dSJulian Elischer  *  sched_thread_exit()  (local)
19414f0e2e9SJulian Elischer  *  remrunqueue()  (local)  (not at the moment)
195e602ba25SJulian Elischer  */
196ed062c8dSJulian Elischer static void
197ed062c8dSJulian Elischer slot_fill(struct ksegrp *kg)
198e602ba25SJulian Elischer {
199e602ba25SJulian Elischer 	struct thread *td;
200e602ba25SJulian Elischer 
20133c06e1dSJulian Elischer 	mtx_assert(&sched_lock, MA_OWNED);
202ed062c8dSJulian Elischer 	while (kg->kg_avail_opennings > 0) {
203e602ba25SJulian Elischer 		/*
2046f8132a8SJulian Elischer 		 * Find the first unassigned thread
2056f8132a8SJulian Elischer 		 */
2065215b187SJeff Roberson 		if ((td = kg->kg_last_assigned) != NULL)
2076f8132a8SJulian Elischer 			td = TAILQ_NEXT(td, td_runq);
2085215b187SJeff Roberson 		else
2096f8132a8SJulian Elischer 			td = TAILQ_FIRST(&kg->kg_runq);
2106f8132a8SJulian Elischer 
2116f8132a8SJulian Elischer 		/*
212ed062c8dSJulian Elischer 		 * If we found one, send it to the system scheduler.
213e602ba25SJulian Elischer 		 */
214e602ba25SJulian Elischer 		if (td) {
215e602ba25SJulian Elischer 			kg->kg_last_assigned = td;
21684f9d4b1SStephan Uphoff 			sched_add(td, SRQ_YIELDING);
217ed062c8dSJulian Elischer 			CTR2(KTR_RUNQ, "slot_fill: td%p -> kg%p", td, kg);
218ed062c8dSJulian Elischer 		} else {
219ed062c8dSJulian Elischer 			/* no threads to use up the slots. quit now */
220ed062c8dSJulian Elischer 			break;
22148bfcdddSJulian Elischer 		}
222ed062c8dSJulian Elischer 	}
223d5a08a60SJake Burkholder }
224d5a08a60SJake Burkholder 
225e8807f22SJulian Elischer #ifdef	SCHED_4BSD
226e602ba25SJulian Elischer /*
227e602ba25SJulian Elischer  * Remove a thread from its KSEGRP's run queue.
228e602ba25SJulian Elischer  * This in turn may remove it from a KSE if it was already assigned
229e602ba25SJulian Elischer  * to one, possibly causing a new thread to be assigned to the KSE
2305215b187SJeff Roberson  * and the KSE getting a new priority.
231e602ba25SJulian Elischer  */
2321f955e2dSJulian Elischer static void
233b40ce416SJulian Elischer remrunqueue(struct thread *td)
234d5a08a60SJake Burkholder {
23548bfcdddSJulian Elischer 	struct thread *td2, *td3;
236e602ba25SJulian Elischer 	struct ksegrp *kg;
237e602ba25SJulian Elischer 	struct kse *ke;
238e602ba25SJulian Elischer 
239e602ba25SJulian Elischer 	mtx_assert(&sched_lock, MA_OWNED);
24071fad9fdSJulian Elischer 	KASSERT((TD_ON_RUNQ(td)), ("remrunqueue: Bad state on run queue"));
241e602ba25SJulian Elischer 	kg = td->td_ksegrp;
242e602ba25SJulian Elischer 	ke = td->td_kse;
243e602ba25SJulian Elischer 	CTR1(KTR_RUNQ, "remrunqueue: td%p", td);
24471fad9fdSJulian Elischer 	TD_SET_CAN_RUN(td);
2455215b187SJeff Roberson 	/*
2465215b187SJeff Roberson 	 * If it is not a threaded process, take the shortcut.
2475215b187SJeff Roberson 	 */
248ed062c8dSJulian Elischer 	if ((td->td_proc->p_flag & P_HADTHREADS) == 0) {
2493389af30SJulian Elischer 		/* remve from sys run queue and free up a slot */
2507cf90fb3SJeff Roberson 		sched_rem(td);
251c3b98db0SJulian Elischer 		ke->ke_state = KES_THREAD;
252e602ba25SJulian Elischer 		return;
253d5a08a60SJake Burkholder 	}
25448bfcdddSJulian Elischer    	td3 = TAILQ_PREV(td, threadqueue, td_runq);
25548bfcdddSJulian Elischer 	TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
256ed062c8dSJulian Elischer 	if (ke->ke_state == KES_ONRUNQ) {
257e602ba25SJulian Elischer 		/*
2583389af30SJulian Elischer 		 * This thread has been assigned to the system run queue.
259e602ba25SJulian Elischer 		 * We need to dissociate it and try assign the
260e602ba25SJulian Elischer 		 * KSE to the next available thread. Then, we should
261e602ba25SJulian Elischer 		 * see if we need to move the KSE in the run queues.
262e602ba25SJulian Elischer 		 */
2637cf90fb3SJeff Roberson 		sched_rem(td);
26493a7aa79SJulian Elischer 		ke->ke_state = KES_THREAD;
265e602ba25SJulian Elischer 		td2 = kg->kg_last_assigned;
266e602ba25SJulian Elischer 		KASSERT((td2 != NULL), ("last assigned has wrong value"));
26748bfcdddSJulian Elischer 		if (td2 == td)
268e602ba25SJulian Elischer 			kg->kg_last_assigned = td3;
2693389af30SJulian Elischer 		/* slot_fill(kg); */ /* will replace it with another */
270e602ba25SJulian Elischer 	}
271e602ba25SJulian Elischer }
272e8807f22SJulian Elischer #endif
2731f955e2dSJulian Elischer 
2741f955e2dSJulian Elischer /*
2751f955e2dSJulian Elischer  * Change the priority of a thread that is on the run queue.
2761f955e2dSJulian Elischer  */
2771f955e2dSJulian Elischer void
2781f955e2dSJulian Elischer adjustrunqueue( struct thread *td, int newpri)
2791f955e2dSJulian Elischer {
2801f955e2dSJulian Elischer 	struct ksegrp *kg;
2811f955e2dSJulian Elischer 	struct kse *ke;
2821f955e2dSJulian Elischer 
2831f955e2dSJulian Elischer 	mtx_assert(&sched_lock, MA_OWNED);
2841f955e2dSJulian Elischer 	KASSERT((TD_ON_RUNQ(td)), ("adjustrunqueue: Bad state on run queue"));
2855215b187SJeff Roberson 
2861f955e2dSJulian Elischer 	ke = td->td_kse;
2871f955e2dSJulian Elischer 	CTR1(KTR_RUNQ, "adjustrunqueue: td%p", td);
2885215b187SJeff Roberson 	/*
2895215b187SJeff Roberson 	 * If it is not a threaded process, take the shortcut.
2905215b187SJeff Roberson 	 */
291ed062c8dSJulian Elischer 	if ((td->td_proc->p_flag & P_HADTHREADS) == 0) {
2921f955e2dSJulian Elischer 		/* We only care about the kse in the run queue. */
29324c5baaeSJulian Elischer 		td->td_priority = newpri;
2941f955e2dSJulian Elischer 		if (ke->ke_rqindex != (newpri / RQ_PPQ)) {
2957cf90fb3SJeff Roberson 			sched_rem(td);
2962630e4c9SJulian Elischer 			sched_add(td, SRQ_BORING);
2971f955e2dSJulian Elischer 		}
2981f955e2dSJulian Elischer 		return;
2991f955e2dSJulian Elischer 	}
3005215b187SJeff Roberson 
3015215b187SJeff Roberson 	/* It is a threaded process */
3021f955e2dSJulian Elischer 	kg = td->td_ksegrp;
303ed062c8dSJulian Elischer 	if (ke->ke_state == KES_ONRUNQ) {
3041f955e2dSJulian Elischer 		if (kg->kg_last_assigned == td) {
3051f955e2dSJulian Elischer 			kg->kg_last_assigned =
3061f955e2dSJulian Elischer 			    TAILQ_PREV(td, threadqueue, td_runq);
3071f955e2dSJulian Elischer 		}
3087cf90fb3SJeff Roberson 		sched_rem(td);
3091f955e2dSJulian Elischer 	}
3101f955e2dSJulian Elischer 	TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
31114f0e2e9SJulian Elischer 	TD_SET_CAN_RUN(td);
3121f955e2dSJulian Elischer 	td->td_priority = newpri;
3132630e4c9SJulian Elischer 	setrunqueue(td, SRQ_BORING);
3141f955e2dSJulian Elischer }
31584f9d4b1SStephan Uphoff 
31684f9d4b1SStephan Uphoff /*
31784f9d4b1SStephan Uphoff  * This function is called when a thread is about to be put on a
31884f9d4b1SStephan Uphoff  * ksegrp run queue because it has been made runnable or its
31984f9d4b1SStephan Uphoff  * priority has been adjusted and the ksegrp does not have a
32084f9d4b1SStephan Uphoff  * free kse slot.  It determines if a thread from the same ksegrp
32184f9d4b1SStephan Uphoff  * should be preempted.  If so, it tries to switch threads
32284f9d4b1SStephan Uphoff  * if the thread is on the same cpu or notifies another cpu that
32384f9d4b1SStephan Uphoff  * it should switch threads.
32484f9d4b1SStephan Uphoff  */
32584f9d4b1SStephan Uphoff 
32684f9d4b1SStephan Uphoff static void
32784f9d4b1SStephan Uphoff maybe_preempt_in_ksegrp(struct thread *td)
3287c71b645SStephan Uphoff #if  !defined(SMP)
32984f9d4b1SStephan Uphoff {
33013e7430fSPoul-Henning Kamp 	struct thread *running_thread;
3317c71b645SStephan Uphoff 
3327c71b645SStephan Uphoff #ifndef FULL_PREEMPTION
3337c71b645SStephan Uphoff 	int pri;
3347c71b645SStephan Uphoff 	pri = td->td_priority;
3357c71b645SStephan Uphoff 	if (!(pri >= PRI_MIN_ITHD && pri <= PRI_MAX_ITHD))
3367c71b645SStephan Uphoff 		return;
3377c71b645SStephan Uphoff #endif
3387c71b645SStephan Uphoff 	mtx_assert(&sched_lock, MA_OWNED);
3397c71b645SStephan Uphoff 	running_thread = curthread;
3407c71b645SStephan Uphoff 
3417c71b645SStephan Uphoff 	if (running_thread->td_ksegrp != td->td_ksegrp)
3427c71b645SStephan Uphoff 		return;
3437c71b645SStephan Uphoff 
3447c71b645SStephan Uphoff 	if (td->td_priority > running_thread->td_priority)
3457c71b645SStephan Uphoff 		return;
3467c71b645SStephan Uphoff #ifdef PREEMPTION
3477c71b645SStephan Uphoff 	if (running_thread->td_critnest > 1)
3487c71b645SStephan Uphoff 		running_thread->td_pflags |= TDP_OWEPREEMPT;
3497c71b645SStephan Uphoff 	 else
3507c71b645SStephan Uphoff 		 mi_switch(SW_INVOL, NULL);
3517c71b645SStephan Uphoff 
3527c71b645SStephan Uphoff #else
3537c71b645SStephan Uphoff 	running_thread->td_flags |= TDF_NEEDRESCHED;
3547c71b645SStephan Uphoff #endif
3557c71b645SStephan Uphoff 	return;
3567c71b645SStephan Uphoff }
3577c71b645SStephan Uphoff 
3587c71b645SStephan Uphoff #else /* SMP */
3597c71b645SStephan Uphoff {
3607c71b645SStephan Uphoff 	struct thread *running_thread;
36184f9d4b1SStephan Uphoff 	int worst_pri;
36284f9d4b1SStephan Uphoff 	struct ksegrp *kg;
36384f9d4b1SStephan Uphoff 	cpumask_t cpumask,dontuse;
36484f9d4b1SStephan Uphoff 	struct pcpu *pc;
36584f9d4b1SStephan Uphoff 	struct pcpu *best_pcpu;
36684f9d4b1SStephan Uphoff 	struct thread *cputhread;
36784f9d4b1SStephan Uphoff 
36884f9d4b1SStephan Uphoff #ifndef FULL_PREEMPTION
36984f9d4b1SStephan Uphoff 	int pri;
37084f9d4b1SStephan Uphoff 	pri = td->td_priority;
37184f9d4b1SStephan Uphoff 	if (!(pri >= PRI_MIN_ITHD && pri <= PRI_MAX_ITHD))
37284f9d4b1SStephan Uphoff 		return;
37384f9d4b1SStephan Uphoff #endif
37484f9d4b1SStephan Uphoff 
37584f9d4b1SStephan Uphoff 	mtx_assert(&sched_lock, MA_OWNED);
37684f9d4b1SStephan Uphoff 
37784f9d4b1SStephan Uphoff 	running_thread = curthread;
37884f9d4b1SStephan Uphoff 
37984f9d4b1SStephan Uphoff #if !defined(KSEG_PEEMPT_BEST_CPU)
38084f9d4b1SStephan Uphoff 	if (running_thread->td_ksegrp != td->td_ksegrp) {
38184f9d4b1SStephan Uphoff #endif
38284f9d4b1SStephan Uphoff 		kg = td->td_ksegrp;
38384f9d4b1SStephan Uphoff 
38484f9d4b1SStephan Uphoff 		/* if someone is ahead of this thread, wait our turn */
38584f9d4b1SStephan Uphoff 		if (td != TAILQ_FIRST(&kg->kg_runq))
38684f9d4b1SStephan Uphoff 			return;
38784f9d4b1SStephan Uphoff 
38884f9d4b1SStephan Uphoff 		worst_pri = td->td_priority;
38984f9d4b1SStephan Uphoff 		best_pcpu = NULL;
39084f9d4b1SStephan Uphoff 		dontuse   = stopped_cpus | idle_cpus_mask;
39184f9d4b1SStephan Uphoff 
39284f9d4b1SStephan Uphoff 		/*
39384f9d4b1SStephan Uphoff 		 * Find a cpu with the worst priority that runs at thread from
39484f9d4b1SStephan Uphoff 		 * the same  ksegrp - if multiple exist give first the last run
39584f9d4b1SStephan Uphoff 		 * cpu and then the current cpu priority
39684f9d4b1SStephan Uphoff 		 */
39784f9d4b1SStephan Uphoff 
39884f9d4b1SStephan Uphoff 		SLIST_FOREACH(pc, &cpuhead, pc_allcpu) {
39984f9d4b1SStephan Uphoff 			cpumask   = pc->pc_cpumask;
40084f9d4b1SStephan Uphoff 			cputhread = pc->pc_curthread;
40184f9d4b1SStephan Uphoff 
40284f9d4b1SStephan Uphoff 			if ((cpumask & dontuse)  ||
40384f9d4b1SStephan Uphoff 			    cputhread->td_ksegrp != kg)
40484f9d4b1SStephan Uphoff 				continue;
40584f9d4b1SStephan Uphoff 
40684f9d4b1SStephan Uphoff 			if (cputhread->td_priority > worst_pri) {
40784f9d4b1SStephan Uphoff 				worst_pri = cputhread->td_priority;
40884f9d4b1SStephan Uphoff 				best_pcpu = pc;
40984f9d4b1SStephan Uphoff 				continue;
41084f9d4b1SStephan Uphoff 			}
41184f9d4b1SStephan Uphoff 
41284f9d4b1SStephan Uphoff 			if (cputhread->td_priority == worst_pri &&
41384f9d4b1SStephan Uphoff 			    best_pcpu != NULL &&
41484f9d4b1SStephan Uphoff 			    (td->td_lastcpu == pc->pc_cpuid ||
41584f9d4b1SStephan Uphoff 				(PCPU_GET(cpumask) == cpumask &&
41684f9d4b1SStephan Uphoff 				    td->td_lastcpu != best_pcpu->pc_cpuid)))
41784f9d4b1SStephan Uphoff 			    best_pcpu = pc;
41884f9d4b1SStephan Uphoff 		}
41984f9d4b1SStephan Uphoff 
42084f9d4b1SStephan Uphoff 		/* Check if we need to preempt someone */
42184f9d4b1SStephan Uphoff 		if (best_pcpu == NULL)
42284f9d4b1SStephan Uphoff 			return;
42384f9d4b1SStephan Uphoff 
42484f9d4b1SStephan Uphoff 		if (PCPU_GET(cpuid) != best_pcpu->pc_cpuid) {
42584f9d4b1SStephan Uphoff 			best_pcpu->pc_curthread->td_flags |= TDF_NEEDRESCHED;
42684f9d4b1SStephan Uphoff 			ipi_selected(best_pcpu->pc_cpumask, IPI_AST);
42784f9d4b1SStephan Uphoff 			return;
42884f9d4b1SStephan Uphoff 		}
42984f9d4b1SStephan Uphoff #if !defined(KSEG_PEEMPT_BEST_CPU)
43084f9d4b1SStephan Uphoff 	}
43184f9d4b1SStephan Uphoff #endif
43284f9d4b1SStephan Uphoff 
43384f9d4b1SStephan Uphoff 	if (td->td_priority > running_thread->td_priority)
43484f9d4b1SStephan Uphoff 		return;
43584f9d4b1SStephan Uphoff #ifdef PREEMPTION
43684f9d4b1SStephan Uphoff 	if (running_thread->td_critnest > 1)
43784f9d4b1SStephan Uphoff 		running_thread->td_pflags |= TDP_OWEPREEMPT;
43884f9d4b1SStephan Uphoff 	 else
43984f9d4b1SStephan Uphoff 		 mi_switch(SW_INVOL, NULL);
44084f9d4b1SStephan Uphoff 
44184f9d4b1SStephan Uphoff #else
44284f9d4b1SStephan Uphoff 	running_thread->td_flags |= TDF_NEEDRESCHED;
44384f9d4b1SStephan Uphoff #endif
44484f9d4b1SStephan Uphoff 	return;
44584f9d4b1SStephan Uphoff }
4467c71b645SStephan Uphoff #endif /* !SMP */
4477c71b645SStephan Uphoff 
44884f9d4b1SStephan Uphoff 
449ed062c8dSJulian Elischer int limitcount;
450d5a08a60SJake Burkholder void
4512630e4c9SJulian Elischer setrunqueue(struct thread *td, int flags)
452d5a08a60SJake Burkholder {
453e602ba25SJulian Elischer 	struct ksegrp *kg;
454e602ba25SJulian Elischer 	struct thread *td2;
455e602ba25SJulian Elischer 	struct thread *tda;
456e602ba25SJulian Elischer 
457ed062c8dSJulian Elischer 	CTR3(KTR_RUNQ, "setrunqueue: td:%p kg:%p pid:%d",
458ed062c8dSJulian Elischer 	    td, td->td_ksegrp, td->td_proc->p_pid);
45985da7a56SJeff Roberson 	CTR5(KTR_SCHED, "setrunqueue: %p(%s) prio %d by %p(%s)",
46085da7a56SJeff Roberson             td, td->td_proc->p_comm, td->td_priority, curthread,
46185da7a56SJeff Roberson             curthread->td_proc->p_comm);
462e602ba25SJulian Elischer 	mtx_assert(&sched_lock, MA_OWNED);
463b2578c6cSJulian Elischer 	KASSERT((td->td_inhibitors == 0),
464b2578c6cSJulian Elischer 			("setrunqueue: trying to run inhibitted thread"));
46571fad9fdSJulian Elischer 	KASSERT((TD_CAN_RUN(td) || TD_IS_RUNNING(td)),
46671fad9fdSJulian Elischer 	    ("setrunqueue: bad thread state"));
46771fad9fdSJulian Elischer 	TD_SET_RUNQ(td);
468e602ba25SJulian Elischer 	kg = td->td_ksegrp;
469ed062c8dSJulian Elischer 	if ((td->td_proc->p_flag & P_HADTHREADS) == 0) {
47048bfcdddSJulian Elischer 		/*
47148bfcdddSJulian Elischer 		 * Common path optimisation: Only one of everything
47248bfcdddSJulian Elischer 		 * and the KSE is always already attached.
47348bfcdddSJulian Elischer 		 * Totally ignore the ksegrp run queue.
47448bfcdddSJulian Elischer 		 */
475ed062c8dSJulian Elischer 		if (kg->kg_avail_opennings != 1) {
47654983505SJulian Elischer 			if (limitcount < 1) {
477ed062c8dSJulian Elischer 				limitcount++;
47854983505SJulian Elischer 				printf("pid %d: corrected slot count (%d->1)\n",
479ed062c8dSJulian Elischer 				    td->td_proc->p_pid, kg->kg_avail_opennings);
480ed062c8dSJulian Elischer 
481ed062c8dSJulian Elischer 			}
482ed062c8dSJulian Elischer 			kg->kg_avail_opennings = 1;
483ed062c8dSJulian Elischer 		}
4842630e4c9SJulian Elischer 		sched_add(td, flags);
48548bfcdddSJulian Elischer 		return;
48648bfcdddSJulian Elischer 	}
48748bfcdddSJulian Elischer 
48814f0e2e9SJulian Elischer 	/*
48914f0e2e9SJulian Elischer 	 * If the concurrency has reduced, and we would go in the
49014f0e2e9SJulian Elischer 	 * assigned section, then keep removing entries from the
49114f0e2e9SJulian Elischer 	 * system run queue, until we are not in that section
49214f0e2e9SJulian Elischer 	 * or there is room for us to be put in that section.
49314f0e2e9SJulian Elischer 	 * What we MUST avoid is the case where there are threads of less
49414f0e2e9SJulian Elischer 	 * priority than the new one scheduled, but it can not
49514f0e2e9SJulian Elischer 	 * be scheduled itself. That would lead to a non contiguous set
49614f0e2e9SJulian Elischer 	 * of scheduled threads, and everything would break.
49714f0e2e9SJulian Elischer 	 */
498e602ba25SJulian Elischer 	tda = kg->kg_last_assigned;
49914f0e2e9SJulian Elischer 	while ((kg->kg_avail_opennings <= 0) &&
500ed062c8dSJulian Elischer 	    (tda && (tda->td_priority > td->td_priority))) {
501e602ba25SJulian Elischer 		/*
502e602ba25SJulian Elischer 		 * None free, but there is one we can commandeer.
503e602ba25SJulian Elischer 		 */
504ed062c8dSJulian Elischer 		CTR2(KTR_RUNQ,
505ed062c8dSJulian Elischer 		    "setrunqueue: kg:%p: take slot from td: %p", kg, tda);
50694816f6dSJeff Roberson 		sched_rem(tda);
507e602ba25SJulian Elischer 		tda = kg->kg_last_assigned =
508e602ba25SJulian Elischer 		    TAILQ_PREV(tda, threadqueue, td_runq);
509d5a08a60SJake Burkholder 	}
510d5a08a60SJake Burkholder 
511e602ba25SJulian Elischer 	/*
512e602ba25SJulian Elischer 	 * Add the thread to the ksegrp's run queue at
513e602ba25SJulian Elischer 	 * the appropriate place.
514e602ba25SJulian Elischer 	 */
515e602ba25SJulian Elischer 	TAILQ_FOREACH(td2, &kg->kg_runq, td_runq) {
516e602ba25SJulian Elischer 		if (td2->td_priority > td->td_priority) {
517e602ba25SJulian Elischer 			TAILQ_INSERT_BEFORE(td2, td, td_runq);
518e602ba25SJulian Elischer 			break;
519e602ba25SJulian Elischer 		}
520e602ba25SJulian Elischer 	}
521e602ba25SJulian Elischer 	if (td2 == NULL) {
522e602ba25SJulian Elischer 		/* We ran off the end of the TAILQ or it was empty. */
523e602ba25SJulian Elischer 		TAILQ_INSERT_TAIL(&kg->kg_runq, td, td_runq);
524e602ba25SJulian Elischer 	}
525e602ba25SJulian Elischer 
526e602ba25SJulian Elischer 	/*
527ed062c8dSJulian Elischer 	 * If we have a slot to use, then put the thread on the system
528ed062c8dSJulian Elischer 	 * run queue and if needed, readjust the last_assigned pointer.
52914f0e2e9SJulian Elischer 	 * it may be that we need to schedule something anyhow
53014f0e2e9SJulian Elischer 	 * even if the availabel slots are -ve so that
53114f0e2e9SJulian Elischer 	 * all the items < last_assigned are scheduled.
532e602ba25SJulian Elischer 	 */
533ed062c8dSJulian Elischer 	if (kg->kg_avail_opennings > 0) {
534e602ba25SJulian Elischer 		if (tda == NULL) {
535e602ba25SJulian Elischer 			/*
536e602ba25SJulian Elischer 			 * No pre-existing last assigned so whoever is first
53714f0e2e9SJulian Elischer 			 * gets the slot.. (maybe us)
538e602ba25SJulian Elischer 			 */
539e602ba25SJulian Elischer 			td2 = TAILQ_FIRST(&kg->kg_runq);
540e602ba25SJulian Elischer 			kg->kg_last_assigned = td2;
541e602ba25SJulian Elischer 		} else if (tda->td_priority > td->td_priority) {
542ed062c8dSJulian Elischer 			td2 = td;
543e602ba25SJulian Elischer 		} else {
544e602ba25SJulian Elischer 			/*
545e602ba25SJulian Elischer 			 * We are past last_assigned, so
54614f0e2e9SJulian Elischer 			 * give the next slot to whatever is next,
547e602ba25SJulian Elischer 			 * which may or may not be us.
548e602ba25SJulian Elischer 			 */
549e602ba25SJulian Elischer 			td2 = TAILQ_NEXT(tda, td_runq);
550e602ba25SJulian Elischer 			kg->kg_last_assigned = td2;
551e602ba25SJulian Elischer 		}
552ed062c8dSJulian Elischer 		sched_add(td2, flags);
553732d9528SJulian Elischer 	} else {
554732d9528SJulian Elischer 		CTR3(KTR_RUNQ, "setrunqueue: held: td%p kg%p pid%d",
555732d9528SJulian Elischer 			td, td->td_ksegrp, td->td_proc->p_pid);
55684f9d4b1SStephan Uphoff 		if ((flags & SRQ_YIELDING) == 0)
55784f9d4b1SStephan Uphoff 			maybe_preempt_in_ksegrp(td);
558e602ba25SJulian Elischer 	}
559e602ba25SJulian Elischer }
560e602ba25SJulian Elischer 
5610c0b25aeSJohn Baldwin /*
5620c0b25aeSJohn Baldwin  * Kernel thread preemption implementation.  Critical sections mark
5630c0b25aeSJohn Baldwin  * regions of code in which preemptions are not allowed.
5640c0b25aeSJohn Baldwin  */
5657e1f6dfeSJohn Baldwin void
5667e1f6dfeSJohn Baldwin critical_enter(void)
5677e1f6dfeSJohn Baldwin {
5687e1f6dfeSJohn Baldwin 	struct thread *td;
5697e1f6dfeSJohn Baldwin 
5707e1f6dfeSJohn Baldwin 	td = curthread;
5717e1f6dfeSJohn Baldwin 	if (td->td_critnest == 0)
5721a8cfbc4SRobert Watson 		cpu_critical_enter(td);
5737e1f6dfeSJohn Baldwin 	td->td_critnest++;
574f42a43faSRobert Watson 	CTR4(KTR_CRITICAL, "critical_enter by thread %p (%ld, %s) to %d", td,
575f42a43faSRobert Watson 	    (long)td->td_proc->p_pid, td->td_proc->p_comm, td->td_critnest);
5767e1f6dfeSJohn Baldwin }
5777e1f6dfeSJohn Baldwin 
5787e1f6dfeSJohn Baldwin void
5797e1f6dfeSJohn Baldwin critical_exit(void)
5807e1f6dfeSJohn Baldwin {
5817e1f6dfeSJohn Baldwin 	struct thread *td;
5827e1f6dfeSJohn Baldwin 
5837e1f6dfeSJohn Baldwin 	td = curthread;
584b209e5e3SJeff Roberson 	KASSERT(td->td_critnest != 0,
585b209e5e3SJeff Roberson 	    ("critical_exit: td_critnest == 0"));
5867e1f6dfeSJohn Baldwin 	if (td->td_critnest == 1) {
587b96741f4SScott Long 		if (td->td_pflags & TDP_WAKEPROC0) {
588b96741f4SScott Long 			td->td_pflags &= ~TDP_WAKEPROC0;
589b96741f4SScott Long 			wakeup(&proc0);
590b96741f4SScott Long 		}
5910c0b25aeSJohn Baldwin #ifdef PREEMPTION
59252eb8464SJohn Baldwin 		mtx_assert(&sched_lock, MA_NOTOWNED);
59352eb8464SJohn Baldwin 		if (td->td_pflags & TDP_OWEPREEMPT) {
5940c0b25aeSJohn Baldwin 			mtx_lock_spin(&sched_lock);
5950c0b25aeSJohn Baldwin 			mi_switch(SW_INVOL, NULL);
5960c0b25aeSJohn Baldwin 			mtx_unlock_spin(&sched_lock);
5970c0b25aeSJohn Baldwin 		}
5980c0b25aeSJohn Baldwin #endif
5997e1f6dfeSJohn Baldwin 		td->td_critnest = 0;
6001a8cfbc4SRobert Watson 		cpu_critical_exit(td);
601d74ac681SMatthew Dillon 	} else {
6027e1f6dfeSJohn Baldwin 		td->td_critnest--;
6037e1f6dfeSJohn Baldwin 	}
604f42a43faSRobert Watson 	CTR4(KTR_CRITICAL, "critical_exit by thread %p (%ld, %s) to %d", td,
605f42a43faSRobert Watson 	    (long)td->td_proc->p_pid, td->td_proc->p_comm, td->td_critnest);
606d74ac681SMatthew Dillon }
6077e1f6dfeSJohn Baldwin 
6080c0b25aeSJohn Baldwin /*
6090c0b25aeSJohn Baldwin  * This function is called when a thread is about to be put on run queue
6100c0b25aeSJohn Baldwin  * because it has been made runnable or its priority has been adjusted.  It
6110c0b25aeSJohn Baldwin  * determines if the new thread should be immediately preempted to.  If so,
6120c0b25aeSJohn Baldwin  * it switches to it and eventually returns true.  If not, it returns false
6130c0b25aeSJohn Baldwin  * so that the caller may place the thread on an appropriate run queue.
6140c0b25aeSJohn Baldwin  */
6150c0b25aeSJohn Baldwin int
6160c0b25aeSJohn Baldwin maybe_preempt(struct thread *td)
6170c0b25aeSJohn Baldwin {
6188b44a2e2SMarcel Moolenaar #ifdef PREEMPTION
6190c0b25aeSJohn Baldwin 	struct thread *ctd;
6200c0b25aeSJohn Baldwin 	int cpri, pri;
6218b44a2e2SMarcel Moolenaar #endif
6220c0b25aeSJohn Baldwin 
6230c0b25aeSJohn Baldwin 	mtx_assert(&sched_lock, MA_OWNED);
6240c0b25aeSJohn Baldwin #ifdef PREEMPTION
6250c0b25aeSJohn Baldwin 	/*
6260c0b25aeSJohn Baldwin 	 * The new thread should not preempt the current thread if any of the
6270c0b25aeSJohn Baldwin 	 * following conditions are true:
6280c0b25aeSJohn Baldwin 	 *
62952eb8464SJohn Baldwin 	 *  - The current thread has a higher (numerically lower) or
63052eb8464SJohn Baldwin 	 *    equivalent priority.  Note that this prevents curthread from
63152eb8464SJohn Baldwin 	 *    trying to preempt to itself.
6320c0b25aeSJohn Baldwin 	 *  - It is too early in the boot for context switches (cold is set).
6330c0b25aeSJohn Baldwin 	 *  - The current thread has an inhibitor set or is in the process of
6340c0b25aeSJohn Baldwin 	 *    exiting.  In this case, the current thread is about to switch
6350c0b25aeSJohn Baldwin 	 *    out anyways, so there's no point in preempting.  If we did,
6360c0b25aeSJohn Baldwin 	 *    the current thread would not be properly resumed as well, so
6370c0b25aeSJohn Baldwin 	 *    just avoid that whole landmine.
6380c0b25aeSJohn Baldwin 	 *  - If the new thread's priority is not a realtime priority and
6390c0b25aeSJohn Baldwin 	 *    the current thread's priority is not an idle priority and
6400c0b25aeSJohn Baldwin 	 *    FULL_PREEMPTION is disabled.
6410c0b25aeSJohn Baldwin 	 *
6420c0b25aeSJohn Baldwin 	 * If all of these conditions are false, but the current thread is in
6430c0b25aeSJohn Baldwin 	 * a nested critical section, then we have to defer the preemption
6440c0b25aeSJohn Baldwin 	 * until we exit the critical section.  Otherwise, switch immediately
6450c0b25aeSJohn Baldwin 	 * to the new thread.
6460c0b25aeSJohn Baldwin 	 */
6470c0b25aeSJohn Baldwin 	ctd = curthread;
6486a574b2aSJulian Elischer 	KASSERT ((ctd->td_kse != NULL && ctd->td_kse->ke_thread == ctd),
6496a574b2aSJulian Elischer 	  ("thread has no (or wrong) sched-private part."));
650b2578c6cSJulian Elischer 	KASSERT((td->td_inhibitors == 0),
651b2578c6cSJulian Elischer 			("maybe_preempt: trying to run inhibitted thread"));
6520c0b25aeSJohn Baldwin 	pri = td->td_priority;
6530c0b25aeSJohn Baldwin 	cpri = ctd->td_priority;
6540c0b25aeSJohn Baldwin 	if (pri >= cpri || cold /* || dumping */ || TD_IS_INHIBITED(ctd) ||
6550c0b25aeSJohn Baldwin 	    td->td_kse->ke_state != KES_THREAD)
6560c0b25aeSJohn Baldwin 		return (0);
6570c0b25aeSJohn Baldwin #ifndef FULL_PREEMPTION
6580c0b25aeSJohn Baldwin 	if (!(pri >= PRI_MIN_ITHD && pri <= PRI_MAX_ITHD) &&
6590c0b25aeSJohn Baldwin 	    !(cpri >= PRI_MIN_IDLE))
6600c0b25aeSJohn Baldwin 		return (0);
6610c0b25aeSJohn Baldwin #endif
6620c0b25aeSJohn Baldwin 	if (ctd->td_critnest > 1) {
6630c0b25aeSJohn Baldwin 		CTR1(KTR_PROC, "maybe_preempt: in critical section %d",
6640c0b25aeSJohn Baldwin 		    ctd->td_critnest);
66552eb8464SJohn Baldwin 		ctd->td_pflags |= TDP_OWEPREEMPT;
6660c0b25aeSJohn Baldwin 		return (0);
6670c0b25aeSJohn Baldwin 	}
6680c0b25aeSJohn Baldwin 
6690c0b25aeSJohn Baldwin 	/*
670c20c691bSJulian Elischer 	 * Thread is runnable but not yet put on system run queue.
6710c0b25aeSJohn Baldwin 	 */
6720c0b25aeSJohn Baldwin 	MPASS(TD_ON_RUNQ(td));
6731f9f5df6SJulian Elischer 	MPASS(td->td_sched->ke_state != KES_ONRUNQ);
6741f9f5df6SJulian Elischer 	if (td->td_proc->p_flag & P_HADTHREADS) {
6751f9f5df6SJulian Elischer 		/*
6761f9f5df6SJulian Elischer 		 * If this is a threaded process we actually ARE on the
6771f9f5df6SJulian Elischer 		 * ksegrp run queue so take it off that first.
6789da3e923SJulian Elischer 		 * Also undo any damage done to the last_assigned pointer.
6799da3e923SJulian Elischer 		 * XXX Fix setrunqueue so this isn't needed
6801f9f5df6SJulian Elischer 		 */
6819da3e923SJulian Elischer 		struct ksegrp *kg;
6829da3e923SJulian Elischer 
6839da3e923SJulian Elischer 		kg = td->td_ksegrp;
6849da3e923SJulian Elischer 		if (kg->kg_last_assigned == td)
6859da3e923SJulian Elischer 			kg->kg_last_assigned =
6869da3e923SJulian Elischer 			    TAILQ_PREV(td, threadqueue, td_runq);
6879da3e923SJulian Elischer 		TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
6881f9f5df6SJulian Elischer 	}
6891f9f5df6SJulian Elischer 
6900c0b25aeSJohn Baldwin 	TD_SET_RUNNING(td);
6910c0b25aeSJohn Baldwin 	CTR3(KTR_PROC, "preempting to thread %p (pid %d, %s)\n", td,
6920c0b25aeSJohn Baldwin 	    td->td_proc->p_pid, td->td_proc->p_comm);
693c20c691bSJulian Elischer 	mi_switch(SW_INVOL|SW_PREEMPT, td);
6940c0b25aeSJohn Baldwin 	return (1);
6950c0b25aeSJohn Baldwin #else
6960c0b25aeSJohn Baldwin 	return (0);
6970c0b25aeSJohn Baldwin #endif
6980c0b25aeSJohn Baldwin }
6990c0b25aeSJohn Baldwin 
70044fe3c1fSJohn Baldwin #if 0
7010c0b25aeSJohn Baldwin #ifndef PREEMPTION
7020c0b25aeSJohn Baldwin /* XXX: There should be a non-static version of this. */
7030c0b25aeSJohn Baldwin static void
7040c0b25aeSJohn Baldwin printf_caddr_t(void *data)
7050c0b25aeSJohn Baldwin {
7060c0b25aeSJohn Baldwin 	printf("%s", (char *)data);
7070c0b25aeSJohn Baldwin }
7080c0b25aeSJohn Baldwin static char preempt_warning[] =
7090c0b25aeSJohn Baldwin     "WARNING: Kernel preemption is disabled, expect reduced performance.\n";
7100c0b25aeSJohn Baldwin SYSINIT(preempt_warning, SI_SUB_COPYRIGHT, SI_ORDER_ANY, printf_caddr_t,
7110c0b25aeSJohn Baldwin     preempt_warning)
7120c0b25aeSJohn Baldwin #endif
71344fe3c1fSJohn Baldwin #endif
714e602ba25SJulian Elischer 
715e602ba25SJulian Elischer /************************************************************************
716e602ba25SJulian Elischer  * SYSTEM RUN QUEUE manipulations and tests				*
717e602ba25SJulian Elischer  ************************************************************************/
718e602ba25SJulian Elischer /*
719e602ba25SJulian Elischer  * Initialize a run structure.
720e602ba25SJulian Elischer  */
721e602ba25SJulian Elischer void
722e602ba25SJulian Elischer runq_init(struct runq *rq)
723e602ba25SJulian Elischer {
724e602ba25SJulian Elischer 	int i;
725e602ba25SJulian Elischer 
726e602ba25SJulian Elischer 	bzero(rq, sizeof *rq);
727e602ba25SJulian Elischer 	for (i = 0; i < RQ_NQS; i++)
728e602ba25SJulian Elischer 		TAILQ_INIT(&rq->rq_queues[i]);
729e602ba25SJulian Elischer }
730e602ba25SJulian Elischer 
731d5a08a60SJake Burkholder /*
732d5a08a60SJake Burkholder  * Clear the status bit of the queue corresponding to priority level pri,
733d5a08a60SJake Burkholder  * indicating that it is empty.
734d5a08a60SJake Burkholder  */
735d5a08a60SJake Burkholder static __inline void
736d5a08a60SJake Burkholder runq_clrbit(struct runq *rq, int pri)
737d5a08a60SJake Burkholder {
738d5a08a60SJake Burkholder 	struct rqbits *rqb;
739d5a08a60SJake Burkholder 
740d5a08a60SJake Burkholder 	rqb = &rq->rq_status;
741d5a08a60SJake Burkholder 	CTR4(KTR_RUNQ, "runq_clrbit: bits=%#x %#x bit=%#x word=%d",
742d5a08a60SJake Burkholder 	    rqb->rqb_bits[RQB_WORD(pri)],
743d5a08a60SJake Burkholder 	    rqb->rqb_bits[RQB_WORD(pri)] & ~RQB_BIT(pri),
744d5a08a60SJake Burkholder 	    RQB_BIT(pri), RQB_WORD(pri));
745d5a08a60SJake Burkholder 	rqb->rqb_bits[RQB_WORD(pri)] &= ~RQB_BIT(pri);
746d5a08a60SJake Burkholder }
747d5a08a60SJake Burkholder 
748d5a08a60SJake Burkholder /*
749d5a08a60SJake Burkholder  * Find the index of the first non-empty run queue.  This is done by
750d5a08a60SJake Burkholder  * scanning the status bits, a set bit indicates a non-empty queue.
751d5a08a60SJake Burkholder  */
752d5a08a60SJake Burkholder static __inline int
753d5a08a60SJake Burkholder runq_findbit(struct runq *rq)
754d5a08a60SJake Burkholder {
755d5a08a60SJake Burkholder 	struct rqbits *rqb;
756d5a08a60SJake Burkholder 	int pri;
757d5a08a60SJake Burkholder 	int i;
758d5a08a60SJake Burkholder 
759d5a08a60SJake Burkholder 	rqb = &rq->rq_status;
760d5a08a60SJake Burkholder 	for (i = 0; i < RQB_LEN; i++)
761d5a08a60SJake Burkholder 		if (rqb->rqb_bits[i]) {
7622f9267ecSPeter Wemm 			pri = RQB_FFS(rqb->rqb_bits[i]) + (i << RQB_L2BPW);
763d5a08a60SJake Burkholder 			CTR3(KTR_RUNQ, "runq_findbit: bits=%#x i=%d pri=%d",
764d5a08a60SJake Burkholder 			    rqb->rqb_bits[i], i, pri);
765d5a08a60SJake Burkholder 			return (pri);
766d5a08a60SJake Burkholder 		}
767d5a08a60SJake Burkholder 
768d5a08a60SJake Burkholder 	return (-1);
769d5a08a60SJake Burkholder }
770d5a08a60SJake Burkholder 
771d5a08a60SJake Burkholder /*
772d5a08a60SJake Burkholder  * Set the status bit of the queue corresponding to priority level pri,
773d5a08a60SJake Burkholder  * indicating that it is non-empty.
774d5a08a60SJake Burkholder  */
775d5a08a60SJake Burkholder static __inline void
776d5a08a60SJake Burkholder runq_setbit(struct runq *rq, int pri)
777d5a08a60SJake Burkholder {
778d5a08a60SJake Burkholder 	struct rqbits *rqb;
779d5a08a60SJake Burkholder 
780d5a08a60SJake Burkholder 	rqb = &rq->rq_status;
781d5a08a60SJake Burkholder 	CTR4(KTR_RUNQ, "runq_setbit: bits=%#x %#x bit=%#x word=%d",
782d5a08a60SJake Burkholder 	    rqb->rqb_bits[RQB_WORD(pri)],
783d5a08a60SJake Burkholder 	    rqb->rqb_bits[RQB_WORD(pri)] | RQB_BIT(pri),
784d5a08a60SJake Burkholder 	    RQB_BIT(pri), RQB_WORD(pri));
785d5a08a60SJake Burkholder 	rqb->rqb_bits[RQB_WORD(pri)] |= RQB_BIT(pri);
786d5a08a60SJake Burkholder }
787d5a08a60SJake Burkholder 
788d5a08a60SJake Burkholder /*
789e602ba25SJulian Elischer  * Add the KSE to the queue specified by its priority, and set the
790d5a08a60SJake Burkholder  * corresponding status bit.
791d5a08a60SJake Burkholder  */
792d5a08a60SJake Burkholder void
793c20c691bSJulian Elischer runq_add(struct runq *rq, struct kse *ke, int flags)
794d5a08a60SJake Burkholder {
795d5a08a60SJake Burkholder 	struct rqhead *rqh;
796d5a08a60SJake Burkholder 	int pri;
797dba6c5a6SPeter Wemm 
7982c100766SJulian Elischer 	pri = ke->ke_thread->td_priority / RQ_PPQ;
799b40ce416SJulian Elischer 	ke->ke_rqindex = pri;
800d5a08a60SJake Burkholder 	runq_setbit(rq, pri);
801d5a08a60SJake Burkholder 	rqh = &rq->rq_queues[pri];
802732d9528SJulian Elischer 	CTR5(KTR_RUNQ, "runq_add: td=%p ke=%p pri=%d %d rqh=%p",
803732d9528SJulian Elischer 	    ke->ke_thread, ke, ke->ke_thread->td_priority, pri, rqh);
804c20c691bSJulian Elischer 	if (flags & SRQ_PREEMPTED) {
805c20c691bSJulian Elischer 		TAILQ_INSERT_HEAD(rqh, ke, ke_procq);
806c20c691bSJulian Elischer 	} else {
807b40ce416SJulian Elischer 		TAILQ_INSERT_TAIL(rqh, ke, ke_procq);
808dba6c5a6SPeter Wemm 	}
809c20c691bSJulian Elischer }
810d5a08a60SJake Burkholder 
811d5a08a60SJake Burkholder /*
812d5a08a60SJake Burkholder  * Return true if there are runnable processes of any priority on the run
813d5a08a60SJake Burkholder  * queue, false otherwise.  Has no side effects, does not modify the run
814d5a08a60SJake Burkholder  * queue structure.
815d5a08a60SJake Burkholder  */
816d5a08a60SJake Burkholder int
817d5a08a60SJake Burkholder runq_check(struct runq *rq)
818d5a08a60SJake Burkholder {
819d5a08a60SJake Burkholder 	struct rqbits *rqb;
820d5a08a60SJake Burkholder 	int i;
821d5a08a60SJake Burkholder 
822d5a08a60SJake Burkholder 	rqb = &rq->rq_status;
823d5a08a60SJake Burkholder 	for (i = 0; i < RQB_LEN; i++)
824d5a08a60SJake Burkholder 		if (rqb->rqb_bits[i]) {
825d5a08a60SJake Burkholder 			CTR2(KTR_RUNQ, "runq_check: bits=%#x i=%d",
826d5a08a60SJake Burkholder 			    rqb->rqb_bits[i], i);
827d5a08a60SJake Burkholder 			return (1);
828dba6c5a6SPeter Wemm 		}
829d5a08a60SJake Burkholder 	CTR0(KTR_RUNQ, "runq_check: empty");
830d5a08a60SJake Burkholder 
831d5a08a60SJake Burkholder 	return (0);
832dba6c5a6SPeter Wemm }
833d5a08a60SJake Burkholder 
8346804a3abSJulian Elischer #if defined(SMP) && defined(SCHED_4BSD)
8356804a3abSJulian Elischer int runq_fuzz = 1;
8366804a3abSJulian Elischer SYSCTL_INT(_kern_sched, OID_AUTO, runq_fuzz, CTLFLAG_RW, &runq_fuzz, 0, "");
8376804a3abSJulian Elischer #endif
8386804a3abSJulian Elischer 
839d5a08a60SJake Burkholder /*
840b43179fbSJeff Roberson  * Find the highest priority process on the run queue.
841d5a08a60SJake Burkholder  */
842b40ce416SJulian Elischer struct kse *
843d5a08a60SJake Burkholder runq_choose(struct runq *rq)
844d5a08a60SJake Burkholder {
845d5a08a60SJake Burkholder 	struct rqhead *rqh;
846b40ce416SJulian Elischer 	struct kse *ke;
847d5a08a60SJake Burkholder 	int pri;
848d5a08a60SJake Burkholder 
849d5a08a60SJake Burkholder 	mtx_assert(&sched_lock, MA_OWNED);
850e602ba25SJulian Elischer 	while ((pri = runq_findbit(rq)) != -1) {
851d5a08a60SJake Burkholder 		rqh = &rq->rq_queues[pri];
8526804a3abSJulian Elischer #if defined(SMP) && defined(SCHED_4BSD)
8536804a3abSJulian Elischer 		/* fuzz == 1 is normal.. 0 or less are ignored */
8546804a3abSJulian Elischer 		if (runq_fuzz > 1) {
8556804a3abSJulian Elischer 			/*
8566804a3abSJulian Elischer 			 * In the first couple of entries, check if
8576804a3abSJulian Elischer 			 * there is one for our CPU as a preference.
8586804a3abSJulian Elischer 			 */
8596804a3abSJulian Elischer 			int count = runq_fuzz;
8606804a3abSJulian Elischer 			int cpu = PCPU_GET(cpuid);
8616804a3abSJulian Elischer 			struct kse *ke2;
8626804a3abSJulian Elischer 			ke2 = ke = TAILQ_FIRST(rqh);
8636804a3abSJulian Elischer 
8646804a3abSJulian Elischer 			while (count-- && ke2) {
8656804a3abSJulian Elischer 				if (ke->ke_thread->td_lastcpu == cpu) {
8666804a3abSJulian Elischer 					ke = ke2;
8676804a3abSJulian Elischer 					break;
8686804a3abSJulian Elischer 				}
8696804a3abSJulian Elischer 				ke2 = TAILQ_NEXT(ke2, ke_procq);
8706804a3abSJulian Elischer 			}
8716804a3abSJulian Elischer 		} else
8726804a3abSJulian Elischer #endif
873b40ce416SJulian Elischer 			ke = TAILQ_FIRST(rqh);
874b40ce416SJulian Elischer 		KASSERT(ke != NULL, ("runq_choose: no proc on busy queue"));
875e602ba25SJulian Elischer 		CTR3(KTR_RUNQ,
876e602ba25SJulian Elischer 		    "runq_choose: pri=%d kse=%p rqh=%p", pri, ke, rqh);
877b40ce416SJulian Elischer 		return (ke);
878d5a08a60SJake Burkholder 	}
879d5a08a60SJake Burkholder 	CTR1(KTR_RUNQ, "runq_choose: idleproc pri=%d", pri);
880d5a08a60SJake Burkholder 
881e602ba25SJulian Elischer 	return (NULL);
882d5a08a60SJake Burkholder }
883d5a08a60SJake Burkholder 
884d5a08a60SJake Burkholder /*
885e602ba25SJulian Elischer  * Remove the KSE from the queue specified by its priority, and clear the
886d5a08a60SJake Burkholder  * corresponding status bit if the queue becomes empty.
887e602ba25SJulian Elischer  * Caller must set ke->ke_state afterwards.
888d5a08a60SJake Burkholder  */
889d5a08a60SJake Burkholder void
890b40ce416SJulian Elischer runq_remove(struct runq *rq, struct kse *ke)
891d5a08a60SJake Burkholder {
892d5a08a60SJake Burkholder 	struct rqhead *rqh;
893d5a08a60SJake Burkholder 	int pri;
894d5a08a60SJake Burkholder 
8959eb881f8SSeigo Tanimura 	KASSERT(ke->ke_proc->p_sflag & PS_INMEM,
8969eb881f8SSeigo Tanimura 		("runq_remove: process swapped out"));
897b40ce416SJulian Elischer 	pri = ke->ke_rqindex;
898d5a08a60SJake Burkholder 	rqh = &rq->rq_queues[pri];
899732d9528SJulian Elischer 	CTR5(KTR_RUNQ, "runq_remove: td=%p, ke=%p pri=%d %d rqh=%p",
900732d9528SJulian Elischer 	    ke->ke_thread, ke, ke->ke_thread->td_priority, pri, rqh);
901b40ce416SJulian Elischer 	KASSERT(ke != NULL, ("runq_remove: no proc on busy queue"));
902b40ce416SJulian Elischer 	TAILQ_REMOVE(rqh, ke, ke_procq);
903d5a08a60SJake Burkholder 	if (TAILQ_EMPTY(rqh)) {
904d5a08a60SJake Burkholder 		CTR0(KTR_RUNQ, "runq_remove: empty");
905d5a08a60SJake Burkholder 		runq_clrbit(rq, pri);
906d5a08a60SJake Burkholder 	}
907dba6c5a6SPeter Wemm }
908e602ba25SJulian Elischer 
909ed062c8dSJulian Elischer /****** functions that are temporarily here ***********/
910ed062c8dSJulian Elischer #include <vm/uma.h>
911ed062c8dSJulian Elischer extern struct mtx kse_zombie_lock;
912ed062c8dSJulian Elischer 
913ed062c8dSJulian Elischer /*
914ed062c8dSJulian Elischer  *  Allocate scheduler specific per-process resources.
915ed062c8dSJulian Elischer  * The thread and ksegrp have already been linked in.
916ed062c8dSJulian Elischer  * In this case just set the default concurrency value.
917ed062c8dSJulian Elischer  *
918ed062c8dSJulian Elischer  * Called from:
919ed062c8dSJulian Elischer  *  proc_init() (UMA init method)
920ed062c8dSJulian Elischer  */
921ed062c8dSJulian Elischer void
922ed062c8dSJulian Elischer sched_newproc(struct proc *p, struct ksegrp *kg, struct thread *td)
923ed062c8dSJulian Elischer {
924ed062c8dSJulian Elischer 
925ed062c8dSJulian Elischer 	/* This can go in sched_fork */
926ed062c8dSJulian Elischer 	sched_init_concurrency(kg);
927ed062c8dSJulian Elischer }
928ed062c8dSJulian Elischer 
929ed062c8dSJulian Elischer /*
930ed062c8dSJulian Elischer  * thread is being either created or recycled.
931ed062c8dSJulian Elischer  * Fix up the per-scheduler resources associated with it.
932ed062c8dSJulian Elischer  * Called from:
933ed062c8dSJulian Elischer  *  sched_fork_thread()
934ed062c8dSJulian Elischer  *  thread_dtor()  (*may go away)
935ed062c8dSJulian Elischer  *  thread_init()  (*may go away)
936ed062c8dSJulian Elischer  */
937ed062c8dSJulian Elischer void
938ed062c8dSJulian Elischer sched_newthread(struct thread *td)
939ed062c8dSJulian Elischer {
940ed062c8dSJulian Elischer 	struct td_sched *ke;
941ed062c8dSJulian Elischer 
942ed062c8dSJulian Elischer 	ke = (struct td_sched *) (td + 1);
943ed062c8dSJulian Elischer 	bzero(ke, sizeof(*ke));
944ed062c8dSJulian Elischer 	td->td_sched     = ke;
945ed062c8dSJulian Elischer 	ke->ke_thread	= td;
946ed062c8dSJulian Elischer 	ke->ke_state	= KES_THREAD;
947ed062c8dSJulian Elischer }
948ed062c8dSJulian Elischer 
949ed062c8dSJulian Elischer /*
950ed062c8dSJulian Elischer  * Set up an initial concurrency of 1
951ed062c8dSJulian Elischer  * and set the given thread (if given) to be using that
952ed062c8dSJulian Elischer  * concurrency slot.
953ed062c8dSJulian Elischer  * May be used "offline"..before the ksegrp is attached to the world
954ed062c8dSJulian Elischer  * and thus wouldn't need schedlock in that case.
955ed062c8dSJulian Elischer  * Called from:
956ed062c8dSJulian Elischer  *  thr_create()
957ed062c8dSJulian Elischer  *  proc_init() (UMA) via sched_newproc()
958ed062c8dSJulian Elischer  */
959ed062c8dSJulian Elischer void
960ed062c8dSJulian Elischer sched_init_concurrency(struct ksegrp *kg)
961ed062c8dSJulian Elischer {
962ed062c8dSJulian Elischer 
963d39063f2SJulian Elischer 	CTR1(KTR_RUNQ,"kg %p init slots and concurrency to 1", kg);
964ed062c8dSJulian Elischer 	kg->kg_concurrency = 1;
965ed062c8dSJulian Elischer 	kg->kg_avail_opennings = 1;
966ed062c8dSJulian Elischer }
967ed062c8dSJulian Elischer 
968ed062c8dSJulian Elischer /*
969ed062c8dSJulian Elischer  * Change the concurrency of an existing ksegrp to N
970ed062c8dSJulian Elischer  * Called from:
971ed062c8dSJulian Elischer  *  kse_create()
972ed062c8dSJulian Elischer  *  kse_exit()
973ed062c8dSJulian Elischer  *  thread_exit()
974ed062c8dSJulian Elischer  *  thread_single()
975ed062c8dSJulian Elischer  */
976ed062c8dSJulian Elischer void
977ed062c8dSJulian Elischer sched_set_concurrency(struct ksegrp *kg, int concurrency)
978ed062c8dSJulian Elischer {
979ed062c8dSJulian Elischer 
980d39063f2SJulian Elischer 	CTR4(KTR_RUNQ,"kg %p set concurrency to %d, slots %d -> %d",
981d39063f2SJulian Elischer 	    kg,
982d39063f2SJulian Elischer 	    concurrency,
983d39063f2SJulian Elischer 	    kg->kg_avail_opennings,
984d39063f2SJulian Elischer 	    kg->kg_avail_opennings + (concurrency - kg->kg_concurrency));
985ed062c8dSJulian Elischer 	kg->kg_avail_opennings += (concurrency - kg->kg_concurrency);
986ed062c8dSJulian Elischer 	kg->kg_concurrency = concurrency;
987ed062c8dSJulian Elischer }
988ed062c8dSJulian Elischer 
989ed062c8dSJulian Elischer /*
990ed062c8dSJulian Elischer  * Called from thread_exit() for all exiting thread
991ed062c8dSJulian Elischer  *
992ed062c8dSJulian Elischer  * Not to be confused with sched_exit_thread()
993ed062c8dSJulian Elischer  * that is only called from thread_exit() for threads exiting
994ed062c8dSJulian Elischer  * without the rest of the process exiting because it is also called from
995ed062c8dSJulian Elischer  * sched_exit() and we wouldn't want to call it twice.
996ed062c8dSJulian Elischer  * XXX This can probably be fixed.
997ed062c8dSJulian Elischer  */
998ed062c8dSJulian Elischer void
999ed062c8dSJulian Elischer sched_thread_exit(struct thread *td)
1000ed062c8dSJulian Elischer {
1001ed062c8dSJulian Elischer 
1002d39063f2SJulian Elischer 	SLOT_RELEASE(td->td_ksegrp);
1003ed062c8dSJulian Elischer 	slot_fill(td->td_ksegrp);
1004ed062c8dSJulian Elischer }
1005ed062c8dSJulian Elischer 
1006ed062c8dSJulian Elischer #endif /* KERN_SWITCH_INCLUDE */
1007