xref: /freebsd/sys/kern/kern_switch.c (revision 5da2b58aeb4d5bc16b135ed4bea93e94dd900422)
1 /*
2  * Copyright (c) 2001 Jake Burkholder <jake@FreeBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD$
27  */
28 
29 /***
30 
31 Here is the logic..
32 
33 If there are N processors, then there are at most N KSEs (kernel
34 schedulable entities) working to process threads that belong to a
35 KSEGOUP (kg). If there are X of these KSEs actually running at the
36 moment in question, then there are at most M (N-X) of these KSEs on
37 the run queue, as running KSEs are not on the queue.
38 
39 Runnable threads are queued off the KSEGROUP in priority order.
40 If there are M or more threads runnable, the top M threads
41 (by priority) are 'preassigned' to the M KSEs not running. The KSEs take
42 their priority from those threads and are put on the run queue.
43 
44 The last thread that had a priority high enough to have a KSE associated
45 with it, AND IS ON THE RUN QUEUE is pointed to by
46 kg->kg_last_assigned. If no threads queued off the KSEGROUP have KSEs
47 assigned as all the available KSEs are activly running, or because there
48 are no threads queued, that pointer is NULL.
49 
50 When a KSE is removed from the run queue to become runnable, we know
51 it was associated with the highest priority thread in the queue (at the head
52 of the queue). If it is also the last assigned we know M was 1 and must
53 now be 0. Since the thread is no longer queued that pointer must be
54 removed from it. Since we know there were no more KSEs available,
55 (M was 1 and is now 0) and since we are not FREEING our KSE
56 but using it, we know there are STILL no more KSEs available, we can prove
57 that the next thread in the ksegrp list will not have a KSE to assign to
58 it, so we can show that the pointer must be made 'invalid' (NULL).
59 
60 The pointer exists so that when a new thread is made runnable, it can
61 have its priority compared with the last assigned thread to see if
62 it should 'steal' its KSE or not.. i.e. is it 'earlier'
63 on the list than that thread or later.. If it's earlier, then the KSE is
64 removed from the last assigned (which is now not assigned a KSE)
65 and reassigned to the new thread, which is placed earlier in the list.
66 The pointer is then backed up to the previous thread (which may or may not
67 be the new thread).
68 
69 When a thread sleeps or is removed, the KSE becomes available and if there
70 are queued threads that are not assigned KSEs, the highest priority one of
71 them is assigned the KSE, which is then placed back on the run queue at
72 the approipriate place, and the kg->kg_last_assigned pointer is adjusted down
73 to point to it.
74 
75 The following diagram shows 2 KSEs and 3 threads from a single process.
76 
77  RUNQ: --->KSE---KSE--...    (KSEs queued at priorities from threads)
78               \    \____
79                \        \
80     KSEGROUP---thread--thread--thread    (queued in priority order)
81         \                 /
82          \_______________/
83           (last_assigned)
84 
85 The result of this scheme is that the M available KSEs are always
86 queued at the priorities they have inherrited from the M highest priority
87 threads for that KSEGROUP. If this situation changes, the KSEs are
88 reassigned to keep this true.
89 
90 */
91 
92 #include <sys/param.h>
93 #include <sys/systm.h>
94 #include <sys/kernel.h>
95 #include <sys/ktr.h>
96 #include <sys/lock.h>
97 #include <sys/mutex.h>
98 #include <sys/proc.h>
99 #include <sys/queue.h>
100 #include <machine/critical.h>
101 
102 CTASSERT((RQB_BPW * RQB_LEN) == RQ_NQS);
103 
104 /*
105  * Global run queue.
106  */
107 static struct runq runq;
108 SYSINIT(runq, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, runq_init, &runq)
109 
110 static void runq_readjust(struct runq *rq, struct kse *ke);
111 /************************************************************************
112  * Functions that manipulate runnability from a thread perspective.	*
113  ************************************************************************/
114 
115 /*
116  * Select the KSE that will be run next.  From that find the thread, and x
117  * remove it from the KSEGRP's run queue.  If there is thread clustering,
118  * this will be what does it.
119  */
120 struct thread *
121 choosethread(void)
122 {
123 	struct kse *ke;
124 	struct thread *td;
125 	struct ksegrp *kg;
126 
127 retry:
128 	if ((ke = runq_choose(&runq))) {
129 		td = ke->ke_thread;
130 		KASSERT((td->td_kse == ke), ("kse/thread mismatch"));
131 		kg = ke->ke_ksegrp;
132 		if (td->td_flags & TDF_UNBOUND) {
133 			TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
134 			if (kg->kg_last_assigned == td) {
135 				if (TAILQ_PREV(td, threadqueue, td_runq)
136 				    != NULL)
137 					printf("Yo MAMA!\n");
138 				kg->kg_last_assigned = TAILQ_PREV(td,
139 				    threadqueue, td_runq);
140 			}
141 			/*
142 			 *  If we have started running an upcall,
143 			 * Then TDF_UNBOUND WAS set because the thread was
144 			 * created without a KSE. Now that we have one,
145 			 * and it is our time to run, we make sure
146 			 * that BOUND semantics apply for the rest of
147 			 * the journey to userland, and into the UTS.
148 			 */
149 #ifdef	NOTYET
150 			if (td->td_flags & TDF_UPCALLING)
151 				tdf->td_flags &= ~TDF_UNBOUND;
152 #endif
153 		}
154 		kg->kg_runnable--;
155 		CTR2(KTR_RUNQ, "choosethread: td=%p pri=%d",
156 		    td, td->td_priority);
157 	} else {
158 		/* Simulate runq_choose() having returned the idle thread */
159 		td = PCPU_GET(idlethread);
160 		ke = td->td_kse;
161 		CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td);
162 	}
163 	ke->ke_flags |= KEF_DIDRUN;
164 	if (panicstr && ((td->td_proc->p_flag & P_SYSTEM) == 0 &&
165 	    (td->td_flags & TDF_INPANIC) == 0))
166 		goto retry;
167 	TD_SET_RUNNING(td);
168 	return (td);
169 }
170 
171 /*
172  * Given a KSE (now surplus), either assign a new runable thread to it
173  * (and put it in the run queue) or put it in the ksegrp's idle KSE list.
174  * Assumes the kse is not linked to any threads any more. (has been cleaned).
175  */
176 void
177 kse_reassign(struct kse *ke)
178 {
179 	struct ksegrp *kg;
180 	struct thread *td;
181 	struct thread *owner;
182 
183 	mtx_assert(&sched_lock, MA_OWNED);
184 	kg = ke->ke_ksegrp;
185 	owner = ke->ke_bound;
186 	KASSERT(!(owner && ((owner->td_kse != ke) ||
187 		    (owner->td_flags & TDF_UNBOUND))),
188 		("kse_reassign: bad thread bound state"));
189 	if (owner && (owner->td_inhibitors == TDI_LOAN)) {
190 		TD_CLR_LOAN(owner);
191 		ke->ke_bound = NULL;
192 		ke->ke_thread = owner;
193 		owner->td_kse = ke;
194 		setrunqueue(owner);
195 		CTR2(KTR_RUNQ, "kse_reassign: ke%p -> td%p (give back)",
196 			 ke, owner);
197 		return;
198 	}
199 
200 	/*
201 	 * Find the first unassigned thread
202 	 * If there is a 'last assigned' then see what's next.
203 	 * otherwise look at what is first.
204 	 */
205 	if ((td = kg->kg_last_assigned)) {
206 		td = TAILQ_NEXT(td, td_runq);
207 	} else {
208 		td = TAILQ_FIRST(&kg->kg_runq);
209 	}
210 
211 	/*
212 	 * If we found one assign it the kse, otherwise idle the kse.
213 	 */
214 	if (td) {
215 		kg->kg_last_assigned = td;
216 		td->td_kse = ke;
217 		ke->ke_thread = td;
218 		runq_add(&runq, ke);
219 		if (owner)
220 			TD_SET_LOAN(owner);
221 		CTR2(KTR_RUNQ, "kse_reassign: ke%p -> td%p", ke, td);
222 	} else if (!owner) {
223 		ke->ke_state = KES_IDLE;
224 		ke->ke_thread = NULL;
225 		TAILQ_INSERT_HEAD(&kg->kg_iq, ke, ke_kgrlist);
226 		kg->kg_idle_kses++;
227 		CTR1(KTR_RUNQ, "kse_reassign: ke%p idled", ke);
228 	} else {
229 		TD_CLR_LOAN(owner);
230 		ke->ke_state = KES_THREAD;
231 		ke->ke_thread = owner;
232 		owner->td_kse = ke;
233 		ke->ke_flags |= KEF_ONLOANQ;
234 		TAILQ_INSERT_HEAD(&kg->kg_lq, ke, ke_kgrlist);
235 		kg->kg_loan_kses++;
236 		CTR1(KTR_RUNQ, "kse_reassign: ke%p is on loan queue", ke);
237 	}
238 }
239 
240 int
241 kserunnable(void)
242 {
243 	return runq_check(&runq);
244 }
245 
246 /*
247  * Remove a thread from its KSEGRP's run queue.
248  * This in turn may remove it from a KSE if it was already assigned
249  * to one, possibly causing a new thread to be assigned to the KSE
250  * and the KSE getting a new priority (unless it's a BOUND thread/KSE pair).
251  */
252 void
253 remrunqueue(struct thread *td)
254 {
255 	struct thread *td2, *td3, *owner;
256 	struct ksegrp *kg;
257 	struct kse *ke;
258 
259 	mtx_assert(&sched_lock, MA_OWNED);
260 	KASSERT ((TD_ON_RUNQ(td)), ("remrunqueue: Bad state on run queue"));
261 	kg = td->td_ksegrp;
262 	ke = td->td_kse;
263 	/*
264 	 * If it's a bound thread/KSE pair, take the shortcut. All non-KSE
265 	 * threads are BOUND.
266 	 */
267 	CTR1(KTR_RUNQ, "remrunqueue: td%p", td);
268 	kg->kg_runnable--;
269 	TD_SET_CAN_RUN(td);
270 	if ((td->td_flags & TDF_UNBOUND) == 0)  {
271 		/* Bring its kse with it, leave the thread attached */
272 		runq_remove(&runq, ke);
273 		ke->ke_state = KES_THREAD;
274 		return;
275 	}
276 	if (ke) {
277 		/*
278 		 * This thread has been assigned to a KSE.
279 		 * We need to dissociate it and try assign the
280 		 * KSE to the next available thread. Then, we should
281 		 * see if we need to move the KSE in the run queues.
282 		 */
283 		td2 = kg->kg_last_assigned;
284 		KASSERT((td2 != NULL), ("last assigned has wrong value "));
285 		td->td_kse = NULL;
286 		if ((td3 = TAILQ_NEXT(td2, td_runq))) {
287 			KASSERT(td3 != td, ("td3 somehow matched td"));
288 			/*
289 			 * Give the next unassigned thread to the KSE
290 			 * so the number of runnable KSEs remains
291 			 * constant.
292 			 */
293 			td3->td_kse = ke;
294 			ke->ke_thread = td3;
295 			kg->kg_last_assigned = td3;
296 			runq_readjust(&runq, ke);
297 		} else {
298 			/*
299 			 * There is no unassigned thread.
300 			 * If we were the last assigned one,
301 			 * adjust the last assigned pointer back
302 			 * one, which may result in NULL.
303 			 */
304 			if (td == td2) {
305 				kg->kg_last_assigned =
306 				    TAILQ_PREV(td, threadqueue, td_runq);
307 			}
308 			runq_remove(&runq, ke);
309 			KASSERT((ke->ke_state != KES_IDLE),
310 			    ("kse already idle"));
311 			if (ke->ke_bound) {
312 				owner = ke->ke_bound;
313 				if (owner->td_inhibitors == TDI_LOAN) {
314 					TD_CLR_LOAN(owner);
315 					ke->ke_bound = NULL;
316 					ke->ke_thread = owner;
317 					owner->td_kse = ke;
318 					setrunqueue(owner);
319 					CTR2(KTR_RUNQ,
320 					"remrunqueue: ke%p -> td%p (give back)",
321 			 			ke, owner);
322 				} else {
323 					TD_CLR_LOAN(owner);
324 					ke->ke_state = KES_THREAD;
325 					ke->ke_thread = owner;
326 					owner->td_kse = ke;
327 					ke->ke_flags |= KEF_ONLOANQ;
328 					TAILQ_INSERT_HEAD(&kg->kg_lq, ke,
329 						ke_kgrlist);
330 					kg->kg_loan_kses++;
331 				}
332 			} else {
333 				ke->ke_state = KES_IDLE;
334 				ke->ke_thread = NULL;
335 				TAILQ_INSERT_HEAD(&kg->kg_iq, ke, ke_kgrlist);
336 				kg->kg_idle_kses++;
337 			}
338 		}
339 	}
340 	TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
341 }
342 
343 void
344 setrunqueue(struct thread *td)
345 {
346 	struct kse *ke;
347 	struct ksegrp *kg;
348 	struct thread *td2;
349 	struct thread *tda;
350 
351 	CTR1(KTR_RUNQ, "setrunqueue: td%p", td);
352 	mtx_assert(&sched_lock, MA_OWNED);
353 	KASSERT((TD_CAN_RUN(td) || TD_IS_RUNNING(td)),
354 	    ("setrunqueue: bad thread state"));
355 	TD_SET_RUNQ(td);
356 	kg = td->td_ksegrp;
357 	kg->kg_runnable++;
358 	if ((td->td_flags & TDF_UNBOUND) == 0) {
359 		KASSERT((td->td_kse != NULL),
360 		    ("queueing BAD thread to run queue"));
361 		ke = td->td_kse;
362 		ke->ke_bound = NULL;
363 		if (ke->ke_flags & KEF_ONLOANQ) {
364 			ke->ke_flags &= ~KEF_ONLOANQ;
365 			TAILQ_REMOVE(&kg->kg_lq, ke, ke_kgrlist);
366 			kg->kg_loan_kses--;
367 		}
368 		/*
369 		 * Common path optimisation: Only one of everything
370 		 * and the KSE is always already attached.
371 		 * Totally ignore the ksegrp run queue.
372 		 */
373 		runq_add(&runq, td->td_kse);
374 		return;
375 	}
376 	/*
377 	 * Ok, so we are threading with this thread.
378 	 * We don't have a KSE, see if we can get one..
379 	 */
380 	tda = kg->kg_last_assigned;
381 	if ((ke = td->td_kse) == NULL) {
382 		/*
383 		 * We will need a KSE, see if there is one..
384 		 * First look for a free one, before getting desperate.
385 		 * If we can't get one, our priority is not high enough..
386 		 * that's ok..
387 		 */
388 		if (kg->kg_idle_kses) {
389 			/*
390 			 * There is a free one so it's ours for the asking..
391 			 */
392 			ke = TAILQ_FIRST(&kg->kg_iq);
393 			TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist);
394 			ke->ke_state = KES_THREAD;
395 			kg->kg_idle_kses--;
396 		} else if (kg->kg_loan_kses) {
397 			ke = TAILQ_FIRST(&kg->kg_lq);
398 			TAILQ_REMOVE(&kg->kg_lq, ke, ke_kgrlist);
399 			ke->ke_flags &= ~KEF_ONLOANQ;
400 			ke->ke_state = KES_THREAD;
401 			TD_SET_LOAN(ke->ke_bound);
402 			kg->kg_loan_kses--;
403 		} else if (tda && (tda->td_priority > td->td_priority)) {
404 			/*
405 			 * None free, but there is one we can commandeer.
406 			 */
407 			ke = tda->td_kse;
408 			tda->td_kse = NULL;
409 			ke->ke_thread = NULL;
410 			tda = kg->kg_last_assigned =
411 		    	    TAILQ_PREV(tda, threadqueue, td_runq);
412 			runq_remove(&runq, ke);
413 		}
414 	} else {
415 		/*
416 		 * Temporarily disassociate so it looks like the other cases.
417 		 */
418 		ke->ke_thread = NULL;
419 		td->td_kse = NULL;
420 	}
421 
422 	/*
423 	 * Add the thread to the ksegrp's run queue at
424 	 * the appropriate place.
425 	 */
426 	TAILQ_FOREACH(td2, &kg->kg_runq, td_runq) {
427 		if (td2->td_priority > td->td_priority) {
428 			TAILQ_INSERT_BEFORE(td2, td, td_runq);
429 			break;
430 		}
431 	}
432 	if (td2 == NULL) {
433 		/* We ran off the end of the TAILQ or it was empty. */
434 		TAILQ_INSERT_TAIL(&kg->kg_runq, td, td_runq);
435 	}
436 
437 	/*
438 	 * If we have a ke to use, then put it on the run queue and
439 	 * If needed, readjust the last_assigned pointer.
440 	 */
441 	if (ke) {
442 		if (tda == NULL) {
443 			/*
444 			 * No pre-existing last assigned so whoever is first
445 			 * gets the KSE we brought in.. (maybe us)
446 			 */
447 			td2 = TAILQ_FIRST(&kg->kg_runq);
448 			KASSERT((td2->td_kse == NULL),
449 			    ("unexpected ke present"));
450 			td2->td_kse = ke;
451 			ke->ke_thread = td2;
452 			kg->kg_last_assigned = td2;
453 		} else if (tda->td_priority > td->td_priority) {
454 			/*
455 			 * It's ours, grab it, but last_assigned is past us
456 			 * so don't change it.
457 			 */
458 			td->td_kse = ke;
459 			ke->ke_thread = td;
460 		} else {
461 			/*
462 			 * We are past last_assigned, so
463 			 * put the new kse on whatever is next,
464 			 * which may or may not be us.
465 			 */
466 			td2 = TAILQ_NEXT(tda, td_runq);
467 			kg->kg_last_assigned = td2;
468 			td2->td_kse = ke;
469 			ke->ke_thread = td2;
470 		}
471 		runq_add(&runq, ke);
472 	}
473 }
474 
475 /************************************************************************
476  * Critical section marker functions					*
477  ************************************************************************/
478 /* Critical sections that prevent preemption. */
479 void
480 critical_enter(void)
481 {
482 	struct thread *td;
483 
484 	td = curthread;
485 	if (td->td_critnest == 0)
486 		cpu_critical_enter();
487 	td->td_critnest++;
488 }
489 
490 void
491 critical_exit(void)
492 {
493 	struct thread *td;
494 
495 	td = curthread;
496 	if (td->td_critnest == 1) {
497 		td->td_critnest = 0;
498 		cpu_critical_exit();
499 	} else {
500 		td->td_critnest--;
501 	}
502 }
503 
504 
505 /************************************************************************
506  * SYSTEM RUN QUEUE manipulations and tests				*
507  ************************************************************************/
508 /*
509  * Initialize a run structure.
510  */
511 void
512 runq_init(struct runq *rq)
513 {
514 	int i;
515 
516 	bzero(rq, sizeof *rq);
517 	for (i = 0; i < RQ_NQS; i++)
518 		TAILQ_INIT(&rq->rq_queues[i]);
519 }
520 
521 /*
522  * Clear the status bit of the queue corresponding to priority level pri,
523  * indicating that it is empty.
524  */
525 static __inline void
526 runq_clrbit(struct runq *rq, int pri)
527 {
528 	struct rqbits *rqb;
529 
530 	rqb = &rq->rq_status;
531 	CTR4(KTR_RUNQ, "runq_clrbit: bits=%#x %#x bit=%#x word=%d",
532 	    rqb->rqb_bits[RQB_WORD(pri)],
533 	    rqb->rqb_bits[RQB_WORD(pri)] & ~RQB_BIT(pri),
534 	    RQB_BIT(pri), RQB_WORD(pri));
535 	rqb->rqb_bits[RQB_WORD(pri)] &= ~RQB_BIT(pri);
536 }
537 
538 /*
539  * Find the index of the first non-empty run queue.  This is done by
540  * scanning the status bits, a set bit indicates a non-empty queue.
541  */
542 static __inline int
543 runq_findbit(struct runq *rq)
544 {
545 	struct rqbits *rqb;
546 	int pri;
547 	int i;
548 
549 	rqb = &rq->rq_status;
550 	for (i = 0; i < RQB_LEN; i++)
551 		if (rqb->rqb_bits[i]) {
552 			pri = RQB_FFS(rqb->rqb_bits[i]) + (i << RQB_L2BPW);
553 			CTR3(KTR_RUNQ, "runq_findbit: bits=%#x i=%d pri=%d",
554 			    rqb->rqb_bits[i], i, pri);
555 			return (pri);
556 		}
557 
558 	return (-1);
559 }
560 
561 /*
562  * Set the status bit of the queue corresponding to priority level pri,
563  * indicating that it is non-empty.
564  */
565 static __inline void
566 runq_setbit(struct runq *rq, int pri)
567 {
568 	struct rqbits *rqb;
569 
570 	rqb = &rq->rq_status;
571 	CTR4(KTR_RUNQ, "runq_setbit: bits=%#x %#x bit=%#x word=%d",
572 	    rqb->rqb_bits[RQB_WORD(pri)],
573 	    rqb->rqb_bits[RQB_WORD(pri)] | RQB_BIT(pri),
574 	    RQB_BIT(pri), RQB_WORD(pri));
575 	rqb->rqb_bits[RQB_WORD(pri)] |= RQB_BIT(pri);
576 }
577 
578 /*
579  * Add the KSE to the queue specified by its priority, and set the
580  * corresponding status bit.
581  */
582 void
583 runq_add(struct runq *rq, struct kse *ke)
584 {
585 	struct rqhead *rqh;
586 	int pri;
587 
588 	mtx_assert(&sched_lock, MA_OWNED);
589 	KASSERT((ke->ke_thread != NULL), ("runq_add: No thread on KSE"));
590 	KASSERT((ke->ke_thread->td_kse != NULL),
591 	    ("runq_add: No KSE on thread"));
592 	KASSERT(ke->ke_state != KES_ONRUNQ,
593 	    ("runq_add: kse %p (%s) already in run queue", ke,
594 	    ke->ke_proc->p_comm));
595 	KASSERT(ke->ke_proc->p_sflag & PS_INMEM,
596 		("runq_add: process swapped out"));
597 	pri = ke->ke_thread->td_priority / RQ_PPQ;
598 	ke->ke_rqindex = pri;
599 	runq_setbit(rq, pri);
600 	rqh = &rq->rq_queues[pri];
601 	CTR4(KTR_RUNQ, "runq_add: p=%p pri=%d %d rqh=%p",
602 	    ke->ke_proc, ke->ke_thread->td_priority, pri, rqh);
603 	TAILQ_INSERT_TAIL(rqh, ke, ke_procq);
604 	ke->ke_ksegrp->kg_runq_kses++;
605 	ke->ke_state = KES_ONRUNQ;
606 }
607 
608 /*
609  * Return true if there are runnable processes of any priority on the run
610  * queue, false otherwise.  Has no side effects, does not modify the run
611  * queue structure.
612  */
613 int
614 runq_check(struct runq *rq)
615 {
616 	struct rqbits *rqb;
617 	int i;
618 
619 	rqb = &rq->rq_status;
620 	for (i = 0; i < RQB_LEN; i++)
621 		if (rqb->rqb_bits[i]) {
622 			CTR2(KTR_RUNQ, "runq_check: bits=%#x i=%d",
623 			    rqb->rqb_bits[i], i);
624 			return (1);
625 		}
626 	CTR0(KTR_RUNQ, "runq_check: empty");
627 
628 	return (0);
629 }
630 
631 /*
632  * Find and remove the highest priority process from the run queue.
633  * If there are no runnable processes, the per-cpu idle process is
634  * returned.  Will not return NULL under any circumstances.
635  */
636 struct kse *
637 runq_choose(struct runq *rq)
638 {
639 	struct rqhead *rqh;
640 	struct kse *ke;
641 	int pri;
642 
643 	mtx_assert(&sched_lock, MA_OWNED);
644 	while ((pri = runq_findbit(rq)) != -1) {
645 		rqh = &rq->rq_queues[pri];
646 		ke = TAILQ_FIRST(rqh);
647 		KASSERT(ke != NULL, ("runq_choose: no proc on busy queue"));
648 		CTR3(KTR_RUNQ,
649 		    "runq_choose: pri=%d kse=%p rqh=%p", pri, ke, rqh);
650 		TAILQ_REMOVE(rqh, ke, ke_procq);
651 		ke->ke_ksegrp->kg_runq_kses--;
652 		if (TAILQ_EMPTY(rqh)) {
653 			CTR0(KTR_RUNQ, "runq_choose: empty");
654 			runq_clrbit(rq, pri);
655 		}
656 
657 		ke->ke_state = KES_THREAD;
658 		KASSERT((ke->ke_thread != NULL),
659 		    ("runq_choose: No thread on KSE"));
660 		KASSERT((ke->ke_thread->td_kse != NULL),
661 		    ("runq_choose: No KSE on thread"));
662 		KASSERT(ke->ke_proc->p_sflag & PS_INMEM,
663 			("runq_choose: process swapped out"));
664 		return (ke);
665 	}
666 	CTR1(KTR_RUNQ, "runq_choose: idleproc pri=%d", pri);
667 
668 	return (NULL);
669 }
670 
671 /*
672  * Remove the KSE from the queue specified by its priority, and clear the
673  * corresponding status bit if the queue becomes empty.
674  * Caller must set ke->ke_state afterwards.
675  */
676 void
677 runq_remove(struct runq *rq, struct kse *ke)
678 {
679 	struct rqhead *rqh;
680 	int pri;
681 
682 	KASSERT((ke->ke_state == KES_ONRUNQ), ("KSE not on run queue"));
683 	mtx_assert(&sched_lock, MA_OWNED);
684 	KASSERT(ke->ke_proc->p_sflag & PS_INMEM,
685 		("runq_remove: process swapped out"));
686 	pri = ke->ke_rqindex;
687 	rqh = &rq->rq_queues[pri];
688 	CTR4(KTR_RUNQ, "runq_remove: p=%p pri=%d %d rqh=%p",
689 	    ke, ke->ke_thread->td_priority, pri, rqh);
690 	KASSERT(ke != NULL, ("runq_remove: no proc on busy queue"));
691 	TAILQ_REMOVE(rqh, ke, ke_procq);
692 	if (TAILQ_EMPTY(rqh)) {
693 		CTR0(KTR_RUNQ, "runq_remove: empty");
694 		runq_clrbit(rq, pri);
695 	}
696 	ke->ke_state = KES_THREAD;
697 	ke->ke_ksegrp->kg_runq_kses--;
698 }
699 
700 static void
701 runq_readjust(struct runq *rq, struct kse *ke)
702 {
703 
704 	if (ke->ke_rqindex != (ke->ke_thread->td_priority / RQ_PPQ)) {
705 		runq_remove(rq, ke);
706 		runq_add(rq, ke);
707 	}
708 }
709 
710 #if 0
711 void
712 thread_sanity_check(struct thread *td)
713 {
714 	struct proc *p;
715 	struct ksegrp *kg;
716 	struct kse *ke;
717 	struct thread *td2;
718 	unsigned int prevpri;
719 	int	saw_lastassigned;
720 	int unassigned;
721 	int assigned;
722 
723 	p = td->td_proc;
724 	kg = td->td_ksegrp;
725 	ke = td->td_kse;
726 
727 
728 	if (ke) {
729 		if (p != ke->ke_proc) {
730 			panic("wrong proc");
731 		}
732 		if (ke->ke_thread != td) {
733 			panic("wrong thread");
734 		}
735 	}
736 
737 	if ((p->p_flag & P_KSES) == 0) {
738 		if (ke == NULL) {
739 			panic("non KSE thread lost kse");
740 		}
741 	} else {
742 		prevpri = 0;
743 		saw_lastassigned = 0;
744 		unassigned = 0;
745 		assigned = 0;
746 		TAILQ_FOREACH(td2, &kg->kg_runq, td_runq) {
747 			if (td2->td_priority < prevpri) {
748 				panic("thread runqueue unosorted");
749 			}
750 			prevpri = td2->td_priority;
751 			if (td2->td_kse) {
752 				assigned++;
753 				if (unassigned) {
754 					panic("unassigned before assigned");
755 				}
756  				if  (kg->kg_last_assigned == NULL) {
757 					panic("lastassigned corrupt");
758 				}
759 				if (saw_lastassigned) {
760 					panic("last assigned not last");
761 				}
762 				if (td2->td_kse->ke_thread != td2) {
763 					panic("mismatched kse/thread");
764 				}
765 			} else {
766 				unassigned++;
767 			}
768 			if (td2 == kg->kg_last_assigned) {
769 				saw_lastassigned = 1;
770 				if (td2->td_kse == NULL) {
771 					panic("last assigned not assigned");
772 				}
773 			}
774 		}
775 		if (kg->kg_last_assigned && (saw_lastassigned == 0)) {
776 			panic("where on earth does lastassigned point?");
777 		}
778 		FOREACH_THREAD_IN_GROUP(kg, td2) {
779 			if (((td2->td_flags & TDF_UNBOUND) == 0) &&
780 			    (TD_ON_RUNQ(td2))) {
781 				assigned++;
782 				if (td2->td_kse == NULL) {
783 					panic ("BOUND thread with no KSE");
784 				}
785 			}
786 		}
787 #if 0
788 		if ((unassigned + assigned) != kg->kg_runnable) {
789 			panic("wrong number in runnable");
790 		}
791 #endif
792 	}
793 }
794 #endif
795 
796