xref: /freebsd/sys/kern/kern_switch.c (revision 5203edcdc553fda6caa1da8826a89b1a02dad1bf)
1 /*
2  * Copyright (c) 2001 Jake Burkholder <jake@FreeBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 /***
28 Here is the logic..
29 
30 If there are N processors, then there are at most N KSEs (kernel
31 schedulable entities) working to process threads that belong to a
32 KSEGROUP (kg). If there are X of these KSEs actually running at the
33 moment in question, then there are at most M (N-X) of these KSEs on
34 the run queue, as running KSEs are not on the queue.
35 
36 Runnable threads are queued off the KSEGROUP in priority order.
37 If there are M or more threads runnable, the top M threads
38 (by priority) are 'preassigned' to the M KSEs not running. The KSEs take
39 their priority from those threads and are put on the run queue.
40 
41 The last thread that had a priority high enough to have a KSE associated
42 with it, AND IS ON THE RUN QUEUE is pointed to by
43 kg->kg_last_assigned. If no threads queued off the KSEGROUP have KSEs
44 assigned as all the available KSEs are activly running, or because there
45 are no threads queued, that pointer is NULL.
46 
47 When a KSE is removed from the run queue to become runnable, we know
48 it was associated with the highest priority thread in the queue (at the head
49 of the queue). If it is also the last assigned we know M was 1 and must
50 now be 0. Since the thread is no longer queued that pointer must be
51 removed from it. Since we know there were no more KSEs available,
52 (M was 1 and is now 0) and since we are not FREEING our KSE
53 but using it, we know there are STILL no more KSEs available, we can prove
54 that the next thread in the ksegrp list will not have a KSE to assign to
55 it, so we can show that the pointer must be made 'invalid' (NULL).
56 
57 The pointer exists so that when a new thread is made runnable, it can
58 have its priority compared with the last assigned thread to see if
59 it should 'steal' its KSE or not.. i.e. is it 'earlier'
60 on the list than that thread or later.. If it's earlier, then the KSE is
61 removed from the last assigned (which is now not assigned a KSE)
62 and reassigned to the new thread, which is placed earlier in the list.
63 The pointer is then backed up to the previous thread (which may or may not
64 be the new thread).
65 
66 When a thread sleeps or is removed, the KSE becomes available and if there
67 are queued threads that are not assigned KSEs, the highest priority one of
68 them is assigned the KSE, which is then placed back on the run queue at
69 the approipriate place, and the kg->kg_last_assigned pointer is adjusted down
70 to point to it.
71 
72 The following diagram shows 2 KSEs and 3 threads from a single process.
73 
74  RUNQ: --->KSE---KSE--...    (KSEs queued at priorities from threads)
75               \    \____
76                \        \
77     KSEGROUP---thread--thread--thread    (queued in priority order)
78         \                 /
79          \_______________/
80           (last_assigned)
81 
82 The result of this scheme is that the M available KSEs are always
83 queued at the priorities they have inherrited from the M highest priority
84 threads for that KSEGROUP. If this situation changes, the KSEs are
85 reassigned to keep this true.
86 ***/
87 
88 #include <sys/cdefs.h>
89 __FBSDID("$FreeBSD$");
90 
91 #include "opt_full_preemption.h"
92 
93 #include <sys/param.h>
94 #include <sys/systm.h>
95 #include <sys/kernel.h>
96 #include <sys/ktr.h>
97 #include <sys/lock.h>
98 #include <sys/mutex.h>
99 #include <sys/proc.h>
100 #include <sys/queue.h>
101 #include <sys/sched.h>
102 #if defined(SMP) && (defined(__i386__) || defined(__amd64__))
103 #include <sys/smp.h>
104 #endif
105 #include <machine/critical.h>
106 
107 CTASSERT((RQB_BPW * RQB_LEN) == RQ_NQS);
108 
109 void panc(char *string1, char *string2);
110 
111 #if 0
112 static void runq_readjust(struct runq *rq, struct kse *ke);
113 #endif
114 /************************************************************************
115  * Functions that manipulate runnability from a thread perspective.	*
116  ************************************************************************/
117 /*
118  * Select the KSE that will be run next.  From that find the thread, and
119  * remove it from the KSEGRP's run queue.  If there is thread clustering,
120  * this will be what does it.
121  */
122 struct thread *
123 choosethread(void)
124 {
125 	struct kse *ke;
126 	struct thread *td;
127 	struct ksegrp *kg;
128 
129 #if defined(SMP) && (defined(__i386__) || defined(__amd64__))
130 	if (smp_active == 0 && PCPU_GET(cpuid) != 0) {
131 		/* Shutting down, run idlethread on AP's */
132 		td = PCPU_GET(idlethread);
133 		ke = td->td_kse;
134 		CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td);
135 		ke->ke_flags |= KEF_DIDRUN;
136 		TD_SET_RUNNING(td);
137 		return (td);
138 	}
139 #endif
140 
141 retry:
142 	ke = sched_choose();
143 	if (ke) {
144 		td = ke->ke_thread;
145 		KASSERT((td->td_kse == ke), ("kse/thread mismatch"));
146 		kg = ke->ke_ksegrp;
147 		if (td->td_proc->p_flag & P_SA) {
148 			if (kg->kg_last_assigned == td) {
149 				kg->kg_last_assigned = TAILQ_PREV(td,
150 				    threadqueue, td_runq);
151 			}
152 			TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
153 		}
154 		kg->kg_runnable--;
155 		CTR2(KTR_RUNQ, "choosethread: td=%p pri=%d",
156 		    td, td->td_priority);
157 	} else {
158 		/* Simulate runq_choose() having returned the idle thread */
159 		td = PCPU_GET(idlethread);
160 		ke = td->td_kse;
161 		CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td);
162 	}
163 	ke->ke_flags |= KEF_DIDRUN;
164 
165 	/*
166 	 * If we are in panic, only allow system threads,
167 	 * plus the one we are running in, to be run.
168 	 */
169 	if (panicstr && ((td->td_proc->p_flag & P_SYSTEM) == 0 &&
170 	    (td->td_flags & TDF_INPANIC) == 0)) {
171 		/* note that it is no longer on the run queue */
172 		TD_SET_CAN_RUN(td);
173 		goto retry;
174 	}
175 
176 	TD_SET_RUNNING(td);
177 	return (td);
178 }
179 
180 /*
181  * Given a surplus KSE, either assign a new runable thread to it
182  * (and put it in the run queue) or put it in the ksegrp's idle KSE list.
183  * Assumes that the original thread is not runnable.
184  */
185 void
186 kse_reassign(struct kse *ke)
187 {
188 	struct ksegrp *kg;
189 	struct thread *td;
190 	struct thread *original;
191 
192 	mtx_assert(&sched_lock, MA_OWNED);
193 	original = ke->ke_thread;
194 	KASSERT(original == NULL || TD_IS_INHIBITED(original),
195     	    ("reassigning KSE with runnable thread"));
196 	kg = ke->ke_ksegrp;
197 	if (original)
198 		original->td_kse = NULL;
199 
200 	/*
201 	 * Find the first unassigned thread
202 	 */
203 	if ((td = kg->kg_last_assigned) != NULL)
204 		td = TAILQ_NEXT(td, td_runq);
205 	else
206 		td = TAILQ_FIRST(&kg->kg_runq);
207 
208 	/*
209 	 * If we found one, assign it the kse, otherwise idle the kse.
210 	 */
211 	if (td) {
212 		kg->kg_last_assigned = td;
213 		td->td_kse = ke;
214 		ke->ke_thread = td;
215 		sched_add(td);
216 		CTR2(KTR_RUNQ, "kse_reassign: ke%p -> td%p", ke, td);
217 		return;
218 	}
219 
220 	ke->ke_state = KES_IDLE;
221 	ke->ke_thread = NULL;
222 	TAILQ_INSERT_TAIL(&kg->kg_iq, ke, ke_kgrlist);
223 	kg->kg_idle_kses++;
224 	CTR1(KTR_RUNQ, "kse_reassign: ke%p on idle queue", ke);
225 	return;
226 }
227 
228 #if 0
229 /*
230  * Remove a thread from its KSEGRP's run queue.
231  * This in turn may remove it from a KSE if it was already assigned
232  * to one, possibly causing a new thread to be assigned to the KSE
233  * and the KSE getting a new priority.
234  */
235 static void
236 remrunqueue(struct thread *td)
237 {
238 	struct thread *td2, *td3;
239 	struct ksegrp *kg;
240 	struct kse *ke;
241 
242 	mtx_assert(&sched_lock, MA_OWNED);
243 	KASSERT((TD_ON_RUNQ(td)), ("remrunqueue: Bad state on run queue"));
244 	kg = td->td_ksegrp;
245 	ke = td->td_kse;
246 	CTR1(KTR_RUNQ, "remrunqueue: td%p", td);
247 	kg->kg_runnable--;
248 	TD_SET_CAN_RUN(td);
249 	/*
250 	 * If it is not a threaded process, take the shortcut.
251 	 */
252 	if ((td->td_proc->p_flag & P_SA) == 0) {
253 		/* Bring its kse with it, leave the thread attached */
254 		sched_rem(td);
255 		ke->ke_state = KES_THREAD;
256 		return;
257 	}
258    	td3 = TAILQ_PREV(td, threadqueue, td_runq);
259 	TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
260 	if (ke) {
261 		/*
262 		 * This thread has been assigned to a KSE.
263 		 * We need to dissociate it and try assign the
264 		 * KSE to the next available thread. Then, we should
265 		 * see if we need to move the KSE in the run queues.
266 		 */
267 		sched_rem(td);
268 		ke->ke_state = KES_THREAD;
269 		td2 = kg->kg_last_assigned;
270 		KASSERT((td2 != NULL), ("last assigned has wrong value"));
271 		if (td2 == td)
272 			kg->kg_last_assigned = td3;
273 		kse_reassign(ke);
274 	}
275 }
276 #endif
277 
278 /*
279  * Change the priority of a thread that is on the run queue.
280  */
281 void
282 adjustrunqueue( struct thread *td, int newpri)
283 {
284 	struct ksegrp *kg;
285 	struct kse *ke;
286 
287 	mtx_assert(&sched_lock, MA_OWNED);
288 	KASSERT((TD_ON_RUNQ(td)), ("adjustrunqueue: Bad state on run queue"));
289 
290 	ke = td->td_kse;
291 	CTR1(KTR_RUNQ, "adjustrunqueue: td%p", td);
292 	/*
293 	 * If it is not a threaded process, take the shortcut.
294 	 */
295 	if ((td->td_proc->p_flag & P_SA) == 0) {
296 		/* We only care about the kse in the run queue. */
297 		td->td_priority = newpri;
298 		if (ke->ke_rqindex != (newpri / RQ_PPQ)) {
299 			sched_rem(td);
300 			sched_add(td);
301 		}
302 		return;
303 	}
304 
305 	/* It is a threaded process */
306 	kg = td->td_ksegrp;
307 	kg->kg_runnable--;
308 	TD_SET_CAN_RUN(td);
309 	if (ke) {
310 		if (kg->kg_last_assigned == td) {
311 			kg->kg_last_assigned =
312 			    TAILQ_PREV(td, threadqueue, td_runq);
313 		}
314 		sched_rem(td);
315 	}
316 	TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
317 	td->td_priority = newpri;
318 	setrunqueue(td);
319 }
320 
321 void
322 setrunqueue(struct thread *td)
323 {
324 	struct kse *ke;
325 	struct ksegrp *kg;
326 	struct thread *td2;
327 	struct thread *tda;
328 
329 	CTR1(KTR_RUNQ, "setrunqueue: td%p", td);
330 	mtx_assert(&sched_lock, MA_OWNED);
331 	KASSERT((TD_CAN_RUN(td) || TD_IS_RUNNING(td)),
332 	    ("setrunqueue: bad thread state"));
333 	TD_SET_RUNQ(td);
334 	kg = td->td_ksegrp;
335 	kg->kg_runnable++;
336 	if ((td->td_proc->p_flag & P_SA) == 0) {
337 		/*
338 		 * Common path optimisation: Only one of everything
339 		 * and the KSE is always already attached.
340 		 * Totally ignore the ksegrp run queue.
341 		 */
342 		sched_add(td);
343 		return;
344 	}
345 
346 	tda = kg->kg_last_assigned;
347 	if ((ke = td->td_kse) == NULL) {
348 		if (kg->kg_idle_kses) {
349 			/*
350 			 * There is a free one so it's ours for the asking..
351 			 */
352 			ke = TAILQ_FIRST(&kg->kg_iq);
353 			TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist);
354 			ke->ke_state = KES_THREAD;
355 			kg->kg_idle_kses--;
356 		} else if (tda && (tda->td_priority > td->td_priority)) {
357 			/*
358 			 * None free, but there is one we can commandeer.
359 			 */
360 			ke = tda->td_kse;
361 			sched_rem(tda);
362 			tda->td_kse = NULL;
363 			ke->ke_thread = NULL;
364 			tda = kg->kg_last_assigned =
365 		    	    TAILQ_PREV(tda, threadqueue, td_runq);
366 		}
367 	} else {
368 		/*
369 		 * Temporarily disassociate so it looks like the other cases.
370 		 */
371 		ke->ke_thread = NULL;
372 		td->td_kse = NULL;
373 	}
374 
375 	/*
376 	 * Add the thread to the ksegrp's run queue at
377 	 * the appropriate place.
378 	 */
379 	TAILQ_FOREACH(td2, &kg->kg_runq, td_runq) {
380 		if (td2->td_priority > td->td_priority) {
381 			TAILQ_INSERT_BEFORE(td2, td, td_runq);
382 			break;
383 		}
384 	}
385 	if (td2 == NULL) {
386 		/* We ran off the end of the TAILQ or it was empty. */
387 		TAILQ_INSERT_TAIL(&kg->kg_runq, td, td_runq);
388 	}
389 
390 	/*
391 	 * If we have a ke to use, then put it on the run queue and
392 	 * If needed, readjust the last_assigned pointer.
393 	 */
394 	if (ke) {
395 		if (tda == NULL) {
396 			/*
397 			 * No pre-existing last assigned so whoever is first
398 			 * gets the KSE we brought in.. (maybe us)
399 			 */
400 			td2 = TAILQ_FIRST(&kg->kg_runq);
401 			KASSERT((td2->td_kse == NULL),
402 			    ("unexpected ke present"));
403 			td2->td_kse = ke;
404 			ke->ke_thread = td2;
405 			kg->kg_last_assigned = td2;
406 		} else if (tda->td_priority > td->td_priority) {
407 			/*
408 			 * It's ours, grab it, but last_assigned is past us
409 			 * so don't change it.
410 			 */
411 			td->td_kse = ke;
412 			ke->ke_thread = td;
413 		} else {
414 			/*
415 			 * We are past last_assigned, so
416 			 * put the new kse on whatever is next,
417 			 * which may or may not be us.
418 			 */
419 			td2 = TAILQ_NEXT(tda, td_runq);
420 			kg->kg_last_assigned = td2;
421 			td2->td_kse = ke;
422 			ke->ke_thread = td2;
423 		}
424 		sched_add(ke->ke_thread);
425 	}
426 }
427 
428 /*
429  * Kernel thread preemption implementation.  Critical sections mark
430  * regions of code in which preemptions are not allowed.
431  */
432 void
433 critical_enter(void)
434 {
435 	struct thread *td;
436 
437 	td = curthread;
438 	if (td->td_critnest == 0)
439 		cpu_critical_enter();
440 	td->td_critnest++;
441 }
442 
443 void
444 critical_exit(void)
445 {
446 	struct thread *td;
447 
448 	td = curthread;
449 	KASSERT(td->td_critnest != 0,
450 	    ("critical_exit: td_critnest == 0"));
451 	if (td->td_critnest == 1) {
452 #ifdef PREEMPTION
453 		if (td->td_flags & TDF_OWEPREEMPT) {
454 			mtx_lock_spin(&sched_lock);
455 			mi_switch(SW_INVOL, NULL);
456 			mtx_unlock_spin(&sched_lock);
457 		}
458 #endif
459 		td->td_critnest = 0;
460 		cpu_critical_exit();
461 	} else {
462 		td->td_critnest--;
463 	}
464 }
465 
466 /*
467  * This function is called when a thread is about to be put on run queue
468  * because it has been made runnable or its priority has been adjusted.  It
469  * determines if the new thread should be immediately preempted to.  If so,
470  * it switches to it and eventually returns true.  If not, it returns false
471  * so that the caller may place the thread on an appropriate run queue.
472  */
473 int
474 maybe_preempt(struct thread *td)
475 {
476 #ifdef PREEMPTION
477 	struct thread *ctd;
478 	int cpri, pri;
479 #endif
480 
481 	mtx_assert(&sched_lock, MA_OWNED);
482 #ifdef PREEMPTION
483 	/*
484 	 * The new thread should not preempt the current thread if any of the
485 	 * following conditions are true:
486 	 *
487 	 *  - The current thread has a higher (numerically lower) priority.
488 	 *  - It is too early in the boot for context switches (cold is set).
489 	 *  - The current thread has an inhibitor set or is in the process of
490 	 *    exiting.  In this case, the current thread is about to switch
491 	 *    out anyways, so there's no point in preempting.  If we did,
492 	 *    the current thread would not be properly resumed as well, so
493 	 *    just avoid that whole landmine.
494 	 *  - If the new thread's priority is not a realtime priority and
495 	 *    the current thread's priority is not an idle priority and
496 	 *    FULL_PREEMPTION is disabled.
497 	 *
498 	 * If all of these conditions are false, but the current thread is in
499 	 * a nested critical section, then we have to defer the preemption
500 	 * until we exit the critical section.  Otherwise, switch immediately
501 	 * to the new thread.
502 	 */
503 	ctd = curthread;
504 	pri = td->td_priority;
505 	cpri = ctd->td_priority;
506 	if (pri >= cpri || cold /* || dumping */ || TD_IS_INHIBITED(ctd) ||
507 	    td->td_kse->ke_state != KES_THREAD)
508 		return (0);
509 #ifndef FULL_PREEMPTION
510 	if (!(pri >= PRI_MIN_ITHD && pri <= PRI_MAX_ITHD) &&
511 	    !(cpri >= PRI_MIN_IDLE))
512 		return (0);
513 #endif
514 	if (ctd->td_critnest > 1) {
515 		CTR1(KTR_PROC, "maybe_preempt: in critical section %d",
516 		    ctd->td_critnest);
517 		ctd->td_flags |= TDF_OWEPREEMPT;
518 		return (0);
519 	}
520 
521 	/*
522 	 * Our thread state says that we are already on a run queue, so
523 	 * update our state as if we had been dequeued by choosethread().
524 	 */
525 	MPASS(TD_ON_RUNQ(td));
526 	TD_SET_RUNNING(td);
527 	CTR3(KTR_PROC, "preempting to thread %p (pid %d, %s)\n", td,
528 	    td->td_proc->p_pid, td->td_proc->p_comm);
529 	mi_switch(SW_INVOL, td);
530 	return (1);
531 #else
532 	return (0);
533 #endif
534 }
535 
536 #ifndef PREEMPTION
537 /* XXX: There should be a non-static version of this. */
538 static void
539 printf_caddr_t(void *data)
540 {
541 	printf("%s", (char *)data);
542 }
543 static char preempt_warning[] =
544     "WARNING: Kernel preemption is disabled, expect reduced performance.\n";
545 SYSINIT(preempt_warning, SI_SUB_COPYRIGHT, SI_ORDER_ANY, printf_caddr_t,
546     preempt_warning)
547 #endif
548 
549 /************************************************************************
550  * SYSTEM RUN QUEUE manipulations and tests				*
551  ************************************************************************/
552 /*
553  * Initialize a run structure.
554  */
555 void
556 runq_init(struct runq *rq)
557 {
558 	int i;
559 
560 	bzero(rq, sizeof *rq);
561 	for (i = 0; i < RQ_NQS; i++)
562 		TAILQ_INIT(&rq->rq_queues[i]);
563 }
564 
565 /*
566  * Clear the status bit of the queue corresponding to priority level pri,
567  * indicating that it is empty.
568  */
569 static __inline void
570 runq_clrbit(struct runq *rq, int pri)
571 {
572 	struct rqbits *rqb;
573 
574 	rqb = &rq->rq_status;
575 	CTR4(KTR_RUNQ, "runq_clrbit: bits=%#x %#x bit=%#x word=%d",
576 	    rqb->rqb_bits[RQB_WORD(pri)],
577 	    rqb->rqb_bits[RQB_WORD(pri)] & ~RQB_BIT(pri),
578 	    RQB_BIT(pri), RQB_WORD(pri));
579 	rqb->rqb_bits[RQB_WORD(pri)] &= ~RQB_BIT(pri);
580 }
581 
582 /*
583  * Find the index of the first non-empty run queue.  This is done by
584  * scanning the status bits, a set bit indicates a non-empty queue.
585  */
586 static __inline int
587 runq_findbit(struct runq *rq)
588 {
589 	struct rqbits *rqb;
590 	int pri;
591 	int i;
592 
593 	rqb = &rq->rq_status;
594 	for (i = 0; i < RQB_LEN; i++)
595 		if (rqb->rqb_bits[i]) {
596 			pri = RQB_FFS(rqb->rqb_bits[i]) + (i << RQB_L2BPW);
597 			CTR3(KTR_RUNQ, "runq_findbit: bits=%#x i=%d pri=%d",
598 			    rqb->rqb_bits[i], i, pri);
599 			return (pri);
600 		}
601 
602 	return (-1);
603 }
604 
605 /*
606  * Set the status bit of the queue corresponding to priority level pri,
607  * indicating that it is non-empty.
608  */
609 static __inline void
610 runq_setbit(struct runq *rq, int pri)
611 {
612 	struct rqbits *rqb;
613 
614 	rqb = &rq->rq_status;
615 	CTR4(KTR_RUNQ, "runq_setbit: bits=%#x %#x bit=%#x word=%d",
616 	    rqb->rqb_bits[RQB_WORD(pri)],
617 	    rqb->rqb_bits[RQB_WORD(pri)] | RQB_BIT(pri),
618 	    RQB_BIT(pri), RQB_WORD(pri));
619 	rqb->rqb_bits[RQB_WORD(pri)] |= RQB_BIT(pri);
620 }
621 
622 /*
623  * Add the KSE to the queue specified by its priority, and set the
624  * corresponding status bit.
625  */
626 void
627 runq_add(struct runq *rq, struct kse *ke)
628 {
629 	struct rqhead *rqh;
630 	int pri;
631 
632 	pri = ke->ke_thread->td_priority / RQ_PPQ;
633 	ke->ke_rqindex = pri;
634 	runq_setbit(rq, pri);
635 	rqh = &rq->rq_queues[pri];
636 	CTR4(KTR_RUNQ, "runq_add: p=%p pri=%d %d rqh=%p",
637 	    ke->ke_proc, ke->ke_thread->td_priority, pri, rqh);
638 	TAILQ_INSERT_TAIL(rqh, ke, ke_procq);
639 }
640 
641 /*
642  * Return true if there are runnable processes of any priority on the run
643  * queue, false otherwise.  Has no side effects, does not modify the run
644  * queue structure.
645  */
646 int
647 runq_check(struct runq *rq)
648 {
649 	struct rqbits *rqb;
650 	int i;
651 
652 	rqb = &rq->rq_status;
653 	for (i = 0; i < RQB_LEN; i++)
654 		if (rqb->rqb_bits[i]) {
655 			CTR2(KTR_RUNQ, "runq_check: bits=%#x i=%d",
656 			    rqb->rqb_bits[i], i);
657 			return (1);
658 		}
659 	CTR0(KTR_RUNQ, "runq_check: empty");
660 
661 	return (0);
662 }
663 
664 /*
665  * Find the highest priority process on the run queue.
666  */
667 struct kse *
668 runq_choose(struct runq *rq)
669 {
670 	struct rqhead *rqh;
671 	struct kse *ke;
672 	int pri;
673 
674 	mtx_assert(&sched_lock, MA_OWNED);
675 	while ((pri = runq_findbit(rq)) != -1) {
676 		rqh = &rq->rq_queues[pri];
677 		ke = TAILQ_FIRST(rqh);
678 		KASSERT(ke != NULL, ("runq_choose: no proc on busy queue"));
679 		CTR3(KTR_RUNQ,
680 		    "runq_choose: pri=%d kse=%p rqh=%p", pri, ke, rqh);
681 		return (ke);
682 	}
683 	CTR1(KTR_RUNQ, "runq_choose: idleproc pri=%d", pri);
684 
685 	return (NULL);
686 }
687 
688 /*
689  * Remove the KSE from the queue specified by its priority, and clear the
690  * corresponding status bit if the queue becomes empty.
691  * Caller must set ke->ke_state afterwards.
692  */
693 void
694 runq_remove(struct runq *rq, struct kse *ke)
695 {
696 	struct rqhead *rqh;
697 	int pri;
698 
699 	KASSERT(ke->ke_proc->p_sflag & PS_INMEM,
700 		("runq_remove: process swapped out"));
701 	pri = ke->ke_rqindex;
702 	rqh = &rq->rq_queues[pri];
703 	CTR4(KTR_RUNQ, "runq_remove: p=%p pri=%d %d rqh=%p",
704 	    ke, ke->ke_thread->td_priority, pri, rqh);
705 	KASSERT(ke != NULL, ("runq_remove: no proc on busy queue"));
706 	TAILQ_REMOVE(rqh, ke, ke_procq);
707 	if (TAILQ_EMPTY(rqh)) {
708 		CTR0(KTR_RUNQ, "runq_remove: empty");
709 		runq_clrbit(rq, pri);
710 	}
711 }
712 
713 #if 0
714 void
715 panc(char *string1, char *string2)
716 {
717 	printf("%s", string1);
718 	Debugger(string2);
719 }
720 
721 void
722 thread_sanity_check(struct thread *td, char *string)
723 {
724 	struct proc *p;
725 	struct ksegrp *kg;
726 	struct kse *ke;
727 	struct thread *td2 = NULL;
728 	unsigned int prevpri;
729 	int	saw_lastassigned = 0;
730 	int unassigned = 0;
731 	int assigned = 0;
732 
733 	p = td->td_proc;
734 	kg = td->td_ksegrp;
735 	ke = td->td_kse;
736 
737 
738 	if (ke) {
739 		if (p != ke->ke_proc) {
740 			panc(string, "wrong proc");
741 		}
742 		if (ke->ke_thread != td) {
743 			panc(string, "wrong thread");
744 		}
745 	}
746 
747 	if ((p->p_flag & P_SA) == 0) {
748 		if (ke == NULL) {
749 			panc(string, "non KSE thread lost kse");
750 		}
751 	} else {
752 		prevpri = 0;
753 		saw_lastassigned = 0;
754 		unassigned = 0;
755 		assigned = 0;
756 		TAILQ_FOREACH(td2, &kg->kg_runq, td_runq) {
757 			if (td2->td_priority < prevpri) {
758 				panc(string, "thread runqueue unosorted");
759 			}
760 			if ((td2->td_state == TDS_RUNQ) &&
761 			    td2->td_kse &&
762 			    (td2->td_kse->ke_state != KES_ONRUNQ)) {
763 				panc(string, "KSE wrong state");
764 			}
765 			prevpri = td2->td_priority;
766 			if (td2->td_kse) {
767 				assigned++;
768 				if (unassigned) {
769 					panc(string, "unassigned before assigned");
770 				}
771  				if  (kg->kg_last_assigned == NULL) {
772 					panc(string, "lastassigned corrupt");
773 				}
774 				if (saw_lastassigned) {
775 					panc(string, "last assigned not last");
776 				}
777 				if (td2->td_kse->ke_thread != td2) {
778 					panc(string, "mismatched kse/thread");
779 				}
780 			} else {
781 				unassigned++;
782 			}
783 			if (td2 == kg->kg_last_assigned) {
784 				saw_lastassigned = 1;
785 				if (td2->td_kse == NULL) {
786 					panc(string, "last assigned not assigned");
787 				}
788 			}
789 		}
790 		if (kg->kg_last_assigned && (saw_lastassigned == 0)) {
791 			panc(string, "where on earth does lastassigned point?");
792 		}
793 #if 0
794 		FOREACH_THREAD_IN_GROUP(kg, td2) {
795 			if (((td2->td_flags & TDF_UNBOUND) == 0) &&
796 			    (TD_ON_RUNQ(td2))) {
797 				assigned++;
798 				if (td2->td_kse == NULL) {
799 					panc(string, "BOUND thread with no KSE");
800 				}
801 			}
802 		}
803 #endif
804 #if 0
805 		if ((unassigned + assigned) != kg->kg_runnable) {
806 			panc(string, "wrong number in runnable");
807 		}
808 #endif
809 	}
810 	if (assigned == 12345) {
811 		printf("%p %p %p %p %p %d, %d",
812 		    td, td2, ke, kg, p, assigned, saw_lastassigned);
813 	}
814 }
815 #endif
816 
817