xref: /freebsd/sys/kern/kern_switch.c (revision 6780ab54325a71e7e70112b11657973edde8655e)
1 /*
2  * Copyright (c) 2001 Jake Burkholder <jake@FreeBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD$
27  */
28 
29 /***
30 
31 Here is the logic..
32 
33 If there are N processors, then there are at most N KSEs (kernel
34 schedulable entities) working to process threads that belong to a
35 KSEGOUP (kg). If there are X of these KSEs actually running at the
36 moment in question, then there are at most M (N-X) of these KSEs on
37 the run queue, as running KSEs are not on the queue.
38 
39 Runnable threads are queued off the KSEGROUP in priority order.
40 If there are M or more threads runnable, the top M threads
41 (by priority) are 'preassigned' to the M KSEs not running. The KSEs take
42 their priority from those threads and are put on the run queue.
43 
44 The last thread that had a priority high enough to have a KSE associated
45 with it, AND IS ON THE RUN QUEUE is pointed to by
46 kg->kg_last_assigned. If no threads queued off the KSEGROUP have KSEs
47 assigned as all the available KSEs are activly running, or because there
48 are no threads queued, that pointer is NULL.
49 
50 When a KSE is removed from the run queue to become runnable, we know
51 it was associated with the highest priority thread in the queue (at the head
52 of the queue). If it is also the last assigned we know M was 1 and must
53 now be 0. Since the thread is no longer queued that pointer must be
54 removed from it. Since we know there were no more KSEs available,
55 (M was 1 and is now 0) and since we are not FREEING our KSE
56 but using it, we know there are STILL no more KSEs available, we can prove
57 that the next thread in the ksegrp list will not have a KSE to assign to
58 it, so we can show that the pointer must be made 'invalid' (NULL).
59 
60 The pointer exists so that when a new thread is made runnable, it can
61 have its priority compared with the last assigned thread to see if
62 it should 'steal' its KSE or not.. i.e. is it 'earlier'
63 on the list than that thread or later.. If it's earlier, then the KSE is
64 removed from the last assigned (which is now not assigned a KSE)
65 and reassigned to the new thread, which is placed earlier in the list.
66 The pointer is then backed up to the previous thread (which may or may not
67 be the new thread).
68 
69 When a thread sleeps or is removed, the KSE becomes available and if there
70 are queued threads that are not assigned KSEs, the highest priority one of
71 them is assigned the KSE, which is then placed back on the run queue at
72 the approipriate place, and the kg->kg_last_assigned pointer is adjusted down
73 to point to it.
74 
75 The following diagram shows 2 KSEs and 3 threads from a single process.
76 
77  RUNQ: --->KSE---KSE--...    (KSEs queued at priorities from threads)
78               \    \____
79                \        \
80     KSEGROUP---thread--thread--thread    (queued in priority order)
81         \                 /
82          \_______________/
83           (last_assigned)
84 
85 The result of this scheme is that the M available KSEs are always
86 queued at the priorities they have inherrited from the M highest priority
87 threads for that KSEGROUP. If this situation changes, the KSEs are
88 reassigned to keep this true.
89 
90 */
91 
92 #include <sys/param.h>
93 #include <sys/systm.h>
94 #include <sys/kernel.h>
95 #include <sys/ktr.h>
96 #include <sys/lock.h>
97 #include <sys/mutex.h>
98 #include <sys/proc.h>
99 #include <sys/queue.h>
100 #include <sys/sched.h>
101 #include <machine/critical.h>
102 
103 CTASSERT((RQB_BPW * RQB_LEN) == RQ_NQS);
104 
105 void panc(char *string1, char *string2);
106 
107 #if 0
108 static void runq_readjust(struct runq *rq, struct kse *ke);
109 #endif
110 /************************************************************************
111  * Functions that manipulate runnability from a thread perspective.	*
112  ************************************************************************/
113 /*
114  * Select the KSE that will be run next.  From that find the thread, and x
115  * remove it from the KSEGRP's run queue.  If there is thread clustering,
116  * this will be what does it.
117  */
118 struct thread *
119 choosethread(void)
120 {
121 	struct kse *ke;
122 	struct thread *td;
123 	struct ksegrp *kg;
124 
125 retry:
126 	if ((ke = sched_choose())) {
127 		td = ke->ke_thread;
128 		KASSERT((td->td_kse == ke), ("kse/thread mismatch"));
129 		kg = ke->ke_ksegrp;
130 		if (TD_IS_UNBOUND(td)) {
131 			TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
132 			if (kg->kg_last_assigned == td) {
133 				kg->kg_last_assigned = TAILQ_PREV(td,
134 				    threadqueue, td_runq);
135 			}
136 		}
137 		kg->kg_runnable--;
138 		CTR2(KTR_RUNQ, "choosethread: td=%p pri=%d",
139 		    td, td->td_priority);
140 	} else {
141 		/* Simulate runq_choose() having returned the idle thread */
142 		td = PCPU_GET(idlethread);
143 		ke = td->td_kse;
144 		CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td);
145 	}
146 	ke->ke_flags |= KEF_DIDRUN;
147 
148 	/*
149 	 * Only allow non system threads to run in panic
150 	 * if they are the one we are tracing.  (I think.. [JRE])
151 	 */
152 	if (panicstr && ((td->td_proc->p_flag & P_SYSTEM) == 0 &&
153 	    (td->td_flags & TDF_INPANIC) == 0))
154 		goto retry;
155 
156 	TD_SET_RUNNING(td);
157 	return (td);
158 }
159 
160 /*
161  * Given a KSE (now surplus or at least loanable), either assign a new
162  * runable thread to it (and put it in the run queue) or put it in
163  * the ksegrp's idle KSE list.
164  * Or maybe give it back to its owner if it's been loaned.
165  * Assumes that the original thread is either not runnable or
166  * already on the run queue
167  */
168 void
169 kse_reassign(struct kse *ke)
170 {
171 	struct ksegrp *kg;
172 	struct thread *td;
173 	struct thread *owner;
174 	struct thread *original;
175 	int loaned;
176 
177 	KASSERT((ke->ke_owner), ("reassigning KSE with no owner"));
178 	KASSERT((ke->ke_thread && TD_IS_INHIBITED(ke->ke_thread)),
179     	    ("reassigning KSE with no or runnable  thread"));
180 	mtx_assert(&sched_lock, MA_OWNED);
181 	kg = ke->ke_ksegrp;
182 	owner = ke->ke_owner;
183 	loaned = TD_LENDER(owner);
184 	original = ke->ke_thread;
185 
186 	if (TD_CAN_UNBIND(original) && (original->td_standin)) {
187 		KASSERT((owner == original),
188 		    ("Early thread borrowing?"));
189 		/*
190 		 * The outgoing thread is "threaded" and has never
191 		 * scheduled an upcall.
192 		 * decide whether this is a short or long term event
193 		 * and thus whether or not to schedule an upcall.
194 		 * if it is a short term event, just suspend it in
195 		 * a way that takes its KSE with it.
196 		 * Select the events for which we want to schedule upcalls.
197 		 * For now it's just sleep.
198 		 * Other threads that still have not fired an upcall
199 		 * are held to their KSE using the temorary Binding.
200 		 */
201 		if (TD_ON_SLEEPQ(original)) {
202 			/*
203 			 * An bound thread that can still unbind itself
204 			 * has been scheduled out.
205 			 * If it is sleeping, then we need to schedule an
206 			 * upcall.
207 			 * XXXKSE eventually almost any inhibition could do.
208 			 */
209 			original->td_flags &= ~TDF_CAN_UNBIND;
210 			original->td_flags |= TDF_UNBOUND;
211 			thread_schedule_upcall(original, ke);
212 			owner = ke->ke_owner;
213 			loaned = 1;
214 		}
215 	}
216 
217 	/*
218 	 * If the current thread was borrowing, then make things consistent
219 	 * by giving it back to the owner for the moment. The original thread
220 	 * must be unbound and have already used its chance for
221 	 * firing off an upcall. Threads that have not yet made an upcall
222 	 * can not borrow KSEs.
223 	 */
224 	if (loaned) {
225 		TD_CLR_LOAN(owner);
226 		ke->ke_thread = owner;
227 		original->td_kse = NULL; /* give it amnesia */
228 		/*
229 		 * Upcalling threads have lower priority than all
230 		 * in-kernel threads, However threads that have loaned out
231 		 * their KSE and are NOT upcalling have the priority that
232 		 * they have. In other words, only look for other work if
233 		 * the owner is not runnable, OR is upcalling.
234 		 */
235 		if (TD_CAN_RUN(owner) &&
236 		    ((owner->td_flags & TDF_UPCALLING) == 0)) {
237 			setrunnable(owner);
238 			CTR2(KTR_RUNQ, "kse_reassign: ke%p -> td%p (give back)",
239 			    ke, owner);
240 			return;
241 		}
242 	}
243 
244 	/*
245 	 * Either the owner is not runnable, or is an upcall.
246 	 * Find the first unassigned thread
247 	 * If there is a 'last assigned' then see what's next.
248 	 * otherwise look at what is first.
249 	 */
250 	if ((td = kg->kg_last_assigned)) {
251 		td = TAILQ_NEXT(td, td_runq);
252 	} else {
253 		td = TAILQ_FIRST(&kg->kg_runq);
254 	}
255 
256 	/*
257 	 * If we found one assign it the kse, otherwise idle the kse.
258 	 */
259 	if (td) {
260 		/*
261 		 * Assign the new thread to the KSE.
262 		 * and make the KSE runnable again,
263 		 */
264 		if (TD_IS_BOUND(owner)) {
265 			/*
266 			 * If there is a reason to keep the previous
267 			 * owner, do so.
268 			 */
269 			TD_SET_LOAN(owner);
270 		} else {
271 			/* otherwise, cut it free */
272 			ke->ke_owner = td;
273 			owner->td_kse = NULL;
274 		}
275 		kg->kg_last_assigned = td;
276 		td->td_kse = ke;
277 		ke->ke_thread = td;
278 		sched_add(ke);
279 		CTR2(KTR_RUNQ, "kse_reassign: ke%p -> td%p", ke, td);
280 		return;
281 	}
282 
283 	/*
284 	 * Now handle any waiting upcall.
285 	 * Since we didn't make them runnable before.
286 	 */
287 	if (TD_CAN_RUN(owner)) {
288 		setrunnable(owner);
289 		CTR2(KTR_RUNQ, "kse_reassign: ke%p -> td%p (give back)",
290 		    ke, owner);
291 		return;
292 	}
293 
294 	/*
295 	 * It is possible that this is the last thread in the group
296 	 * because the KSE is being shut down or the process
297 	 * is exiting.
298 	 */
299 	if (TD_IS_EXITING(owner) || (ke->ke_flags & KEF_EXIT)) {
300 		ke->ke_thread = NULL;
301 		owner->td_kse = NULL;
302 		kse_unlink(ke);
303 		return;
304 	}
305 
306 	/*
307 	 * At this stage all we know is that the owner
308 	 * is the same as the 'active' thread in the KSE
309 	 * and that it is
310 	 * Presently NOT loaned out.
311 	 * Put it on the loanable queue. Make it fifo
312 	 * so that long term sleepers donate their KSE's first.
313 	 */
314 	KASSERT((TD_IS_BOUND(owner)), ("kse_reassign: UNBOUND lender"));
315 	ke->ke_state = KES_THREAD;
316 	ke->ke_flags |= KEF_ONLOANQ;
317 	TAILQ_INSERT_TAIL(&kg->kg_lq, ke, ke_kgrlist);
318 	kg->kg_loan_kses++;
319 	CTR1(KTR_RUNQ, "kse_reassign: ke%p on loan queue", ke);
320 	return;
321 }
322 
323 #if 0
324 /*
325  * Remove a thread from its KSEGRP's run queue.
326  * This in turn may remove it from a KSE if it was already assigned
327  * to one, possibly causing a new thread to be assigned to the KSE
328  * and the KSE getting a new priority (unless it's a BOUND thread/KSE pair).
329  */
330 static void
331 remrunqueue(struct thread *td)
332 {
333 	struct thread *td2, *td3;
334 	struct ksegrp *kg;
335 	struct kse *ke;
336 
337 	mtx_assert(&sched_lock, MA_OWNED);
338 	KASSERT ((TD_ON_RUNQ(td)), ("remrunqueue: Bad state on run queue"));
339 	kg = td->td_ksegrp;
340 	ke = td->td_kse;
341 	/*
342 	 * If it's a bound thread/KSE pair, take the shortcut. All non-KSE
343 	 * threads are BOUND.
344 	 */
345 	CTR1(KTR_RUNQ, "remrunqueue: td%p", td);
346 	kg->kg_runnable--;
347 	TD_SET_CAN_RUN(td);
348 	if (TD_IS_BOUND(td))  {
349 		/* Bring its kse with it, leave the thread attached */
350 		sched_rem(ke);
351 		ke->ke_state = KES_THREAD;
352 		return;
353 	}
354    	td3 = TAILQ_PREV(td, threadqueue, td_runq);
355 	TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
356 	if (ke) {
357 		/*
358 		 * This thread has been assigned to a KSE.
359 		 * We need to dissociate it and try assign the
360 		 * KSE to the next available thread. Then, we should
361 		 * see if we need to move the KSE in the run queues.
362 		 */
363 		sched_rem(ke);
364 		ke->ke_state = KES_THREAD;
365 		td2 = kg->kg_last_assigned;
366 		KASSERT((td2 != NULL), ("last assigned has wrong value "));
367 		if (td2 == td)
368 			kg->kg_last_assigned = td3;
369 		kse_reassign(ke);
370 	}
371 }
372 #endif
373 
374 /*
375  * Change the priority of a thread that is on the run queue.
376  */
377 void
378 adjustrunqueue( struct thread *td, int newpri)
379 {
380 	struct ksegrp *kg;
381 	struct kse *ke;
382 
383 	mtx_assert(&sched_lock, MA_OWNED);
384 	KASSERT ((TD_ON_RUNQ(td)), ("adjustrunqueue: Bad state on run queue"));
385 	/*
386 	 * If it's a bound thread/KSE pair, take the shortcut. All non-KSE
387 	 * threads are BOUND.
388 	 */
389 	ke = td->td_kse;
390 	CTR1(KTR_RUNQ, "adjustrunqueue: td%p", td);
391 	if (TD_IS_BOUND(td))  {
392 		/* We only care about the kse in the run queue. */
393 		td->td_priority = newpri;
394 		if (ke->ke_rqindex != (newpri / RQ_PPQ)) {
395 			sched_rem(ke);
396 			sched_add(ke);
397 		}
398 		return;
399 	}
400 	/*
401 	 * An unbound thread. This is not optimised yet.
402 	 */
403 	kg = td->td_ksegrp;
404 	kg->kg_runnable--;
405 	TD_SET_CAN_RUN(td);
406 	if (ke) {
407 		if (kg->kg_last_assigned == td) {
408 			kg->kg_last_assigned =
409 			    TAILQ_PREV(td, threadqueue, td_runq);
410 		}
411 		sched_rem(ke);
412 	}
413 	TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
414 	td->td_priority = newpri;
415 	setrunqueue(td);
416 }
417 
418 void
419 setrunqueue(struct thread *td)
420 {
421 	struct kse *ke;
422 	struct ksegrp *kg;
423 	struct thread *td2;
424 	struct thread *tda;
425 
426 	CTR1(KTR_RUNQ, "setrunqueue: td%p", td);
427 	mtx_assert(&sched_lock, MA_OWNED);
428 	KASSERT((TD_CAN_RUN(td) || TD_IS_RUNNING(td)),
429 	    ("setrunqueue: bad thread state"));
430 	TD_SET_RUNQ(td);
431 	kg = td->td_ksegrp;
432 	kg->kg_runnable++;
433 	if ((td->td_proc->p_flag & P_KSES) == 0) {
434 		/*
435 		 * Common path optimisation: Only one of everything
436 		 * and the KSE is always already attached.
437 		 * Totally ignore the ksegrp run queue.
438 		 */
439 		sched_add(td->td_kse);
440 		return;
441 	}
442 	/*
443 	 * If the process is threaded but the thread is bound then
444 	 * there is still a little extra to do re. KSE loaning.
445 	 */
446 	if (TD_IS_BOUND(td)) {
447 		KASSERT((td->td_kse != NULL),
448 		    ("queueing BAD thread to run queue"));
449 		ke = td->td_kse;
450 		KASSERT((ke->ke_owner == ke->ke_thread),
451 		    ("setrunqueue: Hey KSE loaned out"));
452 		if (ke->ke_flags & KEF_ONLOANQ) {
453 			ke->ke_flags &= ~KEF_ONLOANQ;
454 			TAILQ_REMOVE(&kg->kg_lq, ke, ke_kgrlist);
455 			kg->kg_loan_kses--;
456 		}
457 		sched_add(td->td_kse);
458 		return;
459 	}
460 
461 	/*
462 	 * Ok, so we are threading with this thread.
463 	 * We don't have a KSE, see if we can get one..
464 	 */
465 	tda = kg->kg_last_assigned;
466 	if ((ke = td->td_kse) == NULL) {
467 		/*
468 		 * We will need a KSE, see if there is one..
469 		 * First look for a free one, before getting desperate.
470 		 * If we can't get one, our priority is not high enough..
471 		 * that's ok..
472 		 */
473 		if (kg->kg_loan_kses) {
474 			/*
475 			 * Failing that see if we can borrow one.
476 			 */
477 			ke = TAILQ_FIRST(&kg->kg_lq);
478 			TAILQ_REMOVE(&kg->kg_lq, ke, ke_kgrlist);
479 			ke->ke_flags &= ~KEF_ONLOANQ;
480 			ke->ke_state = KES_THREAD;
481 			TD_SET_LOAN(ke->ke_owner);
482 			ke->ke_thread  = NULL;
483 			kg->kg_loan_kses--;
484 		} else if (tda && (tda->td_priority > td->td_priority)) {
485 			/*
486 			 * None free, but there is one we can commandeer.
487 			 */
488 			ke = tda->td_kse;
489 			tda->td_kse = NULL;
490 			ke->ke_thread = NULL;
491 			tda = kg->kg_last_assigned =
492 		    	    TAILQ_PREV(tda, threadqueue, td_runq);
493 			sched_rem(ke);
494 		}
495 	} else {
496 		/*
497 		 * Temporarily disassociate so it looks like the other cases.
498 		 * If the owner wasn't lending before, then it is now..
499 		 */
500 		if (!TD_LENDER(ke->ke_owner)) {
501 			TD_SET_LOAN(ke->ke_owner);
502 		}
503 		ke->ke_thread = NULL;
504 		td->td_kse = NULL;
505 	}
506 
507 	/*
508 	 * Add the thread to the ksegrp's run queue at
509 	 * the appropriate place.
510 	 */
511 	TAILQ_FOREACH(td2, &kg->kg_runq, td_runq) {
512 		if (td2->td_priority > td->td_priority) {
513 			TAILQ_INSERT_BEFORE(td2, td, td_runq);
514 			break;
515 		}
516 	}
517 	if (td2 == NULL) {
518 		/* We ran off the end of the TAILQ or it was empty. */
519 		TAILQ_INSERT_TAIL(&kg->kg_runq, td, td_runq);
520 	}
521 
522 	/*
523 	 * If we have a ke to use, then put it on the run queue and
524 	 * If needed, readjust the last_assigned pointer.
525 	 */
526 	if (ke) {
527 		if (tda == NULL) {
528 			/*
529 			 * No pre-existing last assigned so whoever is first
530 			 * gets the KSE we brought in.. (maybe us)
531 			 */
532 			td2 = TAILQ_FIRST(&kg->kg_runq);
533 			KASSERT((td2->td_kse == NULL),
534 			    ("unexpected ke present"));
535 			td2->td_kse = ke;
536 			ke->ke_thread = td2;
537 			kg->kg_last_assigned = td2;
538 		} else if (tda->td_priority > td->td_priority) {
539 			/*
540 			 * It's ours, grab it, but last_assigned is past us
541 			 * so don't change it.
542 			 */
543 			td->td_kse = ke;
544 			ke->ke_thread = td;
545 		} else {
546 			/*
547 			 * We are past last_assigned, so
548 			 * put the new kse on whatever is next,
549 			 * which may or may not be us.
550 			 */
551 			td2 = TAILQ_NEXT(tda, td_runq);
552 			kg->kg_last_assigned = td2;
553 			td2->td_kse = ke;
554 			ke->ke_thread = td2;
555 		}
556 		sched_add(ke);
557 	}
558 }
559 
560 /************************************************************************
561  * Critical section marker functions					*
562  ************************************************************************/
563 /* Critical sections that prevent preemption. */
564 void
565 critical_enter(void)
566 {
567 	struct thread *td;
568 
569 	td = curthread;
570 	if (td->td_critnest == 0)
571 		cpu_critical_enter();
572 	td->td_critnest++;
573 }
574 
575 void
576 critical_exit(void)
577 {
578 	struct thread *td;
579 
580 	td = curthread;
581 	if (td->td_critnest == 1) {
582 		td->td_critnest = 0;
583 		cpu_critical_exit();
584 	} else {
585 		td->td_critnest--;
586 	}
587 }
588 
589 
590 /************************************************************************
591  * SYSTEM RUN QUEUE manipulations and tests				*
592  ************************************************************************/
593 /*
594  * Initialize a run structure.
595  */
596 void
597 runq_init(struct runq *rq)
598 {
599 	int i;
600 
601 	bzero(rq, sizeof *rq);
602 	for (i = 0; i < RQ_NQS; i++)
603 		TAILQ_INIT(&rq->rq_queues[i]);
604 }
605 
606 /*
607  * Clear the status bit of the queue corresponding to priority level pri,
608  * indicating that it is empty.
609  */
610 static __inline void
611 runq_clrbit(struct runq *rq, int pri)
612 {
613 	struct rqbits *rqb;
614 
615 	rqb = &rq->rq_status;
616 	CTR4(KTR_RUNQ, "runq_clrbit: bits=%#x %#x bit=%#x word=%d",
617 	    rqb->rqb_bits[RQB_WORD(pri)],
618 	    rqb->rqb_bits[RQB_WORD(pri)] & ~RQB_BIT(pri),
619 	    RQB_BIT(pri), RQB_WORD(pri));
620 	rqb->rqb_bits[RQB_WORD(pri)] &= ~RQB_BIT(pri);
621 }
622 
623 /*
624  * Find the index of the first non-empty run queue.  This is done by
625  * scanning the status bits, a set bit indicates a non-empty queue.
626  */
627 static __inline int
628 runq_findbit(struct runq *rq)
629 {
630 	struct rqbits *rqb;
631 	int pri;
632 	int i;
633 
634 	rqb = &rq->rq_status;
635 	for (i = 0; i < RQB_LEN; i++)
636 		if (rqb->rqb_bits[i]) {
637 			pri = RQB_FFS(rqb->rqb_bits[i]) + (i << RQB_L2BPW);
638 			CTR3(KTR_RUNQ, "runq_findbit: bits=%#x i=%d pri=%d",
639 			    rqb->rqb_bits[i], i, pri);
640 			return (pri);
641 		}
642 
643 	return (-1);
644 }
645 
646 /*
647  * Set the status bit of the queue corresponding to priority level pri,
648  * indicating that it is non-empty.
649  */
650 static __inline void
651 runq_setbit(struct runq *rq, int pri)
652 {
653 	struct rqbits *rqb;
654 
655 	rqb = &rq->rq_status;
656 	CTR4(KTR_RUNQ, "runq_setbit: bits=%#x %#x bit=%#x word=%d",
657 	    rqb->rqb_bits[RQB_WORD(pri)],
658 	    rqb->rqb_bits[RQB_WORD(pri)] | RQB_BIT(pri),
659 	    RQB_BIT(pri), RQB_WORD(pri));
660 	rqb->rqb_bits[RQB_WORD(pri)] |= RQB_BIT(pri);
661 }
662 
663 /*
664  * Add the KSE to the queue specified by its priority, and set the
665  * corresponding status bit.
666  */
667 void
668 runq_add(struct runq *rq, struct kse *ke)
669 {
670 	struct rqhead *rqh;
671 	int pri;
672 
673 	pri = ke->ke_thread->td_priority / RQ_PPQ;
674 	ke->ke_rqindex = pri;
675 	runq_setbit(rq, pri);
676 	rqh = &rq->rq_queues[pri];
677 	CTR4(KTR_RUNQ, "runq_add: p=%p pri=%d %d rqh=%p",
678 	    ke->ke_proc, ke->ke_thread->td_priority, pri, rqh);
679 	TAILQ_INSERT_TAIL(rqh, ke, ke_procq);
680 }
681 
682 /*
683  * Return true if there are runnable processes of any priority on the run
684  * queue, false otherwise.  Has no side effects, does not modify the run
685  * queue structure.
686  */
687 int
688 runq_check(struct runq *rq)
689 {
690 	struct rqbits *rqb;
691 	int i;
692 
693 	rqb = &rq->rq_status;
694 	for (i = 0; i < RQB_LEN; i++)
695 		if (rqb->rqb_bits[i]) {
696 			CTR2(KTR_RUNQ, "runq_check: bits=%#x i=%d",
697 			    rqb->rqb_bits[i], i);
698 			return (1);
699 		}
700 	CTR0(KTR_RUNQ, "runq_check: empty");
701 
702 	return (0);
703 }
704 
705 /*
706  * Find the highest priority process on the run queue.
707  */
708 struct kse *
709 runq_choose(struct runq *rq)
710 {
711 	struct rqhead *rqh;
712 	struct kse *ke;
713 	int pri;
714 
715 	mtx_assert(&sched_lock, MA_OWNED);
716 	while ((pri = runq_findbit(rq)) != -1) {
717 		rqh = &rq->rq_queues[pri];
718 		ke = TAILQ_FIRST(rqh);
719 		KASSERT(ke != NULL, ("runq_choose: no proc on busy queue"));
720 		CTR3(KTR_RUNQ,
721 		    "runq_choose: pri=%d kse=%p rqh=%p", pri, ke, rqh);
722 		return (ke);
723 	}
724 	CTR1(KTR_RUNQ, "runq_choose: idleproc pri=%d", pri);
725 
726 	return (NULL);
727 }
728 
729 /*
730  * Remove the KSE from the queue specified by its priority, and clear the
731  * corresponding status bit if the queue becomes empty.
732  * Caller must set ke->ke_state afterwards.
733  */
734 void
735 runq_remove(struct runq *rq, struct kse *ke)
736 {
737 	struct rqhead *rqh;
738 	int pri;
739 
740 	KASSERT(ke->ke_proc->p_sflag & PS_INMEM,
741 		("runq_remove: process swapped out"));
742 	pri = ke->ke_rqindex;
743 	rqh = &rq->rq_queues[pri];
744 	CTR4(KTR_RUNQ, "runq_remove: p=%p pri=%d %d rqh=%p",
745 	    ke, ke->ke_thread->td_priority, pri, rqh);
746 	KASSERT(ke != NULL, ("runq_remove: no proc on busy queue"));
747 	TAILQ_REMOVE(rqh, ke, ke_procq);
748 	if (TAILQ_EMPTY(rqh)) {
749 		CTR0(KTR_RUNQ, "runq_remove: empty");
750 		runq_clrbit(rq, pri);
751 	}
752 }
753 
754 #if 0
755 void
756 panc(char *string1, char *string2)
757 {
758 	printf("%s", string1);
759 	Debugger(string2);
760 }
761 
762 void
763 thread_sanity_check(struct thread *td, char *string)
764 {
765 	struct proc *p;
766 	struct ksegrp *kg;
767 	struct kse *ke;
768 	struct thread *td2 = NULL;
769 	unsigned int prevpri;
770 	int	saw_lastassigned = 0;
771 	int unassigned = 0;
772 	int assigned = 0;
773 
774 	p = td->td_proc;
775 	kg = td->td_ksegrp;
776 	ke = td->td_kse;
777 
778 
779 	if (ke) {
780 		if (p != ke->ke_proc) {
781 			panc(string, "wrong proc");
782 		}
783 		if (ke->ke_thread != td) {
784 			panc(string, "wrong thread");
785 		}
786 	}
787 
788 	if ((p->p_flag & P_KSES) == 0) {
789 		if (ke == NULL) {
790 			panc(string, "non KSE thread lost kse");
791 		}
792 	} else {
793 		prevpri = 0;
794 		saw_lastassigned = 0;
795 		unassigned = 0;
796 		assigned = 0;
797 		TAILQ_FOREACH(td2, &kg->kg_runq, td_runq) {
798 			if (td2->td_priority < prevpri) {
799 				panc(string, "thread runqueue unosorted");
800 			}
801 			if ((td2->td_state == TDS_RUNQ) &&
802 			    td2->td_kse &&
803 			    (td2->td_kse->ke_state != KES_ONRUNQ)) {
804 				panc(string, "KSE wrong state");
805 			}
806 			prevpri = td2->td_priority;
807 			if (td2->td_kse) {
808 				assigned++;
809 				if (unassigned) {
810 					panc(string, "unassigned before assigned");
811 				}
812  				if  (kg->kg_last_assigned == NULL) {
813 					panc(string, "lastassigned corrupt");
814 				}
815 				if (saw_lastassigned) {
816 					panc(string, "last assigned not last");
817 				}
818 				if (td2->td_kse->ke_thread != td2) {
819 					panc(string, "mismatched kse/thread");
820 				}
821 			} else {
822 				unassigned++;
823 			}
824 			if (td2 == kg->kg_last_assigned) {
825 				saw_lastassigned = 1;
826 				if (td2->td_kse == NULL) {
827 					panc(string, "last assigned not assigned");
828 				}
829 			}
830 		}
831 		if (kg->kg_last_assigned && (saw_lastassigned == 0)) {
832 			panc(string, "where on earth does lastassigned point?");
833 		}
834 		FOREACH_THREAD_IN_GROUP(kg, td2) {
835 			if (((td2->td_flags & TDF_UNBOUND) == 0) &&
836 			    (TD_ON_RUNQ(td2))) {
837 				assigned++;
838 				if (td2->td_kse == NULL) {
839 					panc(string, "BOUND thread with no KSE");
840 				}
841 			}
842 		}
843 #if 0
844 		if ((unassigned + assigned) != kg->kg_runnable) {
845 			panc(string, "wrong number in runnable");
846 		}
847 #endif
848 	}
849 	if (assigned == 12345) {
850 		printf("%p %p %p %p %p %d, %d",
851 		    td, td2, ke, kg, p, assigned, saw_lastassigned);
852 	}
853 }
854 #endif
855 
856