xref: /freebsd/sys/kern/kern_switch.c (revision 33d7ad1abe96b8aca43d5a3986f10fc79bd72022)
1 /*
2  * Copyright (c) 2001 Jake Burkholder <jake@FreeBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD$
27  */
28 
29 /***
30 
31 Here is the logic..
32 
33 If there are N processors, then there are at most N KSEs (kernel
34 schedulable entities) working to process threads that belong to a
35 KSEGOUP (kg). If there are X of these KSEs actually running at the
36 moment in question, then there are at most M (N-X) of these KSEs on
37 the run queue, as running KSEs are not on the queue.
38 
39 Runnable threads are queued off the KSEGROUP in priority order.
40 If there are M or more threads runnable, the top M threads
41 (by priority) are 'preassigned' to the M KSEs not running. The KSEs take
42 their priority from those threads and are put on the run queue.
43 
44 The last thread that had a priority high enough to have a KSE associated
45 with it, AND IS ON THE RUN QUEUE is pointed to by
46 kg->kg_last_assigned. If no threads queued off the KSEGROUP have KSEs
47 assigned as all the available KSEs are activly running, or because there
48 are no threads queued, that pointer is NULL.
49 
50 When a KSE is removed from the run queue to become runnable, we know
51 it was associated with the highest priority thread in the queue (at the head
52 of the queue). If it is also the last assigned we know M was 1 and must
53 now be 0. Since the thread is no longer queued that pointer must be
54 removed from it. Since we know there were no more KSEs available,
55 (M was 1 and is now 0) and since we are not FREEING our KSE
56 but using it, we know there are STILL no more KSEs available, we can prove
57 that the next thread in the ksegrp list will not have a KSE to assign to
58 it, so we can show that the pointer must be made 'invalid' (NULL).
59 
60 The pointer exists so that when a new thread is made runnable, it can
61 have its priority compared with the last assigned thread to see if
62 it should 'steal' its KSE or not.. i.e. is it 'earlier'
63 on the list than that thread or later.. If it's earlier, then the KSE is
64 removed from the last assigned (which is now not assigned a KSE)
65 and reassigned to the new thread, which is placed earlier in the list.
66 The pointer is then backed up to the previous thread (which may or may not
67 be the new thread).
68 
69 When a thread sleeps or is removed, the KSE becomes available and if there
70 are queued threads that are not assigned KSEs, the highest priority one of
71 them is assigned the KSE, which is then placed back on the run queue at
72 the approipriate place, and the kg->kg_last_assigned pointer is adjusted down
73 to point to it.
74 
75 The following diagram shows 2 KSEs and 3 threads from a single process.
76 
77  RUNQ: --->KSE---KSE--...    (KSEs queued at priorities from threads)
78               \    \____
79                \        \
80     KSEGROUP---thread--thread--thread    (queued in priority order)
81         \                 /
82          \_______________/
83           (last_assigned)
84 
85 The result of this scheme is that the M available KSEs are always
86 queued at the priorities they have inherrited from the M highest priority
87 threads for that KSEGROUP. If this situation changes, the KSEs are
88 reassigned to keep this true.
89 
90 */
91 
92 #include <sys/param.h>
93 #include <sys/systm.h>
94 #include <sys/kernel.h>
95 #include <sys/ktr.h>
96 #include <sys/lock.h>
97 #include <sys/mutex.h>
98 #include <sys/proc.h>
99 #include <sys/queue.h>
100 #include <machine/critical.h>
101 
102 CTASSERT((RQB_BPW * RQB_LEN) == RQ_NQS);
103 
104 /*
105  * Global run queue.
106  */
107 static struct runq runq;
108 SYSINIT(runq, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, runq_init, &runq)
109 
110 static void runq_readjust(struct runq *rq, struct kse *ke);
111 /************************************************************************
112  * Functions that manipulate runnability from a thread perspective.	*
113  ************************************************************************/
114 
115 /*
116  * Select the KSE that will be run next.  From that find the thread, and x
117  * remove it from the KSEGRP's run queue.  If there is thread clustering,
118  * this will be what does it.
119  */
120 struct thread *
121 choosethread(void)
122 {
123 	struct kse *ke;
124 	struct thread *td;
125 	struct ksegrp *kg;
126 
127 	if ((ke = runq_choose(&runq))) {
128 		td = ke->ke_thread;
129 		KASSERT((td->td_kse == ke), ("kse/thread mismatch"));
130 		kg = ke->ke_ksegrp;
131 		if (td->td_flags & TDF_UNBOUND) {
132 			TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
133 			if (kg->kg_last_assigned == td)
134 				if (TAILQ_PREV(td, threadqueue, td_runq)
135 				    != NULL)
136 					printf("Yo MAMA!\n");
137 				kg->kg_last_assigned = TAILQ_PREV(td,
138 				    threadqueue, td_runq);
139 			/*
140 			 *  If we have started running an upcall,
141 			 * Then TDF_UNBOUND WAS set because the thread was
142 			 * created without a KSE. Now that we have one,
143 			 * and it is our time to run, we make sure
144 			 * that BOUND semantics apply for the rest of
145 			 * the journey to userland, and into the UTS.
146 			 */
147 #ifdef	NOTYET
148 			if (td->td_flags & TDF_UPCALLING)
149 				tdf->td_flags &= ~TDF_UNBOUND;
150 #endif
151 		}
152 		kg->kg_runnable--;
153 		CTR2(KTR_RUNQ, "choosethread: td=%p pri=%d",
154 		    td, td->td_priority);
155 	} else {
156 		/* Pretend the idle thread was on the run queue. */
157 		td = PCPU_GET(idlethread);
158 		td->td_kse->ke_state = KES_UNQUEUED;
159 		CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td);
160 	}
161 	td->td_state = TDS_RUNNING;
162 	return (td);
163 }
164 
165 /*
166  * Given a KSE (now surplus), either assign a new runable thread to it
167  * (and put it in the run queue) or put it in the ksegrp's idle KSE list.
168  * Assumes the kse is not linked to any threads any more. (has been cleaned).
169  */
170 void
171 kse_reassign(struct kse *ke)
172 {
173 	struct ksegrp *kg;
174 	struct thread *td;
175 
176 	kg = ke->ke_ksegrp;
177 
178 	/*
179 	 * Find the first unassigned thread
180 	 * If there is a 'last assigned' then see what's next.
181 	 * otherwise look at what is first.
182 	 */
183 	if ((td = kg->kg_last_assigned)) {
184 		td = TAILQ_NEXT(td, td_runq);
185 	} else {
186 		td = TAILQ_FIRST(&kg->kg_runq);
187 	}
188 
189 	/*
190 	 * If we found one assign it the kse, otherwise idle the kse.
191 	 */
192 	if (td) {
193 		kg->kg_last_assigned = td;
194 		td->td_kse = ke;
195 		ke->ke_thread = td;
196 		runq_add(&runq, ke);
197 		CTR2(KTR_RUNQ, "kse_reassign: ke%p -> td%p", ke, td);
198 	} else {
199 		KASSERT((ke->ke_state != KES_IDLE), ("kse already idle"));
200 		ke->ke_state = KES_IDLE;
201 		ke->ke_thread = NULL;
202 		TAILQ_INSERT_HEAD(&kg->kg_iq, ke, ke_kgrlist);
203 		kg->kg_idle_kses++;
204 		CTR1(KTR_RUNQ, "kse_reassign: ke%p idled", ke);
205 	}
206 }
207 
208 int
209 kserunnable(void)
210 {
211 	return runq_check(&runq);
212 }
213 
214 /*
215  * Remove a thread from its KSEGRP's run queue.
216  * This in turn may remove it from a KSE if it was already assigned
217  * to one, possibly causing a new thread to be assigned to the KSE
218  * and the KSE getting a new priority (unless it's a BOUND thread/KSE pair).
219  */
220 void
221 remrunqueue(struct thread *td)
222 {
223 	struct thread *td2, *td3;
224 	struct ksegrp *kg;
225 	struct kse *ke;
226 
227 	mtx_assert(&sched_lock, MA_OWNED);
228 	KASSERT ((td->td_state == TDS_RUNQ),
229 		("remrunqueue: Bad state on run queue"));
230 	kg = td->td_ksegrp;
231 	ke = td->td_kse;
232 	/*
233 	 * If it's a bound thread/KSE pair, take the shortcut. All non-KSE
234 	 * threads are BOUND.
235 	 */
236 	CTR1(KTR_RUNQ, "remrunqueue: td%p", td);
237 	td->td_state = TDS_UNQUEUED;
238 	kg->kg_runnable--;
239 	if ((td->td_flags & TDF_UNBOUND) == 0)  {
240 		/* Bring its kse with it, leave the thread attached */
241 		runq_remove(&runq, ke);
242 		ke->ke_state = KES_UNQUEUED;
243 		return;
244 	}
245 	if (ke) {
246 		/*
247 		 * This thread has been assigned to a KSE.
248 		 * We need to dissociate it and try assign the
249 		 * KSE to the next available thread. Then, we should
250 		 * see if we need to move the KSE in the run queues.
251 		 */
252 		td2 = kg->kg_last_assigned;
253 		KASSERT((td2 != NULL), ("last assigned has wrong value "));
254 		td->td_kse = NULL;
255 		if ((td3 = TAILQ_NEXT(td2, td_runq))) {
256 			KASSERT(td3 != td, ("td3 somehow matched td"));
257 			/*
258 			 * Give the next unassigned thread to the KSE
259 			 * so the number of runnable KSEs remains
260 			 * constant.
261 			 */
262 			td3->td_kse = ke;
263 			ke->ke_thread = td3;
264 			kg->kg_last_assigned = td3;
265 			runq_readjust(&runq, ke);
266 		} else {
267 			/*
268 			 * There is no unassigned thread.
269 			 * If we were the last assigned one,
270 			 * adjust the last assigned pointer back
271 			 * one, which may result in NULL.
272 			 */
273 			if (td == td2) {
274 				kg->kg_last_assigned =
275 				    TAILQ_PREV(td, threadqueue, td_runq);
276 			}
277 			runq_remove(&runq, ke);
278 			KASSERT((ke->ke_state != KES_IDLE),
279 			    ("kse already idle"));
280 			ke->ke_state = KES_IDLE;
281 			ke->ke_thread = NULL;
282 			TAILQ_INSERT_HEAD(&kg->kg_iq, ke, ke_kgrlist);
283 			kg->kg_idle_kses++;
284 		}
285 	}
286 	TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
287 }
288 
289 #if 1 /* use the first version */
290 
291 void
292 setrunqueue(struct thread *td)
293 {
294 	struct kse *ke;
295 	struct ksegrp *kg;
296 	struct thread *td2;
297 	struct thread *tda;
298 
299 	CTR1(KTR_RUNQ, "setrunqueue: td%p", td);
300 	mtx_assert(&sched_lock, MA_OWNED);
301 	KASSERT((td->td_state != TDS_RUNQ), ("setrunqueue: bad thread state"));
302 	td->td_state = TDS_RUNQ;
303 	kg = td->td_ksegrp;
304 	kg->kg_runnable++;
305 	if ((td->td_flags & TDF_UNBOUND) == 0) {
306 		KASSERT((td->td_kse != NULL),
307 		    ("queueing BAD thread to run queue"));
308 		/*
309 		 * Common path optimisation: Only one of everything
310 		 * and the KSE is always already attached.
311 		 * Totally ignore the ksegrp run queue.
312 		 */
313 		runq_add(&runq, td->td_kse);
314 		return;
315 	}
316 	/*
317 	 * Ok, so we are threading with this thread.
318 	 * We don't have a KSE, see if we can get one..
319 	 */
320 	tda = kg->kg_last_assigned;
321 	if ((ke = td->td_kse) == NULL) {
322 		/*
323 		 * We will need a KSE, see if there is one..
324 		 * First look for a free one, before getting desperate.
325 		 * If we can't get one, our priority is not high enough..
326 		 * that's ok..
327 		 */
328 		if (kg->kg_idle_kses) {
329 			/*
330 			 * There is a free one so it's ours for the asking..
331 			 */
332 			ke = TAILQ_FIRST(&kg->kg_iq);
333 			TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist);
334 			ke->ke_state = KES_UNQUEUED;
335 			kg->kg_idle_kses--;
336 		} else if (tda && (tda->td_priority > td->td_priority)) {
337 			/*
338 			 * None free, but there is one we can commandeer.
339 			 */
340 			ke = tda->td_kse;
341 			tda->td_kse = NULL;
342 			ke->ke_thread = NULL;
343 			tda = kg->kg_last_assigned =
344 		    	    TAILQ_PREV(tda, threadqueue, td_runq);
345 			runq_remove(&runq, ke);
346 		}
347 	} else {
348 		KASSERT(ke->ke_thread == td, ("KSE/thread mismatch"));
349 		KASSERT(ke->ke_state != KES_IDLE, ("KSE unexpectedly idle"));
350 		ke->ke_thread = NULL;
351 		td->td_kse = NULL;
352 	}
353 
354 	/*
355 	 * Add the thread to the ksegrp's run queue at
356 	 * the appropriate place.
357 	 */
358 	TAILQ_FOREACH(td2, &kg->kg_runq, td_runq) {
359 		if (td2->td_priority > td->td_priority) {
360 			TAILQ_INSERT_BEFORE(td2, td, td_runq);
361 			break;
362 		}
363 	}
364 	if (td2 == NULL) {
365 		/* We ran off the end of the TAILQ or it was empty. */
366 		TAILQ_INSERT_TAIL(&kg->kg_runq, td, td_runq);
367 	}
368 
369 	/*
370 	 * If we have a ke to use, then put it on the run queue and
371 	 * If needed, readjust the last_assigned pointer.
372 	 */
373 	if (ke) {
374 		if (tda == NULL) {
375 			/*
376 			 * No pre-existing last assigned so whoever is first
377 			 * gets the KSE we borught in.. (may be us)
378 			 */
379 			td2 = TAILQ_FIRST(&kg->kg_runq);
380 			KASSERT((td2->td_kse == NULL),
381 			    ("unexpected ke present"));
382 			td2->td_kse = ke;
383 			ke->ke_thread = td2;
384 			kg->kg_last_assigned = td2;
385 		} else if (tda->td_priority > td->td_priority) {
386 			/*
387 			 * It's ours, grab it, but last_assigned is past us
388 			 * so don't change it.
389 			 */
390 			td->td_kse = ke;
391 			ke->ke_thread = td;
392 		} else {
393 			/*
394 			 * We are past last_assigned, so
395 			 * put the new kse on whatever is next,
396 			 * which may or may not be us.
397 			 */
398 			td2 = TAILQ_NEXT(tda, td_runq);
399 			kg->kg_last_assigned = td2;
400 			td2->td_kse = ke;
401 			ke->ke_thread = td2;
402 		}
403 		runq_add(&runq, ke);
404 	}
405 }
406 
407 #else
408 
409 void
410 setrunqueue(struct thread *td)
411 {
412 	struct kse *ke;
413 	struct ksegrp *kg;
414 	struct thread *td2;
415 
416 	CTR1(KTR_RUNQ, "setrunqueue: td%p", td);
417 	KASSERT((td->td_state != TDS_RUNQ), ("setrunqueue: bad thread state"));
418 	td->td_state = TDS_RUNQ;
419 	kg = td->td_ksegrp;
420 	kg->kg_runnable++;
421 	if ((td->td_flags & TDF_UNBOUND) == 0) {
422 		/*
423 		 * Common path optimisation: Only one of everything
424 		 * and the KSE is always already attached.
425 		 * Totally ignore the ksegrp run queue.
426 		 */
427 		runq_add(&runq, td->td_kse);
428 		return;
429 	}
430 	/*
431 	 * First add the thread to the ksegrp's run queue at
432 	 * the appropriate place.
433 	 */
434 	TAILQ_FOREACH(td2, &kg->kg_runq, td_runq) {
435 		if (td2->td_priority > td->td_priority) {
436 			TAILQ_INSERT_BEFORE(td2, td, td_runq);
437 			break;
438 		}
439 	}
440 	if (td2 == NULL) {
441 		/* We ran off the end of the TAILQ or it was empty. */
442 		TAILQ_INSERT_TAIL(&kg->kg_runq, td, td_runq);
443 	}
444 
445 	/*
446 	 * The following could be achieved by simply doing:
447 	 * td->td_kse = NULL; kse_reassign(ke);
448 	 * but I felt that I'd try do it inline here.
449 	 * All this work may not be worth it.
450 	 */
451 	if ((ke = td->td_kse)) { /* XXXKSE */
452 		/*
453 		 * We have a KSE already. See whether we can keep it
454 		 * or if we need to give it to someone else.
455 		 * Either way it will need to be inserted into
456 		 * the runq. kse_reassign() will do this as will runq_add().
457 		 */
458 		if ((kg->kg_last_assigned) &&
459 		   (kg->kg_last_assigned->td_priority > td->td_priority)) {
460 			/*
461 			 * We can definitly keep the KSE
462 			 * as the "last assignead thread" has
463 			 * less priority than we do.
464 			 * The "last assigned" pointer stays the same.
465 			 */
466 			runq_add(&runq, ke);
467 			return;
468 
469 		}
470 		/*
471 		 * Give it to the correct thread,
472 		 * which may be (often is) us, but may not be.
473 		 */
474 		td->td_kse = NULL;
475 		kse_reassign(ke);
476 		return;
477 	}
478 	/*
479 	 * There are two cases where KSE adjustment is needed.
480 	 * Usurpation of an already assigned KSE, and assignment
481 	 * of a previously IDLE KSE.
482 	 */
483 	if (kg->kg_idle_kses) {
484 		/*
485 		 * If there are unassigned KSEs then we definitly
486 		 * will be assigned one from the idle KSE list.
487 		 * If we are the last, we should get the "last
488 		 * assigned" pointer set to us as well.
489 		 */
490 		ke = TAILQ_FIRST(&kg->kg_iq);
491 		TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist);
492 		ke->ke_state = KES_UNQUEUED;
493 		kg->kg_idle_kses--;
494 		ke->ke_thread = td;
495 		td->td_kse = ke;
496 		runq_add(&runq, ke);
497 		if (TAILQ_NEXT(td, td_runq) == NULL) {
498 			kg->kg_last_assigned = td;
499 		}
500 	} else if (kg->kg_last_assigned &&
501 		(kg->kg_last_assigned->td_priority > td->td_priority)) {
502 		/*
503 		 * If there were none last-assigned, all KSEs
504 		 * are actually out running as we speak.
505 		 * If there was a last assigned, but we didn't see it,
506 		 * we must be inserting before it, so take the KSE from
507 		 * the last assigned, and back it up one entry. Then,
508 		 * assign the KSE to the new thread and adjust its priority.
509 		 */
510 		td2 = kg->kg_last_assigned;
511 		ke = td2->td_kse;
512 		kg->kg_last_assigned =
513 		    TAILQ_PREV(td2, threadqueue, td_runq);
514 		td2->td_kse = NULL;
515 		td->td_kse = ke;
516 		ke->ke_thread = td;
517 		runq_readjust(&runq, ke);
518 	}
519 }
520 #endif
521 
522 /************************************************************************
523  * Critical section marker functions					*
524  ************************************************************************/
525 /* Critical sections that prevent preemption. */
526 void
527 critical_enter(void)
528 {
529 	struct thread *td;
530 
531 	td = curthread;
532 	if (td->td_critnest == 0)
533 		cpu_critical_enter();
534 	td->td_critnest++;
535 }
536 
537 void
538 critical_exit(void)
539 {
540 	struct thread *td;
541 
542 	td = curthread;
543 	if (td->td_critnest == 1) {
544 		td->td_critnest = 0;
545 		cpu_critical_exit();
546 	} else {
547 		td->td_critnest--;
548 	}
549 }
550 
551 
552 /************************************************************************
553  * SYSTEM RUN QUEUE manipulations and tests				*
554  ************************************************************************/
555 /*
556  * Initialize a run structure.
557  */
558 void
559 runq_init(struct runq *rq)
560 {
561 	int i;
562 
563 	bzero(rq, sizeof *rq);
564 	for (i = 0; i < RQ_NQS; i++)
565 		TAILQ_INIT(&rq->rq_queues[i]);
566 }
567 
568 /*
569  * Clear the status bit of the queue corresponding to priority level pri,
570  * indicating that it is empty.
571  */
572 static __inline void
573 runq_clrbit(struct runq *rq, int pri)
574 {
575 	struct rqbits *rqb;
576 
577 	rqb = &rq->rq_status;
578 	CTR4(KTR_RUNQ, "runq_clrbit: bits=%#x %#x bit=%#x word=%d",
579 	    rqb->rqb_bits[RQB_WORD(pri)],
580 	    rqb->rqb_bits[RQB_WORD(pri)] & ~RQB_BIT(pri),
581 	    RQB_BIT(pri), RQB_WORD(pri));
582 	rqb->rqb_bits[RQB_WORD(pri)] &= ~RQB_BIT(pri);
583 }
584 
585 /*
586  * Find the index of the first non-empty run queue.  This is done by
587  * scanning the status bits, a set bit indicates a non-empty queue.
588  */
589 static __inline int
590 runq_findbit(struct runq *rq)
591 {
592 	struct rqbits *rqb;
593 	int pri;
594 	int i;
595 
596 	rqb = &rq->rq_status;
597 	for (i = 0; i < RQB_LEN; i++)
598 		if (rqb->rqb_bits[i]) {
599 			pri = RQB_FFS(rqb->rqb_bits[i]) + (i << RQB_L2BPW);
600 			CTR3(KTR_RUNQ, "runq_findbit: bits=%#x i=%d pri=%d",
601 			    rqb->rqb_bits[i], i, pri);
602 			return (pri);
603 		}
604 
605 	return (-1);
606 }
607 
608 /*
609  * Set the status bit of the queue corresponding to priority level pri,
610  * indicating that it is non-empty.
611  */
612 static __inline void
613 runq_setbit(struct runq *rq, int pri)
614 {
615 	struct rqbits *rqb;
616 
617 	rqb = &rq->rq_status;
618 	CTR4(KTR_RUNQ, "runq_setbit: bits=%#x %#x bit=%#x word=%d",
619 	    rqb->rqb_bits[RQB_WORD(pri)],
620 	    rqb->rqb_bits[RQB_WORD(pri)] | RQB_BIT(pri),
621 	    RQB_BIT(pri), RQB_WORD(pri));
622 	rqb->rqb_bits[RQB_WORD(pri)] |= RQB_BIT(pri);
623 }
624 
625 /*
626  * Add the KSE to the queue specified by its priority, and set the
627  * corresponding status bit.
628  */
629 void
630 runq_add(struct runq *rq, struct kse *ke)
631 {
632 	struct rqhead *rqh;
633 	int pri;
634 
635 	mtx_assert(&sched_lock, MA_OWNED);
636 	KASSERT((ke->ke_thread != NULL), ("runq_add: No thread on KSE"));
637 	KASSERT((ke->ke_thread->td_kse != NULL), ("runq_add: No KSE on thread"));
638 	if (ke->ke_state == KES_ONRUNQ)
639 		return;
640 #if defined(INVARIANTS) && defined(DIAGNOSTIC)
641 	KASSERT(ke->ke_state != KES_ONRUNQ,
642 	    ("runq_add: kse %p (%s) already in run queue", ke,
643 	    ke->ke_proc->p_comm));
644 #endif
645 	pri = ke->ke_thread->td_priority / RQ_PPQ;
646 	ke->ke_rqindex = pri;
647 	runq_setbit(rq, pri);
648 	rqh = &rq->rq_queues[pri];
649 	CTR4(KTR_RUNQ, "runq_add: p=%p pri=%d %d rqh=%p",
650 	    ke->ke_proc, ke->ke_thread->td_priority, pri, rqh);
651 	TAILQ_INSERT_TAIL(rqh, ke, ke_procq);
652 	ke->ke_ksegrp->kg_runq_kses++;
653 	ke->ke_state = KES_ONRUNQ;
654 }
655 
656 /*
657  * Return true if there are runnable processes of any priority on the run
658  * queue, false otherwise.  Has no side effects, does not modify the run
659  * queue structure.
660  */
661 int
662 runq_check(struct runq *rq)
663 {
664 	struct rqbits *rqb;
665 	int i;
666 
667 	rqb = &rq->rq_status;
668 	for (i = 0; i < RQB_LEN; i++)
669 		if (rqb->rqb_bits[i]) {
670 			CTR2(KTR_RUNQ, "runq_check: bits=%#x i=%d",
671 			    rqb->rqb_bits[i], i);
672 			return (1);
673 		}
674 	CTR0(KTR_RUNQ, "runq_check: empty");
675 
676 	return (0);
677 }
678 
679 /*
680  * Find and remove the highest priority process from the run queue.
681  * If there are no runnable processes, the per-cpu idle process is
682  * returned.  Will not return NULL under any circumstances.
683  */
684 struct kse *
685 runq_choose(struct runq *rq)
686 {
687 	struct rqhead *rqh;
688 	struct kse *ke;
689 	int pri;
690 
691 	mtx_assert(&sched_lock, MA_OWNED);
692 	while ((pri = runq_findbit(rq)) != -1) {
693 		rqh = &rq->rq_queues[pri];
694 		ke = TAILQ_FIRST(rqh);
695 		KASSERT(ke != NULL, ("runq_choose: no proc on busy queue"));
696 		CTR3(KTR_RUNQ,
697 		    "runq_choose: pri=%d kse=%p rqh=%p", pri, ke, rqh);
698 		TAILQ_REMOVE(rqh, ke, ke_procq);
699 		ke->ke_ksegrp->kg_runq_kses--;
700 		if (TAILQ_EMPTY(rqh)) {
701 			CTR0(KTR_RUNQ, "runq_choose: empty");
702 			runq_clrbit(rq, pri);
703 		}
704 
705 		ke->ke_state = KES_RUNNING;
706 		KASSERT((ke->ke_thread != NULL),
707 		    ("runq_choose: No thread on KSE"));
708 		KASSERT((ke->ke_thread->td_kse != NULL),
709 		    ("runq_choose: No KSE on thread"));
710 		return (ke);
711 	}
712 	CTR1(KTR_RUNQ, "runq_choose: idleproc pri=%d", pri);
713 
714 	return (NULL);
715 }
716 
717 /*
718  * Remove the KSE from the queue specified by its priority, and clear the
719  * corresponding status bit if the queue becomes empty.
720  * Caller must set ke->ke_state afterwards.
721  */
722 void
723 runq_remove(struct runq *rq, struct kse *ke)
724 {
725 	struct rqhead *rqh;
726 	int pri;
727 
728 	KASSERT((ke->ke_state == KES_ONRUNQ), ("KSE not on run queue"));
729 	mtx_assert(&sched_lock, MA_OWNED);
730 	pri = ke->ke_rqindex;
731 	rqh = &rq->rq_queues[pri];
732 	CTR4(KTR_RUNQ, "runq_remove: p=%p pri=%d %d rqh=%p",
733 	    ke, ke->ke_thread->td_priority, pri, rqh);
734 	KASSERT(ke != NULL, ("runq_remove: no proc on busy queue"));
735 	TAILQ_REMOVE(rqh, ke, ke_procq);
736 	if (TAILQ_EMPTY(rqh)) {
737 		CTR0(KTR_RUNQ, "runq_remove: empty");
738 		runq_clrbit(rq, pri);
739 	}
740 	ke->ke_state = KES_UNQUEUED;
741 	ke->ke_ksegrp->kg_runq_kses--;
742 }
743 
744 static void
745 runq_readjust(struct runq *rq, struct kse *ke)
746 {
747 
748 	if (ke->ke_rqindex != (ke->ke_thread->td_priority / RQ_PPQ)) {
749 		runq_remove(rq, ke);
750 		runq_add(rq, ke);
751 	}
752 }
753 
754 #if 0
755 void
756 thread_sanity_check(struct thread *td)
757 {
758 	struct proc *p;
759 	struct ksegrp *kg;
760 	struct kse *ke;
761 	struct thread *td2;
762 	unsigned int prevpri;
763 	int	saw_lastassigned;
764 	int unassigned;
765 	int assigned;
766 
767 	p = td->td_proc;
768 	kg = td->td_ksegrp;
769 	ke = td->td_kse;
770 
771 	if (kg != &p->p_ksegrp) {
772 		panic ("wrong ksegrp");
773 	}
774 
775 	if (ke) {
776 		if (ke != &p->p_kse) {
777 			panic("wrong kse");
778 		}
779 		if (ke->ke_thread != td) {
780 			panic("wrong thread");
781 		}
782 	}
783 
784 	if ((p->p_flag & P_KSES) == 0) {
785 		if (ke == NULL) {
786 			panic("non KSE thread lost kse");
787 		}
788 	} else {
789 		prevpri = 0;
790 		saw_lastassigned = 0;
791 		unassigned = 0;
792 		assigned = 0;
793 		TAILQ_FOREACH(td2, &kg->kg_runq, td_runq) {
794 			if (td2->td_priority < prevpri) {
795 				panic("thread runqueue unosorted");
796 			}
797 			prevpri = td2->td_priority;
798 			if (td2->td_kse) {
799 				assigned++;
800 				if (unassigned) {
801 					panic("unassigned before assigned");
802 				}
803  				if  (kg->kg_last_assigned == NULL) {
804 					panic("lastassigned corrupt");
805 				}
806 				if (saw_lastassigned) {
807 					panic("last assigned not last");
808 				}
809 				if (td2->td_kse->ke_thread != td2) {
810 					panic("mismatched kse/thread");
811 				}
812 			} else {
813 				unassigned++;
814 			}
815 			if (td2 == kg->kg_last_assigned) {
816 				saw_lastassigned = 1;
817 				if (td2->td_kse == NULL) {
818 					panic("last assigned not assigned");
819 				}
820 			}
821 		}
822 		if (kg->kg_last_assigned && (saw_lastassigned == 0)) {
823 			panic("where on earth does lastassigned point?");
824 		}
825 		FOREACH_THREAD_IN_GROUP(kg, td2) {
826 			if (((td2->td_flags & TDF_UNBOUND) == 0) &&
827 			    (td2->td_state == TDS_RUNQ)) {
828 				assigned++;
829 				if (td2->td_kse == NULL) {
830 					panic ("BOUND thread with no KSE");
831 				}
832 			}
833 		}
834 #if 0
835 		if ((unassigned + assigned) != kg->kg_runnable) {
836 			panic("wrong number in runnable");
837 		}
838 #endif
839 	}
840 }
841 #endif
842 
843