xref: /freebsd/sys/kern/kern_switch.c (revision 5e3da64ee9dd7ccca2aff401ebd8604ddc848417)
1 /*
2  * Copyright (c) 2001 Jake Burkholder <jake@FreeBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD$
27  */
28 
29 /***
30 
31 Here is the logic..
32 
33 If there are N processors, then there are at most N KSEs (kernel
34 schedulable entities) working to process threads that belong to a
35 KSEGOUP (kg). If there are X of these KSEs actually running at the
36 moment in question, then there are at most M (N-X) of these KSEs on
37 the run queue, as running KSEs are not on the queue.
38 
39 Runnable threads are queued off the KSEGROUP in priority order.
40 If there are M or more threads runnable, the top M threads
41 (by priority) are 'preassigned' to the M KSEs not running. The KSEs take
42 their priority from those threads and are put on the run queue.
43 
44 The last thread that had a priority high enough to have a KSE associated
45 with it, AND IS ON THE RUN QUEUE is pointed to by
46 kg->kg_last_assigned. If no threads queued off the KSEGROUP have KSEs
47 assigned as all the available KSEs are activly running, or because there
48 are no threads queued, that pointer is NULL.
49 
50 When a KSE is removed from the run queue to become runnable, we know
51 it was associated with the highest priority thread in the queue (at the head
52 of the queue). If it is also the last assigned we know M was 1 and must
53 now be 0. Since the thread is no longer queued that pointer must be
54 removed from it. Since we know there were no more KSEs available,
55 (M was 1 and is now 0) and since we are not FREEING our KSE
56 but using it, we know there are STILL no more KSEs available, we can prove
57 that the next thread in the ksegrp list will not have a KSE to assign to
58 it, so we can show that the pointer must be made 'invalid' (NULL).
59 
60 The pointer exists so that when a new thread is made runnable, it can
61 have its priority compared with the last assigned thread to see if
62 it should 'steal' its KSE or not.. i.e. is it 'earlier'
63 on the list than that thread or later.. If it's earlier, then the KSE is
64 removed from the last assigned (which is now not assigned a KSE)
65 and reassigned to the new thread, which is placed earlier in the list.
66 The pointer is then backed up to the previous thread (which may or may not
67 be the new thread).
68 
69 When a thread sleeps or is removed, the KSE becomes available and if there
70 are queued threads that are not assigned KSEs, the highest priority one of
71 them is assigned the KSE, which is then placed back on the run queue at
72 the approipriate place, and the kg->kg_last_assigned pointer is adjusted down
73 to point to it.
74 
75 The following diagram shows 2 KSEs and 3 threads from a single process.
76 
77  RUNQ: --->KSE---KSE--...    (KSEs queued at priorities from threads)
78               \    \____
79                \        \
80     KSEGROUP---thread--thread--thread    (queued in priority order)
81         \                 /
82          \_______________/
83           (last_assigned)
84 
85 The result of this scheme is that the M available KSEs are always
86 queued at the priorities they have inherrited from the M highest priority
87 threads for that KSEGROUP. If this situation changes, the KSEs are
88 reassigned to keep this true.
89 
90 */
91 
92 #include <sys/param.h>
93 #include <sys/systm.h>
94 #include <sys/kernel.h>
95 #include <sys/ktr.h>
96 #include <sys/lock.h>
97 #include <sys/mutex.h>
98 #include <sys/proc.h>
99 #include <sys/queue.h>
100 #include <machine/critical.h>
101 
102 CTASSERT((RQB_BPW * RQB_LEN) == RQ_NQS);
103 
104 /*
105  * Global run queue.
106  */
107 static struct runq runq;
108 SYSINIT(runq, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, runq_init, &runq)
109 
110 static void runq_readjust(struct runq *rq, struct kse *ke);
111 /************************************************************************
112  * Functions that manipulate runnability from a thread perspective.	*
113  ************************************************************************/
114 
115 /*
116  * Select the KSE that will be run next.  From that find the thread, and x
117  * remove it from the KSEGRP's run queue.  If there is thread clustering,
118  * this will be what does it.
119  */
120 struct thread *
121 choosethread(void)
122 {
123 	struct kse *ke;
124 	struct thread *td;
125 	struct ksegrp *kg;
126 
127 	if ((ke = runq_choose(&runq))) {
128 		td = ke->ke_thread;
129 		KASSERT((td->td_kse == ke), ("kse/thread mismatch"));
130 		kg = ke->ke_ksegrp;
131 		if (td->td_flags & TDF_UNBOUND) {
132 			TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
133 			if (kg->kg_last_assigned == td)
134 				if (TAILQ_PREV(td, threadqueue, td_runq)
135 				    != NULL)
136 					printf("Yo MAMA!\n");
137 				kg->kg_last_assigned = TAILQ_PREV(td,
138 				    threadqueue, td_runq);
139 			/*
140 			 *  If we have started running an upcall,
141 			 * Then TDF_UNBOUND WAS set because the thread was
142 			 * created without a KSE. Now that we have one,
143 			 * and it is our time to run, we make sure
144 			 * that BOUND semantics apply for the rest of
145 			 * the journey to userland, and into the UTS.
146 			 */
147 #ifdef	NOTYET
148 			if (td->td_flags & TDF_UPCALLING)
149 				tdf->td_flags &= ~TDF_UNBOUND;
150 #endif
151 		}
152 		kg->kg_runnable--;
153 		CTR2(KTR_RUNQ, "choosethread: td=%p pri=%d",
154 		    td, td->td_priority);
155 	} else {
156 		/* Pretend the idle thread was on the run queue. */
157 		td = PCPU_GET(idlethread);
158 		/* Simulate that it was on the run queue */
159 		td->td_state = TDS_RUNQ;
160 		td->td_kse->ke_state = KES_UNQUEUED;
161 		CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td);
162 	}
163 	return (td);
164 }
165 
166 /*
167  * Given a KSE (now surplus), either assign a new runable thread to it
168  * (and put it in the run queue) or put it in the ksegrp's idle KSE list.
169  * Assumes the kse is not linked to any threads any more. (has been cleaned).
170  */
171 void
172 kse_reassign(struct kse *ke)
173 {
174 	struct ksegrp *kg;
175 	struct thread *td;
176 
177 	kg = ke->ke_ksegrp;
178 
179 	/*
180 	 * Find the first unassigned thread
181 	 * If there is a 'last assigned' then see what's next.
182 	 * otherwise look at what is first.
183 	 */
184 	if ((td = kg->kg_last_assigned)) {
185 		td = TAILQ_NEXT(td, td_runq);
186 	} else {
187 		td = TAILQ_FIRST(&kg->kg_runq);
188 	}
189 
190 	/*
191 	 * If we found one assign it the kse, otherwise idle the kse.
192 	 */
193 	if (td) {
194 		kg->kg_last_assigned = td;
195 		td->td_kse = ke;
196 		ke->ke_thread = td;
197 		runq_add(&runq, ke);
198 		CTR2(KTR_RUNQ, "kse_reassign: ke%p -> td%p", ke, td);
199 	} else {
200 		KASSERT((ke->ke_state != KES_IDLE), ("kse already idle"));
201 		ke->ke_state = KES_IDLE;
202 		ke->ke_thread = NULL;
203 		TAILQ_INSERT_HEAD(&kg->kg_iq, ke, ke_kgrlist);
204 		kg->kg_idle_kses++;
205 		CTR1(KTR_RUNQ, "kse_reassign: ke%p idled", ke);
206 	}
207 }
208 
209 int
210 kserunnable(void)
211 {
212 	return runq_check(&runq);
213 }
214 
215 /*
216  * Remove a thread from its KSEGRP's run queue.
217  * This in turn may remove it from a KSE if it was already assigned
218  * to one, possibly causing a new thread to be assigned to the KSE
219  * and the KSE getting a new priority (unless it's a BOUND thread/KSE pair).
220  */
221 void
222 remrunqueue(struct thread *td)
223 {
224 	struct thread *td2, *td3;
225 	struct ksegrp *kg;
226 	struct kse *ke;
227 
228 	mtx_assert(&sched_lock, MA_OWNED);
229 	KASSERT ((td->td_state == TDS_RUNQ),
230 		("remrunqueue: Bad state on run queue"));
231 	kg = td->td_ksegrp;
232 	ke = td->td_kse;
233 	/*
234 	 * If it's a bound thread/KSE pair, take the shortcut. All non-KSE
235 	 * threads are BOUND.
236 	 */
237 	CTR1(KTR_RUNQ, "remrunqueue: td%p", td);
238 	td->td_state = TDS_UNQUEUED;
239 	kg->kg_runnable--;
240 	if ((td->td_flags & TDF_UNBOUND) == 0)  {
241 		/* Bring its kse with it, leave the thread attached */
242 		runq_remove(&runq, ke);
243 		ke->ke_state = KES_UNQUEUED;
244 		return;
245 	}
246 	if (ke) {
247 		/*
248 		 * This thread has been assigned to a KSE.
249 		 * We need to dissociate it and try assign the
250 		 * KSE to the next available thread. Then, we should
251 		 * see if we need to move the KSE in the run queues.
252 		 */
253 		td2 = kg->kg_last_assigned;
254 		KASSERT((td2 != NULL), ("last assigned has wrong value "));
255 		td->td_kse = NULL;
256 		if ((td3 = TAILQ_NEXT(td2, td_runq))) {
257 			KASSERT(td3 != td, ("td3 somehow matched td"));
258 			/*
259 			 * Give the next unassigned thread to the KSE
260 			 * so the number of runnable KSEs remains
261 			 * constant.
262 			 */
263 			td3->td_kse = ke;
264 			ke->ke_thread = td3;
265 			kg->kg_last_assigned = td3;
266 			runq_readjust(&runq, ke);
267 		} else {
268 			/*
269 			 * There is no unassigned thread.
270 			 * If we were the last assigned one,
271 			 * adjust the last assigned pointer back
272 			 * one, which may result in NULL.
273 			 */
274 			if (td == td2) {
275 				kg->kg_last_assigned =
276 				    TAILQ_PREV(td, threadqueue, td_runq);
277 			}
278 			runq_remove(&runq, ke);
279 			KASSERT((ke->ke_state != KES_IDLE),
280 			    ("kse already idle"));
281 			ke->ke_state = KES_IDLE;
282 			ke->ke_thread = NULL;
283 			TAILQ_INSERT_HEAD(&kg->kg_iq, ke, ke_kgrlist);
284 			kg->kg_idle_kses++;
285 		}
286 	}
287 	TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
288 }
289 
290 #if 1 /* use the first version */
291 
292 void
293 setrunqueue(struct thread *td)
294 {
295 	struct kse *ke;
296 	struct ksegrp *kg;
297 	struct thread *td2;
298 	struct thread *tda;
299 
300 	CTR1(KTR_RUNQ, "setrunqueue: td%p", td);
301 	mtx_assert(&sched_lock, MA_OWNED);
302 	KASSERT((td->td_state != TDS_RUNQ), ("setrunqueue: bad thread state"));
303 	td->td_state = TDS_RUNQ;
304 	kg = td->td_ksegrp;
305 	kg->kg_runnable++;
306 	if ((td->td_flags & TDF_UNBOUND) == 0) {
307 		KASSERT((td->td_kse != NULL),
308 		    ("queueing BAD thread to run queue"));
309 		/*
310 		 * Common path optimisation: Only one of everything
311 		 * and the KSE is always already attached.
312 		 * Totally ignore the ksegrp run queue.
313 		 */
314 		runq_add(&runq, td->td_kse);
315 		return;
316 	}
317 	/*
318 	 * Ok, so we are threading with this thread.
319 	 * We don't have a KSE, see if we can get one..
320 	 */
321 	tda = kg->kg_last_assigned;
322 	if ((ke = td->td_kse) == NULL) {
323 		/*
324 		 * We will need a KSE, see if there is one..
325 		 * First look for a free one, before getting desperate.
326 		 * If we can't get one, our priority is not high enough..
327 		 * that's ok..
328 		 */
329 		if (kg->kg_idle_kses) {
330 			/*
331 			 * There is a free one so it's ours for the asking..
332 			 */
333 			ke = TAILQ_FIRST(&kg->kg_iq);
334 			TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist);
335 			ke->ke_state = KES_UNQUEUED;
336 			kg->kg_idle_kses--;
337 		} else if (tda && (tda->td_priority > td->td_priority)) {
338 			/*
339 			 * None free, but there is one we can commandeer.
340 			 */
341 			ke = tda->td_kse;
342 			tda->td_kse = NULL;
343 			ke->ke_thread = NULL;
344 			tda = kg->kg_last_assigned =
345 		    	    TAILQ_PREV(tda, threadqueue, td_runq);
346 			runq_remove(&runq, ke);
347 		}
348 	} else {
349 		KASSERT(ke->ke_thread == td, ("KSE/thread mismatch"));
350 		KASSERT(ke->ke_state != KES_IDLE, ("KSE unexpectedly idle"));
351 		ke->ke_thread = NULL;
352 		td->td_kse = NULL;
353 	}
354 
355 	/*
356 	 * Add the thread to the ksegrp's run queue at
357 	 * the appropriate place.
358 	 */
359 	TAILQ_FOREACH(td2, &kg->kg_runq, td_runq) {
360 		if (td2->td_priority > td->td_priority) {
361 			TAILQ_INSERT_BEFORE(td2, td, td_runq);
362 			break;
363 		}
364 	}
365 	if (td2 == NULL) {
366 		/* We ran off the end of the TAILQ or it was empty. */
367 		TAILQ_INSERT_TAIL(&kg->kg_runq, td, td_runq);
368 	}
369 
370 	/*
371 	 * If we have a ke to use, then put it on the run queue and
372 	 * If needed, readjust the last_assigned pointer.
373 	 */
374 	if (ke) {
375 		if (tda == NULL) {
376 			/*
377 			 * No pre-existing last assigned so whoever is first
378 			 * gets the KSE we borught in.. (may be us)
379 			 */
380 			td2 = TAILQ_FIRST(&kg->kg_runq);
381 			KASSERT((td2->td_kse == NULL),
382 			    ("unexpected ke present"));
383 			td2->td_kse = ke;
384 			ke->ke_thread = td2;
385 			kg->kg_last_assigned = td2;
386 		} else if (tda->td_priority > td->td_priority) {
387 			/*
388 			 * It's ours, grab it, but last_assigned is past us
389 			 * so don't change it.
390 			 */
391 			td->td_kse = ke;
392 			ke->ke_thread = td;
393 		} else {
394 			/*
395 			 * We are past last_assigned, so
396 			 * put the new kse on whatever is next,
397 			 * which may or may not be us.
398 			 */
399 			td2 = TAILQ_NEXT(tda, td_runq);
400 			kg->kg_last_assigned = td2;
401 			td2->td_kse = ke;
402 			ke->ke_thread = td2;
403 		}
404 		runq_add(&runq, ke);
405 	}
406 }
407 
408 #else
409 
410 void
411 setrunqueue(struct thread *td)
412 {
413 	struct kse *ke;
414 	struct ksegrp *kg;
415 	struct thread *td2;
416 
417 	CTR1(KTR_RUNQ, "setrunqueue: td%p", td);
418 	KASSERT((td->td_state != TDS_RUNQ), ("setrunqueue: bad thread state"));
419 	td->td_state = TDS_RUNQ;
420 	kg = td->td_ksegrp;
421 	kg->kg_runnable++;
422 	if ((td->td_flags & TDF_UNBOUND) == 0) {
423 		/*
424 		 * Common path optimisation: Only one of everything
425 		 * and the KSE is always already attached.
426 		 * Totally ignore the ksegrp run queue.
427 		 */
428 		runq_add(&runq, td->td_kse);
429 		return;
430 	}
431 	/*
432 	 * First add the thread to the ksegrp's run queue at
433 	 * the appropriate place.
434 	 */
435 	TAILQ_FOREACH(td2, &kg->kg_runq, td_runq) {
436 		if (td2->td_priority > td->td_priority) {
437 			TAILQ_INSERT_BEFORE(td2, td, td_runq);
438 			break;
439 		}
440 	}
441 	if (td2 == NULL) {
442 		/* We ran off the end of the TAILQ or it was empty. */
443 		TAILQ_INSERT_TAIL(&kg->kg_runq, td, td_runq);
444 	}
445 
446 	/*
447 	 * The following could be achieved by simply doing:
448 	 * td->td_kse = NULL; kse_reassign(ke);
449 	 * but I felt that I'd try do it inline here.
450 	 * All this work may not be worth it.
451 	 */
452 	if ((ke = td->td_kse)) { /* XXXKSE */
453 		/*
454 		 * We have a KSE already. See whether we can keep it
455 		 * or if we need to give it to someone else.
456 		 * Either way it will need to be inserted into
457 		 * the runq. kse_reassign() will do this as will runq_add().
458 		 */
459 		if ((kg->kg_last_assigned) &&
460 		   (kg->kg_last_assigned->td_priority > td->td_priority)) {
461 			/*
462 			 * We can definitly keep the KSE
463 			 * as the "last assignead thread" has
464 			 * less priority than we do.
465 			 * The "last assigned" pointer stays the same.
466 			 */
467 			runq_add(&runq, ke);
468 			return;
469 
470 		}
471 		/*
472 		 * Give it to the correct thread,
473 		 * which may be (often is) us, but may not be.
474 		 */
475 		td->td_kse = NULL;
476 		kse_reassign(ke);
477 		return;
478 	}
479 	/*
480 	 * There are two cases where KSE adjustment is needed.
481 	 * Usurpation of an already assigned KSE, and assignment
482 	 * of a previously IDLE KSE.
483 	 */
484 	if (kg->kg_idle_kses) {
485 		/*
486 		 * If there are unassigned KSEs then we definitly
487 		 * will be assigned one from the idle KSE list.
488 		 * If we are the last, we should get the "last
489 		 * assigned" pointer set to us as well.
490 		 */
491 		ke = TAILQ_FIRST(&kg->kg_iq);
492 		TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist);
493 		ke->ke_state = KES_UNQUEUED;
494 		kg->kg_idle_kses--;
495 		ke->ke_thread = td;
496 		td->td_kse = ke;
497 		runq_add(&runq, ke);
498 		if (TAILQ_NEXT(td, td_runq) == NULL) {
499 			kg->kg_last_assigned = td;
500 		}
501 	} else if (kg->kg_last_assigned &&
502 		(kg->kg_last_assigned->td_priority > td->td_priority)) {
503 		/*
504 		 * If there were none last-assigned, all KSEs
505 		 * are actually out running as we speak.
506 		 * If there was a last assigned, but we didn't see it,
507 		 * we must be inserting before it, so take the KSE from
508 		 * the last assigned, and back it up one entry. Then,
509 		 * assign the KSE to the new thread and adjust its priority.
510 		 */
511 		td2 = kg->kg_last_assigned;
512 		ke = td2->td_kse;
513 		kg->kg_last_assigned =
514 		    TAILQ_PREV(td2, threadqueue, td_runq);
515 		td2->td_kse = NULL;
516 		td->td_kse = ke;
517 		ke->ke_thread = td;
518 		runq_readjust(&runq, ke);
519 	}
520 }
521 #endif
522 
523 /************************************************************************
524  * Critical section marker functions					*
525  ************************************************************************/
526 /* Critical sections that prevent preemption. */
527 void
528 critical_enter(void)
529 {
530 	struct thread *td;
531 
532 	td = curthread;
533 	if (td->td_critnest == 0)
534 		cpu_critical_enter();
535 	td->td_critnest++;
536 }
537 
538 void
539 critical_exit(void)
540 {
541 	struct thread *td;
542 
543 	td = curthread;
544 	if (td->td_critnest == 1) {
545 		td->td_critnest = 0;
546 		cpu_critical_exit();
547 	} else {
548 		td->td_critnest--;
549 	}
550 }
551 
552 
553 /************************************************************************
554  * SYSTEM RUN QUEUE manipulations and tests				*
555  ************************************************************************/
556 /*
557  * Initialize a run structure.
558  */
559 void
560 runq_init(struct runq *rq)
561 {
562 	int i;
563 
564 	bzero(rq, sizeof *rq);
565 	for (i = 0; i < RQ_NQS; i++)
566 		TAILQ_INIT(&rq->rq_queues[i]);
567 }
568 
569 /*
570  * Clear the status bit of the queue corresponding to priority level pri,
571  * indicating that it is empty.
572  */
573 static __inline void
574 runq_clrbit(struct runq *rq, int pri)
575 {
576 	struct rqbits *rqb;
577 
578 	rqb = &rq->rq_status;
579 	CTR4(KTR_RUNQ, "runq_clrbit: bits=%#x %#x bit=%#x word=%d",
580 	    rqb->rqb_bits[RQB_WORD(pri)],
581 	    rqb->rqb_bits[RQB_WORD(pri)] & ~RQB_BIT(pri),
582 	    RQB_BIT(pri), RQB_WORD(pri));
583 	rqb->rqb_bits[RQB_WORD(pri)] &= ~RQB_BIT(pri);
584 }
585 
586 /*
587  * Find the index of the first non-empty run queue.  This is done by
588  * scanning the status bits, a set bit indicates a non-empty queue.
589  */
590 static __inline int
591 runq_findbit(struct runq *rq)
592 {
593 	struct rqbits *rqb;
594 	int pri;
595 	int i;
596 
597 	rqb = &rq->rq_status;
598 	for (i = 0; i < RQB_LEN; i++)
599 		if (rqb->rqb_bits[i]) {
600 			pri = RQB_FFS(rqb->rqb_bits[i]) + (i << RQB_L2BPW);
601 			CTR3(KTR_RUNQ, "runq_findbit: bits=%#x i=%d pri=%d",
602 			    rqb->rqb_bits[i], i, pri);
603 			return (pri);
604 		}
605 
606 	return (-1);
607 }
608 
609 /*
610  * Set the status bit of the queue corresponding to priority level pri,
611  * indicating that it is non-empty.
612  */
613 static __inline void
614 runq_setbit(struct runq *rq, int pri)
615 {
616 	struct rqbits *rqb;
617 
618 	rqb = &rq->rq_status;
619 	CTR4(KTR_RUNQ, "runq_setbit: bits=%#x %#x bit=%#x word=%d",
620 	    rqb->rqb_bits[RQB_WORD(pri)],
621 	    rqb->rqb_bits[RQB_WORD(pri)] | RQB_BIT(pri),
622 	    RQB_BIT(pri), RQB_WORD(pri));
623 	rqb->rqb_bits[RQB_WORD(pri)] |= RQB_BIT(pri);
624 }
625 
626 /*
627  * Add the KSE to the queue specified by its priority, and set the
628  * corresponding status bit.
629  */
630 void
631 runq_add(struct runq *rq, struct kse *ke)
632 {
633 	struct rqhead *rqh;
634 	int pri;
635 
636 	mtx_assert(&sched_lock, MA_OWNED);
637 	KASSERT((ke->ke_thread != NULL), ("runq_add: No thread on KSE"));
638 	KASSERT((ke->ke_thread->td_kse != NULL), ("runq_add: No KSE on thread"));
639 	if (ke->ke_state == KES_ONRUNQ)
640 		return;
641 #if defined(INVARIANTS) && defined(DIAGNOSTIC)
642 	KASSERT(ke->ke_state != KES_ONRUNQ,
643 	    ("runq_add: kse %p (%s) already in run queue", ke,
644 	    ke->ke_proc->p_comm));
645 #endif
646 	pri = ke->ke_thread->td_priority / RQ_PPQ;
647 	ke->ke_rqindex = pri;
648 	runq_setbit(rq, pri);
649 	rqh = &rq->rq_queues[pri];
650 	CTR4(KTR_RUNQ, "runq_add: p=%p pri=%d %d rqh=%p",
651 	    ke->ke_proc, ke->ke_thread->td_priority, pri, rqh);
652 	TAILQ_INSERT_TAIL(rqh, ke, ke_procq);
653 	ke->ke_ksegrp->kg_runq_kses++;
654 	ke->ke_state = KES_ONRUNQ;
655 }
656 
657 /*
658  * Return true if there are runnable processes of any priority on the run
659  * queue, false otherwise.  Has no side effects, does not modify the run
660  * queue structure.
661  */
662 int
663 runq_check(struct runq *rq)
664 {
665 	struct rqbits *rqb;
666 	int i;
667 
668 	rqb = &rq->rq_status;
669 	for (i = 0; i < RQB_LEN; i++)
670 		if (rqb->rqb_bits[i]) {
671 			CTR2(KTR_RUNQ, "runq_check: bits=%#x i=%d",
672 			    rqb->rqb_bits[i], i);
673 			return (1);
674 		}
675 	CTR0(KTR_RUNQ, "runq_check: empty");
676 
677 	return (0);
678 }
679 
680 /*
681  * Find and remove the highest priority process from the run queue.
682  * If there are no runnable processes, the per-cpu idle process is
683  * returned.  Will not return NULL under any circumstances.
684  */
685 struct kse *
686 runq_choose(struct runq *rq)
687 {
688 	struct rqhead *rqh;
689 	struct kse *ke;
690 	int pri;
691 
692 	mtx_assert(&sched_lock, MA_OWNED);
693 	while ((pri = runq_findbit(rq)) != -1) {
694 		rqh = &rq->rq_queues[pri];
695 		ke = TAILQ_FIRST(rqh);
696 		KASSERT(ke != NULL, ("runq_choose: no proc on busy queue"));
697 		CTR3(KTR_RUNQ,
698 		    "runq_choose: pri=%d kse=%p rqh=%p", pri, ke, rqh);
699 		TAILQ_REMOVE(rqh, ke, ke_procq);
700 		ke->ke_ksegrp->kg_runq_kses--;
701 		if (TAILQ_EMPTY(rqh)) {
702 			CTR0(KTR_RUNQ, "runq_choose: empty");
703 			runq_clrbit(rq, pri);
704 		}
705 
706 		ke->ke_state = KES_RUNNING;
707 		KASSERT((ke->ke_thread != NULL),
708 		    ("runq_choose: No thread on KSE"));
709 		KASSERT((ke->ke_thread->td_kse != NULL),
710 		    ("runq_choose: No KSE on thread"));
711 		return (ke);
712 	}
713 	CTR1(KTR_RUNQ, "runq_choose: idleproc pri=%d", pri);
714 
715 	return (NULL);
716 }
717 
718 /*
719  * Remove the KSE from the queue specified by its priority, and clear the
720  * corresponding status bit if the queue becomes empty.
721  * Caller must set ke->ke_state afterwards.
722  */
723 void
724 runq_remove(struct runq *rq, struct kse *ke)
725 {
726 	struct rqhead *rqh;
727 	int pri;
728 
729 	KASSERT((ke->ke_state == KES_ONRUNQ), ("KSE not on run queue"));
730 	mtx_assert(&sched_lock, MA_OWNED);
731 	pri = ke->ke_rqindex;
732 	rqh = &rq->rq_queues[pri];
733 	CTR4(KTR_RUNQ, "runq_remove: p=%p pri=%d %d rqh=%p",
734 	    ke, ke->ke_thread->td_priority, pri, rqh);
735 	KASSERT(ke != NULL, ("runq_remove: no proc on busy queue"));
736 	TAILQ_REMOVE(rqh, ke, ke_procq);
737 	if (TAILQ_EMPTY(rqh)) {
738 		CTR0(KTR_RUNQ, "runq_remove: empty");
739 		runq_clrbit(rq, pri);
740 	}
741 	ke->ke_state = KES_UNQUEUED;
742 	ke->ke_ksegrp->kg_runq_kses--;
743 }
744 
745 static void
746 runq_readjust(struct runq *rq, struct kse *ke)
747 {
748 
749 	if (ke->ke_rqindex != (ke->ke_thread->td_priority / RQ_PPQ)) {
750 		runq_remove(rq, ke);
751 		runq_add(rq, ke);
752 	}
753 }
754 
755 #if 0
756 void
757 thread_sanity_check(struct thread *td)
758 {
759 	struct proc *p;
760 	struct ksegrp *kg;
761 	struct kse *ke;
762 	struct thread *td2;
763 	unsigned int prevpri;
764 	int	saw_lastassigned;
765 	int unassigned;
766 	int assigned;
767 
768 	p = td->td_proc;
769 	kg = td->td_ksegrp;
770 	ke = td->td_kse;
771 
772 	if (kg != &p->p_ksegrp) {
773 		panic ("wrong ksegrp");
774 	}
775 
776 	if (ke) {
777 		if (ke != &p->p_kse) {
778 			panic("wrong kse");
779 		}
780 		if (ke->ke_thread != td) {
781 			panic("wrong thread");
782 		}
783 	}
784 
785 	if ((p->p_flag & P_KSES) == 0) {
786 		if (ke == NULL) {
787 			panic("non KSE thread lost kse");
788 		}
789 	} else {
790 		prevpri = 0;
791 		saw_lastassigned = 0;
792 		unassigned = 0;
793 		assigned = 0;
794 		TAILQ_FOREACH(td2, &kg->kg_runq, td_runq) {
795 			if (td2->td_priority < prevpri) {
796 				panic("thread runqueue unosorted");
797 			}
798 			prevpri = td2->td_priority;
799 			if (td2->td_kse) {
800 				assigned++;
801 				if (unassigned) {
802 					panic("unassigned before assigned");
803 				}
804  				if  (kg->kg_last_assigned == NULL) {
805 					panic("lastassigned corrupt");
806 				}
807 				if (saw_lastassigned) {
808 					panic("last assigned not last");
809 				}
810 				if (td2->td_kse->ke_thread != td2) {
811 					panic("mismatched kse/thread");
812 				}
813 			} else {
814 				unassigned++;
815 			}
816 			if (td2 == kg->kg_last_assigned) {
817 				saw_lastassigned = 1;
818 				if (td2->td_kse == NULL) {
819 					panic("last assigned not assigned");
820 				}
821 			}
822 		}
823 		if (kg->kg_last_assigned && (saw_lastassigned == 0)) {
824 			panic("where on earth does lastassigned point?");
825 		}
826 		FOREACH_THREAD_IN_GROUP(kg, td2) {
827 			if (((td2->td_flags & TDF_UNBOUND) == 0) &&
828 			    (td2->td_state == TDS_RUNQ)) {
829 				assigned++;
830 				if (td2->td_kse == NULL) {
831 					panic ("BOUND thread with no KSE");
832 				}
833 			}
834 		}
835 #if 0
836 		if ((unassigned + assigned) != kg->kg_runnable) {
837 			panic("wrong number in runnable");
838 		}
839 #endif
840 	}
841 }
842 #endif
843 
844