xref: /freebsd/sys/kern/kern_switch.c (revision e602ba25fd1f9a7ea2215c01f470c08f140de809)
1 /*
2  * Copyright (c) 2001 Jake Burkholder <jake@FreeBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD$
27  */
28 
29 /***
30 
31 Here is the logic..
32 
33 If there are N processors, then there are at most N KSEs (kernel
34 schedulable entities) working to process threads that belong to a
35 KSEGOUP (kg). If there are X of these KSEs actually running at the
36 moment in question, then there are at most M (N-X) of these KSEs on
37 the run queue, as running KSEs are not on the queue.
38 
39 Runnable threads are queued off the KSEGROUP in priority order.
40 If there are M or more threads runnable, the top M threads
41 (by priority) are 'preassigned' to the M KSEs not running. The KSEs take
42 their priority from those threads and are put on the run queue.
43 
44 The last thread that had a priority high enough to have a KSE associated
45 with it, AND IS ON THE RUN QUEUE is pointed to by
46 kg->kg_last_assigned. If no threads queued off the KSEGROUP have KSEs
47 assigned as all the available KSEs are activly running, or because there
48 are no threads queued, that pointer is NULL.
49 
50 When a KSE is removed from the run queue to become runnable, we know
51 it was associated with the highest priority thread in the queue (at the head
52 of the queue). If it is also the last assigned we know M was 1 and must
53 now be 0. Since the thread is no longer queued that pointer must be
54 removed from it. Since we know there were no more KSEs available,
55 (M was 1 and is now 0) and since we are not FREEING our KSE
56 but using it, we know there are STILL no more KSEs available, we can prove
57 that the next thread in the ksegrp list will not have a KSE to assign to
58 it, so we can show that the pointer must be made 'invalid' (NULL).
59 
60 The pointer exists so that when a new thread is made runnable, it can
61 have its priority compared with the last assigned thread to see if
62 it should 'steal' its KSE or not.. i.e. is it 'earlier'
63 on the list than that thread or later.. If it's earlier, then the KSE is
64 removed from the last assigned (which is now not assigned a KSE)
65 and reassigned to the new thread, which is placed earlier in the list.
66 The pointer is then backed up to the previous thread (which may or may not
67 be the new thread).
68 
69 When a thread sleeps or is removed, the KSE becomes available and if there
70 are queued threads that are not assigned KSEs, the highest priority one of
71 them is assigned the KSE, which is then placed back on the run queue at
72 the approipriate place, and the kg->kg_last_assigned pointer is adjusted down
73 to point to it.
74 
75 The following diagram shows 2 KSEs and 3 threads from a single process.
76 
77  RUNQ: --->KSE---KSE--...    (KSEs queued at priorities from threads)
78               \    \____
79                \        \
80     KSEGROUP---thread--thread--thread    (queued in priority order)
81         \                 /
82          \_______________/
83           (last_assigned)
84 
85 The result of this scheme is that the M available KSEs are always
86 queued at the priorities they have inherrited from the M highest priority
87 threads for that KSEGROUP. If this situation changes, the KSEs are
88 reassigned to keep this true.
89 
90 */
91 
92 #include <sys/param.h>
93 #include <sys/systm.h>
94 #include <sys/kernel.h>
95 #include <sys/ktr.h>
96 #include <sys/lock.h>
97 #include <sys/mutex.h>
98 #include <sys/proc.h>
99 #include <sys/queue.h>
100 #include <machine/critical.h>
101 
102 CTASSERT((RQB_BPW * RQB_LEN) == RQ_NQS);
103 
104 /*
105  * Global run queue.
106  */
107 static struct runq runq;
108 SYSINIT(runq, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, runq_init, &runq)
109 
110 static void runq_readjust(struct runq *rq, struct kse *ke);
111 /************************************************************************
112  * Functions that manipulate runnability from a thread perspective.	*
113  ************************************************************************/
114 
115 /*
116  * Select the KSE that will be run next.  From that find the thread, and x
117  * remove it from the KSEGRP's run queue.  If there is thread clustering,
118  * this will be what does it.
119  */
120 struct thread *
121 choosethread(void)
122 {
123 	struct kse *ke;
124 	struct thread *td;
125 	struct ksegrp *kg;
126 
127 	if ((ke = runq_choose(&runq))) {
128 		td = ke->ke_thread;
129 		KASSERT((td->td_kse == ke), ("kse/thread mismatch"));
130 		kg = ke->ke_ksegrp;
131 		if (td->td_flags & TDF_UNBOUND) {
132 			TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
133 			if (kg->kg_last_assigned == td)
134 				if (TAILQ_PREV(td, threadqueue, td_runq)
135 				    != NULL)
136 					printf("Yo MAMA!\n");
137 				kg->kg_last_assigned = TAILQ_PREV(td,
138 				    threadqueue, td_runq);
139 			/*
140 			 *  If we have started running an upcall,
141 			 * Then TDF_UNBOUND WAS set because the thread was
142 			 * created without a KSE. Now that we have one,
143 			 * and it is our time to run, we make sure
144 			 * that BOUND semantics apply for the rest of
145 			 * the journey to userland, and into the UTS.
146 			 */
147 #ifdef	NOTYET
148 			if (td->td_flags & TDF_UPCALLING)
149 				tdf->td_flags &= ~TDF_UNBOUND;
150 #endif
151 		}
152 		kg->kg_runnable--;
153 		CTR2(KTR_RUNQ, "choosethread: td=%p pri=%d",
154 		    td, td->td_priority);
155 	} else {
156 		/* Pretend the idle thread was on the run queue. */
157 		td = PCPU_GET(idlethread);
158 		/* Simulate that it was on the run queue */
159 		td->td_state = TDS_RUNQ;
160 		td->td_kse->ke_state = KES_UNQUEUED;
161 		CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td);
162 	}
163 	thread_sanity_check(td);
164 	return (td);
165 }
166 
167 /*
168  * Given a KSE (now surplus), either assign a new runable thread to it
169  * (and put it in the run queue) or put it in the ksegrp's idle KSE list.
170  * Assumes the kse is not linked to any threads any more. (has been cleaned).
171  */
172 void
173 kse_reassign(struct kse *ke)
174 {
175 	struct ksegrp *kg;
176 	struct thread *td;
177 
178 	kg = ke->ke_ksegrp;
179 
180 KASSERT((ke->ke_state != KES_ONRUNQ), ("kse_reassigning non-free kse"));
181 	/*
182 	 * Find the first unassigned thread
183 	 * If there is a 'last assigned' then see what's next.
184 	 * otherwise look at what is first.
185 	 */
186 	if ((td = kg->kg_last_assigned)) {
187 		td = TAILQ_NEXT(td, td_runq);
188 	} else {
189 		td = TAILQ_FIRST(&kg->kg_runq);
190 	}
191 
192 	/*
193 	 * If we found one assign it the kse, otherwise idle the kse.
194 	 */
195 	if (td) {
196 		thread_sanity_check(td);
197 		kg->kg_last_assigned = td;
198 		td->td_kse = ke;
199 		ke->ke_thread = td;
200 		runq_add(&runq, ke);
201 		CTR2(KTR_RUNQ, "kse_reassign: ke%p -> td%p", ke, td);
202 	} else {
203 		KASSERT((ke->ke_state != KES_IDLE), ("kse already idle"));
204 KASSERT((ke->ke_kgrlist.tqe_next != ke), ("linked to self!"));
205 		ke->ke_state = KES_IDLE;
206 		ke->ke_thread = NULL;
207 		TAILQ_INSERT_HEAD(&kg->kg_iq, ke, ke_kgrlist);
208 		kg->kg_idle_kses++;
209 		CTR1(KTR_RUNQ, "kse_reassign: ke%p idled", ke);
210 KASSERT((ke->ke_kgrlist.tqe_next != ke), ("linked to self2!"));
211 	}
212 }
213 
214 int
215 kserunnable(void)
216 {
217 	return runq_check(&runq);
218 }
219 
220 /*
221  * Remove a thread from its KSEGRP's run queue.
222  * This in turn may remove it from a KSE if it was already assigned
223  * to one, possibly causing a new thread to be assigned to the KSE
224  * and the KSE getting a new priority (unless it's a BOUND thread/KSE pair).
225  */
226 void
227 remrunqueue(struct thread *td)
228 {
229 	struct thread *td2, *td3;
230 	struct ksegrp *kg;
231 	struct kse *ke;
232 
233 	mtx_assert(&sched_lock, MA_OWNED);
234 	thread_sanity_check(td);
235 	KASSERT ((td->td_state == TDS_RUNQ),
236 		("remrunqueue: Bad state on run queue"));
237 	kg = td->td_ksegrp;
238 	ke = td->td_kse;
239 	/*
240 	 * If it's a bound thread/KSE pair, take the shortcut. All non-KSE
241 	 * threads are BOUND.
242 	 */
243 	CTR1(KTR_RUNQ, "remrunqueue: td%p", td);
244 	td->td_state = TDS_UNQUEUED;
245 	kg->kg_runnable--;
246 	if ((td->td_flags & TDF_UNBOUND) == 0)  {
247 		/* Bring its kse with it, leave the thread attached */
248 		runq_remove(&runq, ke);
249 		ke->ke_state = KES_UNQUEUED;
250 		return;
251 	}
252 	if (ke) {
253 		/*
254 		 * This thread has been assigned to a KSE.
255 		 * We need to dissociate it and try assign the
256 		 * KSE to the next available thread. Then, we should
257 		 * see if we need to move the KSE in the run queues.
258 		 */
259 		td2 = kg->kg_last_assigned;
260 		KASSERT((td2 != NULL), ("last assigned has wrong value "));
261 		td->td_kse = NULL;
262 		if ((td3 = TAILQ_NEXT(td2, td_runq))) {
263 			KASSERT(td3 != td, ("td3 somehow matched td"));
264 			/*
265 			 * Give the next unassigned thread to the KSE
266 			 * so the number of runnable KSEs remains
267 			 * constant.
268 			 */
269 			td3->td_kse = ke;
270 			ke->ke_thread = td3;
271 			kg->kg_last_assigned = td3;
272 			runq_readjust(&runq, ke);
273 		} else {
274 			/*
275 			 * There is no unassigned thread.
276 			 * If we were the last assigned one,
277 			 * adjust the last assigned pointer back
278 			 * one, which may result in NULL.
279 			 */
280 			if (td == td2) {
281 				kg->kg_last_assigned =
282 				    TAILQ_PREV(td, threadqueue, td_runq);
283 			}
284 			runq_remove(&runq, ke);
285 KASSERT((ke->ke_kgrlist.tqe_next != ke), ("linked to self!"));
286 			KASSERT((ke->ke_state != KES_IDLE),
287 			    ("kse already idle"));
288 			ke->ke_state = KES_IDLE;
289 			ke->ke_thread = NULL;
290 KASSERT((TAILQ_FIRST(&kg->kg_iq) != ke), ("really bad screwup"));
291 			TAILQ_INSERT_HEAD(&kg->kg_iq, ke, ke_kgrlist);
292 			kg->kg_idle_kses++;
293 KASSERT((ke->ke_kgrlist.tqe_next != ke), ("linked to self2!"));
294 		}
295 	}
296 	TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
297 	thread_sanity_check(td);
298 }
299 
300 #if 1 /* use the first version */
301 
302 void
303 setrunqueue(struct thread *td)
304 {
305 	struct kse *ke;
306 	struct ksegrp *kg;
307 	struct thread *td2;
308 	struct thread *tda;
309 
310 	CTR1(KTR_RUNQ, "setrunqueue: td%p", td);
311 	mtx_assert(&sched_lock, MA_OWNED);
312 	thread_sanity_check(td);
313 	KASSERT((td->td_state != TDS_RUNQ), ("setrunqueue: bad thread state"));
314 	td->td_state = TDS_RUNQ;
315 	kg = td->td_ksegrp;
316 	kg->kg_runnable++;
317 	if ((td->td_flags & TDF_UNBOUND) == 0) {
318 		KASSERT((td->td_kse != NULL),
319 		    ("queueing BAD thread to run queue"));
320 		/*
321 		 * Common path optimisation: Only one of everything
322 		 * and the KSE is always already attached.
323 		 * Totally ignore the ksegrp run queue.
324 		 */
325 		runq_add(&runq, td->td_kse);
326 		return;
327 	}
328 	/*
329 	 * Ok, so we are threading with this thread.
330 	 * We don't have a KSE, see if we can get one..
331 	 */
332 	tda = kg->kg_last_assigned;
333 	if ((ke = td->td_kse) == NULL) {
334 		/*
335 		 * We will need a KSE, see if there is one..
336 		 * First look for a free one, before getting desperate.
337 		 * If we can't get one, our priority is not high enough..
338 		 * that's ok..
339 		 */
340 		if (kg->kg_idle_kses) {
341 			/*
342 			 * There is a free one so it's ours for the asking..
343 			 */
344 			ke = TAILQ_FIRST(&kg->kg_iq);
345 KASSERT((ke->ke_kgrlist.tqe_next != ke), ("linked to self3!"));
346 			TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist);
347 			ke->ke_state = KES_UNQUEUED;
348 			kg->kg_idle_kses--;
349 KASSERT((ke->ke_kgrlist.tqe_next != ke), ("linked to self4!"));
350 		} else if (tda && (tda->td_priority > td->td_priority)) {
351 			/*
352 			 * None free, but there is one we can commandeer.
353 			 */
354 			ke = tda->td_kse;
355 			tda->td_kse = NULL;
356 			ke->ke_thread = NULL;
357 			tda = kg->kg_last_assigned =
358 		    	    TAILQ_PREV(tda, threadqueue, td_runq);
359 			runq_remove(&runq, ke);
360 KASSERT((ke->ke_kgrlist.tqe_next != ke), ("linked to self5!"));
361 		}
362 	} else {
363 		KASSERT(ke->ke_thread == td, ("KSE/thread mismatch"));
364 		KASSERT(ke->ke_state != KES_IDLE, ("KSE unexpectedly idle"));
365 		ke->ke_thread = NULL;
366 		td->td_kse = NULL;
367 	}
368 
369 	/*
370 	 * Add the thread to the ksegrp's run queue at
371 	 * the appropriate place.
372 	 */
373 	TAILQ_FOREACH(td2, &kg->kg_runq, td_runq) {
374 		if (td2->td_priority > td->td_priority) {
375 			TAILQ_INSERT_BEFORE(td2, td, td_runq);
376 			break;
377 		}
378 	}
379 	if (td2 == NULL) {
380 		/* We ran off the end of the TAILQ or it was empty. */
381 		TAILQ_INSERT_TAIL(&kg->kg_runq, td, td_runq);
382 	}
383 
384 	/*
385 	 * If we have a ke to use, then put it on the run queue and
386 	 * If needed, readjust the last_assigned pointer.
387 	 */
388 	if (ke) {
389 		if (tda == NULL) {
390 			/*
391 			 * No pre-existing last assigned so whoever is first
392 			 * gets the KSE we borught in.. (may be us)
393 			 */
394 			td2 = TAILQ_FIRST(&kg->kg_runq);
395 			KASSERT((td2->td_kse == NULL),
396 			    ("unexpected ke present"));
397 			td2->td_kse = ke;
398 			ke->ke_thread = td2;
399 			kg->kg_last_assigned = td2;
400 		} else if (tda->td_priority > td->td_priority) {
401 			/*
402 			 * It's ours, grab it, but last_assigned is past us
403 			 * so don't change it.
404 			 */
405 			td->td_kse = ke;
406 			ke->ke_thread = td;
407 		} else {
408 			/*
409 			 * We are past last_assigned, so
410 			 * put the new kse on whatever is next,
411 			 * which may or may not be us.
412 			 */
413 			td2 = TAILQ_NEXT(tda, td_runq);
414 			kg->kg_last_assigned = td2;
415 			td2->td_kse = ke;
416 			ke->ke_thread = td2;
417 		}
418 		runq_add(&runq, ke);
419 	}
420 	thread_sanity_check(td);
421 }
422 
423 #else
424 
425 void
426 setrunqueue(struct thread *td)
427 {
428 	struct kse *ke;
429 	struct ksegrp *kg;
430 	struct thread *td2;
431 
432 	CTR1(KTR_RUNQ, "setrunqueue: td%p", td);
433 	KASSERT((td->td_state != TDS_RUNQ), ("setrunqueue: bad thread state"));
434 	td->td_state = TDS_RUNQ;
435 	kg = td->td_ksegrp;
436 	kg->kg_runnable++;
437 	if ((td->td_flags & TDF_UNBOUND) == 0) {
438 		/*
439 		 * Common path optimisation: Only one of everything
440 		 * and the KSE is always already attached.
441 		 * Totally ignore the ksegrp run queue.
442 		 */
443 		runq_add(&runq, td->td_kse);
444 		return;
445 	}
446 	/*
447 	 * First add the thread to the ksegrp's run queue at
448 	 * the appropriate place.
449 	 */
450 	TAILQ_FOREACH(td2, &kg->kg_runq, td_runq) {
451 		if (td2->td_priority > td->td_priority) {
452 			TAILQ_INSERT_BEFORE(td2, td, td_runq);
453 			break;
454 		}
455 	}
456 	if (td2 == NULL) {
457 		/* We ran off the end of the TAILQ or it was empty. */
458 		TAILQ_INSERT_TAIL(&kg->kg_runq, td, td_runq);
459 	}
460 
461 	/*
462 	 * The following could be achieved by simply doing:
463 	 * td->td_kse = NULL; kse_reassign(ke);
464 	 * but I felt that I'd try do it inline here.
465 	 * All this work may not be worth it.
466 	 */
467 	if ((ke = td->td_kse)) { /* XXXKSE */
468 		/*
469 		 * We have a KSE already. See whether we can keep it
470 		 * or if we need to give it to someone else.
471 		 * Either way it will need to be inserted into
472 		 * the runq. kse_reassign() will do this as will runq_add().
473 		 */
474 		if ((kg->kg_last_assigned) &&
475 		   (kg->kg_last_assigned->td_priority > td->td_priority)) {
476 			/*
477 			 * We can definitly keep the KSE
478 			 * as the "last assignead thread" has
479 			 * less priority than we do.
480 			 * The "last assigned" pointer stays the same.
481 			 */
482 			runq_add(&runq, ke);
483 			return;
484 
485 		}
486 		/*
487 		 * Give it to the correct thread,
488 		 * which may be (often is) us, but may not be.
489 		 */
490 		td->td_kse = NULL;
491 		kse_reassign(ke);
492 		return;
493 	}
494 	/*
495 	 * There are two cases where KSE adjustment is needed.
496 	 * Usurpation of an already assigned KSE, and assignment
497 	 * of a previously IDLE KSE.
498 	 */
499 	if (kg->kg_idle_kses) {
500 		/*
501 		 * If there are unassigned KSEs then we definitly
502 		 * will be assigned one from the idle KSE list.
503 		 * If we are the last, we should get the "last
504 		 * assigned" pointer set to us as well.
505 		 */
506 		ke = TAILQ_FIRST(&kg->kg_iq);
507 KASSERT((ke->ke_kgrlist.tqe_next != ke), ("linked to self!"));
508 		TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist);
509 		ke->ke_state = KES_UNQUEUED;
510 		kg->kg_idle_kses--;
511 KASSERT((ke->ke_kgrlist.tqe_next != ke), ("linked to self!"));
512 		ke->ke_thread = td;
513 		td->td_kse = ke;
514 		runq_add(&runq, ke);
515 KASSERT((ke->ke_kgrlist.tqe_next != ke), ("linked to self!"));
516 		if (TAILQ_NEXT(td, td_runq) == NULL) {
517 			kg->kg_last_assigned = td;
518 		}
519 	} else if (kg->kg_last_assigned &&
520 		(kg->kg_last_assigned->td_priority > td->td_priority)) {
521 		/*
522 		 * If there were none last-assigned, all KSEs
523 		 * are actually out running as we speak.
524 		 * If there was a last assigned, but we didn't see it,
525 		 * we must be inserting before it, so take the KSE from
526 		 * the last assigned, and back it up one entry. Then,
527 		 * assign the KSE to the new thread and adjust its priority.
528 		 */
529 		td2 = kg->kg_last_assigned;
530 		ke = td2->td_kse;
531 KASSERT((ke->ke_kgrlist.tqe_next != ke), ("linked to self!"));
532 		kg->kg_last_assigned =
533 		    TAILQ_PREV(td2, threadqueue, td_runq);
534 		td2->td_kse = NULL;
535 		td->td_kse = ke;
536 		ke->ke_thread = td;
537 		runq_readjust(&runq, ke);
538 KASSERT((ke->ke_kgrlist.tqe_next != ke), ("linked to self!"));
539 	}
540 }
541 #endif
542 
543 /************************************************************************
544  * Critical section marker functions					*
545  ************************************************************************/
546 /* Critical sections that prevent preemption. */
547 void
548 critical_enter(void)
549 {
550 	struct thread *td;
551 
552 	td = curthread;
553 	if (td->td_critnest == 0)
554 		cpu_critical_enter();
555 	td->td_critnest++;
556 }
557 
558 void
559 critical_exit(void)
560 {
561 	struct thread *td;
562 
563 	td = curthread;
564 	if (td->td_critnest == 1) {
565 		td->td_critnest = 0;
566 		cpu_critical_exit();
567 	} else {
568 		td->td_critnest--;
569 	}
570 }
571 
572 
573 /************************************************************************
574  * SYSTEM RUN QUEUE manipulations and tests				*
575  ************************************************************************/
576 /*
577  * Initialize a run structure.
578  */
579 void
580 runq_init(struct runq *rq)
581 {
582 	int i;
583 
584 	bzero(rq, sizeof *rq);
585 	for (i = 0; i < RQ_NQS; i++)
586 		TAILQ_INIT(&rq->rq_queues[i]);
587 }
588 
589 /*
590  * Clear the status bit of the queue corresponding to priority level pri,
591  * indicating that it is empty.
592  */
593 static __inline void
594 runq_clrbit(struct runq *rq, int pri)
595 {
596 	struct rqbits *rqb;
597 
598 	rqb = &rq->rq_status;
599 	CTR4(KTR_RUNQ, "runq_clrbit: bits=%#x %#x bit=%#x word=%d",
600 	    rqb->rqb_bits[RQB_WORD(pri)],
601 	    rqb->rqb_bits[RQB_WORD(pri)] & ~RQB_BIT(pri),
602 	    RQB_BIT(pri), RQB_WORD(pri));
603 	rqb->rqb_bits[RQB_WORD(pri)] &= ~RQB_BIT(pri);
604 }
605 
606 /*
607  * Find the index of the first non-empty run queue.  This is done by
608  * scanning the status bits, a set bit indicates a non-empty queue.
609  */
610 static __inline int
611 runq_findbit(struct runq *rq)
612 {
613 	struct rqbits *rqb;
614 	int pri;
615 	int i;
616 
617 	rqb = &rq->rq_status;
618 	for (i = 0; i < RQB_LEN; i++)
619 		if (rqb->rqb_bits[i]) {
620 			pri = RQB_FFS(rqb->rqb_bits[i]) + (i << RQB_L2BPW);
621 			CTR3(KTR_RUNQ, "runq_findbit: bits=%#x i=%d pri=%d",
622 			    rqb->rqb_bits[i], i, pri);
623 			return (pri);
624 		}
625 
626 	return (-1);
627 }
628 
629 /*
630  * Set the status bit of the queue corresponding to priority level pri,
631  * indicating that it is non-empty.
632  */
633 static __inline void
634 runq_setbit(struct runq *rq, int pri)
635 {
636 	struct rqbits *rqb;
637 
638 	rqb = &rq->rq_status;
639 	CTR4(KTR_RUNQ, "runq_setbit: bits=%#x %#x bit=%#x word=%d",
640 	    rqb->rqb_bits[RQB_WORD(pri)],
641 	    rqb->rqb_bits[RQB_WORD(pri)] | RQB_BIT(pri),
642 	    RQB_BIT(pri), RQB_WORD(pri));
643 	rqb->rqb_bits[RQB_WORD(pri)] |= RQB_BIT(pri);
644 }
645 
646 /*
647  * Add the KSE to the queue specified by its priority, and set the
648  * corresponding status bit.
649  */
650 void
651 runq_add(struct runq *rq, struct kse *ke)
652 {
653 	struct rqhead *rqh;
654 	int pri;
655 
656 	mtx_assert(&sched_lock, MA_OWNED);
657 	KASSERT((ke->ke_thread != NULL), ("runq_add: No thread on KSE"));
658 	KASSERT((ke->ke_thread->td_kse != NULL), ("runq_add: No KSE on thread"));
659 	if (ke->ke_state == KES_ONRUNQ)
660 		return;
661 #if defined(INVARIANTS) && defined(DIAGNOSTIC)
662 	KASSERT(ke->ke_state != KES_ONRUNQ,
663 	    ("runq_add: kse %p (%s) already in run queue", ke,
664 	    ke->ke_proc->p_comm));
665 #endif
666 	pri = ke->ke_thread->td_priority / RQ_PPQ;
667 	ke->ke_rqindex = pri;
668 	runq_setbit(rq, pri);
669 	rqh = &rq->rq_queues[pri];
670 	CTR4(KTR_RUNQ, "runq_add: p=%p pri=%d %d rqh=%p",
671 	    ke->ke_proc, ke->ke_thread->td_priority, pri, rqh);
672 	TAILQ_INSERT_TAIL(rqh, ke, ke_procq);
673 	ke->ke_ksegrp->kg_runq_kses++;
674 	ke->ke_state = KES_ONRUNQ;
675 }
676 
677 /*
678  * Return true if there are runnable processes of any priority on the run
679  * queue, false otherwise.  Has no side effects, does not modify the run
680  * queue structure.
681  */
682 int
683 runq_check(struct runq *rq)
684 {
685 	struct rqbits *rqb;
686 	int i;
687 
688 	rqb = &rq->rq_status;
689 	for (i = 0; i < RQB_LEN; i++)
690 		if (rqb->rqb_bits[i]) {
691 			CTR2(KTR_RUNQ, "runq_check: bits=%#x i=%d",
692 			    rqb->rqb_bits[i], i);
693 			return (1);
694 		}
695 	CTR0(KTR_RUNQ, "runq_check: empty");
696 
697 	return (0);
698 }
699 
700 /*
701  * Find and remove the highest priority process from the run queue.
702  * If there are no runnable processes, the per-cpu idle process is
703  * returned.  Will not return NULL under any circumstances.
704  */
705 struct kse *
706 runq_choose(struct runq *rq)
707 {
708 	struct rqhead *rqh;
709 	struct kse *ke;
710 	int pri;
711 
712 	mtx_assert(&sched_lock, MA_OWNED);
713 	while ((pri = runq_findbit(rq)) != -1) {
714 		rqh = &rq->rq_queues[pri];
715 		ke = TAILQ_FIRST(rqh);
716 		KASSERT(ke != NULL, ("runq_choose: no proc on busy queue"));
717 		CTR3(KTR_RUNQ,
718 		    "runq_choose: pri=%d kse=%p rqh=%p", pri, ke, rqh);
719 KASSERT(ke->ke_procq.tqe_prev != NULL, ("no prev"));
720 if (ke->ke_procq.tqe_next)
721 	KASSERT(ke->ke_procq.tqe_next->ke_procq.tqe_prev != NULL, ("no next"));
722 		TAILQ_REMOVE(rqh, ke, ke_procq);
723 		ke->ke_ksegrp->kg_runq_kses--;
724 		if (TAILQ_EMPTY(rqh)) {
725 			CTR0(KTR_RUNQ, "runq_choose: empty");
726 			runq_clrbit(rq, pri);
727 		}
728 
729 		ke->ke_state = KES_RUNNING;
730 		KASSERT((ke->ke_thread != NULL),
731 		    ("runq_choose: No thread on KSE"));
732 		KASSERT((ke->ke_thread->td_kse != NULL),
733 		    ("runq_choose: No KSE on thread"));
734 		return (ke);
735 	}
736 	CTR1(KTR_RUNQ, "runq_choose: idleproc pri=%d", pri);
737 
738 	return (NULL);
739 }
740 
741 /*
742  * Remove the KSE from the queue specified by its priority, and clear the
743  * corresponding status bit if the queue becomes empty.
744  * Caller must set ke->ke_state afterwards.
745  */
746 void
747 runq_remove(struct runq *rq, struct kse *ke)
748 {
749 	struct rqhead *rqh;
750 	int pri;
751 
752 	KASSERT((ke->ke_state == KES_ONRUNQ), ("KSE not on run queue"));
753 	mtx_assert(&sched_lock, MA_OWNED);
754 	pri = ke->ke_rqindex;
755 	rqh = &rq->rq_queues[pri];
756 	CTR4(KTR_RUNQ, "runq_remove: p=%p pri=%d %d rqh=%p",
757 	    ke, ke->ke_thread->td_priority, pri, rqh);
758 	KASSERT(ke != NULL, ("runq_remove: no proc on busy queue"));
759 	TAILQ_REMOVE(rqh, ke, ke_procq);
760 	if (TAILQ_EMPTY(rqh)) {
761 		CTR0(KTR_RUNQ, "runq_remove: empty");
762 		runq_clrbit(rq, pri);
763 	}
764 	ke->ke_state = KES_UNQUEUED;
765 	ke->ke_ksegrp->kg_runq_kses--;
766 }
767 
768 static void
769 runq_readjust(struct runq *rq, struct kse *ke)
770 {
771 
772 	if (ke->ke_rqindex != (ke->ke_thread->td_priority / RQ_PPQ)) {
773 		runq_remove(rq, ke);
774 		runq_add(rq, ke);
775 	}
776 }
777 
778 void
779 thread_sanity_check(struct thread *td)
780 {
781 	struct proc *p;
782 	struct ksegrp *kg;
783 	struct kse *ke;
784 	struct thread *td2;
785 	unsigned int prevpri;
786 	int	saw_lastassigned;
787 	int unassigned;
788 	int assigned;
789 
790 	p = td->td_proc;
791 	kg = td->td_ksegrp;
792 	ke = td->td_kse;
793 
794 	if (kg != &p->p_ksegrp) {
795 		panic ("wrong ksegrp");
796 	}
797 
798 	if (ke) {
799 		if (ke != &p->p_kse) {
800 			panic("wrong kse");
801 		}
802 		if (ke->ke_thread != td) {
803 			panic("wrong thread");
804 		}
805 	}
806 
807 	if ((p->p_flag & P_KSES) == 0) {
808 		if (ke == NULL) {
809 			panic("non KSE thread lost kse");
810 		}
811 	} else {
812 		prevpri = 0;
813 		saw_lastassigned = 0;
814 		unassigned = 0;
815 		assigned = 0;
816 		TAILQ_FOREACH(td2, &kg->kg_runq, td_runq) {
817 			if (td2->td_priority < prevpri) {
818 				panic("thread runqueue unosorted");
819 			}
820 			prevpri = td2->td_priority;
821 			if (td2->td_kse) {
822 				assigned++;
823 				if (unassigned) {
824 					panic("unassigned before assigned");
825 				}
826  				if  (kg->kg_last_assigned == NULL) {
827 					panic("lastassigned corrupt");
828 				}
829 				if (saw_lastassigned) {
830 					panic("last assigned not last");
831 				}
832 				if (td2->td_kse->ke_thread != td2) {
833 					panic("mismatched kse/thread");
834 				}
835 			} else {
836 				unassigned++;
837 			}
838 			if (td2 == kg->kg_last_assigned) {
839 				saw_lastassigned = 1;
840 				if (td2->td_kse == NULL) {
841 					panic("last assigned not assigned");
842 				}
843 			}
844 		}
845 		if (kg->kg_last_assigned && (saw_lastassigned == 0)) {
846 			panic("where on earth does lastassigned point?");
847 		}
848 		FOREACH_THREAD_IN_GROUP(kg, td2) {
849 			if (((td2->td_flags & TDF_UNBOUND) == 0) &&
850 			    (td2->td_state == TDS_RUNQ)) {
851 				assigned++;
852 				if (td2->td_kse == NULL) {
853 					panic ("BOUND thread with no KSE");
854 				}
855 			}
856 		}
857 #if 0
858 		if ((unassigned + assigned) != kg->kg_runnable) {
859 			panic("wrong number in runnable");
860 		}
861 #endif
862 	}
863 }
864 
865