xref: /freebsd/sys/kern/kern_switch.c (revision a90f3f2547a012ead16d3f10b13d2868bff1f25f)
1 /*-
2  * Copyright (c) 2001 Jake Burkholder <jake@FreeBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 
31 #include "opt_sched.h"
32 
33 #ifndef KERN_SWITCH_INCLUDE
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/kdb.h>
37 #include <sys/kernel.h>
38 #include <sys/ktr.h>
39 #include <sys/lock.h>
40 #include <sys/mutex.h>
41 #include <sys/proc.h>
42 #include <sys/queue.h>
43 #include <sys/sched.h>
44 #else  /* KERN_SWITCH_INCLUDE */
45 #if defined(SMP) && (defined(__i386__) || defined(__amd64__))
46 #include <sys/smp.h>
47 #endif
48 
49 #include <machine/cpu.h>
50 
51 /* Uncomment this to enable logging of critical_enter/exit. */
52 #if 0
53 #define	KTR_CRITICAL	KTR_SCHED
54 #else
55 #define	KTR_CRITICAL	0
56 #endif
57 
58 #ifdef FULL_PREEMPTION
59 #ifndef PREEMPTION
60 #error "The FULL_PREEMPTION option requires the PREEMPTION option"
61 #endif
62 #endif
63 
64 CTASSERT((RQB_BPW * RQB_LEN) == RQ_NQS);
65 
66 /*
67  * kern.sched.preemption allows user space to determine if preemption support
68  * is compiled in or not.  It is not currently a boot or runtime flag that
69  * can be changed.
70  */
71 #ifdef PREEMPTION
72 static int kern_sched_preemption = 1;
73 #else
74 static int kern_sched_preemption = 0;
75 #endif
76 SYSCTL_INT(_kern_sched, OID_AUTO, preemption, CTLFLAG_RD,
77     &kern_sched_preemption, 0, "Kernel preemption enabled");
78 
79 #ifdef SCHED_STATS
80 long switch_preempt;
81 long switch_owepreempt;
82 long switch_turnstile;
83 long switch_sleepq;
84 long switch_sleepqtimo;
85 long switch_relinquish;
86 long switch_needresched;
87 static SYSCTL_NODE(_kern_sched, OID_AUTO, stats, CTLFLAG_RW, 0, "switch stats");
88 SYSCTL_INT(_kern_sched_stats, OID_AUTO, preempt, CTLFLAG_RD, &switch_preempt, 0, "");
89 SYSCTL_INT(_kern_sched_stats, OID_AUTO, owepreempt, CTLFLAG_RD, &switch_owepreempt, 0, "");
90 SYSCTL_INT(_kern_sched_stats, OID_AUTO, turnstile, CTLFLAG_RD, &switch_turnstile, 0, "");
91 SYSCTL_INT(_kern_sched_stats, OID_AUTO, sleepq, CTLFLAG_RD, &switch_sleepq, 0, "");
92 SYSCTL_INT(_kern_sched_stats, OID_AUTO, sleepqtimo, CTLFLAG_RD, &switch_sleepqtimo, 0, "");
93 SYSCTL_INT(_kern_sched_stats, OID_AUTO, relinquish, CTLFLAG_RD, &switch_relinquish, 0, "");
94 SYSCTL_INT(_kern_sched_stats, OID_AUTO, needresched, CTLFLAG_RD, &switch_needresched, 0, "");
95 static int
96 sysctl_stats_reset(SYSCTL_HANDLER_ARGS)
97 {
98         int error;
99 	int val;
100 
101         val = 0;
102         error = sysctl_handle_int(oidp, &val, 0, req);
103         if (error != 0 || req->newptr == NULL)
104                 return (error);
105         if (val == 0)
106                 return (0);
107 	switch_preempt = 0;
108 	switch_owepreempt = 0;
109 	switch_turnstile = 0;
110 	switch_sleepq = 0;
111 	switch_sleepqtimo = 0;
112 	switch_relinquish = 0;
113 	switch_needresched = 0;
114 
115 	return (0);
116 }
117 
118 SYSCTL_PROC(_kern_sched_stats, OID_AUTO, reset, CTLTYPE_INT | CTLFLAG_WR, NULL,
119     0, sysctl_stats_reset, "I", "Reset scheduler statistics");
120 #endif
121 
122 /************************************************************************
123  * Functions that manipulate runnability from a thread perspective.	*
124  ************************************************************************/
125 /*
126  * Select the thread that will be run next.
127  */
128 struct thread *
129 choosethread(void)
130 {
131 	struct thread *td;
132 
133 retry:
134 	td = sched_choose();
135 
136 	/*
137 	 * If we are in panic, only allow system threads,
138 	 * plus the one we are running in, to be run.
139 	 */
140 	if (panicstr && ((td->td_proc->p_flag & P_SYSTEM) == 0 &&
141 	    (td->td_flags & TDF_INPANIC) == 0)) {
142 		/* note that it is no longer on the run queue */
143 		TD_SET_CAN_RUN(td);
144 		goto retry;
145 	}
146 
147 	TD_SET_RUNNING(td);
148 	return (td);
149 }
150 
151 /*
152  * Kernel thread preemption implementation.  Critical sections mark
153  * regions of code in which preemptions are not allowed.
154  */
155 void
156 critical_enter(void)
157 {
158 	struct thread *td;
159 
160 	td = curthread;
161 	td->td_critnest++;
162 	CTR4(KTR_CRITICAL, "critical_enter by thread %p (%ld, %s) to %d", td,
163 	    (long)td->td_proc->p_pid, td->td_name, td->td_critnest);
164 }
165 
166 void
167 critical_exit(void)
168 {
169 	struct thread *td;
170 
171 	td = curthread;
172 	KASSERT(td->td_critnest != 0,
173 	    ("critical_exit: td_critnest == 0"));
174 
175 	if (td->td_critnest == 1) {
176 		td->td_critnest = 0;
177 		if (td->td_owepreempt) {
178 			td->td_critnest = 1;
179 			thread_lock(td);
180 			td->td_critnest--;
181 			SCHED_STAT_INC(switch_owepreempt);
182 			mi_switch(SW_INVOL|SW_PREEMPT, NULL);
183 			thread_unlock(td);
184 		}
185 	} else
186 		td->td_critnest--;
187 
188 	CTR4(KTR_CRITICAL, "critical_exit by thread %p (%ld, %s) to %d", td,
189 	    (long)td->td_proc->p_pid, td->td_name, td->td_critnest);
190 }
191 
192 /************************************************************************
193  * SYSTEM RUN QUEUE manipulations and tests				*
194  ************************************************************************/
195 /*
196  * Initialize a run structure.
197  */
198 void
199 runq_init(struct runq *rq)
200 {
201 	int i;
202 
203 	bzero(rq, sizeof *rq);
204 	for (i = 0; i < RQ_NQS; i++)
205 		TAILQ_INIT(&rq->rq_queues[i]);
206 }
207 
208 /*
209  * Clear the status bit of the queue corresponding to priority level pri,
210  * indicating that it is empty.
211  */
212 static __inline void
213 runq_clrbit(struct runq *rq, int pri)
214 {
215 	struct rqbits *rqb;
216 
217 	rqb = &rq->rq_status;
218 	CTR4(KTR_RUNQ, "runq_clrbit: bits=%#x %#x bit=%#x word=%d",
219 	    rqb->rqb_bits[RQB_WORD(pri)],
220 	    rqb->rqb_bits[RQB_WORD(pri)] & ~RQB_BIT(pri),
221 	    RQB_BIT(pri), RQB_WORD(pri));
222 	rqb->rqb_bits[RQB_WORD(pri)] &= ~RQB_BIT(pri);
223 }
224 
225 /*
226  * Find the index of the first non-empty run queue.  This is done by
227  * scanning the status bits, a set bit indicates a non-empty queue.
228  */
229 static __inline int
230 runq_findbit(struct runq *rq)
231 {
232 	struct rqbits *rqb;
233 	int pri;
234 	int i;
235 
236 	rqb = &rq->rq_status;
237 	for (i = 0; i < RQB_LEN; i++)
238 		if (rqb->rqb_bits[i]) {
239 			pri = RQB_FFS(rqb->rqb_bits[i]) + (i << RQB_L2BPW);
240 			CTR3(KTR_RUNQ, "runq_findbit: bits=%#x i=%d pri=%d",
241 			    rqb->rqb_bits[i], i, pri);
242 			return (pri);
243 		}
244 
245 	return (-1);
246 }
247 
248 static __inline int
249 runq_findbit_from(struct runq *rq, u_char pri)
250 {
251 	struct rqbits *rqb;
252 	rqb_word_t mask;
253 	int i;
254 
255 	/*
256 	 * Set the mask for the first word so we ignore priorities before 'pri'.
257 	 */
258 	mask = (rqb_word_t)-1 << (pri & (RQB_BPW - 1));
259 	rqb = &rq->rq_status;
260 again:
261 	for (i = RQB_WORD(pri); i < RQB_LEN; mask = -1, i++) {
262 		mask = rqb->rqb_bits[i] & mask;
263 		if (mask == 0)
264 			continue;
265 		pri = RQB_FFS(mask) + (i << RQB_L2BPW);
266 		CTR3(KTR_RUNQ, "runq_findbit_from: bits=%#x i=%d pri=%d",
267 		    mask, i, pri);
268 		return (pri);
269 	}
270 	if (pri == 0)
271 		return (-1);
272 	/*
273 	 * Wrap back around to the beginning of the list just once so we
274 	 * scan the whole thing.
275 	 */
276 	pri = 0;
277 	goto again;
278 }
279 
280 /*
281  * Set the status bit of the queue corresponding to priority level pri,
282  * indicating that it is non-empty.
283  */
284 static __inline void
285 runq_setbit(struct runq *rq, int pri)
286 {
287 	struct rqbits *rqb;
288 
289 	rqb = &rq->rq_status;
290 	CTR4(KTR_RUNQ, "runq_setbit: bits=%#x %#x bit=%#x word=%d",
291 	    rqb->rqb_bits[RQB_WORD(pri)],
292 	    rqb->rqb_bits[RQB_WORD(pri)] | RQB_BIT(pri),
293 	    RQB_BIT(pri), RQB_WORD(pri));
294 	rqb->rqb_bits[RQB_WORD(pri)] |= RQB_BIT(pri);
295 }
296 
297 /*
298  * Add the thread to the queue specified by its priority, and set the
299  * corresponding status bit.
300  */
301 void
302 runq_add(struct runq *rq, struct td_sched *ts, int flags)
303 {
304 	struct rqhead *rqh;
305 	int pri;
306 
307 	pri = ts->ts_thread->td_priority / RQ_PPQ;
308 	ts->ts_rqindex = pri;
309 	runq_setbit(rq, pri);
310 	rqh = &rq->rq_queues[pri];
311 	CTR5(KTR_RUNQ, "runq_add: td=%p ts=%p pri=%d %d rqh=%p",
312 	    ts->ts_thread, ts, ts->ts_thread->td_priority, pri, rqh);
313 	if (flags & SRQ_PREEMPTED) {
314 		TAILQ_INSERT_HEAD(rqh, ts, ts_procq);
315 	} else {
316 		TAILQ_INSERT_TAIL(rqh, ts, ts_procq);
317 	}
318 }
319 
320 void
321 runq_add_pri(struct runq *rq, struct td_sched *ts, u_char pri, int flags)
322 {
323 	struct rqhead *rqh;
324 
325 	KASSERT(pri < RQ_NQS, ("runq_add_pri: %d out of range", pri));
326 	ts->ts_rqindex = pri;
327 	runq_setbit(rq, pri);
328 	rqh = &rq->rq_queues[pri];
329 	CTR5(KTR_RUNQ, "runq_add_pri: td=%p ke=%p pri=%d idx=%d rqh=%p",
330 	    ts->ts_thread, ts, ts->ts_thread->td_priority, pri, rqh);
331 	if (flags & SRQ_PREEMPTED) {
332 		TAILQ_INSERT_HEAD(rqh, ts, ts_procq);
333 	} else {
334 		TAILQ_INSERT_TAIL(rqh, ts, ts_procq);
335 	}
336 }
337 /*
338  * Return true if there are runnable processes of any priority on the run
339  * queue, false otherwise.  Has no side effects, does not modify the run
340  * queue structure.
341  */
342 int
343 runq_check(struct runq *rq)
344 {
345 	struct rqbits *rqb;
346 	int i;
347 
348 	rqb = &rq->rq_status;
349 	for (i = 0; i < RQB_LEN; i++)
350 		if (rqb->rqb_bits[i]) {
351 			CTR2(KTR_RUNQ, "runq_check: bits=%#x i=%d",
352 			    rqb->rqb_bits[i], i);
353 			return (1);
354 		}
355 	CTR0(KTR_RUNQ, "runq_check: empty");
356 
357 	return (0);
358 }
359 
360 /*
361  * Find the highest priority process on the run queue.
362  */
363 struct td_sched *
364 runq_choose_fuzz(struct runq *rq, int fuzz)
365 {
366 	struct rqhead *rqh;
367 	struct td_sched *ts;
368 	int pri;
369 
370 	while ((pri = runq_findbit(rq)) != -1) {
371 		rqh = &rq->rq_queues[pri];
372 		/* fuzz == 1 is normal.. 0 or less are ignored */
373 		if (fuzz > 1) {
374 			/*
375 			 * In the first couple of entries, check if
376 			 * there is one for our CPU as a preference.
377 			 */
378 			int count = fuzz;
379 			int cpu = PCPU_GET(cpuid);
380 			struct td_sched *ts2;
381 			ts2 = ts = TAILQ_FIRST(rqh);
382 
383 			while (count-- && ts2) {
384 				if (ts->ts_thread->td_lastcpu == cpu) {
385 					ts = ts2;
386 					break;
387 				}
388 				ts2 = TAILQ_NEXT(ts2, ts_procq);
389 			}
390 		} else
391 			ts = TAILQ_FIRST(rqh);
392 		KASSERT(ts != NULL, ("runq_choose_fuzz: no proc on busy queue"));
393 		CTR3(KTR_RUNQ,
394 		    "runq_choose_fuzz: pri=%d td_sched=%p rqh=%p", pri, ts, rqh);
395 		return (ts);
396 	}
397 	CTR1(KTR_RUNQ, "runq_choose_fuzz: idleproc pri=%d", pri);
398 
399 	return (NULL);
400 }
401 
402 /*
403  * Find the highest priority process on the run queue.
404  */
405 struct td_sched *
406 runq_choose(struct runq *rq)
407 {
408 	struct rqhead *rqh;
409 	struct td_sched *ts;
410 	int pri;
411 
412 	while ((pri = runq_findbit(rq)) != -1) {
413 		rqh = &rq->rq_queues[pri];
414 		ts = TAILQ_FIRST(rqh);
415 		KASSERT(ts != NULL, ("runq_choose: no proc on busy queue"));
416 		CTR3(KTR_RUNQ,
417 		    "runq_choose: pri=%d td_sched=%p rqh=%p", pri, ts, rqh);
418 		return (ts);
419 	}
420 	CTR1(KTR_RUNQ, "runq_choose: idleproc pri=%d", pri);
421 
422 	return (NULL);
423 }
424 
425 struct td_sched *
426 runq_choose_from(struct runq *rq, u_char idx)
427 {
428 	struct rqhead *rqh;
429 	struct td_sched *ts;
430 	int pri;
431 
432 	if ((pri = runq_findbit_from(rq, idx)) != -1) {
433 		rqh = &rq->rq_queues[pri];
434 		ts = TAILQ_FIRST(rqh);
435 		KASSERT(ts != NULL, ("runq_choose: no proc on busy queue"));
436 		CTR4(KTR_RUNQ,
437 		    "runq_choose_from: pri=%d td_sched=%p idx=%d rqh=%p",
438 		    pri, ts, ts->ts_rqindex, rqh);
439 		return (ts);
440 	}
441 	CTR1(KTR_RUNQ, "runq_choose_from: idleproc pri=%d", pri);
442 
443 	return (NULL);
444 }
445 /*
446  * Remove the thread from the queue specified by its priority, and clear the
447  * corresponding status bit if the queue becomes empty.
448  * Caller must set state afterwards.
449  */
450 void
451 runq_remove(struct runq *rq, struct td_sched *ts)
452 {
453 
454 	runq_remove_idx(rq, ts, NULL);
455 }
456 
457 void
458 runq_remove_idx(struct runq *rq, struct td_sched *ts, u_char *idx)
459 {
460 	struct rqhead *rqh;
461 	u_char pri;
462 
463 	KASSERT(ts->ts_thread->td_flags & TDF_INMEM,
464 		("runq_remove_idx: thread swapped out"));
465 	pri = ts->ts_rqindex;
466 	KASSERT(pri < RQ_NQS, ("runq_remove_idx: Invalid index %d\n", pri));
467 	rqh = &rq->rq_queues[pri];
468 	CTR5(KTR_RUNQ, "runq_remove_idx: td=%p, ts=%p pri=%d %d rqh=%p",
469 	    ts->ts_thread, ts, ts->ts_thread->td_priority, pri, rqh);
470 	{
471 		struct td_sched *nts;
472 
473 		TAILQ_FOREACH(nts, rqh, ts_procq)
474 			if (nts == ts)
475 				break;
476 		if (ts != nts)
477 			panic("runq_remove_idx: ts %p not on rqindex %d",
478 			    ts, pri);
479 	}
480 	TAILQ_REMOVE(rqh, ts, ts_procq);
481 	if (TAILQ_EMPTY(rqh)) {
482 		CTR0(KTR_RUNQ, "runq_remove_idx: empty");
483 		runq_clrbit(rq, pri);
484 		if (idx != NULL && *idx == pri)
485 			*idx = (pri + 1) % RQ_NQS;
486 	}
487 }
488 
489 /****** functions that are temporarily here ***********/
490 #include <vm/uma.h>
491 
492 /*
493  *  Allocate scheduler specific per-process resources.
494  * The thread and proc have already been linked in.
495  *
496  * Called from:
497  *  proc_init() (UMA init method)
498  */
499 void
500 sched_newproc(struct proc *p, struct thread *td)
501 {
502 }
503 
504 /*
505  * thread is being either created or recycled.
506  * Fix up the per-scheduler resources associated with it.
507  * Called from:
508  *  sched_fork_thread()
509  *  thread_dtor()  (*may go away)
510  *  thread_init()  (*may go away)
511  */
512 void
513 sched_newthread(struct thread *td)
514 {
515 	struct td_sched *ts;
516 
517 	ts = (struct td_sched *) (td + 1);
518 	bzero(ts, sizeof(*ts));
519 	td->td_sched     = ts;
520 	ts->ts_thread	= td;
521 }
522 
523 #endif /* KERN_SWITCH_INCLUDE */
524