xref: /freebsd/sys/kern/kern_synch.c (revision 7660b554bc59a07be0431c17e0e33815818baa69)
1 /*-
2  * Copyright (c) 1982, 1986, 1990, 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)kern_synch.c	8.9 (Berkeley) 5/19/95
39  */
40 
41 #include <sys/cdefs.h>
42 __FBSDID("$FreeBSD$");
43 
44 #include "opt_ddb.h"
45 #include "opt_ktrace.h"
46 
47 #include <sys/param.h>
48 #include <sys/systm.h>
49 #include <sys/condvar.h>
50 #include <sys/kernel.h>
51 #include <sys/ktr.h>
52 #include <sys/lock.h>
53 #include <sys/mutex.h>
54 #include <sys/proc.h>
55 #include <sys/resourcevar.h>
56 #include <sys/sched.h>
57 #include <sys/signalvar.h>
58 #include <sys/smp.h>
59 #include <sys/sx.h>
60 #include <sys/sysctl.h>
61 #include <sys/sysproto.h>
62 #include <sys/vmmeter.h>
63 #ifdef DDB
64 #include <ddb/ddb.h>
65 #endif
66 #ifdef KTRACE
67 #include <sys/uio.h>
68 #include <sys/ktrace.h>
69 #endif
70 
71 #include <machine/cpu.h>
72 
73 static void sched_setup(void *dummy);
74 SYSINIT(sched_setup, SI_SUB_KICK_SCHEDULER, SI_ORDER_FIRST, sched_setup, NULL)
75 
76 int	hogticks;
77 int	lbolt;
78 
79 static struct callout loadav_callout;
80 static struct callout lbolt_callout;
81 
82 struct loadavg averunnable =
83 	{ {0, 0, 0}, FSCALE };	/* load average, of runnable procs */
84 /*
85  * Constants for averages over 1, 5, and 15 minutes
86  * when sampling at 5 second intervals.
87  */
88 static fixpt_t cexp[3] = {
89 	0.9200444146293232 * FSCALE,	/* exp(-1/12) */
90 	0.9834714538216174 * FSCALE,	/* exp(-1/60) */
91 	0.9944598480048967 * FSCALE,	/* exp(-1/180) */
92 };
93 
94 /* kernel uses `FSCALE', userland (SHOULD) use kern.fscale */
95 static int      fscale __unused = FSCALE;
96 SYSCTL_INT(_kern, OID_AUTO, fscale, CTLFLAG_RD, 0, FSCALE, "");
97 
98 static void	endtsleep(void *);
99 static void	loadav(void *arg);
100 static void	lboltcb(void *arg);
101 
102 /*
103  * We're only looking at 7 bits of the address; everything is
104  * aligned to 4, lots of things are aligned to greater powers
105  * of 2.  Shift right by 8, i.e. drop the bottom 256 worth.
106  */
107 #define TABLESIZE	128
108 static TAILQ_HEAD(slpquehead, thread) slpque[TABLESIZE];
109 #define LOOKUP(x)	(((intptr_t)(x) >> 8) & (TABLESIZE - 1))
110 
111 void
112 sleepinit(void)
113 {
114 	int i;
115 
116 	hogticks = (hz / 10) * 2;	/* Default only. */
117 	for (i = 0; i < TABLESIZE; i++)
118 		TAILQ_INIT(&slpque[i]);
119 }
120 
121 /*
122  * General sleep call.  Suspends the current process until a wakeup is
123  * performed on the specified identifier.  The process will then be made
124  * runnable with the specified priority.  Sleeps at most timo/hz seconds
125  * (0 means no timeout).  If pri includes PCATCH flag, signals are checked
126  * before and after sleeping, else signals are not checked.  Returns 0 if
127  * awakened, EWOULDBLOCK if the timeout expires.  If PCATCH is set and a
128  * signal needs to be delivered, ERESTART is returned if the current system
129  * call should be restarted if possible, and EINTR is returned if the system
130  * call should be interrupted by the signal (return EINTR).
131  *
132  * The mutex argument is exited before the caller is suspended, and
133  * entered before msleep returns.  If priority includes the PDROP
134  * flag the mutex is not entered before returning.
135  */
136 
137 int
138 msleep(ident, mtx, priority, wmesg, timo)
139 	void *ident;
140 	struct mtx *mtx;
141 	int priority, timo;
142 	const char *wmesg;
143 {
144 	struct thread *td = curthread;
145 	struct proc *p = td->td_proc;
146 	int sig, catch = priority & PCATCH;
147 	int rval = 0;
148 	WITNESS_SAVE_DECL(mtx);
149 
150 #ifdef KTRACE
151 	if (KTRPOINT(td, KTR_CSW))
152 		ktrcsw(1, 0);
153 #endif
154 	/* XXX: mtx == NULL ?? */
155 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, &mtx->mtx_object,
156 	    "Sleeping on \"%s\"", wmesg);
157 	KASSERT(timo != 0 || mtx_owned(&Giant) || mtx != NULL,
158 	    ("sleeping without a mutex"));
159 	/*
160 	 * If we are capable of async syscalls and there isn't already
161 	 * another one ready to return, start a new thread
162 	 * and queue it as ready to run. Note that there is danger here
163 	 * because we need to make sure that we don't sleep allocating
164 	 * the thread (recursion here might be bad).
165 	 */
166 	mtx_lock_spin(&sched_lock);
167 	if (p->p_flag & P_SA || p->p_numthreads > 1) {
168 		/*
169 		 * Just don't bother if we are exiting
170 		 * and not the exiting thread or thread was marked as
171 		 * interrupted.
172 		 */
173 		if (catch) {
174 			if ((p->p_flag & P_WEXIT) && p->p_singlethread != td) {
175 				mtx_unlock_spin(&sched_lock);
176 				return (EINTR);
177 			}
178 			if (td->td_flags & TDF_INTERRUPT) {
179 				mtx_unlock_spin(&sched_lock);
180 				return (td->td_intrval);
181 			}
182 		}
183 	}
184 	if (cold ) {
185 		/*
186 		 * During autoconfiguration, just return;
187 		 * don't run any other procs or panic below,
188 		 * in case this is the idle process and already asleep.
189 		 * XXX: this used to do "s = splhigh(); splx(safepri);
190 		 * splx(s);" to give interrupts a chance, but there is
191 		 * no way to give interrupts a chance now.
192 		 */
193 		if (mtx != NULL && priority & PDROP)
194 			mtx_unlock(mtx);
195 		mtx_unlock_spin(&sched_lock);
196 		return (0);
197 	}
198 	DROP_GIANT();
199 	if (mtx != NULL) {
200 		mtx_assert(mtx, MA_OWNED | MA_NOTRECURSED);
201 		WITNESS_SAVE(&mtx->mtx_object, mtx);
202 		mtx_unlock(mtx);
203 		if (priority & PDROP)
204 			mtx = NULL;
205 	}
206 	KASSERT(p != NULL, ("msleep1"));
207 	KASSERT(ident != NULL && TD_IS_RUNNING(td), ("msleep"));
208 
209 	CTR5(KTR_PROC, "msleep: thread %p (pid %d, %s) on %s (%p)",
210 	    td, p->p_pid, p->p_comm, wmesg, ident);
211 
212 	td->td_wchan = ident;
213 	td->td_wmesg = wmesg;
214 	TAILQ_INSERT_TAIL(&slpque[LOOKUP(ident)], td, td_slpq);
215 	TD_SET_ON_SLEEPQ(td);
216 	if (timo)
217 		callout_reset(&td->td_slpcallout, timo, endtsleep, td);
218 	/*
219 	 * We put ourselves on the sleep queue and start our timeout
220 	 * before calling thread_suspend_check, as we could stop there, and
221 	 * a wakeup or a SIGCONT (or both) could occur while we were stopped.
222 	 * without resuming us, thus we must be ready for sleep
223 	 * when cursig is called.  If the wakeup happens while we're
224 	 * stopped, td->td_wchan will be 0 upon return from cursig.
225 	 */
226 	if (catch) {
227 		CTR3(KTR_PROC, "msleep caught: thread %p (pid %d, %s)", td,
228 		    p->p_pid, p->p_comm);
229 		td->td_flags |= TDF_SINTR;
230 		mtx_unlock_spin(&sched_lock);
231 		PROC_LOCK(p);
232 		mtx_lock(&p->p_sigacts->ps_mtx);
233 		sig = cursig(td);
234 		mtx_unlock(&p->p_sigacts->ps_mtx);
235 		if (sig == 0 && thread_suspend_check(1))
236 			sig = SIGSTOP;
237 		mtx_lock_spin(&sched_lock);
238 		PROC_UNLOCK(p);
239 		if (sig != 0) {
240 			if (TD_ON_SLEEPQ(td))
241 				unsleep(td);
242 		} else if (!TD_ON_SLEEPQ(td))
243 			catch = 0;
244 	} else
245 		sig = 0;
246 
247 	/*
248 	 * Let the scheduler know we're about to voluntarily go to sleep.
249 	 */
250 	sched_sleep(td, priority & PRIMASK);
251 
252 	if (TD_ON_SLEEPQ(td)) {
253 		p->p_stats->p_ru.ru_nvcsw++;
254 		TD_SET_SLEEPING(td);
255 		mi_switch();
256 	}
257 	/*
258 	 * We're awake from voluntary sleep.
259 	 */
260 	CTR3(KTR_PROC, "msleep resume: thread %p (pid %d, %s)", td, p->p_pid,
261 	    p->p_comm);
262 	KASSERT(TD_IS_RUNNING(td), ("running but not TDS_RUNNING"));
263 	td->td_flags &= ~TDF_SINTR;
264 	if (td->td_flags & TDF_TIMEOUT) {
265 		td->td_flags &= ~TDF_TIMEOUT;
266 		if (sig == 0)
267 			rval = EWOULDBLOCK;
268 	} else if (td->td_flags & TDF_TIMOFAIL) {
269 		td->td_flags &= ~TDF_TIMOFAIL;
270 	} else if (timo && callout_stop(&td->td_slpcallout) == 0) {
271 		/*
272 		 * This isn't supposed to be pretty.  If we are here, then
273 		 * the endtsleep() callout is currently executing on another
274 		 * CPU and is either spinning on the sched_lock or will be
275 		 * soon.  If we don't synchronize here, there is a chance
276 		 * that this process may msleep() again before the callout
277 		 * has a chance to run and the callout may end up waking up
278 		 * the wrong msleep().  Yuck.
279 		 */
280 		TD_SET_SLEEPING(td);
281 		p->p_stats->p_ru.ru_nivcsw++;
282 		mi_switch();
283 		td->td_flags &= ~TDF_TIMOFAIL;
284 	}
285 	if ((td->td_flags & TDF_INTERRUPT) && (priority & PCATCH) &&
286 	    (rval == 0)) {
287 		rval = td->td_intrval;
288 	}
289 	mtx_unlock_spin(&sched_lock);
290 	if (rval == 0 && catch) {
291 		PROC_LOCK(p);
292 		/* XXX: shouldn't we always be calling cursig()? */
293 		mtx_lock(&p->p_sigacts->ps_mtx);
294 		if (sig != 0 || (sig = cursig(td))) {
295 			if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig))
296 				rval = EINTR;
297 			else
298 				rval = ERESTART;
299 		}
300 		mtx_unlock(&p->p_sigacts->ps_mtx);
301 		PROC_UNLOCK(p);
302 	}
303 #ifdef KTRACE
304 	if (KTRPOINT(td, KTR_CSW))
305 		ktrcsw(0, 0);
306 #endif
307 	PICKUP_GIANT();
308 	if (mtx != NULL) {
309 		mtx_lock(mtx);
310 		WITNESS_RESTORE(&mtx->mtx_object, mtx);
311 	}
312 	return (rval);
313 }
314 
315 /*
316  * Implement timeout for msleep().
317  *
318  * If process hasn't been awakened (wchan non-zero),
319  * set timeout flag and undo the sleep.  If proc
320  * is stopped, just unsleep so it will remain stopped.
321  * MP-safe, called without the Giant mutex.
322  */
323 static void
324 endtsleep(arg)
325 	void *arg;
326 {
327 	register struct thread *td;
328 
329 	td = (struct thread *)arg;
330 	CTR3(KTR_PROC, "endtsleep: thread %p (pid %d, %s)",
331 	    td, td->td_proc->p_pid, td->td_proc->p_comm);
332 	mtx_lock_spin(&sched_lock);
333 	/*
334 	 * This is the other half of the synchronization with msleep()
335 	 * described above.  If the TDS_TIMEOUT flag is set, we lost the
336 	 * race and just need to put the process back on the runqueue.
337 	 */
338 	if (TD_ON_SLEEPQ(td)) {
339 		TAILQ_REMOVE(&slpque[LOOKUP(td->td_wchan)], td, td_slpq);
340 		TD_CLR_ON_SLEEPQ(td);
341 		td->td_flags |= TDF_TIMEOUT;
342 		td->td_wmesg = NULL;
343 	} else
344 		td->td_flags |= TDF_TIMOFAIL;
345 	TD_CLR_SLEEPING(td);
346 	setrunnable(td);
347 	mtx_unlock_spin(&sched_lock);
348 }
349 
350 /*
351  * Abort a thread, as if an interrupt had occured.  Only abort
352  * interruptable waits (unfortunatly it isn't only safe to abort others).
353  * This is about identical to cv_abort().
354  * Think about merging them?
355  * Also, whatever the signal code does...
356  */
357 void
358 abortsleep(struct thread *td)
359 {
360 
361 	mtx_assert(&sched_lock, MA_OWNED);
362 	/*
363 	 * If the TDF_TIMEOUT flag is set, just leave. A
364 	 * timeout is scheduled anyhow.
365 	 */
366 	if ((td->td_flags & (TDF_TIMEOUT | TDF_SINTR)) == TDF_SINTR) {
367 		if (TD_ON_SLEEPQ(td)) {
368 			unsleep(td);
369 			TD_CLR_SLEEPING(td);
370 			setrunnable(td);
371 		}
372 	}
373 }
374 
375 /*
376  * Remove a process from its wait queue
377  */
378 void
379 unsleep(struct thread *td)
380 {
381 
382 	mtx_lock_spin(&sched_lock);
383 	if (TD_ON_SLEEPQ(td)) {
384 		TAILQ_REMOVE(&slpque[LOOKUP(td->td_wchan)], td, td_slpq);
385 		TD_CLR_ON_SLEEPQ(td);
386 		td->td_wmesg = NULL;
387 	}
388 	mtx_unlock_spin(&sched_lock);
389 }
390 
391 /*
392  * Make all processes sleeping on the specified identifier runnable.
393  */
394 void
395 wakeup(ident)
396 	register void *ident;
397 {
398 	register struct slpquehead *qp;
399 	register struct thread *td;
400 	struct thread *ntd;
401 	struct proc *p;
402 
403 	mtx_lock_spin(&sched_lock);
404 	qp = &slpque[LOOKUP(ident)];
405 restart:
406 	for (td = TAILQ_FIRST(qp); td != NULL; td = ntd) {
407 		ntd = TAILQ_NEXT(td, td_slpq);
408 		if (td->td_wchan == ident) {
409 			unsleep(td);
410 			TD_CLR_SLEEPING(td);
411 			setrunnable(td);
412 			p = td->td_proc;
413 			CTR3(KTR_PROC,"wakeup: thread %p (pid %d, %s)",
414 			    td, p->p_pid, p->p_comm);
415 			goto restart;
416 		}
417 	}
418 	mtx_unlock_spin(&sched_lock);
419 }
420 
421 /*
422  * Make a process sleeping on the specified identifier runnable.
423  * May wake more than one process if a target process is currently
424  * swapped out.
425  */
426 void
427 wakeup_one(ident)
428 	register void *ident;
429 {
430 	register struct proc *p;
431 	register struct slpquehead *qp;
432 	register struct thread *td;
433 	struct thread *ntd;
434 
435 	mtx_lock_spin(&sched_lock);
436 	qp = &slpque[LOOKUP(ident)];
437 	for (td = TAILQ_FIRST(qp); td != NULL; td = ntd) {
438 		ntd = TAILQ_NEXT(td, td_slpq);
439 		if (td->td_wchan == ident) {
440 			unsleep(td);
441 			TD_CLR_SLEEPING(td);
442 			setrunnable(td);
443 			p = td->td_proc;
444 			CTR3(KTR_PROC,"wakeup1: thread %p (pid %d, %s)",
445 			    td, p->p_pid, p->p_comm);
446 			break;
447 		}
448 	}
449 	mtx_unlock_spin(&sched_lock);
450 }
451 
452 /*
453  * The machine independent parts of mi_switch().
454  */
455 void
456 mi_switch(void)
457 {
458 	struct bintime new_switchtime;
459 	struct thread *td;
460 	struct thread *newtd;
461 	struct proc *p;
462 	u_int sched_nest;
463 
464 	mtx_assert(&sched_lock, MA_OWNED | MA_NOTRECURSED);
465 	td = curthread;			/* XXX */
466 	p = td->td_proc;		/* XXX */
467 	KASSERT(!TD_ON_RUNQ(td), ("mi_switch: called by old code"));
468 #ifdef INVARIANTS
469 	if (!TD_ON_LOCK(td) && !TD_IS_RUNNING(td))
470 		mtx_assert(&Giant, MA_NOTOWNED);
471 #endif
472 	KASSERT(td->td_critnest == 1,
473 	    ("mi_switch: switch in a critical section"));
474 
475 	/*
476 	 * Compute the amount of time during which the current
477 	 * process was running, and add that to its total so far.
478 	 */
479 	binuptime(&new_switchtime);
480 	bintime_add(&p->p_runtime, &new_switchtime);
481 	bintime_sub(&p->p_runtime, PCPU_PTR(switchtime));
482 
483 #ifdef DDB
484 	/*
485 	 * Don't perform context switches from the debugger.
486 	 */
487 	if (db_active) {
488 		mtx_unlock_spin(&sched_lock);
489 		db_print_backtrace();
490 		db_error("Context switches not allowed in the debugger");
491 	}
492 #endif
493 
494 	/*
495 	 * Check if the process exceeds its cpu resource allocation.  If
496 	 * over max, arrange to kill the process in ast().
497 	 */
498 	if (p->p_cpulimit != RLIM_INFINITY &&
499 	    p->p_runtime.sec > p->p_cpulimit) {
500 		p->p_sflag |= PS_XCPU;
501 		td->td_flags |= TDF_ASTPENDING;
502 	}
503 
504 	/*
505 	 * Finish up stats for outgoing thread.
506 	 */
507 	cnt.v_swtch++;
508 	PCPU_SET(switchtime, new_switchtime);
509 	CTR3(KTR_PROC, "mi_switch: old thread %p (pid %d, %s)", td, p->p_pid,
510 	    p->p_comm);
511 	sched_nest = sched_lock.mtx_recurse;
512 	if (td->td_proc->p_flag & P_SA)
513 		thread_switchout(td);
514 	sched_switchout(td);
515 
516 	newtd = choosethread();
517 	if (td != newtd)
518 		cpu_switch(td, newtd);	/* SHAZAM!! */
519 
520 	sched_lock.mtx_recurse = sched_nest;
521 	sched_lock.mtx_lock = (uintptr_t)td;
522 	sched_switchin(td);
523 
524 	/*
525 	 * Start setting up stats etc. for the incoming thread.
526 	 * Similar code in fork_exit() is returned to by cpu_switch()
527 	 * in the case of a new thread/process.
528 	 */
529 	CTR3(KTR_PROC, "mi_switch: new thread %p (pid %d, %s)", td, p->p_pid,
530 	    p->p_comm);
531 	if (PCPU_GET(switchtime.sec) == 0)
532 		binuptime(PCPU_PTR(switchtime));
533 	PCPU_SET(switchticks, ticks);
534 
535 	/*
536 	 * Call the switchin function while still holding the scheduler lock
537 	 * (used by the idlezero code and the general page-zeroing code)
538 	 */
539 	if (td->td_switchin)
540 		td->td_switchin();
541 
542 	/*
543 	 * If the last thread was exiting, finish cleaning it up.
544 	 */
545 	if ((td = PCPU_GET(deadthread))) {
546 		PCPU_SET(deadthread, NULL);
547 		thread_stash(td);
548 	}
549 }
550 
551 /*
552  * Change process state to be runnable,
553  * placing it on the run queue if it is in memory,
554  * and awakening the swapper if it isn't in memory.
555  */
556 void
557 setrunnable(struct thread *td)
558 {
559 	struct proc *p;
560 
561 	p = td->td_proc;
562 	mtx_assert(&sched_lock, MA_OWNED);
563 	switch (p->p_state) {
564 	case PRS_ZOMBIE:
565 		panic("setrunnable(1)");
566 	default:
567 		break;
568 	}
569 	switch (td->td_state) {
570 	case TDS_RUNNING:
571 	case TDS_RUNQ:
572 		return;
573 	case TDS_INHIBITED:
574 		/*
575 		 * If we are only inhibited because we are swapped out
576 		 * then arange to swap in this process. Otherwise just return.
577 		 */
578 		if (td->td_inhibitors != TDI_SWAPPED)
579 			return;
580 		/* XXX: intentional fall-through ? */
581 	case TDS_CAN_RUN:
582 		break;
583 	default:
584 		printf("state is 0x%x", td->td_state);
585 		panic("setrunnable(2)");
586 	}
587 	if ((p->p_sflag & PS_INMEM) == 0) {
588 		if ((p->p_sflag & PS_SWAPPINGIN) == 0) {
589 			p->p_sflag |= PS_SWAPINREQ;
590 			wakeup(&proc0);
591 		}
592 	} else
593 		sched_wakeup(td);
594 }
595 
596 /*
597  * Compute a tenex style load average of a quantity on
598  * 1, 5 and 15 minute intervals.
599  * XXXKSE   Needs complete rewrite when correct info is available.
600  * Completely Bogus.. only works with 1:1 (but compiles ok now :-)
601  */
602 static void
603 loadav(void *arg)
604 {
605 	int i, nrun;
606 	struct loadavg *avg;
607 	struct proc *p;
608 	struct thread *td;
609 
610 	avg = &averunnable;
611 	sx_slock(&allproc_lock);
612 	nrun = 0;
613 	FOREACH_PROC_IN_SYSTEM(p) {
614 		FOREACH_THREAD_IN_PROC(p, td) {
615 			switch (td->td_state) {
616 			case TDS_RUNQ:
617 			case TDS_RUNNING:
618 				if ((p->p_flag & P_NOLOAD) != 0)
619 					goto nextproc;
620 				nrun++; /* XXXKSE */
621 			default:
622 				break;
623 			}
624 nextproc:
625 			continue;
626 		}
627 	}
628 	sx_sunlock(&allproc_lock);
629 	for (i = 0; i < 3; i++)
630 		avg->ldavg[i] = (cexp[i] * avg->ldavg[i] +
631 		    nrun * FSCALE * (FSCALE - cexp[i])) >> FSHIFT;
632 
633 	/*
634 	 * Schedule the next update to occur after 5 seconds, but add a
635 	 * random variation to avoid synchronisation with processes that
636 	 * run at regular intervals.
637 	 */
638 	callout_reset(&loadav_callout, hz * 4 + (int)(random() % (hz * 2 + 1)),
639 	    loadav, NULL);
640 }
641 
642 static void
643 lboltcb(void *arg)
644 {
645 	wakeup(&lbolt);
646 	callout_reset(&lbolt_callout, hz, lboltcb, NULL);
647 }
648 
649 /* ARGSUSED */
650 static void
651 sched_setup(dummy)
652 	void *dummy;
653 {
654 	callout_init(&loadav_callout, 0);
655 	callout_init(&lbolt_callout, CALLOUT_MPSAFE);
656 
657 	/* Kick off timeout driven events by calling first time. */
658 	loadav(NULL);
659 	lboltcb(NULL);
660 }
661 
662 /*
663  * General purpose yield system call
664  */
665 int
666 yield(struct thread *td, struct yield_args *uap)
667 {
668 	struct ksegrp *kg;
669 
670 	kg = td->td_ksegrp;
671 	mtx_assert(&Giant, MA_NOTOWNED);
672 	mtx_lock_spin(&sched_lock);
673 	kg->kg_proc->p_stats->p_ru.ru_nvcsw++;
674 	sched_prio(td, PRI_MAX_TIMESHARE);
675 	mi_switch();
676 	mtx_unlock_spin(&sched_lock);
677 	td->td_retval[0] = 0;
678 	return (0);
679 }
680