xref: /freebsd/sys/kern/kern_synch.c (revision c21410e1192bfc47a8bca3ca72ee8ff726bffa4c)
1 /*-
2  * Copyright (c) 1982, 1986, 1990, 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)kern_synch.c	8.9 (Berkeley) 5/19/95
39  * $Id: kern_synch.c,v 1.54 1998/04/04 13:25:20 phk Exp $
40  */
41 
42 #include "opt_ktrace.h"
43 
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/proc.h>
47 #include <sys/kernel.h>
48 #include <sys/signalvar.h>
49 #include <sys/resourcevar.h>
50 #include <sys/vmmeter.h>
51 #include <sys/sysctl.h>
52 #include <vm/vm.h>
53 #include <vm/vm_extern.h>
54 #ifdef KTRACE
55 #include <sys/uio.h>
56 #include <sys/ktrace.h>
57 #endif
58 
59 #include <machine/cpu.h>
60 #include <machine/limits.h>	/* for UCHAR_MAX = typeof(p_priority)_MAX */
61 
62 static void rqinit __P((void *));
63 SYSINIT(runqueue, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, rqinit, NULL)
64 
65 u_char	curpriority;		/* usrpri of curproc */
66 int	lbolt;			/* once a second sleep address */
67 
68 static void	endtsleep __P((void *));
69 static void	roundrobin __P((void *arg));
70 static void	schedcpu __P((void *arg));
71 static void	updatepri __P((struct proc *p));
72 
73 #define MAXIMUM_SCHEDULE_QUANTUM	(1000000) /* arbitrary limit */
74 #ifndef DEFAULT_SCHEDULE_QUANTUM
75 #define DEFAULT_SCHEDULE_QUANTUM 10
76 #endif
77 static int quantum = DEFAULT_SCHEDULE_QUANTUM; /* default value */
78 
79 static int
80 sysctl_kern_quantum SYSCTL_HANDLER_ARGS
81 {
82 	int error;
83 	int new_val = quantum;
84 
85 	new_val = quantum;
86 	error = sysctl_handle_int(oidp, &new_val, 0, req);
87 	if (error == 0) {
88 		if ((new_val > 0) && (new_val < MAXIMUM_SCHEDULE_QUANTUM)) {
89 			quantum = new_val;
90 		} else {
91 			error = EINVAL;
92 		}
93 	}
94 	return (error);
95 }
96 
97 SYSCTL_PROC(_kern, OID_AUTO, quantum, CTLTYPE_INT|CTLFLAG_RW,
98 	0, sizeof quantum, sysctl_kern_quantum, "I", "");
99 
100 /* maybe_resched: Decide if you need to reschedule or not
101  * taking the priorities and schedulers into account.
102  */
103 static void maybe_resched(struct proc *chk)
104 {
105 	struct proc *p = curproc; /* XXX */
106 
107 	/* If the current scheduler is the idle scheduler or
108 	 * the priority of the new one is higher then reschedule.
109 	 */
110 	if (p == 0 ||
111 	RTP_PRIO_BASE(p->p_rtprio.type) == RTP_PRIO_IDLE ||
112 	(chk->p_priority < curpriority &&
113 	RTP_PRIO_BASE(p->p_rtprio.type) == RTP_PRIO_BASE(chk->p_rtprio.type)) )
114 		need_resched();
115 }
116 
117 #define ROUNDROBIN_INTERVAL (hz / quantum)
118 int roundrobin_interval(void)
119 {
120 	return ROUNDROBIN_INTERVAL;
121 }
122 
123 /*
124  * Force switch among equal priority processes every 100ms.
125  */
126 /* ARGSUSED */
127 static void
128 roundrobin(arg)
129 	void *arg;
130 {
131  	struct proc *p = curproc; /* XXX */
132 
133  	if (p == 0 || RTP_PRIO_NEED_RR(p->p_rtprio.type))
134  		need_resched();
135 
136  	timeout(roundrobin, NULL, ROUNDROBIN_INTERVAL);
137 }
138 
139 /*
140  * Constants for digital decay and forget:
141  *	90% of (p_estcpu) usage in 5 * loadav time
142  *	95% of (p_pctcpu) usage in 60 seconds (load insensitive)
143  *          Note that, as ps(1) mentions, this can let percentages
144  *          total over 100% (I've seen 137.9% for 3 processes).
145  *
146  * Note that statclock() updates p_estcpu and p_cpticks asynchronously.
147  *
148  * We wish to decay away 90% of p_estcpu in (5 * loadavg) seconds.
149  * That is, the system wants to compute a value of decay such
150  * that the following for loop:
151  * 	for (i = 0; i < (5 * loadavg); i++)
152  * 		p_estcpu *= decay;
153  * will compute
154  * 	p_estcpu *= 0.1;
155  * for all values of loadavg:
156  *
157  * Mathematically this loop can be expressed by saying:
158  * 	decay ** (5 * loadavg) ~= .1
159  *
160  * The system computes decay as:
161  * 	decay = (2 * loadavg) / (2 * loadavg + 1)
162  *
163  * We wish to prove that the system's computation of decay
164  * will always fulfill the equation:
165  * 	decay ** (5 * loadavg) ~= .1
166  *
167  * If we compute b as:
168  * 	b = 2 * loadavg
169  * then
170  * 	decay = b / (b + 1)
171  *
172  * We now need to prove two things:
173  *	1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1)
174  *	2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg)
175  *
176  * Facts:
177  *         For x close to zero, exp(x) =~ 1 + x, since
178  *              exp(x) = 0! + x**1/1! + x**2/2! + ... .
179  *              therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b.
180  *         For x close to zero, ln(1+x) =~ x, since
181  *              ln(1+x) = x - x**2/2 + x**3/3 - ...     -1 < x < 1
182  *              therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1).
183  *         ln(.1) =~ -2.30
184  *
185  * Proof of (1):
186  *    Solve (factor)**(power) =~ .1 given power (5*loadav):
187  *	solving for factor,
188  *      ln(factor) =~ (-2.30/5*loadav), or
189  *      factor =~ exp(-1/((5/2.30)*loadav)) =~ exp(-1/(2*loadav)) =
190  *          exp(-1/b) =~ (b-1)/b =~ b/(b+1).                    QED
191  *
192  * Proof of (2):
193  *    Solve (factor)**(power) =~ .1 given factor == (b/(b+1)):
194  *	solving for power,
195  *      power*ln(b/(b+1)) =~ -2.30, or
196  *      power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav.  QED
197  *
198  * Actual power values for the implemented algorithm are as follows:
199  *      loadav: 1       2       3       4
200  *      power:  5.68    10.32   14.94   19.55
201  */
202 
203 /* calculations for digital decay to forget 90% of usage in 5*loadav sec */
204 #define	loadfactor(loadav)	(2 * (loadav))
205 #define	decay_cpu(loadfac, cpu)	(((loadfac) * (cpu)) / ((loadfac) + FSCALE))
206 
207 /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */
208 static fixpt_t	ccpu = 0.95122942450071400909 * FSCALE;	/* exp(-1/20) */
209 
210 /*
211  * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the
212  * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below
213  * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT).
214  *
215  * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used:
216  *	1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits).
217  *
218  * If you don't want to bother with the faster/more-accurate formula, you
219  * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate
220  * (more general) method of calculating the %age of CPU used by a process.
221  */
222 #define	CCPU_SHIFT	11
223 
224 /*
225  * Recompute process priorities, every hz ticks.
226  */
227 /* ARGSUSED */
228 static void
229 schedcpu(arg)
230 	void *arg;
231 {
232 	register fixpt_t loadfac = loadfactor(averunnable.ldavg[0]);
233 	register struct proc *p;
234 	register int s;
235 	register unsigned int newcpu;
236 
237 	for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
238 		/*
239 		 * Increment time in/out of memory and sleep time
240 		 * (if sleeping).  We ignore overflow; with 16-bit int's
241 		 * (remember them?) overflow takes 45 days.
242 		 */
243 		p->p_swtime++;
244 		if (p->p_stat == SSLEEP || p->p_stat == SSTOP)
245 			p->p_slptime++;
246 		p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT;
247 		/*
248 		 * If the process has slept the entire second,
249 		 * stop recalculating its priority until it wakes up.
250 		 */
251 		if (p->p_slptime > 1)
252 			continue;
253 		s = splhigh();	/* prevent state changes and protect run queue */
254 		/*
255 		 * p_pctcpu is only for ps.
256 		 */
257 #if	(FSHIFT >= CCPU_SHIFT)
258 		p->p_pctcpu += (hz == 100)?
259 			((fixpt_t) p->p_cpticks) << (FSHIFT - CCPU_SHIFT):
260                 	100 * (((fixpt_t) p->p_cpticks)
261 				<< (FSHIFT - CCPU_SHIFT)) / hz;
262 #else
263 		p->p_pctcpu += ((FSCALE - ccpu) *
264 			(p->p_cpticks * FSCALE / hz)) >> FSHIFT;
265 #endif
266 		p->p_cpticks = 0;
267 		newcpu = (u_int) decay_cpu(loadfac, p->p_estcpu) + p->p_nice;
268 		p->p_estcpu = min(newcpu, UCHAR_MAX);
269 		resetpriority(p);
270 		if (p->p_priority >= PUSER) {
271 #define	PPQ	(128 / NQS)		/* priorities per queue */
272 			if ((p != curproc) &&
273 #ifdef SMP
274 			    (u_char)p->p_oncpu == 0xff && 	/* idle */
275 #endif
276 			    p->p_stat == SRUN &&
277 			    (p->p_flag & P_INMEM) &&
278 			    (p->p_priority / PPQ) != (p->p_usrpri / PPQ)) {
279 				remrq(p);
280 				p->p_priority = p->p_usrpri;
281 				setrunqueue(p);
282 			} else
283 				p->p_priority = p->p_usrpri;
284 		}
285 		splx(s);
286 	}
287 	vmmeter();
288 	wakeup((caddr_t)&lbolt);
289 	timeout(schedcpu, (void *)0, hz);
290 }
291 
292 /*
293  * Recalculate the priority of a process after it has slept for a while.
294  * For all load averages >= 1 and max p_estcpu of 255, sleeping for at
295  * least six times the loadfactor will decay p_estcpu to zero.
296  */
297 static void
298 updatepri(p)
299 	register struct proc *p;
300 {
301 	register unsigned int newcpu = p->p_estcpu;
302 	register fixpt_t loadfac = loadfactor(averunnable.ldavg[0]);
303 
304 	if (p->p_slptime > 5 * loadfac)
305 		p->p_estcpu = 0;
306 	else {
307 		p->p_slptime--;	/* the first time was done in schedcpu */
308 		while (newcpu && --p->p_slptime)
309 			newcpu = (int) decay_cpu(loadfac, newcpu);
310 		p->p_estcpu = min(newcpu, UCHAR_MAX);
311 	}
312 	resetpriority(p);
313 }
314 
315 /*
316  * We're only looking at 7 bits of the address; everything is
317  * aligned to 4, lots of things are aligned to greater powers
318  * of 2.  Shift right by 8, i.e. drop the bottom 256 worth.
319  */
320 #define TABLESIZE	128
321 static TAILQ_HEAD(slpquehead, proc) slpque[TABLESIZE];
322 #define LOOKUP(x)	(((long)(x) >> 8) & (TABLESIZE - 1))
323 
324 /*
325  * During autoconfiguration or after a panic, a sleep will simply
326  * lower the priority briefly to allow interrupts, then return.
327  * The priority to be used (safepri) is machine-dependent, thus this
328  * value is initialized and maintained in the machine-dependent layers.
329  * This priority will typically be 0, or the lowest priority
330  * that is safe for use on the interrupt stack; it can be made
331  * higher to block network software interrupts after panics.
332  */
333 int safepri;
334 
335 void
336 sleepinit()
337 {
338 	int i;
339 
340 	for (i = 0; i < TABLESIZE; i++)
341 		TAILQ_INIT(&slpque[i]);
342 }
343 
344 /*
345  * General sleep call.  Suspends the current process until a wakeup is
346  * performed on the specified identifier.  The process will then be made
347  * runnable with the specified priority.  Sleeps at most timo/hz seconds
348  * (0 means no timeout).  If pri includes PCATCH flag, signals are checked
349  * before and after sleeping, else signals are not checked.  Returns 0 if
350  * awakened, EWOULDBLOCK if the timeout expires.  If PCATCH is set and a
351  * signal needs to be delivered, ERESTART is returned if the current system
352  * call should be restarted if possible, and EINTR is returned if the system
353  * call should be interrupted by the signal (return EINTR).
354  */
355 int
356 tsleep(ident, priority, wmesg, timo)
357 	void *ident;
358 	int priority, timo;
359 	const char *wmesg;
360 {
361 	struct proc *p = curproc;
362 	int s, sig, catch = priority & PCATCH;
363 	struct callout_handle thandle;
364 
365 #ifdef KTRACE
366 	if (KTRPOINT(p, KTR_CSW))
367 		ktrcsw(p->p_tracep, 1, 0);
368 #endif
369 	s = splhigh();
370 	if (cold || panicstr) {
371 		/*
372 		 * After a panic, or during autoconfiguration,
373 		 * just give interrupts a chance, then just return;
374 		 * don't run any other procs or panic below,
375 		 * in case this is the idle process and already asleep.
376 		 */
377 		splx(safepri);
378 		splx(s);
379 		return (0);
380 	}
381 #ifdef DIAGNOSTIC
382 	if(p == NULL)
383 		panic("tsleep1");
384 	if (ident == NULL || p->p_stat != SRUN)
385 		panic("tsleep");
386 	/* XXX This is not exhaustive, just the most common case */
387 	if ((p->p_procq.tqe_prev != NULL) && (*p->p_procq.tqe_prev == p))
388 		panic("sleeping process already on another queue");
389 #endif
390 	p->p_wchan = ident;
391 	p->p_wmesg = wmesg;
392 	p->p_slptime = 0;
393 	p->p_priority = priority & PRIMASK;
394 	TAILQ_INSERT_TAIL(&slpque[LOOKUP(ident)], p, p_procq);
395 	if (timo)
396 		thandle = timeout(endtsleep, (void *)p, timo);
397 	/*
398 	 * We put ourselves on the sleep queue and start our timeout
399 	 * before calling CURSIG, as we could stop there, and a wakeup
400 	 * or a SIGCONT (or both) could occur while we were stopped.
401 	 * A SIGCONT would cause us to be marked as SSLEEP
402 	 * without resuming us, thus we must be ready for sleep
403 	 * when CURSIG is called.  If the wakeup happens while we're
404 	 * stopped, p->p_wchan will be 0 upon return from CURSIG.
405 	 */
406 	if (catch) {
407 		p->p_flag |= P_SINTR;
408 		if ((sig = CURSIG(p))) {
409 			if (p->p_wchan)
410 				unsleep(p);
411 			p->p_stat = SRUN;
412 			goto resume;
413 		}
414 		if (p->p_wchan == 0) {
415 			catch = 0;
416 			goto resume;
417 		}
418 	} else
419 		sig = 0;
420 	p->p_stat = SSLEEP;
421 	p->p_stats->p_ru.ru_nvcsw++;
422 	mi_switch();
423 resume:
424 	curpriority = p->p_usrpri;
425 	splx(s);
426 	p->p_flag &= ~P_SINTR;
427 	if (p->p_flag & P_TIMEOUT) {
428 		p->p_flag &= ~P_TIMEOUT;
429 		if (sig == 0) {
430 #ifdef KTRACE
431 			if (KTRPOINT(p, KTR_CSW))
432 				ktrcsw(p->p_tracep, 0, 0);
433 #endif
434 			return (EWOULDBLOCK);
435 		}
436 	} else if (timo)
437 		untimeout(endtsleep, (void *)p, thandle);
438 	if (catch && (sig != 0 || (sig = CURSIG(p)))) {
439 #ifdef KTRACE
440 		if (KTRPOINT(p, KTR_CSW))
441 			ktrcsw(p->p_tracep, 0, 0);
442 #endif
443 		if (p->p_sigacts->ps_sigintr & sigmask(sig))
444 			return (EINTR);
445 		return (ERESTART);
446 	}
447 #ifdef KTRACE
448 	if (KTRPOINT(p, KTR_CSW))
449 		ktrcsw(p->p_tracep, 0, 0);
450 #endif
451 	return (0);
452 }
453 
454 /*
455  * Implement timeout for tsleep.
456  * If process hasn't been awakened (wchan non-zero),
457  * set timeout flag and undo the sleep.  If proc
458  * is stopped, just unsleep so it will remain stopped.
459  */
460 static void
461 endtsleep(arg)
462 	void *arg;
463 {
464 	register struct proc *p;
465 	int s;
466 
467 	p = (struct proc *)arg;
468 	s = splhigh();
469 	if (p->p_wchan) {
470 		if (p->p_stat == SSLEEP)
471 			setrunnable(p);
472 		else
473 			unsleep(p);
474 		p->p_flag |= P_TIMEOUT;
475 	}
476 	splx(s);
477 }
478 
479 /*
480  * Remove a process from its wait queue
481  */
482 void
483 unsleep(p)
484 	register struct proc *p;
485 {
486 	int s;
487 
488 	s = splhigh();
489 	if (p->p_wchan) {
490 		TAILQ_REMOVE(&slpque[LOOKUP(p->p_wchan)], p, p_procq);
491 		p->p_wchan = 0;
492 	}
493 	splx(s);
494 }
495 
496 /*
497  * Make all processes sleeping on the specified identifier runnable.
498  */
499 void
500 wakeup(ident)
501 	register void *ident;
502 {
503 	register struct slpquehead *qp;
504 	register struct proc *p;
505 	int s;
506 
507 	s = splhigh();
508 	qp = &slpque[LOOKUP(ident)];
509 restart:
510 	for (p = qp->tqh_first; p != NULL; p = p->p_procq.tqe_next) {
511 #ifdef DIAGNOSTIC
512 		if (p->p_stat != SSLEEP && p->p_stat != SSTOP)
513 			panic("wakeup");
514 #endif
515 		if (p->p_wchan == ident) {
516 			TAILQ_REMOVE(qp, p, p_procq);
517 			p->p_wchan = 0;
518 			if (p->p_stat == SSLEEP) {
519 				/* OPTIMIZED EXPANSION OF setrunnable(p); */
520 				if (p->p_slptime > 1)
521 					updatepri(p);
522 				p->p_slptime = 0;
523 				p->p_stat = SRUN;
524 				if (p->p_flag & P_INMEM) {
525 					setrunqueue(p);
526 					maybe_resched(p);
527 				} else {
528 					p->p_flag |= P_SWAPINREQ;
529 					wakeup((caddr_t)&proc0);
530 				}
531 				/* END INLINE EXPANSION */
532 				goto restart;
533 			}
534 		}
535 	}
536 	splx(s);
537 }
538 
539 /*
540  * Make a process sleeping on the specified identifier runnable.
541  * May wake more than one process if a target prcoess is currently
542  * swapped out.
543  */
544 void
545 wakeup_one(ident)
546 	register void *ident;
547 {
548 	register struct slpquehead *qp;
549 	register struct proc *p;
550 	int s;
551 
552 	s = splhigh();
553 	qp = &slpque[LOOKUP(ident)];
554 
555 	for (p = qp->tqh_first; p != NULL; p = p->p_procq.tqe_next) {
556 #ifdef DIAGNOSTIC
557 		if (p->p_stat != SSLEEP && p->p_stat != SSTOP)
558 			panic("wakeup_one");
559 #endif
560 		if (p->p_wchan == ident) {
561 			TAILQ_REMOVE(qp, p, p_procq);
562 			p->p_wchan = 0;
563 			if (p->p_stat == SSLEEP) {
564 				/* OPTIMIZED EXPANSION OF setrunnable(p); */
565 				if (p->p_slptime > 1)
566 					updatepri(p);
567 				p->p_slptime = 0;
568 				p->p_stat = SRUN;
569 				if (p->p_flag & P_INMEM) {
570 					setrunqueue(p);
571 					maybe_resched(p);
572 					break;
573 				} else {
574 					p->p_flag |= P_SWAPINREQ;
575 					wakeup((caddr_t)&proc0);
576 				}
577 				/* END INLINE EXPANSION */
578 			}
579 		}
580 	}
581 	splx(s);
582 }
583 
584 /*
585  * The machine independent parts of mi_switch().
586  * Must be called at splstatclock() or higher.
587  */
588 void
589 mi_switch()
590 {
591 	register struct proc *p = curproc;	/* XXX */
592 	register struct rlimit *rlim;
593 	register long s, u;
594 	int x;
595 	struct timeval tv;
596 
597 	/*
598 	 * XXX this spl is almost unnecessary.  It is partly to allow for
599 	 * sloppy callers that don't do it (issignal() via CURSIG() is the
600 	 * main offender).  It is partly to work around a bug in the i386
601 	 * cpu_switch() (the ipl is not preserved).  We ran for years
602 	 * without it.  I think there was only a interrupt latency problem.
603 	 * The main caller, tsleep(), does an splx() a couple of instructions
604 	 * after calling here.  The buggy caller, issignal(), usually calls
605 	 * here at spl0() and sometimes returns at splhigh().  The process
606 	 * then runs for a little too long at splhigh().  The ipl gets fixed
607 	 * when the process returns to user mode (or earlier).
608 	 *
609 	 * It would probably be better to always call here at spl0(). Callers
610 	 * are prepared to give up control to another process, so they must
611 	 * be prepared to be interrupted.  The clock stuff here may not
612 	 * actually need splstatclock().
613 	 */
614 	x = splstatclock();
615 
616 #ifdef SIMPLELOCK_DEBUG
617 	if (p->p_simple_locks)
618 		printf("sleep: holding simple lock\n");
619 #endif
620 	/*
621 	 * Compute the amount of time during which the current
622 	 * process was running, and add that to its total so far.
623 	 */
624 	microuptime(&tv);
625 	u = p->p_rtime.tv_usec + (tv.tv_usec - p->p_runtime.tv_usec);
626 	s = p->p_rtime.tv_sec + (tv.tv_sec - p->p_runtime.tv_sec);
627 	if (u < 0) {
628 		u += 1000000;
629 		s--;
630 	} else if (u >= 1000000) {
631 		u -= 1000000;
632 		s++;
633 	}
634 #ifdef SMP
635 	if (s < 0)
636 		s = u = 0;
637 #endif
638 	p->p_rtime.tv_usec = u;
639 	p->p_rtime.tv_sec = s;
640 
641 	/*
642 	 * Check if the process exceeds its cpu resource allocation.
643 	 * If over max, kill it.
644 	 */
645 	if (p->p_stat != SZOMB) {
646 		rlim = &p->p_rlimit[RLIMIT_CPU];
647 		if (s >= rlim->rlim_cur) {
648 			if (s >= rlim->rlim_max)
649 				killproc(p, "exceeded maximum CPU limit");
650 			else {
651 				psignal(p, SIGXCPU);
652 				if (rlim->rlim_cur < rlim->rlim_max)
653 					rlim->rlim_cur += 5;
654 			}
655 		}
656 	}
657 
658 	/*
659 	 * Pick a new current process and record its start time.
660 	 */
661 	cnt.v_swtch++;
662 	cpu_switch(p);
663 	microuptime(&p->p_runtime);
664 	splx(x);
665 }
666 
667 /*
668  * Initialize the (doubly-linked) run queues
669  * to be empty.
670  */
671 /* ARGSUSED*/
672 static void
673 rqinit(dummy)
674 	void *dummy;
675 {
676 	register int i;
677 
678 	for (i = 0; i < NQS; i++) {
679 		qs[i].ph_link = qs[i].ph_rlink = (struct proc *)&qs[i];
680 		rtqs[i].ph_link = rtqs[i].ph_rlink = (struct proc *)&rtqs[i];
681 		idqs[i].ph_link = idqs[i].ph_rlink = (struct proc *)&idqs[i];
682 	}
683 }
684 
685 /*
686  * Change process state to be runnable,
687  * placing it on the run queue if it is in memory,
688  * and awakening the swapper if it isn't in memory.
689  */
690 void
691 setrunnable(p)
692 	register struct proc *p;
693 {
694 	register int s;
695 
696 	s = splhigh();
697 	switch (p->p_stat) {
698 	case 0:
699 	case SRUN:
700 	case SZOMB:
701 	default:
702 		panic("setrunnable");
703 	case SSTOP:
704 	case SSLEEP:
705 		unsleep(p);		/* e.g. when sending signals */
706 		break;
707 
708 	case SIDL:
709 		break;
710 	}
711 	p->p_stat = SRUN;
712 	if (p->p_flag & P_INMEM)
713 		setrunqueue(p);
714 	splx(s);
715 	if (p->p_slptime > 1)
716 		updatepri(p);
717 	p->p_slptime = 0;
718 	if ((p->p_flag & P_INMEM) == 0) {
719 		p->p_flag |= P_SWAPINREQ;
720 		wakeup((caddr_t)&proc0);
721 	}
722 	else
723 		maybe_resched(p);
724 }
725 
726 /*
727  * Compute the priority of a process when running in user mode.
728  * Arrange to reschedule if the resulting priority is better
729  * than that of the current process.
730  */
731 void
732 resetpriority(p)
733 	register struct proc *p;
734 {
735 	register unsigned int newpriority;
736 
737 	if (p->p_rtprio.type == RTP_PRIO_NORMAL) {
738 		newpriority = PUSER + p->p_estcpu / 4 + 2 * p->p_nice;
739 		newpriority = min(newpriority, MAXPRI);
740 		p->p_usrpri = newpriority;
741 	}
742 	maybe_resched(p);
743 }
744 
745 /* ARGSUSED */
746 static void sched_setup __P((void *dummy));
747 static void
748 sched_setup(dummy)
749 	void *dummy;
750 {
751 	/* Kick off timeout driven events by calling first time. */
752 	roundrobin(NULL);
753 	schedcpu(NULL);
754 }
755 SYSINIT(sched_setup, SI_SUB_KICK_SCHEDULER, SI_ORDER_FIRST, sched_setup, NULL)
756 
757