xref: /freebsd/sys/kern/kern_timeout.c (revision 6b3455a7665208c366849f0b2b3bc916fb97516e)
1 /*-
2  * Copyright (c) 1982, 1986, 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 4. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *	From: @(#)kern_clock.c	8.5 (Berkeley) 1/21/94
35  */
36 
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD$");
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/callout.h>
43 #include <sys/condvar.h>
44 #include <sys/kernel.h>
45 #include <sys/lock.h>
46 #include <sys/mutex.h>
47 #include <sys/sysctl.h>
48 
49 static int avg_depth;
50 SYSCTL_INT(_debug, OID_AUTO, to_avg_depth, CTLFLAG_RD, &avg_depth, 0,
51     "Average number of items examined per softclock call. Units = 1/1000");
52 static int avg_gcalls;
53 SYSCTL_INT(_debug, OID_AUTO, to_avg_gcalls, CTLFLAG_RD, &avg_gcalls, 0,
54     "Average number of Giant callouts made per softclock call. Units = 1/1000");
55 static int avg_mpcalls;
56 SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls, CTLFLAG_RD, &avg_mpcalls, 0,
57     "Average number of MP callouts made per softclock call. Units = 1/1000");
58 /*
59  * TODO:
60  *	allocate more timeout table slots when table overflows.
61  */
62 
63 /* Exported to machdep.c and/or kern_clock.c.  */
64 struct callout *callout;
65 struct callout_list callfree;
66 int callwheelsize, callwheelbits, callwheelmask;
67 struct callout_tailq *callwheel;
68 int softticks;			/* Like ticks, but for softclock(). */
69 struct mtx callout_lock;
70 #ifdef DIAGNOSTIC
71 struct mtx dont_sleep_in_callout;
72 #endif
73 
74 static struct callout *nextsoftcheck;	/* Next callout to be checked. */
75 
76 /*-
77  * Locked by callout_lock:
78  *   curr_callout    - If a callout is in progress, it is curr_callout.
79  *                     If curr_callout is non-NULL, threads waiting on
80  *                     callout_wait will be woken up as soon as the
81  *                     relevant callout completes.
82  *   wakeup_ctr      - Incremented every time a thread wants to wait
83  *                     for a callout to complete.  Modified only when
84  *                     curr_callout is non-NULL.
85  *   wakeup_needed   - If a thread is waiting on callout_wait, then
86  *                     wakeup_needed is nonzero.  Increased only when
87  *                     cutt_callout is non-NULL.
88  */
89 static struct callout *curr_callout;
90 static int wakeup_ctr;
91 static int wakeup_needed;
92 
93 /*-
94  * Locked by callout_wait_lock:
95  *   callout_wait    - If wakeup_needed is set, callout_wait will be
96  *                     triggered after the current callout finishes.
97  *   wakeup_done_ctr - Set to the current value of wakeup_ctr after
98  *                     callout_wait is triggered.
99  */
100 static struct mtx callout_wait_lock;
101 static struct cv callout_wait;
102 static int wakeup_done_ctr;
103 
104 /*
105  * kern_timeout_callwheel_alloc() - kernel low level callwheel initialization
106  *
107  *	This code is called very early in the kernel initialization sequence,
108  *	and may be called more then once.
109  */
110 caddr_t
111 kern_timeout_callwheel_alloc(caddr_t v)
112 {
113 	/*
114 	 * Calculate callout wheel size
115 	 */
116 	for (callwheelsize = 1, callwheelbits = 0;
117 	     callwheelsize < ncallout;
118 	     callwheelsize <<= 1, ++callwheelbits)
119 		;
120 	callwheelmask = callwheelsize - 1;
121 
122 	callout = (struct callout *)v;
123 	v = (caddr_t)(callout + ncallout);
124 	callwheel = (struct callout_tailq *)v;
125 	v = (caddr_t)(callwheel + callwheelsize);
126 	return(v);
127 }
128 
129 /*
130  * kern_timeout_callwheel_init() - initialize previously reserved callwheel
131  *				   space.
132  *
133  *	This code is called just once, after the space reserved for the
134  *	callout wheel has been finalized.
135  */
136 void
137 kern_timeout_callwheel_init(void)
138 {
139 	int i;
140 
141 	SLIST_INIT(&callfree);
142 	for (i = 0; i < ncallout; i++) {
143 		callout_init(&callout[i], 0);
144 		callout[i].c_flags = CALLOUT_LOCAL_ALLOC;
145 		SLIST_INSERT_HEAD(&callfree, &callout[i], c_links.sle);
146 	}
147 	for (i = 0; i < callwheelsize; i++) {
148 		TAILQ_INIT(&callwheel[i]);
149 	}
150 	mtx_init(&callout_lock, "callout", NULL, MTX_SPIN | MTX_RECURSE);
151 #ifdef DIAGNOSTIC
152 	mtx_init(&dont_sleep_in_callout, "dont_sleep_in_callout", NULL, MTX_DEF);
153 #endif
154 	mtx_init(&callout_wait_lock, "callout_wait_lock", NULL, MTX_DEF);
155 	cv_init(&callout_wait, "callout_wait");
156 }
157 
158 /*
159  * The callout mechanism is based on the work of Adam M. Costello and
160  * George Varghese, published in a technical report entitled "Redesigning
161  * the BSD Callout and Timer Facilities" and modified slightly for inclusion
162  * in FreeBSD by Justin T. Gibbs.  The original work on the data structures
163  * used in this implementation was published by G. Varghese and T. Lauck in
164  * the paper "Hashed and Hierarchical Timing Wheels: Data Structures for
165  * the Efficient Implementation of a Timer Facility" in the Proceedings of
166  * the 11th ACM Annual Symposium on Operating Systems Principles,
167  * Austin, Texas Nov 1987.
168  */
169 
170 /*
171  * Software (low priority) clock interrupt.
172  * Run periodic events from timeout queue.
173  */
174 void
175 softclock(void *dummy)
176 {
177 	struct callout *c;
178 	struct callout_tailq *bucket;
179 	int curticks;
180 	int steps;	/* #steps since we last allowed interrupts */
181 	int depth;
182 	int mpcalls;
183 	int gcalls;
184 	int wakeup_cookie;
185 #ifdef DIAGNOSTIC
186 	struct bintime bt1, bt2;
187 	struct timespec ts2;
188 	static uint64_t maxdt = 36893488147419102LL;	/* 2 msec */
189 	static timeout_t *lastfunc;
190 #endif
191 
192 #ifndef MAX_SOFTCLOCK_STEPS
193 #define MAX_SOFTCLOCK_STEPS 100 /* Maximum allowed value of steps. */
194 #endif /* MAX_SOFTCLOCK_STEPS */
195 
196 	mpcalls = 0;
197 	gcalls = 0;
198 	depth = 0;
199 	steps = 0;
200 	mtx_lock_spin(&callout_lock);
201 	while (softticks != ticks) {
202 		softticks++;
203 		/*
204 		 * softticks may be modified by hard clock, so cache
205 		 * it while we work on a given bucket.
206 		 */
207 		curticks = softticks;
208 		bucket = &callwheel[curticks & callwheelmask];
209 		c = TAILQ_FIRST(bucket);
210 		while (c) {
211 			depth++;
212 			if (c->c_time != curticks) {
213 				c = TAILQ_NEXT(c, c_links.tqe);
214 				++steps;
215 				if (steps >= MAX_SOFTCLOCK_STEPS) {
216 					nextsoftcheck = c;
217 					/* Give interrupts a chance. */
218 					mtx_unlock_spin(&callout_lock);
219 					;	/* nothing */
220 					mtx_lock_spin(&callout_lock);
221 					c = nextsoftcheck;
222 					steps = 0;
223 				}
224 			} else {
225 				void (*c_func)(void *);
226 				void *c_arg;
227 				int c_flags;
228 
229 				nextsoftcheck = TAILQ_NEXT(c, c_links.tqe);
230 				TAILQ_REMOVE(bucket, c, c_links.tqe);
231 				c_func = c->c_func;
232 				c_arg = c->c_arg;
233 				c_flags = c->c_flags;
234 				c->c_func = NULL;
235 				if (c->c_flags & CALLOUT_LOCAL_ALLOC) {
236 					c->c_flags = CALLOUT_LOCAL_ALLOC;
237 					SLIST_INSERT_HEAD(&callfree, c,
238 							  c_links.sle);
239 				} else {
240 					c->c_flags =
241 					    (c->c_flags & ~CALLOUT_PENDING);
242 				}
243 				curr_callout = c;
244 				mtx_unlock_spin(&callout_lock);
245 				if (!(c_flags & CALLOUT_MPSAFE)) {
246 					mtx_lock(&Giant);
247 					gcalls++;
248 				} else {
249 					mpcalls++;
250 				}
251 #ifdef DIAGNOSTIC
252 				binuptime(&bt1);
253 				mtx_lock(&dont_sleep_in_callout);
254 #endif
255 				c_func(c_arg);
256 #ifdef DIAGNOSTIC
257 				mtx_unlock(&dont_sleep_in_callout);
258 				binuptime(&bt2);
259 				bintime_sub(&bt2, &bt1);
260 				if (bt2.frac > maxdt) {
261 					if (lastfunc != c_func ||
262 					    bt2.frac > maxdt * 2) {
263 						bintime2timespec(&bt2, &ts2);
264 						printf(
265 			"Expensive timeout(9) function: %p(%p) %jd.%09ld s\n",
266 						    c_func, c_arg,
267 						    (intmax_t)ts2.tv_sec,
268 						    ts2.tv_nsec);
269 					}
270 					maxdt = bt2.frac;
271 					lastfunc = c_func;
272 				}
273 #endif
274 				if (!(c_flags & CALLOUT_MPSAFE))
275 					mtx_unlock(&Giant);
276 				mtx_lock_spin(&callout_lock);
277 				curr_callout = NULL;
278 				if (wakeup_needed) {
279 					/*
280 					 * There might be someone waiting
281 					 * for the callout to complete.
282 					 */
283 					wakeup_cookie = wakeup_ctr;
284 					mtx_unlock_spin(&callout_lock);
285 					mtx_lock(&callout_wait_lock);
286 					cv_broadcast(&callout_wait);
287 					wakeup_done_ctr = wakeup_cookie;
288 					mtx_unlock(&callout_wait_lock);
289 					mtx_lock_spin(&callout_lock);
290 					wakeup_needed = 0;
291 				}
292 				steps = 0;
293 				c = nextsoftcheck;
294 			}
295 		}
296 	}
297 	avg_depth += (depth * 1000 - avg_depth) >> 8;
298 	avg_mpcalls += (mpcalls * 1000 - avg_mpcalls) >> 8;
299 	avg_gcalls += (gcalls * 1000 - avg_gcalls) >> 8;
300 	nextsoftcheck = NULL;
301 	mtx_unlock_spin(&callout_lock);
302 }
303 
304 /*
305  * timeout --
306  *	Execute a function after a specified length of time.
307  *
308  * untimeout --
309  *	Cancel previous timeout function call.
310  *
311  * callout_handle_init --
312  *	Initialize a handle so that using it with untimeout is benign.
313  *
314  *	See AT&T BCI Driver Reference Manual for specification.  This
315  *	implementation differs from that one in that although an
316  *	identification value is returned from timeout, the original
317  *	arguments to timeout as well as the identifier are used to
318  *	identify entries for untimeout.
319  */
320 struct callout_handle
321 timeout(ftn, arg, to_ticks)
322 	timeout_t *ftn;
323 	void *arg;
324 	int to_ticks;
325 {
326 	struct callout *new;
327 	struct callout_handle handle;
328 
329 	mtx_lock_spin(&callout_lock);
330 
331 	/* Fill in the next free callout structure. */
332 	new = SLIST_FIRST(&callfree);
333 	if (new == NULL)
334 		/* XXX Attempt to malloc first */
335 		panic("timeout table full");
336 	SLIST_REMOVE_HEAD(&callfree, c_links.sle);
337 
338 	callout_reset(new, to_ticks, ftn, arg);
339 
340 	handle.callout = new;
341 	mtx_unlock_spin(&callout_lock);
342 	return (handle);
343 }
344 
345 void
346 untimeout(ftn, arg, handle)
347 	timeout_t *ftn;
348 	void *arg;
349 	struct callout_handle handle;
350 {
351 
352 	/*
353 	 * Check for a handle that was initialized
354 	 * by callout_handle_init, but never used
355 	 * for a real timeout.
356 	 */
357 	if (handle.callout == NULL)
358 		return;
359 
360 	mtx_lock_spin(&callout_lock);
361 	if (handle.callout->c_func == ftn && handle.callout->c_arg == arg)
362 		callout_stop(handle.callout);
363 	mtx_unlock_spin(&callout_lock);
364 }
365 
366 void
367 callout_handle_init(struct callout_handle *handle)
368 {
369 	handle->callout = NULL;
370 }
371 
372 /*
373  * New interface; clients allocate their own callout structures.
374  *
375  * callout_reset() - establish or change a timeout
376  * callout_stop() - disestablish a timeout
377  * callout_init() - initialize a callout structure so that it can
378  *	safely be passed to callout_reset() and callout_stop()
379  *
380  * <sys/callout.h> defines three convenience macros:
381  *
382  * callout_active() - returns truth if callout has not been serviced
383  * callout_pending() - returns truth if callout is still waiting for timeout
384  * callout_deactivate() - marks the callout as having been serviced
385  */
386 void
387 callout_reset(c, to_ticks, ftn, arg)
388 	struct	callout *c;
389 	int	to_ticks;
390 	void	(*ftn)(void *);
391 	void	*arg;
392 {
393 
394 	mtx_lock_spin(&callout_lock);
395 	if (c == curr_callout && wakeup_needed) {
396 		/*
397 		 * We're being asked to reschedule a callout which is
398 		 * currently in progress, and someone has called
399 		 * callout_drain to kill that callout.  Don't reschedule.
400 		 */
401 		mtx_unlock_spin(&callout_lock);
402 		return;
403 	}
404 	if (c->c_flags & CALLOUT_PENDING)
405 		callout_stop(c);
406 
407 	/*
408 	 * We could unlock callout_lock here and lock it again before the
409 	 * TAILQ_INSERT_TAIL, but there's no point since doing this setup
410 	 * doesn't take much time.
411 	 */
412 	if (to_ticks <= 0)
413 		to_ticks = 1;
414 
415 	c->c_arg = arg;
416 	c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING);
417 	c->c_func = ftn;
418 	c->c_time = ticks + to_ticks;
419 	TAILQ_INSERT_TAIL(&callwheel[c->c_time & callwheelmask],
420 			  c, c_links.tqe);
421 	mtx_unlock_spin(&callout_lock);
422 }
423 
424 int
425 _callout_stop_safe(c, safe)
426 	struct	callout *c;
427 	int	safe;
428 {
429 	int wakeup_cookie;
430 
431 	mtx_lock_spin(&callout_lock);
432 	/*
433 	 * Don't attempt to delete a callout that's not on the queue.
434 	 */
435 	if (!(c->c_flags & CALLOUT_PENDING)) {
436 		c->c_flags &= ~CALLOUT_ACTIVE;
437 		if (c == curr_callout && safe) {
438 			/* We need to wait until the callout is finished. */
439 			wakeup_needed = 1;
440 			wakeup_cookie = wakeup_ctr++;
441 			mtx_unlock_spin(&callout_lock);
442 			mtx_lock(&callout_wait_lock);
443 
444 			/*
445 			 * Check to make sure that softclock() didn't
446 			 * do the wakeup in between our dropping
447 			 * callout_lock and picking up callout_wait_lock
448 			 */
449 			if (wakeup_cookie - wakeup_done_ctr > 0)
450 				cv_wait(&callout_wait, &callout_wait_lock);
451 
452 			mtx_unlock(&callout_wait_lock);
453 		} else
454 			mtx_unlock_spin(&callout_lock);
455 		return (0);
456 	}
457 	c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING);
458 
459 	if (nextsoftcheck == c) {
460 		nextsoftcheck = TAILQ_NEXT(c, c_links.tqe);
461 	}
462 	TAILQ_REMOVE(&callwheel[c->c_time & callwheelmask], c, c_links.tqe);
463 	c->c_func = NULL;
464 
465 	if (c->c_flags & CALLOUT_LOCAL_ALLOC) {
466 		SLIST_INSERT_HEAD(&callfree, c, c_links.sle);
467 	}
468 	mtx_unlock_spin(&callout_lock);
469 	return (1);
470 }
471 
472 void
473 callout_init(c, mpsafe)
474 	struct	callout *c;
475 	int mpsafe;
476 {
477 	bzero(c, sizeof *c);
478 	if (mpsafe)
479 		c->c_flags |= CALLOUT_MPSAFE;
480 }
481 
482 #ifdef APM_FIXUP_CALLTODO
483 /*
484  * Adjust the kernel calltodo timeout list.  This routine is used after
485  * an APM resume to recalculate the calltodo timer list values with the
486  * number of hz's we have been sleeping.  The next hardclock() will detect
487  * that there are fired timers and run softclock() to execute them.
488  *
489  * Please note, I have not done an exhaustive analysis of what code this
490  * might break.  I am motivated to have my select()'s and alarm()'s that
491  * have expired during suspend firing upon resume so that the applications
492  * which set the timer can do the maintanence the timer was for as close
493  * as possible to the originally intended time.  Testing this code for a
494  * week showed that resuming from a suspend resulted in 22 to 25 timers
495  * firing, which seemed independant on whether the suspend was 2 hours or
496  * 2 days.  Your milage may vary.   - Ken Key <key@cs.utk.edu>
497  */
498 void
499 adjust_timeout_calltodo(time_change)
500     struct timeval *time_change;
501 {
502 	register struct callout *p;
503 	unsigned long delta_ticks;
504 
505 	/*
506 	 * How many ticks were we asleep?
507 	 * (stolen from tvtohz()).
508 	 */
509 
510 	/* Don't do anything */
511 	if (time_change->tv_sec < 0)
512 		return;
513 	else if (time_change->tv_sec <= LONG_MAX / 1000000)
514 		delta_ticks = (time_change->tv_sec * 1000000 +
515 			       time_change->tv_usec + (tick - 1)) / tick + 1;
516 	else if (time_change->tv_sec <= LONG_MAX / hz)
517 		delta_ticks = time_change->tv_sec * hz +
518 			      (time_change->tv_usec + (tick - 1)) / tick + 1;
519 	else
520 		delta_ticks = LONG_MAX;
521 
522 	if (delta_ticks > INT_MAX)
523 		delta_ticks = INT_MAX;
524 
525 	/*
526 	 * Now rip through the timer calltodo list looking for timers
527 	 * to expire.
528 	 */
529 
530 	/* don't collide with softclock() */
531 	mtx_lock_spin(&callout_lock);
532 	for (p = calltodo.c_next; p != NULL; p = p->c_next) {
533 		p->c_time -= delta_ticks;
534 
535 		/* Break if the timer had more time on it than delta_ticks */
536 		if (p->c_time > 0)
537 			break;
538 
539 		/* take back the ticks the timer didn't use (p->c_time <= 0) */
540 		delta_ticks = -p->c_time;
541 	}
542 	mtx_unlock_spin(&callout_lock);
543 
544 	return;
545 }
546 #endif /* APM_FIXUP_CALLTODO */
547