xref: /illumos-gate/usr/src/uts/common/os/callout.c (revision a913396d8daab34d2fa497f49ae18d9f3d3a059f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/callo.h>
29 #include <sys/param.h>
30 #include <sys/types.h>
31 #include <sys/systm.h>
32 #include <sys/cpuvar.h>
33 #include <sys/thread.h>
34 #include <sys/kmem.h>
35 #include <sys/cmn_err.h>
36 #include <sys/callb.h>
37 #include <sys/debug.h>
38 #include <sys/vtrace.h>
39 #include <sys/sysmacros.h>
40 #include <sys/sdt.h>
41 
42 /*
43  * Callout tables.  See timeout(9F) for details.
44  */
45 static int cpr_stop_callout;
46 static int callout_fanout;
47 static int ncallout;
48 static callout_table_t *callout_table[CALLOUT_TABLES];
49 
50 #define	CALLOUT_HASH_INSERT(cthead, cp, cnext, cprev)	\
51 {							\
52 	callout_t **headpp = &cthead;			\
53 	callout_t *headp = *headpp;			\
54 	cp->cnext = headp;				\
55 	cp->cprev = NULL;				\
56 	if (headp != NULL)				\
57 		headp->cprev = cp;			\
58 	*headpp = cp;					\
59 }
60 
61 #define	CALLOUT_HASH_DELETE(cthead, cp, cnext, cprev)	\
62 {							\
63 	callout_t *nextp = cp->cnext;			\
64 	callout_t *prevp = cp->cprev;			\
65 	if (nextp != NULL)				\
66 		nextp->cprev = prevp;			\
67 	if (prevp != NULL)				\
68 		prevp->cnext = nextp;			\
69 	else						\
70 		cthead = nextp;				\
71 }
72 
73 #define	CALLOUT_HASH_UPDATE(INSDEL, ct, cp, id, runtime)		\
74 	ASSERT(MUTEX_HELD(&ct->ct_lock));				\
75 	ASSERT(cp->c_xid == id && cp->c_runtime == runtime);		\
76 	CALLOUT_HASH_##INSDEL(ct->ct_idhash[CALLOUT_IDHASH(id)],	\
77 	cp, c_idnext, c_idprev)						\
78 	CALLOUT_HASH_##INSDEL(ct->ct_lbhash[CALLOUT_LBHASH(runtime)],	\
79 	cp, c_lbnext, c_lbprev)
80 
81 #define	CALLOUT_HRES_INSERT(ct, cp, cnext, cprev, hresms)		\
82 {									\
83 	callout_t *nextp = ct->ct_hresq;				\
84 	callout_t *prevp;						\
85 									\
86 	if (nextp == NULL || hresms <= nextp->c_hresms) {		\
87 		cp->cnext = ct->ct_hresq;				\
88 		ct->ct_hresq = cp;					\
89 		cp->cprev = NULL;					\
90 		if (cp->cnext != NULL)					\
91 			cp->cnext->cprev = cp;				\
92 	} else {							\
93 		do {							\
94 			prevp = nextp;					\
95 			nextp = nextp->cnext;				\
96 		} while (nextp != NULL && hresms > nextp->c_hresms);	\
97 		prevp->cnext = cp;					\
98 		cp->cprev = prevp;					\
99 		cp->cnext = nextp;					\
100 		if (nextp != NULL) 					\
101 			nextp->cprev = cp;				\
102 	}								\
103 }
104 
105 #define	CALLOUT_HRES_DELETE(ct, cp, cnext, cprev, hresms)	\
106 {								\
107 	if (cp == ct->ct_hresq) {				\
108 		ct->ct_hresq = cp->cnext;			\
109 		if (cp->cnext != NULL)				\
110 			cp->cnext->cprev = NULL;		\
111 	} else {						\
112 		cp->cprev->cnext = cp->cnext;			\
113 		if (cp->cnext != NULL)				\
114 			cp->cnext->cprev = cp->cprev;		\
115 	}							\
116 }
117 
118 #define	CALLOUT_HRES_UPDATE(INSDEL, ct, cp, id, hresms)		\
119 	ASSERT(MUTEX_HELD(&ct->ct_lock));			\
120 	ASSERT(cp->c_xid == id);				\
121 	CALLOUT_HRES_##INSDEL(ct, cp, c_hrnext,			\
122 	c_hrprev, hresms)
123 
124 /*
125  * Allocate a callout structure.  We try quite hard because we
126  * can't sleep, and if we can't do the allocation, we're toast.
127  * Failing all, we try a KM_PANIC allocation.
128  */
129 static callout_t *
130 callout_alloc(callout_table_t *ct)
131 {
132 	size_t size = 0;
133 	callout_t *cp = NULL;
134 
135 	mutex_exit(&ct->ct_lock);
136 	cp = kmem_alloc_tryhard(sizeof (callout_t), &size,
137 	    KM_NOSLEEP | KM_PANIC);
138 	bzero(cp, sizeof (callout_t));
139 	ncallout++;
140 	mutex_enter(&ct->ct_lock);
141 	return (cp);
142 }
143 
144 /*
145  * Arrange that func(arg) be called after delta clock ticks.
146  */
147 static timeout_id_t
148 timeout_common(void (*func)(void *), void *arg, clock_t delta,
149     callout_table_t *ct)
150 {
151 	callout_t	*cp;
152 	callout_id_t	id;
153 	clock_t		runtime;
154 	timestruc_t	now;
155 	int64_t		hresms;
156 
157 	gethrestime(&now);
158 
159 	mutex_enter(&ct->ct_lock);
160 
161 	if ((cp = ct->ct_freelist) == NULL)
162 		cp = callout_alloc(ct);
163 	else
164 		ct->ct_freelist = cp->c_idnext;
165 
166 	cp->c_func = func;
167 	cp->c_arg = arg;
168 
169 	/*
170 	 * Make sure the callout runs at least 1 tick in the future.
171 	 */
172 	if (delta <= 0)
173 		delta = 1;
174 	cp->c_runtime = runtime = lbolt + delta;
175 
176 	/* Calculate the future time in milli-second */
177 	hresms = now.tv_sec * MILLISEC + now.tv_nsec / MICROSEC +
178 	    TICK_TO_MSEC(delta);
179 	cp->c_hresms = hresms;
180 
181 	/*
182 	 * Assign an ID to this callout
183 	 */
184 	if (delta > CALLOUT_LONGTERM_TICKS)
185 		ct->ct_long_id = id = (ct->ct_long_id - CALLOUT_COUNTER_LOW) |
186 		    CALLOUT_COUNTER_HIGH;
187 	else
188 		ct->ct_short_id = id = (ct->ct_short_id - CALLOUT_COUNTER_LOW) |
189 		    CALLOUT_COUNTER_HIGH;
190 
191 	cp->c_xid = id;
192 
193 	CALLOUT_HASH_UPDATE(INSERT, ct, cp, id, runtime);
194 	CALLOUT_HRES_UPDATE(INSERT, ct, cp, id, hresms);
195 
196 	mutex_exit(&ct->ct_lock);
197 
198 	TRACE_4(TR_FAC_CALLOUT, TR_TIMEOUT,
199 		"timeout:%K(%p) in %ld ticks, cp %p",
200 		func, arg, delta, cp);
201 
202 	return ((timeout_id_t)id);
203 }
204 
205 timeout_id_t
206 timeout(void (*func)(void *), void *arg, clock_t delta)
207 {
208 	return (timeout_common(func, arg, delta,
209 	    callout_table[CALLOUT_TABLE(CALLOUT_NORMAL, CPU->cpu_seqid)]));
210 
211 }
212 
213 timeout_id_t
214 realtime_timeout(void (*func)(void *), void *arg, clock_t delta)
215 {
216 	return (timeout_common(func, arg, delta,
217 	    callout_table[CALLOUT_TABLE(CALLOUT_REALTIME, CPU->cpu_seqid)]));
218 }
219 
220 clock_t
221 untimeout(timeout_id_t id_arg)
222 {
223 	callout_id_t id = (callout_id_t)id_arg;
224 	callout_table_t *ct;
225 	callout_t *cp;
226 	callout_id_t xid;
227 
228 	ct = callout_table[id & CALLOUT_TABLE_MASK];
229 
230 	mutex_enter(&ct->ct_lock);
231 
232 	for (cp = ct->ct_idhash[CALLOUT_IDHASH(id)]; cp; cp = cp->c_idnext) {
233 
234 		if ((xid = cp->c_xid) == id) {
235 			clock_t runtime = cp->c_runtime;
236 			clock_t time_left = runtime - lbolt;
237 
238 			CALLOUT_HASH_UPDATE(DELETE, ct, cp, id, runtime);
239 			CALLOUT_HRES_UPDATE(DELETE, ct, cp, id, 0);
240 			cp->c_idnext = ct->ct_freelist;
241 			ct->ct_freelist = cp;
242 			mutex_exit(&ct->ct_lock);
243 			TRACE_2(TR_FAC_CALLOUT, TR_UNTIMEOUT,
244 			    "untimeout:ID %lx ticks_left %ld", id, time_left);
245 			return (time_left < 0 ? 0 : time_left);
246 		}
247 
248 		if (xid != (id | CALLOUT_EXECUTING))
249 			continue;
250 
251 		/*
252 		 * The callout we want to delete is currently executing.
253 		 * The DDI states that we must wait until the callout
254 		 * completes before returning, so we block on c_done until
255 		 * the callout ID changes (to zero if it's on the freelist,
256 		 * or to a new callout ID if it's in use).  This implicitly
257 		 * assumes that callout structures are persistent (they are).
258 		 */
259 		if (cp->c_executor == curthread) {
260 			/*
261 			 * The timeout handler called untimeout() on itself.
262 			 * Stupid, but legal.  We can't wait for the timeout
263 			 * to complete without deadlocking, so we just return.
264 			 */
265 			mutex_exit(&ct->ct_lock);
266 			TRACE_1(TR_FAC_CALLOUT, TR_UNTIMEOUT_SELF,
267 			    "untimeout_self:ID %x", id);
268 			return (-1);
269 		}
270 		while (cp->c_xid == xid)
271 			cv_wait(&cp->c_done, &ct->ct_lock);
272 		mutex_exit(&ct->ct_lock);
273 		TRACE_1(TR_FAC_CALLOUT, TR_UNTIMEOUT_EXECUTING,
274 		    "untimeout_executing:ID %lx", id);
275 		return (-1);
276 	}
277 
278 	mutex_exit(&ct->ct_lock);
279 	TRACE_1(TR_FAC_CALLOUT, TR_UNTIMEOUT_BOGUS_ID,
280 	    "untimeout_bogus_id:ID %lx", id);
281 
282 	/*
283 	 * We didn't find the specified callout ID.  This means either
284 	 * (1) the callout already fired, or (2) the caller passed us
285 	 * a bogus value.  Perform a sanity check to detect case (2).
286 	 */
287 	if (id != 0 && (id & (CALLOUT_COUNTER_HIGH | CALLOUT_EXECUTING)) !=
288 	    CALLOUT_COUNTER_HIGH)
289 		panic("untimeout: impossible timeout id %lx", id);
290 
291 	return (-1);
292 }
293 
294 /*
295  * Do the actual work of executing callouts.  This routine is called either
296  * by a taskq_thread (normal case), or by softcall (realtime case).
297  */
298 static void
299 callout_execute(callout_table_t *ct)
300 {
301 	callout_t	*cp;
302 	callout_id_t	xid;
303 	clock_t		runtime;
304 	timestruc_t	now;
305 	int64_t		hresms;
306 
307 	mutex_enter(&ct->ct_lock);
308 
309 	while (((runtime = ct->ct_runtime) - ct->ct_curtime) <= 0) {
310 		for (cp = ct->ct_lbhash[CALLOUT_LBHASH(runtime)];
311 		    cp != NULL; cp = cp->c_lbnext) {
312 			xid = cp->c_xid;
313 			if (cp->c_runtime != runtime ||
314 			    (xid & CALLOUT_EXECUTING))
315 				continue;
316 			cp->c_executor = curthread;
317 			cp->c_xid = xid |= CALLOUT_EXECUTING;
318 			mutex_exit(&ct->ct_lock);
319 			DTRACE_PROBE1(callout__start, callout_t *, cp);
320 			(*cp->c_func)(cp->c_arg);
321 			DTRACE_PROBE1(callout__end, callout_t *, cp);
322 			mutex_enter(&ct->ct_lock);
323 
324 			/*
325 			 * Delete callout from both the hash tables and the
326 			 * hres queue, return it to freelist, and tell anyone
327 			 * who cares that we're done.
328 			 * Even though we dropped and reacquired ct->ct_lock,
329 			 * it's OK to pick up where we left off because only
330 			 * newly-created timeouts can precede cp on ct_lbhash,
331 			 * and those timeouts cannot be due on this tick.
332 			 */
333 			CALLOUT_HASH_UPDATE(DELETE, ct, cp, xid, runtime);
334 			CALLOUT_HRES_UPDATE(DELETE, ct, cp, xid, hresms);
335 			cp->c_idnext = ct->ct_freelist;
336 			ct->ct_freelist = cp;
337 			cp->c_xid = 0;	/* Indicate completion for c_done */
338 			cv_broadcast(&cp->c_done);
339 		}
340 		/*
341 		 * We have completed all callouts that were scheduled to
342 		 * run at "runtime".  If the global run time still matches
343 		 * our local copy, then we advance the global run time;
344 		 * otherwise, another callout thread must have already done so.
345 		 */
346 		if (ct->ct_runtime == runtime)
347 			ct->ct_runtime = runtime + 1;
348 	}
349 
350 	gethrestime(&now);
351 
352 	/* Calculate the current time in milli-second */
353 	hresms = now.tv_sec * MILLISEC + now.tv_nsec / MICROSEC;
354 
355 	cp = ct->ct_hresq;
356 	while (cp != NULL && hresms >= cp->c_hresms) {
357 		xid = cp->c_xid;
358 		if (xid & CALLOUT_EXECUTING) {
359 			cp = cp->c_hrnext;
360 			continue;
361 		}
362 		cp->c_executor = curthread;
363 		cp->c_xid = xid |= CALLOUT_EXECUTING;
364 		runtime = cp->c_runtime;
365 		mutex_exit(&ct->ct_lock);
366 		DTRACE_PROBE1(callout__start, callout_t *, cp);
367 		(*cp->c_func)(cp->c_arg);
368 		DTRACE_PROBE1(callout__end, callout_t *, cp);
369 		mutex_enter(&ct->ct_lock);
370 
371 		/*
372 		 * See comments above.
373 		 */
374 		CALLOUT_HASH_UPDATE(DELETE, ct, cp, xid, runtime);
375 		CALLOUT_HRES_UPDATE(DELETE, ct, cp, xid, hresms);
376 		cp->c_idnext = ct->ct_freelist;
377 		ct->ct_freelist = cp;
378 		cp->c_xid = 0;	/* Indicate completion for c_done */
379 		cv_broadcast(&cp->c_done);
380 
381 		/*
382 		 * Start over from the head of the list, see if
383 		 * any timeout bearing an earlier hres time.
384 		 */
385 		cp = ct->ct_hresq;
386 	}
387 	mutex_exit(&ct->ct_lock);
388 }
389 
390 /*
391  * Schedule any callouts that are due on or before this tick.
392  */
393 static void
394 callout_schedule_1(callout_table_t *ct)
395 {
396 	callout_t	*cp;
397 	clock_t		curtime, runtime;
398 	timestruc_t	now;
399 	int64_t		hresms;
400 
401 	mutex_enter(&ct->ct_lock);
402 	ct->ct_curtime = curtime = lbolt;
403 	while (((runtime = ct->ct_runtime) - curtime) <= 0) {
404 		for (cp = ct->ct_lbhash[CALLOUT_LBHASH(runtime)];
405 		    cp != NULL; cp = cp->c_lbnext) {
406 			if (cp->c_runtime != runtime ||
407 			    (cp->c_xid & CALLOUT_EXECUTING))
408 				continue;
409 			mutex_exit(&ct->ct_lock);
410 			if (ct->ct_taskq == NULL)
411 				softcall((void (*)(void *))callout_execute, ct);
412 			else
413 				(void) taskq_dispatch(ct->ct_taskq,
414 				    (task_func_t *)callout_execute, ct,
415 				    KM_NOSLEEP);
416 			return;
417 		}
418 		ct->ct_runtime++;
419 	}
420 
421 	gethrestime(&now);
422 
423 	/* Calculate the current time in milli-second */
424 	hresms = now.tv_sec * MILLISEC + now.tv_nsec / MICROSEC;
425 
426 	cp = ct->ct_hresq;
427 	while (cp != NULL && hresms >= cp->c_hresms) {
428 		if (cp->c_xid & CALLOUT_EXECUTING) {
429 			cp = cp->c_hrnext;
430 			continue;
431 		}
432 		mutex_exit(&ct->ct_lock);
433 		if (ct->ct_taskq == NULL)
434 			softcall((void (*)(void *))callout_execute, ct);
435 		else
436 			(void) taskq_dispatch(ct->ct_taskq,
437 			    (task_func_t *)callout_execute, ct, KM_NOSLEEP);
438 		return;
439 	}
440 	mutex_exit(&ct->ct_lock);
441 }
442 
443 /*
444  * Schedule callouts for all callout tables.  Called by clock() on each tick.
445  */
446 void
447 callout_schedule(void)
448 {
449 	int f, t;
450 
451 	if (cpr_stop_callout)
452 		return;
453 
454 	for (t = 0; t < CALLOUT_NTYPES; t++)
455 		for (f = 0; f < callout_fanout; f++)
456 			callout_schedule_1(callout_table[CALLOUT_TABLE(t, f)]);
457 }
458 
459 /*
460  * Callback handler used by CPR to stop and resume callouts.
461  */
462 /*ARGSUSED*/
463 static boolean_t
464 callout_cpr_callb(void *arg, int code)
465 {
466 	cpr_stop_callout = (code == CB_CODE_CPR_CHKPT);
467 	return (B_TRUE);
468 }
469 
470 /*
471  * Initialize all callout tables.  Called at boot time just before clkstart().
472  */
473 void
474 callout_init(void)
475 {
476 	int f, t;
477 	int table_id;
478 	callout_table_t *ct;
479 
480 	callout_fanout = MIN(CALLOUT_FANOUT, max_ncpus);
481 
482 	for (t = 0; t < CALLOUT_NTYPES; t++) {
483 		for (f = 0; f < CALLOUT_FANOUT; f++) {
484 			table_id = CALLOUT_TABLE(t, f);
485 			if (f >= callout_fanout) {
486 				callout_table[table_id] =
487 				    callout_table[table_id - callout_fanout];
488 				continue;
489 			}
490 			ct = kmem_zalloc(sizeof (callout_table_t), KM_SLEEP);
491 			callout_table[table_id] = ct;
492 			ct->ct_short_id = (callout_id_t)table_id |
493 			    CALLOUT_COUNTER_HIGH;
494 			ct->ct_long_id = ct->ct_short_id | CALLOUT_LONGTERM;
495 			ct->ct_curtime = ct->ct_runtime = lbolt;
496 			if (t == CALLOUT_NORMAL) {
497 				/*
498 				 * Each callout thread consumes exactly one
499 				 * task structure while active.  Therefore,
500 				 * prepopulating with 2 * CALLOUT_THREADS tasks
501 				 * ensures that there's at least one task per
502 				 * thread that's either scheduled or on the
503 				 * freelist.  In turn, this guarantees that
504 				 * taskq_dispatch() will always either succeed
505 				 * (because there's a free task structure) or
506 				 * be unnecessary (because "callout_excute(ct)"
507 				 * has already scheduled).
508 				 */
509 				ct->ct_taskq =
510 				    taskq_create_instance("callout_taskq", f,
511 				    CALLOUT_THREADS, maxclsyspri,
512 				    2 * CALLOUT_THREADS, 2 * CALLOUT_THREADS,
513 				    TASKQ_PREPOPULATE | TASKQ_CPR_SAFE);
514 			}
515 		}
516 	}
517 	(void) callb_add(callout_cpr_callb, 0, CB_CL_CPR_CALLOUT, "callout");
518 }
519