xref: /illumos-gate/usr/src/uts/common/os/callout.c (revision 34a0f871d192b33b865455a8812a3d34c1866315)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/callo.h>
29 #include <sys/param.h>
30 #include <sys/types.h>
31 #include <sys/systm.h>
32 #include <sys/cpuvar.h>
33 #include <sys/thread.h>
34 #include <sys/kmem.h>
35 #include <sys/cmn_err.h>
36 #include <sys/callb.h>
37 #include <sys/debug.h>
38 #include <sys/vtrace.h>
39 #include <sys/sysmacros.h>
40 #include <sys/sdt.h>
41 
42 /*
43  * Callout tables.  See timeout(9F) for details.
44  */
45 static int cpr_stop_callout;
46 static int callout_fanout;
47 static int ncallout;
48 static callout_table_t *callout_table[CALLOUT_TABLES];
49 
50 #define	CALLOUT_HASH_INSERT(cthead, cp, cnext, cprev)	\
51 {							\
52 	callout_t **headpp = &cthead;			\
53 	callout_t *headp = *headpp;			\
54 	cp->cnext = headp;				\
55 	cp->cprev = NULL;				\
56 	if (headp != NULL)				\
57 		headp->cprev = cp;			\
58 	*headpp = cp;					\
59 }
60 
61 #define	CALLOUT_HASH_DELETE(cthead, cp, cnext, cprev)	\
62 {							\
63 	callout_t *nextp = cp->cnext;			\
64 	callout_t *prevp = cp->cprev;			\
65 	if (nextp != NULL)				\
66 		nextp->cprev = prevp;			\
67 	if (prevp != NULL)				\
68 		prevp->cnext = nextp;			\
69 	else						\
70 		cthead = nextp;				\
71 }
72 
73 #define	CALLOUT_HASH_UPDATE(INSDEL, ct, cp, id, runtime)		\
74 	ASSERT(MUTEX_HELD(&ct->ct_lock));				\
75 	ASSERT(cp->c_xid == id && cp->c_runtime == runtime);		\
76 	CALLOUT_HASH_##INSDEL(ct->ct_idhash[CALLOUT_IDHASH(id)],	\
77 	cp, c_idnext, c_idprev)						\
78 	CALLOUT_HASH_##INSDEL(ct->ct_lbhash[CALLOUT_LBHASH(runtime)],	\
79 	cp, c_lbnext, c_lbprev)
80 
81 #define	CALLOUT_HRES_INSERT(ct, cp, cnext, cprev, hresms)		\
82 {									\
83 	callout_t *nextp = ct->ct_hresq;				\
84 	callout_t *prevp;						\
85 									\
86 	if (nextp == NULL || hresms <= nextp->c_hresms) {		\
87 		cp->cnext = ct->ct_hresq;				\
88 		ct->ct_hresq = cp;					\
89 		cp->cprev = NULL;					\
90 		if (cp->cnext != NULL)					\
91 			cp->cnext->cprev = cp;				\
92 	} else {							\
93 		do {							\
94 			prevp = nextp;					\
95 			nextp = nextp->cnext;				\
96 		} while (nextp != NULL && hresms > nextp->c_hresms);	\
97 		prevp->cnext = cp;					\
98 		cp->cprev = prevp;					\
99 		cp->cnext = nextp;					\
100 		if (nextp != NULL) 					\
101 			nextp->cprev = cp;				\
102 	}								\
103 }
104 
105 #define	CALLOUT_HRES_DELETE(ct, cp, cnext, cprev, hresms)	\
106 {								\
107 	if (cp == ct->ct_hresq) {				\
108 		ct->ct_hresq = cp->cnext;			\
109 		if (cp->cnext != NULL)				\
110 			cp->cnext->cprev = NULL;		\
111 	} else {						\
112 		cp->cprev->cnext = cp->cnext;			\
113 		if (cp->cnext != NULL)				\
114 			cp->cnext->cprev = cp->cprev;		\
115 	}							\
116 }
117 
118 #define	CALLOUT_HRES_UPDATE(INSDEL, ct, cp, id, hresms)		\
119 	ASSERT(MUTEX_HELD(&ct->ct_lock));			\
120 	ASSERT(cp->c_xid == id);				\
121 	CALLOUT_HRES_##INSDEL(ct, cp, c_hrnext,			\
122 	c_hrprev, hresms)
123 
124 /*
125  * Allocate a callout structure.  We try quite hard because we
126  * can't sleep, and if we can't do the allocation, we're toast.
127  * Failing all, we try a KM_PANIC allocation.
128  */
129 static callout_t *
130 callout_alloc(callout_table_t *ct)
131 {
132 	size_t size = 0;
133 	callout_t *cp = NULL;
134 
135 	mutex_exit(&ct->ct_lock);
136 	cp = kmem_alloc_tryhard(sizeof (callout_t), &size,
137 	    KM_NOSLEEP | KM_PANIC);
138 	bzero(cp, sizeof (callout_t));
139 	ncallout++;
140 	mutex_enter(&ct->ct_lock);
141 	return (cp);
142 }
143 
144 /*
145  * Arrange that func(arg) be called after delta clock ticks.
146  */
147 static timeout_id_t
148 timeout_common(void (*func)(void *), void *arg, clock_t delta,
149     callout_table_t *ct)
150 {
151 	callout_t	*cp;
152 	callout_id_t	id;
153 	clock_t		runtime;
154 	timestruc_t	now;
155 	int64_t		hresms;
156 
157 	gethrestime(&now);
158 
159 	mutex_enter(&ct->ct_lock);
160 
161 	if ((cp = ct->ct_freelist) == NULL)
162 		cp = callout_alloc(ct);
163 	else
164 		ct->ct_freelist = cp->c_idnext;
165 
166 	cp->c_func = func;
167 	cp->c_arg = arg;
168 
169 	/*
170 	 * Make sure the callout runs at least 1 tick in the future.
171 	 */
172 	if (delta <= 0)
173 		delta = 1;
174 	cp->c_runtime = runtime = lbolt + delta;
175 
176 	/*
177 	 * Calculate the future time in millisecond.
178 	 * We must cast tv_sec and delta to 64-bit integers
179 	 * to avoid integer overflow on 32-platforms.
180 	 */
181 	hresms = (int64_t)now.tv_sec * MILLISEC + now.tv_nsec / MICROSEC +
182 	    TICK_TO_MSEC((int64_t)delta);
183 
184 	cp->c_hresms = hresms;
185 
186 	/*
187 	 * Assign an ID to this callout
188 	 */
189 	if (delta > CALLOUT_LONGTERM_TICKS)
190 		ct->ct_long_id = id = (ct->ct_long_id - CALLOUT_COUNTER_LOW) |
191 		    CALLOUT_COUNTER_HIGH;
192 	else
193 		ct->ct_short_id = id = (ct->ct_short_id - CALLOUT_COUNTER_LOW) |
194 		    CALLOUT_COUNTER_HIGH;
195 
196 	cp->c_xid = id;
197 
198 	CALLOUT_HASH_UPDATE(INSERT, ct, cp, id, runtime);
199 	CALLOUT_HRES_UPDATE(INSERT, ct, cp, id, hresms);
200 
201 	mutex_exit(&ct->ct_lock);
202 
203 	TRACE_4(TR_FAC_CALLOUT, TR_TIMEOUT,
204 		"timeout:%K(%p) in %ld ticks, cp %p",
205 		func, arg, delta, cp);
206 
207 	return ((timeout_id_t)id);
208 }
209 
210 timeout_id_t
211 timeout(void (*func)(void *), void *arg, clock_t delta)
212 {
213 	return (timeout_common(func, arg, delta,
214 	    callout_table[CALLOUT_TABLE(CALLOUT_NORMAL, CPU->cpu_seqid)]));
215 
216 }
217 
218 timeout_id_t
219 realtime_timeout(void (*func)(void *), void *arg, clock_t delta)
220 {
221 	return (timeout_common(func, arg, delta,
222 	    callout_table[CALLOUT_TABLE(CALLOUT_REALTIME, CPU->cpu_seqid)]));
223 }
224 
225 clock_t
226 untimeout(timeout_id_t id_arg)
227 {
228 	callout_id_t id = (callout_id_t)id_arg;
229 	callout_table_t *ct;
230 	callout_t *cp;
231 	callout_id_t xid;
232 
233 	ct = callout_table[id & CALLOUT_TABLE_MASK];
234 
235 	mutex_enter(&ct->ct_lock);
236 
237 	for (cp = ct->ct_idhash[CALLOUT_IDHASH(id)]; cp; cp = cp->c_idnext) {
238 
239 		if ((xid = cp->c_xid) == id) {
240 			clock_t runtime = cp->c_runtime;
241 			clock_t time_left = runtime - lbolt;
242 
243 			CALLOUT_HASH_UPDATE(DELETE, ct, cp, id, runtime);
244 			CALLOUT_HRES_UPDATE(DELETE, ct, cp, id, 0);
245 			cp->c_idnext = ct->ct_freelist;
246 			ct->ct_freelist = cp;
247 			mutex_exit(&ct->ct_lock);
248 			TRACE_2(TR_FAC_CALLOUT, TR_UNTIMEOUT,
249 			    "untimeout:ID %lx ticks_left %ld", id, time_left);
250 			return (time_left < 0 ? 0 : time_left);
251 		}
252 
253 		if (xid != (id | CALLOUT_EXECUTING))
254 			continue;
255 
256 		/*
257 		 * The callout we want to delete is currently executing.
258 		 * The DDI states that we must wait until the callout
259 		 * completes before returning, so we block on c_done until
260 		 * the callout ID changes (to zero if it's on the freelist,
261 		 * or to a new callout ID if it's in use).  This implicitly
262 		 * assumes that callout structures are persistent (they are).
263 		 */
264 		if (cp->c_executor == curthread) {
265 			/*
266 			 * The timeout handler called untimeout() on itself.
267 			 * Stupid, but legal.  We can't wait for the timeout
268 			 * to complete without deadlocking, so we just return.
269 			 */
270 			mutex_exit(&ct->ct_lock);
271 			TRACE_1(TR_FAC_CALLOUT, TR_UNTIMEOUT_SELF,
272 			    "untimeout_self:ID %x", id);
273 			return (-1);
274 		}
275 		while (cp->c_xid == xid)
276 			cv_wait(&cp->c_done, &ct->ct_lock);
277 		mutex_exit(&ct->ct_lock);
278 		TRACE_1(TR_FAC_CALLOUT, TR_UNTIMEOUT_EXECUTING,
279 		    "untimeout_executing:ID %lx", id);
280 		return (-1);
281 	}
282 
283 	mutex_exit(&ct->ct_lock);
284 	TRACE_1(TR_FAC_CALLOUT, TR_UNTIMEOUT_BOGUS_ID,
285 	    "untimeout_bogus_id:ID %lx", id);
286 
287 	/*
288 	 * We didn't find the specified callout ID.  This means either
289 	 * (1) the callout already fired, or (2) the caller passed us
290 	 * a bogus value.  Perform a sanity check to detect case (2).
291 	 */
292 	if (id != 0 && (id & (CALLOUT_COUNTER_HIGH | CALLOUT_EXECUTING)) !=
293 	    CALLOUT_COUNTER_HIGH)
294 		panic("untimeout: impossible timeout id %lx", id);
295 
296 	return (-1);
297 }
298 
299 /*
300  * Do the actual work of executing callouts.  This routine is called either
301  * by a taskq_thread (normal case), or by softcall (realtime case).
302  */
303 static void
304 callout_execute(callout_table_t *ct)
305 {
306 	callout_t	*cp;
307 	callout_id_t	xid;
308 	clock_t		runtime;
309 	timestruc_t	now;
310 	int64_t		hresms;
311 
312 	mutex_enter(&ct->ct_lock);
313 
314 	while (((runtime = ct->ct_runtime) - ct->ct_curtime) <= 0) {
315 		for (cp = ct->ct_lbhash[CALLOUT_LBHASH(runtime)];
316 		    cp != NULL; cp = cp->c_lbnext) {
317 			xid = cp->c_xid;
318 			if (cp->c_runtime != runtime ||
319 			    (xid & CALLOUT_EXECUTING))
320 				continue;
321 			cp->c_executor = curthread;
322 			cp->c_xid = xid |= CALLOUT_EXECUTING;
323 			mutex_exit(&ct->ct_lock);
324 			DTRACE_PROBE1(callout__start, callout_t *, cp);
325 			(*cp->c_func)(cp->c_arg);
326 			DTRACE_PROBE1(callout__end, callout_t *, cp);
327 			mutex_enter(&ct->ct_lock);
328 
329 			/*
330 			 * Delete callout from both the hash tables and the
331 			 * hres queue, return it to freelist, and tell anyone
332 			 * who cares that we're done.
333 			 * Even though we dropped and reacquired ct->ct_lock,
334 			 * it's OK to pick up where we left off because only
335 			 * newly-created timeouts can precede cp on ct_lbhash,
336 			 * and those timeouts cannot be due on this tick.
337 			 */
338 			CALLOUT_HASH_UPDATE(DELETE, ct, cp, xid, runtime);
339 			CALLOUT_HRES_UPDATE(DELETE, ct, cp, xid, hresms);
340 			cp->c_idnext = ct->ct_freelist;
341 			ct->ct_freelist = cp;
342 			cp->c_xid = 0;	/* Indicate completion for c_done */
343 			cv_broadcast(&cp->c_done);
344 		}
345 		/*
346 		 * We have completed all callouts that were scheduled to
347 		 * run at "runtime".  If the global run time still matches
348 		 * our local copy, then we advance the global run time;
349 		 * otherwise, another callout thread must have already done so.
350 		 */
351 		if (ct->ct_runtime == runtime)
352 			ct->ct_runtime = runtime + 1;
353 	}
354 
355 	gethrestime(&now);
356 
357 	/*
358 	 * Calculate the future time in millisecond.
359 	 * We must cast tv_sec to 64-bit integer
360 	 * to avoid integer overflow on 32-platforms.
361 	 */
362 	hresms = (int64_t)now.tv_sec * MILLISEC + now.tv_nsec / MICROSEC;
363 
364 	cp = ct->ct_hresq;
365 	while (cp != NULL && hresms >= cp->c_hresms) {
366 		xid = cp->c_xid;
367 		if (xid & CALLOUT_EXECUTING) {
368 			cp = cp->c_hrnext;
369 			continue;
370 		}
371 		cp->c_executor = curthread;
372 		cp->c_xid = xid |= CALLOUT_EXECUTING;
373 		runtime = cp->c_runtime;
374 		mutex_exit(&ct->ct_lock);
375 		DTRACE_PROBE1(callout__start, callout_t *, cp);
376 		(*cp->c_func)(cp->c_arg);
377 		DTRACE_PROBE1(callout__end, callout_t *, cp);
378 		mutex_enter(&ct->ct_lock);
379 
380 		/*
381 		 * See comments above.
382 		 */
383 		CALLOUT_HASH_UPDATE(DELETE, ct, cp, xid, runtime);
384 		CALLOUT_HRES_UPDATE(DELETE, ct, cp, xid, hresms);
385 		cp->c_idnext = ct->ct_freelist;
386 		ct->ct_freelist = cp;
387 		cp->c_xid = 0;	/* Indicate completion for c_done */
388 		cv_broadcast(&cp->c_done);
389 
390 		/*
391 		 * Start over from the head of the list, see if
392 		 * any timeout bearing an earlier hres time.
393 		 */
394 		cp = ct->ct_hresq;
395 	}
396 	mutex_exit(&ct->ct_lock);
397 }
398 
399 /*
400  * Schedule any callouts that are due on or before this tick.
401  */
402 static void
403 callout_schedule_1(callout_table_t *ct)
404 {
405 	callout_t	*cp;
406 	clock_t		curtime, runtime;
407 	timestruc_t	now;
408 	int64_t		hresms;
409 
410 	mutex_enter(&ct->ct_lock);
411 	ct->ct_curtime = curtime = lbolt;
412 	while (((runtime = ct->ct_runtime) - curtime) <= 0) {
413 		for (cp = ct->ct_lbhash[CALLOUT_LBHASH(runtime)];
414 		    cp != NULL; cp = cp->c_lbnext) {
415 			if (cp->c_runtime != runtime ||
416 			    (cp->c_xid & CALLOUT_EXECUTING))
417 				continue;
418 			mutex_exit(&ct->ct_lock);
419 			if (ct->ct_taskq == NULL)
420 				softcall((void (*)(void *))callout_execute, ct);
421 			else
422 				(void) taskq_dispatch(ct->ct_taskq,
423 				    (task_func_t *)callout_execute, ct,
424 				    KM_NOSLEEP);
425 			return;
426 		}
427 		ct->ct_runtime++;
428 	}
429 
430 	gethrestime(&now);
431 
432 	/*
433 	 * Calculate the future time in millisecond.
434 	 * We must cast tv_sec to 64-bit integer
435 	 * to avoid integer overflow on 32-platforms.
436 	 */
437 	hresms = (int64_t)now.tv_sec * MILLISEC + now.tv_nsec / MICROSEC;
438 
439 	cp = ct->ct_hresq;
440 	while (cp != NULL && hresms >= cp->c_hresms) {
441 		if (cp->c_xid & CALLOUT_EXECUTING) {
442 			cp = cp->c_hrnext;
443 			continue;
444 		}
445 		mutex_exit(&ct->ct_lock);
446 		if (ct->ct_taskq == NULL)
447 			softcall((void (*)(void *))callout_execute, ct);
448 		else
449 			(void) taskq_dispatch(ct->ct_taskq,
450 			    (task_func_t *)callout_execute, ct, KM_NOSLEEP);
451 		return;
452 	}
453 	mutex_exit(&ct->ct_lock);
454 }
455 
456 /*
457  * Schedule callouts for all callout tables.  Called by clock() on each tick.
458  */
459 void
460 callout_schedule(void)
461 {
462 	int f, t;
463 
464 	if (cpr_stop_callout)
465 		return;
466 
467 	for (t = 0; t < CALLOUT_NTYPES; t++)
468 		for (f = 0; f < callout_fanout; f++)
469 			callout_schedule_1(callout_table[CALLOUT_TABLE(t, f)]);
470 }
471 
472 /*
473  * Callback handler used by CPR to stop and resume callouts.
474  */
475 /*ARGSUSED*/
476 static boolean_t
477 callout_cpr_callb(void *arg, int code)
478 {
479 	cpr_stop_callout = (code == CB_CODE_CPR_CHKPT);
480 	return (B_TRUE);
481 }
482 
483 /*
484  * Initialize all callout tables.  Called at boot time just before clkstart().
485  */
486 void
487 callout_init(void)
488 {
489 	int f, t;
490 	int table_id;
491 	callout_table_t *ct;
492 
493 	callout_fanout = MIN(CALLOUT_FANOUT, max_ncpus);
494 
495 	for (t = 0; t < CALLOUT_NTYPES; t++) {
496 		for (f = 0; f < CALLOUT_FANOUT; f++) {
497 			table_id = CALLOUT_TABLE(t, f);
498 			if (f >= callout_fanout) {
499 				callout_table[table_id] =
500 				    callout_table[table_id - callout_fanout];
501 				continue;
502 			}
503 			ct = kmem_zalloc(sizeof (callout_table_t), KM_SLEEP);
504 			callout_table[table_id] = ct;
505 			ct->ct_short_id = (callout_id_t)table_id |
506 			    CALLOUT_COUNTER_HIGH;
507 			ct->ct_long_id = ct->ct_short_id | CALLOUT_LONGTERM;
508 			ct->ct_curtime = ct->ct_runtime = lbolt;
509 			if (t == CALLOUT_NORMAL) {
510 				/*
511 				 * Each callout thread consumes exactly one
512 				 * task structure while active.  Therefore,
513 				 * prepopulating with 2 * CALLOUT_THREADS tasks
514 				 * ensures that there's at least one task per
515 				 * thread that's either scheduled or on the
516 				 * freelist.  In turn, this guarantees that
517 				 * taskq_dispatch() will always either succeed
518 				 * (because there's a free task structure) or
519 				 * be unnecessary (because "callout_excute(ct)"
520 				 * has already scheduled).
521 				 */
522 				ct->ct_taskq =
523 				    taskq_create_instance("callout_taskq", f,
524 				    CALLOUT_THREADS, maxclsyspri,
525 				    2 * CALLOUT_THREADS, 2 * CALLOUT_THREADS,
526 				    TASKQ_PREPOPULATE | TASKQ_CPR_SAFE);
527 			}
528 		}
529 	}
530 	(void) callb_add(callout_cpr_callb, 0, CB_CL_CPR_CALLOUT, "callout");
531 }
532