xref: /illumos-gate/usr/src/uts/common/os/callout.c (revision 3348528f7ec68bf2f11d0cbd5c3b9932ea7f0d5c)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/callo.h>
30 #include <sys/param.h>
31 #include <sys/types.h>
32 #include <sys/systm.h>
33 #include <sys/cpuvar.h>
34 #include <sys/thread.h>
35 #include <sys/kmem.h>
36 #include <sys/cmn_err.h>
37 #include <sys/callb.h>
38 #include <sys/debug.h>
39 #include <sys/vtrace.h>
40 #include <sys/sysmacros.h>
41 #include <sys/sdt.h>
42 
43 /*
44  * Callout tables.  See timeout(9F) for details.
45  */
46 static int cpr_stop_callout;
47 static int callout_fanout;
48 static int ncallout;
49 static callout_table_t *callout_table[CALLOUT_TABLES];
50 
51 #define	CALLOUT_HASH_INSERT(cthead, cp, cnext, cprev)	\
52 {							\
53 	callout_t **headpp = &cthead;			\
54 	callout_t *headp = *headpp;			\
55 	cp->cnext = headp;				\
56 	cp->cprev = NULL;				\
57 	if (headp != NULL)				\
58 		headp->cprev = cp;			\
59 	*headpp = cp;					\
60 }
61 
62 #define	CALLOUT_HASH_DELETE(cthead, cp, cnext, cprev)	\
63 {							\
64 	callout_t *nextp = cp->cnext;			\
65 	callout_t *prevp = cp->cprev;			\
66 	if (nextp != NULL)				\
67 		nextp->cprev = prevp;			\
68 	if (prevp != NULL)				\
69 		prevp->cnext = nextp;			\
70 	else						\
71 		cthead = nextp;				\
72 }
73 
74 #define	CALLOUT_HASH_UPDATE(INSDEL, ct, cp, id, runtime)		\
75 	ASSERT(MUTEX_HELD(&ct->ct_lock));				\
76 	ASSERT(cp->c_xid == id && cp->c_runtime == runtime);		\
77 	CALLOUT_HASH_##INSDEL(ct->ct_idhash[CALLOUT_IDHASH(id)],	\
78 	cp, c_idnext, c_idprev)						\
79 	CALLOUT_HASH_##INSDEL(ct->ct_lbhash[CALLOUT_LBHASH(runtime)],	\
80 	cp, c_lbnext, c_lbprev)
81 
82 /*
83  * Allocate a callout structure.  We try quite hard because we
84  * can't sleep, and if we can't do the allocation, we're toast.
85  * Failing all, we try a KM_PANIC allocation.
86  */
87 static callout_t *
88 callout_alloc(callout_table_t *ct)
89 {
90 	size_t size = 0;
91 	callout_t *cp = NULL;
92 
93 	mutex_exit(&ct->ct_lock);
94 	cp = kmem_alloc_tryhard(sizeof (callout_t), &size,
95 	    KM_NOSLEEP | KM_PANIC);
96 	bzero(cp, sizeof (callout_t));
97 	ncallout++;
98 	mutex_enter(&ct->ct_lock);
99 	return (cp);
100 }
101 
102 /*
103  * Arrange that func(arg) be called after delta clock ticks.
104  */
105 static timeout_id_t
106 timeout_common(void (*func)(void *), void *arg, clock_t delta,
107     callout_table_t *ct)
108 {
109 	callout_t *cp;
110 	callout_id_t id;
111 	clock_t runtime;
112 
113 	mutex_enter(&ct->ct_lock);
114 
115 	if ((cp = ct->ct_freelist) == NULL)
116 		cp = callout_alloc(ct);
117 	else
118 		ct->ct_freelist = cp->c_idnext;
119 
120 	cp->c_func = func;
121 	cp->c_arg = arg;
122 
123 	/*
124 	 * Make sure the callout runs at least 1 tick in the future.
125 	 */
126 	if (delta <= 0)
127 		delta = 1;
128 	cp->c_runtime = runtime = lbolt + delta;
129 
130 	/*
131 	 * Assign an ID to this callout
132 	 */
133 	if (delta > CALLOUT_LONGTERM_TICKS)
134 		ct->ct_long_id = id = (ct->ct_long_id - CALLOUT_COUNTER_LOW) |
135 		    CALLOUT_COUNTER_HIGH;
136 	else
137 		ct->ct_short_id = id = (ct->ct_short_id - CALLOUT_COUNTER_LOW) |
138 		    CALLOUT_COUNTER_HIGH;
139 
140 	cp->c_xid = id;
141 
142 	CALLOUT_HASH_UPDATE(INSERT, ct, cp, id, runtime);
143 
144 	mutex_exit(&ct->ct_lock);
145 
146 	TRACE_4(TR_FAC_CALLOUT, TR_TIMEOUT,
147 		"timeout:%K(%p) in %ld ticks, cp %p",
148 		func, arg, delta, cp);
149 
150 	return ((timeout_id_t)id);
151 }
152 
153 timeout_id_t
154 timeout(void (*func)(void *), void *arg, clock_t delta)
155 {
156 	return (timeout_common(func, arg, delta,
157 	    callout_table[CALLOUT_TABLE(CALLOUT_NORMAL, CPU->cpu_seqid)]));
158 
159 }
160 
161 timeout_id_t
162 realtime_timeout(void (*func)(void *), void *arg, clock_t delta)
163 {
164 	return (timeout_common(func, arg, delta,
165 	    callout_table[CALLOUT_TABLE(CALLOUT_REALTIME, CPU->cpu_seqid)]));
166 }
167 
168 clock_t
169 untimeout(timeout_id_t id_arg)
170 {
171 	callout_id_t id = (callout_id_t)id_arg;
172 	callout_table_t *ct;
173 	callout_t *cp;
174 	callout_id_t xid;
175 
176 	ct = callout_table[id & CALLOUT_TABLE_MASK];
177 
178 	mutex_enter(&ct->ct_lock);
179 
180 	for (cp = ct->ct_idhash[CALLOUT_IDHASH(id)]; cp; cp = cp->c_idnext) {
181 
182 		if ((xid = cp->c_xid) == id) {
183 			clock_t runtime = cp->c_runtime;
184 			clock_t time_left = runtime - lbolt;
185 
186 			CALLOUT_HASH_UPDATE(DELETE, ct, cp, id, runtime);
187 			cp->c_idnext = ct->ct_freelist;
188 			ct->ct_freelist = cp;
189 			mutex_exit(&ct->ct_lock);
190 			TRACE_2(TR_FAC_CALLOUT, TR_UNTIMEOUT,
191 			    "untimeout:ID %lx ticks_left %ld", id, time_left);
192 			return (time_left < 0 ? 0 : time_left);
193 		}
194 
195 		if (xid != (id | CALLOUT_EXECUTING))
196 			continue;
197 
198 		/*
199 		 * The callout we want to delete is currently executing.
200 		 * The DDI states that we must wait until the callout
201 		 * completes before returning, so we block on c_done until
202 		 * the callout ID changes (to zero if it's on the freelist,
203 		 * or to a new callout ID if it's in use).  This implicitly
204 		 * assumes that callout structures are persistent (they are).
205 		 */
206 		if (cp->c_executor == curthread) {
207 			/*
208 			 * The timeout handler called untimeout() on itself.
209 			 * Stupid, but legal.  We can't wait for the timeout
210 			 * to complete without deadlocking, so we just return.
211 			 */
212 			mutex_exit(&ct->ct_lock);
213 			TRACE_1(TR_FAC_CALLOUT, TR_UNTIMEOUT_SELF,
214 			    "untimeout_self:ID %x", id);
215 			return (-1);
216 		}
217 		while (cp->c_xid == xid)
218 			cv_wait(&cp->c_done, &ct->ct_lock);
219 		mutex_exit(&ct->ct_lock);
220 		TRACE_1(TR_FAC_CALLOUT, TR_UNTIMEOUT_EXECUTING,
221 		    "untimeout_executing:ID %lx", id);
222 		return (-1);
223 	}
224 
225 	mutex_exit(&ct->ct_lock);
226 	TRACE_1(TR_FAC_CALLOUT, TR_UNTIMEOUT_BOGUS_ID,
227 	    "untimeout_bogus_id:ID %lx", id);
228 
229 	/*
230 	 * We didn't find the specified callout ID.  This means either
231 	 * (1) the callout already fired, or (2) the caller passed us
232 	 * a bogus value.  Perform a sanity check to detect case (2).
233 	 */
234 	if (id != 0 && (id & (CALLOUT_COUNTER_HIGH | CALLOUT_EXECUTING)) !=
235 	    CALLOUT_COUNTER_HIGH)
236 		panic("untimeout: impossible timeout id %lx", id);
237 
238 	return (-1);
239 }
240 
241 /*
242  * Do the actual work of executing callouts.  This routine is called either
243  * by a taskq_thread (normal case), or by softcall (realtime case).
244  */
245 static void
246 callout_execute(callout_table_t *ct)
247 {
248 	callout_t *cp;
249 	callout_id_t xid;
250 	clock_t runtime;
251 
252 	mutex_enter(&ct->ct_lock);
253 
254 	while (((runtime = ct->ct_runtime) - ct->ct_curtime) <= 0) {
255 		for (cp = ct->ct_lbhash[CALLOUT_LBHASH(runtime)];
256 		    cp != NULL; cp = cp->c_lbnext) {
257 			xid = cp->c_xid;
258 			if (cp->c_runtime != runtime ||
259 			    (xid & CALLOUT_EXECUTING))
260 				continue;
261 			cp->c_executor = curthread;
262 			cp->c_xid = xid |= CALLOUT_EXECUTING;
263 			mutex_exit(&ct->ct_lock);
264 			DTRACE_PROBE1(callout__start, callout_t *, cp);
265 			(*cp->c_func)(cp->c_arg);
266 			DTRACE_PROBE1(callout__end, callout_t *, cp);
267 			mutex_enter(&ct->ct_lock);
268 
269 			/*
270 			 * Delete callout from hash tables, return to freelist,
271 			 * and tell anyone who cares that we're done.
272 			 * Even though we dropped and reacquired ct->ct_lock,
273 			 * it's OK to pick up where we left off because only
274 			 * newly-created timeouts can precede cp on ct_lbhash,
275 			 * and those timeouts cannot be due on this tick.
276 			 */
277 			CALLOUT_HASH_UPDATE(DELETE, ct, cp, xid, runtime);
278 			cp->c_idnext = ct->ct_freelist;
279 			ct->ct_freelist = cp;
280 			cp->c_xid = 0;	/* Indicate completion for c_done */
281 			cv_broadcast(&cp->c_done);
282 		}
283 		/*
284 		 * We have completed all callouts that were scheduled to
285 		 * run at "runtime".  If the global run time still matches
286 		 * our local copy, then we advance the global run time;
287 		 * otherwise, another callout thread must have already done so.
288 		 */
289 		if (ct->ct_runtime == runtime)
290 			ct->ct_runtime = runtime + 1;
291 	}
292 	mutex_exit(&ct->ct_lock);
293 }
294 
295 /*
296  * Schedule any callouts that are due on or before this tick.
297  */
298 static void
299 callout_schedule_1(callout_table_t *ct)
300 {
301 	callout_t *cp;
302 	clock_t curtime, runtime;
303 
304 	mutex_enter(&ct->ct_lock);
305 	ct->ct_curtime = curtime = lbolt;
306 	while (((runtime = ct->ct_runtime) - curtime) <= 0) {
307 		for (cp = ct->ct_lbhash[CALLOUT_LBHASH(runtime)];
308 		    cp != NULL; cp = cp->c_lbnext) {
309 			if (cp->c_runtime != runtime ||
310 			    (cp->c_xid & CALLOUT_EXECUTING))
311 				continue;
312 			mutex_exit(&ct->ct_lock);
313 			if (ct->ct_taskq == NULL)
314 				softcall((void (*)(void *))callout_execute, ct);
315 			else
316 				(void) taskq_dispatch(ct->ct_taskq,
317 				    (task_func_t *)callout_execute, ct,
318 				    KM_NOSLEEP);
319 			return;
320 		}
321 		ct->ct_runtime++;
322 	}
323 	mutex_exit(&ct->ct_lock);
324 }
325 
326 /*
327  * Schedule callouts for all callout tables.  Called by clock() on each tick.
328  */
329 void
330 callout_schedule(void)
331 {
332 	int f, t;
333 
334 	if (cpr_stop_callout)
335 		return;
336 
337 	for (t = 0; t < CALLOUT_NTYPES; t++)
338 		for (f = 0; f < callout_fanout; f++)
339 			callout_schedule_1(callout_table[CALLOUT_TABLE(t, f)]);
340 }
341 
342 /*
343  * Callback handler used by CPR to stop and resume callouts.
344  */
345 /*ARGSUSED*/
346 static boolean_t
347 callout_cpr_callb(void *arg, int code)
348 {
349 	cpr_stop_callout = (code == CB_CODE_CPR_CHKPT);
350 	return (B_TRUE);
351 }
352 
353 /*
354  * Initialize all callout tables.  Called at boot time just before clkstart().
355  */
356 void
357 callout_init(void)
358 {
359 	int f, t;
360 	int table_id;
361 	callout_table_t *ct;
362 
363 	callout_fanout = MIN(CALLOUT_FANOUT, max_ncpus);
364 
365 	for (t = 0; t < CALLOUT_NTYPES; t++) {
366 		for (f = 0; f < CALLOUT_FANOUT; f++) {
367 			table_id = CALLOUT_TABLE(t, f);
368 			if (f >= callout_fanout) {
369 				callout_table[table_id] =
370 				    callout_table[table_id - callout_fanout];
371 				continue;
372 			}
373 			ct = kmem_zalloc(sizeof (callout_table_t), KM_SLEEP);
374 			callout_table[table_id] = ct;
375 			ct->ct_short_id = (callout_id_t)table_id |
376 			    CALLOUT_COUNTER_HIGH;
377 			ct->ct_long_id = ct->ct_short_id | CALLOUT_LONGTERM;
378 			ct->ct_curtime = ct->ct_runtime = lbolt;
379 			if (t == CALLOUT_NORMAL) {
380 				/*
381 				 * Each callout thread consumes exactly one
382 				 * task structure while active.  Therefore,
383 				 * prepopulating with 2 * CALLOUT_THREADS tasks
384 				 * ensures that there's at least one task per
385 				 * thread that's either scheduled or on the
386 				 * freelist.  In turn, this guarantees that
387 				 * taskq_dispatch() will always either succeed
388 				 * (because there's a free task structure) or
389 				 * be unnecessary (because "callout_excute(ct)"
390 				 * has already scheduled).
391 				 */
392 				ct->ct_taskq =
393 				    taskq_create_instance("callout_taskq", f,
394 				    CALLOUT_THREADS, maxclsyspri,
395 				    2 * CALLOUT_THREADS, 2 * CALLOUT_THREADS,
396 				    TASKQ_PREPOPULATE | TASKQ_CPR_SAFE);
397 			}
398 		}
399 	}
400 	(void) callb_add(callout_cpr_callb, 0, CB_CL_CPR_CALLOUT, "callout");
401 }
402