xref: /titanic_52/usr/src/uts/common/os/callout.c (revision c7158ae983f5a04c4a998f468ecefba6d23ba721)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/callo.h>
29 #include <sys/param.h>
30 #include <sys/types.h>
31 #include <sys/systm.h>
32 #include <sys/cpuvar.h>
33 #include <sys/thread.h>
34 #include <sys/kmem.h>
35 #include <sys/cmn_err.h>
36 #include <sys/callb.h>
37 #include <sys/debug.h>
38 #include <sys/vtrace.h>
39 #include <sys/sysmacros.h>
40 #include <sys/sdt.h>
41 
42 /*
43  * Callout tables.  See timeout(9F) for details.
44  */
45 static int cpr_stop_callout;
46 static int callout_fanout;
47 static int ncallout;
48 static callout_table_t *callout_table[CALLOUT_TABLES];
49 
50 #define	CALLOUT_HASH_INSERT(cthead, cp, cnext, cprev)	\
51 {							\
52 	callout_t **headpp = &cthead;			\
53 	callout_t *headp = *headpp;			\
54 	cp->cnext = headp;				\
55 	cp->cprev = NULL;				\
56 	if (headp != NULL)				\
57 		headp->cprev = cp;			\
58 	*headpp = cp;					\
59 }
60 
61 #define	CALLOUT_HASH_DELETE(cthead, cp, cnext, cprev)	\
62 {							\
63 	callout_t *nextp = cp->cnext;			\
64 	callout_t *prevp = cp->cprev;			\
65 	if (nextp != NULL)				\
66 		nextp->cprev = prevp;			\
67 	if (prevp != NULL)				\
68 		prevp->cnext = nextp;			\
69 	else						\
70 		cthead = nextp;				\
71 }
72 
73 #define	CALLOUT_HASH_UPDATE(INSDEL, ct, cp, id, runtime, runhrtime)	\
74 	ASSERT(MUTEX_HELD(&ct->ct_lock));				\
75 	ASSERT(cp->c_xid == id && ((cp->c_runtime == runtime) ||	\
76 	    (cp->c_runhrtime <= runhrtime)));				\
77 	CALLOUT_HASH_##INSDEL(ct->ct_idhash[CALLOUT_IDHASH(id)],	\
78 	cp, c_idnext, c_idprev)						\
79 	CALLOUT_HASH_##INSDEL(ct->ct_lbhash[CALLOUT_LBHASH(runtime)],	\
80 	cp, c_lbnext, c_lbprev)
81 
82 /*
83  * Allocate a callout structure.  We try quite hard because we
84  * can't sleep, and if we can't do the allocation, we're toast.
85  * Failing all, we try a KM_PANIC allocation.
86  */
87 static callout_t *
88 callout_alloc(callout_table_t *ct)
89 {
90 	size_t size = 0;
91 	callout_t *cp = NULL;
92 
93 	mutex_exit(&ct->ct_lock);
94 	cp = kmem_alloc_tryhard(sizeof (callout_t), &size,
95 	    KM_NOSLEEP | KM_PANIC);
96 	bzero(cp, sizeof (callout_t));
97 	ncallout++;
98 	mutex_enter(&ct->ct_lock);
99 	return (cp);
100 }
101 
102 /*
103  * Arrange that func(arg) be called after delta clock ticks.
104  */
105 static timeout_id_t
106 timeout_common(void (*func)(void *), void *arg, clock_t delta,
107     callout_table_t *ct)
108 {
109 	callout_t *cp;
110 	callout_id_t id;
111 	clock_t runtime;
112 	timestruc_t start;
113 	int64_t runhrtime;
114 
115 	gethrestime_lasttick(&start);
116 
117 	mutex_enter(&ct->ct_lock);
118 
119 	if ((cp = ct->ct_freelist) == NULL)
120 		cp = callout_alloc(ct);
121 	else
122 		ct->ct_freelist = cp->c_idnext;
123 
124 	cp->c_func = func;
125 	cp->c_arg = arg;
126 
127 	/*
128 	 * Make sure the callout runs at least 1 tick in the future.
129 	 */
130 	if (delta <= 0)
131 		delta = 1;
132 	cp->c_runtime = runtime = lbolt + delta;
133 	cp->c_runhrtime = runhrtime = delta + timespectohz64(&start);
134 
135 	/*
136 	 * Assign an ID to this callout
137 	 */
138 	if (delta > CALLOUT_LONGTERM_TICKS)
139 		ct->ct_long_id = id = (ct->ct_long_id - CALLOUT_COUNTER_LOW) |
140 		    CALLOUT_COUNTER_HIGH;
141 	else
142 		ct->ct_short_id = id = (ct->ct_short_id - CALLOUT_COUNTER_LOW) |
143 		    CALLOUT_COUNTER_HIGH;
144 
145 	cp->c_xid = id;
146 
147 	CALLOUT_HASH_UPDATE(INSERT, ct, cp, id, runtime, runhrtime);
148 
149 	mutex_exit(&ct->ct_lock);
150 
151 	TRACE_4(TR_FAC_CALLOUT, TR_TIMEOUT,
152 	    "timeout:%K(%p) in %ld ticks, cp %p",
153 	    func, arg, delta, cp);
154 
155 	return ((timeout_id_t)id);
156 }
157 
158 timeout_id_t
159 timeout(void (*func)(void *), void *arg, clock_t delta)
160 {
161 	return (timeout_common(func, arg, delta,
162 	    callout_table[CALLOUT_TABLE(CALLOUT_NORMAL, CPU->cpu_seqid)]));
163 
164 }
165 
166 timeout_id_t
167 realtime_timeout(void (*func)(void *), void *arg, clock_t delta)
168 {
169 	return (timeout_common(func, arg, delta,
170 	    callout_table[CALLOUT_TABLE(CALLOUT_REALTIME, CPU->cpu_seqid)]));
171 }
172 
173 clock_t
174 untimeout(timeout_id_t id_arg)
175 {
176 	callout_id_t id = (callout_id_t)id_arg;
177 	callout_table_t *ct;
178 	callout_t *cp;
179 	callout_id_t xid;
180 
181 	ct = callout_table[id & CALLOUT_TABLE_MASK];
182 
183 	mutex_enter(&ct->ct_lock);
184 
185 	for (cp = ct->ct_idhash[CALLOUT_IDHASH(id)]; cp; cp = cp->c_idnext) {
186 
187 		if ((xid = cp->c_xid) == id) {
188 			clock_t runtime = cp->c_runtime;
189 			int64_t runhrtime = cp->c_runhrtime;
190 			clock_t time_left = runtime - lbolt;
191 
192 			CALLOUT_HASH_UPDATE(DELETE, ct, cp, id,
193 			    runtime, runhrtime);
194 
195 			cp->c_idnext = ct->ct_freelist;
196 			ct->ct_freelist = cp;
197 			mutex_exit(&ct->ct_lock);
198 			TRACE_2(TR_FAC_CALLOUT, TR_UNTIMEOUT,
199 			    "untimeout:ID %lx ticks_left %ld", id, time_left);
200 			return (time_left < 0 ? 0 : time_left);
201 		}
202 
203 		if (xid != (id | CALLOUT_EXECUTING))
204 			continue;
205 
206 		/*
207 		 * The callout we want to delete is currently executing.
208 		 * The DDI states that we must wait until the callout
209 		 * completes before returning, so we block on c_done until
210 		 * the callout ID changes (to zero if it's on the freelist,
211 		 * or to a new callout ID if it's in use).  This implicitly
212 		 * assumes that callout structures are persistent (they are).
213 		 */
214 		if (cp->c_executor == curthread) {
215 			/*
216 			 * The timeout handler called untimeout() on itself.
217 			 * Stupid, but legal.  We can't wait for the timeout
218 			 * to complete without deadlocking, so we just return.
219 			 */
220 			mutex_exit(&ct->ct_lock);
221 			TRACE_1(TR_FAC_CALLOUT, TR_UNTIMEOUT_SELF,
222 			    "untimeout_self:ID %x", id);
223 			return (-1);
224 		}
225 		while (cp->c_xid == xid)
226 			cv_wait(&cp->c_done, &ct->ct_lock);
227 		mutex_exit(&ct->ct_lock);
228 		TRACE_1(TR_FAC_CALLOUT, TR_UNTIMEOUT_EXECUTING,
229 		    "untimeout_executing:ID %lx", id);
230 		return (-1);
231 	}
232 
233 	mutex_exit(&ct->ct_lock);
234 	TRACE_1(TR_FAC_CALLOUT, TR_UNTIMEOUT_BOGUS_ID,
235 	    "untimeout_bogus_id:ID %lx", id);
236 
237 	/*
238 	 * We didn't find the specified callout ID.  This means either
239 	 * (1) the callout already fired, or (2) the caller passed us
240 	 * a bogus value.  Perform a sanity check to detect case (2).
241 	 */
242 	if (id != 0 && (id & (CALLOUT_COUNTER_HIGH | CALLOUT_EXECUTING)) !=
243 	    CALLOUT_COUNTER_HIGH)
244 		panic("untimeout: impossible timeout id %lx", id);
245 
246 	return (-1);
247 }
248 
249 /*
250  * Do the actual work of executing callouts.  This routine is called either
251  * by a taskq_thread (normal case), or by softcall (realtime case).
252  */
253 static void
254 callout_execute(callout_table_t *ct)
255 {
256 	callout_t *cp;
257 	callout_id_t xid;
258 	clock_t runtime;
259 	int64_t curhrtime;
260 
261 	mutex_enter(&ct->ct_lock);
262 
263 	/*
264 	 * Assuming the system time can be set forward and backward
265 	 * at any time. If it is set backward, we will measure the
266 	 * c_runtime; otherwise, we will compare c_runhrtime with
267 	 * ct_curhrtime.
268 	 */
269 	curhrtime = ct->ct_curhrtime;
270 	while (((runtime = ct->ct_runtime) - ct->ct_curtime) <= 0) {
271 		for (cp = ct->ct_lbhash[CALLOUT_LBHASH(runtime)];
272 		    cp != NULL; cp = cp->c_lbnext) {
273 			xid = cp->c_xid;
274 			if ((cp->c_runtime != runtime &&
275 			    cp->c_runhrtime > curhrtime) ||
276 			    (xid & CALLOUT_EXECUTING))
277 				continue;
278 			cp->c_executor = curthread;
279 			cp->c_xid = xid |= CALLOUT_EXECUTING;
280 			mutex_exit(&ct->ct_lock);
281 			DTRACE_PROBE1(callout__start, callout_t *, cp);
282 			(*cp->c_func)(cp->c_arg);
283 			DTRACE_PROBE1(callout__end, callout_t *, cp);
284 			mutex_enter(&ct->ct_lock);
285 
286 			/*
287 			 * Delete callout from hash tables, return to freelist,
288 			 * and tell anyone who cares that we're done.
289 			 * Even though we dropped and reacquired ct->ct_lock,
290 			 * it's OK to pick up where we left off because only
291 			 * newly-created timeouts can precede cp on ct_lbhash,
292 			 * and those timeouts cannot be due on this tick.
293 			 */
294 			CALLOUT_HASH_UPDATE(DELETE, ct, cp, xid,
295 			    runtime, curhrtime);
296 
297 			cp->c_idnext = ct->ct_freelist;
298 			ct->ct_freelist = cp;
299 			cp->c_xid = 0;	/* Indicate completion for c_done */
300 			cv_broadcast(&cp->c_done);
301 		}
302 		/*
303 		 * We have completed all callouts that were scheduled to
304 		 * run at "runtime".  If the global run time still matches
305 		 * our local copy, then we advance the global run time;
306 		 * otherwise, another callout thread must have already done so.
307 		 */
308 		if (ct->ct_runtime == runtime)
309 			ct->ct_runtime = runtime + 1;
310 	}
311 	mutex_exit(&ct->ct_lock);
312 }
313 
314 /*
315  * Schedule any callouts that are due on or before this tick.
316  */
317 static void
318 callout_schedule_1(callout_table_t *ct)
319 {
320 	callout_t *cp;
321 	clock_t curtime, runtime;
322 	timestruc_t now;
323 	int64_t curhrtime;
324 
325 	gethrestime(&now);
326 	curhrtime = timespectohz64(&now);
327 
328 	mutex_enter(&ct->ct_lock);
329 	ct->ct_curtime = curtime = lbolt;
330 
331 	/*
332 	 * We use both the conditions cp->c_runtime == runtime and
333 	 * cp->c_runhrtime <= curhrtime to determine a timeout is
334 	 * premature or not. If the system time has been set backwards,
335 	 * then cp->c_runtime == runtime will become true first.
336 	 * Otherwise, we test cp->c_runhrtime <= curhrtime
337 	 */
338 	ct->ct_curhrtime = curhrtime;
339 	while (((runtime = ct->ct_runtime) - curtime) <= 0) {
340 		for (cp = ct->ct_lbhash[CALLOUT_LBHASH(runtime)];
341 		    cp != NULL; cp = cp->c_lbnext) {
342 			if ((cp->c_runtime != runtime &&
343 			    cp->c_runhrtime > curhrtime) ||
344 			    (cp->c_xid & CALLOUT_EXECUTING))
345 				continue;
346 			mutex_exit(&ct->ct_lock);
347 			if (ct->ct_taskq == NULL)
348 				softcall((void (*)(void *))callout_execute, ct);
349 			else
350 				(void) taskq_dispatch(ct->ct_taskq,
351 				    (task_func_t *)callout_execute, ct,
352 				    KM_NOSLEEP);
353 			return;
354 		}
355 		ct->ct_runtime++;
356 	}
357 	mutex_exit(&ct->ct_lock);
358 }
359 
360 /*
361  * Schedule callouts for all callout tables.  Called by clock() on each tick.
362  */
363 void
364 callout_schedule(void)
365 {
366 	int f, t;
367 
368 	if (cpr_stop_callout)
369 		return;
370 
371 	for (t = 0; t < CALLOUT_NTYPES; t++)
372 		for (f = 0; f < callout_fanout; f++)
373 			callout_schedule_1(callout_table[CALLOUT_TABLE(t, f)]);
374 }
375 
376 /*
377  * Callback handler used by CPR to stop and resume callouts.
378  */
379 /*ARGSUSED*/
380 static boolean_t
381 callout_cpr_callb(void *arg, int code)
382 {
383 	cpr_stop_callout = (code == CB_CODE_CPR_CHKPT);
384 	return (B_TRUE);
385 }
386 
387 /*
388  * Initialize all callout tables.  Called at boot time just before clkstart().
389  */
390 void
391 callout_init(void)
392 {
393 	int f, t;
394 	int table_id;
395 	callout_table_t *ct;
396 
397 	callout_fanout = MIN(CALLOUT_FANOUT, max_ncpus);
398 
399 	for (t = 0; t < CALLOUT_NTYPES; t++) {
400 		for (f = 0; f < CALLOUT_FANOUT; f++) {
401 			table_id = CALLOUT_TABLE(t, f);
402 			if (f >= callout_fanout) {
403 				callout_table[table_id] =
404 				    callout_table[table_id - callout_fanout];
405 				continue;
406 			}
407 			ct = kmem_zalloc(sizeof (callout_table_t), KM_SLEEP);
408 			callout_table[table_id] = ct;
409 			ct->ct_short_id = (callout_id_t)table_id |
410 			    CALLOUT_COUNTER_HIGH;
411 			ct->ct_long_id = ct->ct_short_id | CALLOUT_LONGTERM;
412 			ct->ct_curtime = ct->ct_runtime = lbolt;
413 
414 			/*
415 			 * We can not call gethrestime() at this moment
416 			 * since the system time has not been validated.
417 			 * So Set ct_curhrtime to zero.
418 			 */
419 			ct->ct_curhrtime = 0;
420 
421 			if (t == CALLOUT_NORMAL) {
422 				/*
423 				 * Each callout thread consumes exactly one
424 				 * task structure while active.  Therefore,
425 				 * prepopulating with 2 * CALLOUT_THREADS tasks
426 				 * ensures that there's at least one task per
427 				 * thread that's either scheduled or on the
428 				 * freelist.  In turn, this guarantees that
429 				 * taskq_dispatch() will always either succeed
430 				 * (because there's a free task structure) or
431 				 * be unnecessary (because "callout_excute(ct)"
432 				 * has already scheduled).
433 				 */
434 				ct->ct_taskq =
435 				    taskq_create_instance("callout_taskq", f,
436 				    CALLOUT_THREADS, maxclsyspri,
437 				    2 * CALLOUT_THREADS, 2 * CALLOUT_THREADS,
438 				    TASKQ_PREPOPULATE | TASKQ_CPR_SAFE);
439 			}
440 		}
441 	}
442 	(void) callb_add(callout_cpr_callb, 0, CB_CL_CPR_CALLOUT, "callout");
443 }
444