xref: /titanic_54/usr/src/uts/common/os/callout.c (revision 30392143708689bd11c23f26010219185c61a4b8)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5a913396dSqiao  * Common Development and Distribution License (the "License").
6a913396dSqiao  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*
22a913396dSqiao  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
237c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
247c478bd9Sstevel@tonic-gate  */
257c478bd9Sstevel@tonic-gate 
267c478bd9Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
277c478bd9Sstevel@tonic-gate 
287c478bd9Sstevel@tonic-gate #include <sys/callo.h>
297c478bd9Sstevel@tonic-gate #include <sys/param.h>
307c478bd9Sstevel@tonic-gate #include <sys/types.h>
317c478bd9Sstevel@tonic-gate #include <sys/systm.h>
327c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h>
337c478bd9Sstevel@tonic-gate #include <sys/thread.h>
347c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
357c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
367c478bd9Sstevel@tonic-gate #include <sys/callb.h>
377c478bd9Sstevel@tonic-gate #include <sys/debug.h>
387c478bd9Sstevel@tonic-gate #include <sys/vtrace.h>
397c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
407c478bd9Sstevel@tonic-gate #include <sys/sdt.h>
417c478bd9Sstevel@tonic-gate 
427c478bd9Sstevel@tonic-gate /*
437c478bd9Sstevel@tonic-gate  * Callout tables.  See timeout(9F) for details.
447c478bd9Sstevel@tonic-gate  */
457c478bd9Sstevel@tonic-gate static int cpr_stop_callout;
467c478bd9Sstevel@tonic-gate static int callout_fanout;
477c478bd9Sstevel@tonic-gate static int ncallout;
487c478bd9Sstevel@tonic-gate static callout_table_t *callout_table[CALLOUT_TABLES];
497c478bd9Sstevel@tonic-gate 
507c478bd9Sstevel@tonic-gate #define	CALLOUT_HASH_INSERT(cthead, cp, cnext, cprev)	\
517c478bd9Sstevel@tonic-gate {							\
527c478bd9Sstevel@tonic-gate 	callout_t **headpp = &cthead;			\
537c478bd9Sstevel@tonic-gate 	callout_t *headp = *headpp;			\
547c478bd9Sstevel@tonic-gate 	cp->cnext = headp;				\
557c478bd9Sstevel@tonic-gate 	cp->cprev = NULL;				\
567c478bd9Sstevel@tonic-gate 	if (headp != NULL)				\
577c478bd9Sstevel@tonic-gate 		headp->cprev = cp;			\
587c478bd9Sstevel@tonic-gate 	*headpp = cp;					\
597c478bd9Sstevel@tonic-gate }
607c478bd9Sstevel@tonic-gate 
617c478bd9Sstevel@tonic-gate #define	CALLOUT_HASH_DELETE(cthead, cp, cnext, cprev)	\
627c478bd9Sstevel@tonic-gate {							\
637c478bd9Sstevel@tonic-gate 	callout_t *nextp = cp->cnext;			\
647c478bd9Sstevel@tonic-gate 	callout_t *prevp = cp->cprev;			\
657c478bd9Sstevel@tonic-gate 	if (nextp != NULL)				\
667c478bd9Sstevel@tonic-gate 		nextp->cprev = prevp;			\
677c478bd9Sstevel@tonic-gate 	if (prevp != NULL)				\
687c478bd9Sstevel@tonic-gate 		prevp->cnext = nextp;			\
697c478bd9Sstevel@tonic-gate 	else						\
707c478bd9Sstevel@tonic-gate 		cthead = nextp;				\
717c478bd9Sstevel@tonic-gate }
727c478bd9Sstevel@tonic-gate 
737c478bd9Sstevel@tonic-gate #define	CALLOUT_HASH_UPDATE(INSDEL, ct, cp, id, runtime)		\
747c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&ct->ct_lock));				\
757c478bd9Sstevel@tonic-gate 	ASSERT(cp->c_xid == id && cp->c_runtime == runtime);		\
767c478bd9Sstevel@tonic-gate 	CALLOUT_HASH_##INSDEL(ct->ct_idhash[CALLOUT_IDHASH(id)],	\
777c478bd9Sstevel@tonic-gate 	cp, c_idnext, c_idprev)						\
787c478bd9Sstevel@tonic-gate 	CALLOUT_HASH_##INSDEL(ct->ct_lbhash[CALLOUT_LBHASH(runtime)],	\
797c478bd9Sstevel@tonic-gate 	cp, c_lbnext, c_lbprev)
807c478bd9Sstevel@tonic-gate 
81a913396dSqiao #define	CALLOUT_HRES_INSERT(ct, cp, cnext, cprev, hresms)		\
82a913396dSqiao {									\
83a913396dSqiao 	callout_t *nextp = ct->ct_hresq;				\
84a913396dSqiao 	callout_t *prevp;						\
85a913396dSqiao 									\
86a913396dSqiao 	if (nextp == NULL || hresms <= nextp->c_hresms) {		\
87a913396dSqiao 		cp->cnext = ct->ct_hresq;				\
88a913396dSqiao 		ct->ct_hresq = cp;					\
89a913396dSqiao 		cp->cprev = NULL;					\
90a913396dSqiao 		if (cp->cnext != NULL)					\
91a913396dSqiao 			cp->cnext->cprev = cp;				\
92a913396dSqiao 	} else {							\
93a913396dSqiao 		do {							\
94a913396dSqiao 			prevp = nextp;					\
95a913396dSqiao 			nextp = nextp->cnext;				\
96a913396dSqiao 		} while (nextp != NULL && hresms > nextp->c_hresms);	\
97a913396dSqiao 		prevp->cnext = cp;					\
98a913396dSqiao 		cp->cprev = prevp;					\
99a913396dSqiao 		cp->cnext = nextp;					\
100a913396dSqiao 		if (nextp != NULL) 					\
101a913396dSqiao 			nextp->cprev = cp;				\
102a913396dSqiao 	}								\
103a913396dSqiao }
104a913396dSqiao 
105a913396dSqiao #define	CALLOUT_HRES_DELETE(ct, cp, cnext, cprev, hresms)	\
106a913396dSqiao {								\
107a913396dSqiao 	if (cp == ct->ct_hresq) {				\
108a913396dSqiao 		ct->ct_hresq = cp->cnext;			\
109a913396dSqiao 		if (cp->cnext != NULL)				\
110a913396dSqiao 			cp->cnext->cprev = NULL;		\
111a913396dSqiao 	} else {						\
112a913396dSqiao 		cp->cprev->cnext = cp->cnext;			\
113a913396dSqiao 		if (cp->cnext != NULL)				\
114a913396dSqiao 			cp->cnext->cprev = cp->cprev;		\
115a913396dSqiao 	}							\
116a913396dSqiao }
117a913396dSqiao 
118a913396dSqiao #define	CALLOUT_HRES_UPDATE(INSDEL, ct, cp, id, hresms)		\
119a913396dSqiao 	ASSERT(MUTEX_HELD(&ct->ct_lock));			\
120a913396dSqiao 	ASSERT(cp->c_xid == id);				\
121a913396dSqiao 	CALLOUT_HRES_##INSDEL(ct, cp, c_hrnext,			\
122a913396dSqiao 	c_hrprev, hresms)
123a913396dSqiao 
1247c478bd9Sstevel@tonic-gate /*
1257c478bd9Sstevel@tonic-gate  * Allocate a callout structure.  We try quite hard because we
1267c478bd9Sstevel@tonic-gate  * can't sleep, and if we can't do the allocation, we're toast.
1277c478bd9Sstevel@tonic-gate  * Failing all, we try a KM_PANIC allocation.
1287c478bd9Sstevel@tonic-gate  */
1297c478bd9Sstevel@tonic-gate static callout_t *
1307c478bd9Sstevel@tonic-gate callout_alloc(callout_table_t *ct)
1317c478bd9Sstevel@tonic-gate {
1327c478bd9Sstevel@tonic-gate 	size_t size = 0;
1337c478bd9Sstevel@tonic-gate 	callout_t *cp = NULL;
1347c478bd9Sstevel@tonic-gate 
1357c478bd9Sstevel@tonic-gate 	mutex_exit(&ct->ct_lock);
1367c478bd9Sstevel@tonic-gate 	cp = kmem_alloc_tryhard(sizeof (callout_t), &size,
1377c478bd9Sstevel@tonic-gate 	    KM_NOSLEEP | KM_PANIC);
1387c478bd9Sstevel@tonic-gate 	bzero(cp, sizeof (callout_t));
1397c478bd9Sstevel@tonic-gate 	ncallout++;
1407c478bd9Sstevel@tonic-gate 	mutex_enter(&ct->ct_lock);
1417c478bd9Sstevel@tonic-gate 	return (cp);
1427c478bd9Sstevel@tonic-gate }
1437c478bd9Sstevel@tonic-gate 
1447c478bd9Sstevel@tonic-gate /*
1457c478bd9Sstevel@tonic-gate  * Arrange that func(arg) be called after delta clock ticks.
1467c478bd9Sstevel@tonic-gate  */
1477c478bd9Sstevel@tonic-gate static timeout_id_t
1487c478bd9Sstevel@tonic-gate timeout_common(void (*func)(void *), void *arg, clock_t delta,
1497c478bd9Sstevel@tonic-gate     callout_table_t *ct)
1507c478bd9Sstevel@tonic-gate {
1517c478bd9Sstevel@tonic-gate 	callout_t	*cp;
1527c478bd9Sstevel@tonic-gate 	callout_id_t	id;
1537c478bd9Sstevel@tonic-gate 	clock_t		runtime;
154a913396dSqiao 	timestruc_t	now;
155a913396dSqiao 	int64_t		hresms;
156a913396dSqiao 
157a913396dSqiao 	gethrestime(&now);
1587c478bd9Sstevel@tonic-gate 
1597c478bd9Sstevel@tonic-gate 	mutex_enter(&ct->ct_lock);
1607c478bd9Sstevel@tonic-gate 
1617c478bd9Sstevel@tonic-gate 	if ((cp = ct->ct_freelist) == NULL)
1627c478bd9Sstevel@tonic-gate 		cp = callout_alloc(ct);
1637c478bd9Sstevel@tonic-gate 	else
1647c478bd9Sstevel@tonic-gate 		ct->ct_freelist = cp->c_idnext;
1657c478bd9Sstevel@tonic-gate 
1667c478bd9Sstevel@tonic-gate 	cp->c_func = func;
1677c478bd9Sstevel@tonic-gate 	cp->c_arg = arg;
1687c478bd9Sstevel@tonic-gate 
1697c478bd9Sstevel@tonic-gate 	/*
1707c478bd9Sstevel@tonic-gate 	 * Make sure the callout runs at least 1 tick in the future.
1717c478bd9Sstevel@tonic-gate 	 */
1727c478bd9Sstevel@tonic-gate 	if (delta <= 0)
1737c478bd9Sstevel@tonic-gate 		delta = 1;
1747c478bd9Sstevel@tonic-gate 	cp->c_runtime = runtime = lbolt + delta;
1757c478bd9Sstevel@tonic-gate 
176*30392143Sqiao 	/*
177*30392143Sqiao 	 * Calculate the future time in millisecond.
178*30392143Sqiao 	 * We must cast tv_sec and delta to 64-bit integers
179*30392143Sqiao 	 * to avoid integer overflow on 32-platforms.
180*30392143Sqiao 	 */
181*30392143Sqiao 	hresms = (int64_t)now.tv_sec * MILLISEC + now.tv_nsec / MICROSEC +
182*30392143Sqiao 	    TICK_TO_MSEC((int64_t)delta);
183*30392143Sqiao 
184a913396dSqiao 	cp->c_hresms = hresms;
185a913396dSqiao 
1867c478bd9Sstevel@tonic-gate 	/*
1877c478bd9Sstevel@tonic-gate 	 * Assign an ID to this callout
1887c478bd9Sstevel@tonic-gate 	 */
1897c478bd9Sstevel@tonic-gate 	if (delta > CALLOUT_LONGTERM_TICKS)
1907c478bd9Sstevel@tonic-gate 		ct->ct_long_id = id = (ct->ct_long_id - CALLOUT_COUNTER_LOW) |
1917c478bd9Sstevel@tonic-gate 		    CALLOUT_COUNTER_HIGH;
1927c478bd9Sstevel@tonic-gate 	else
1937c478bd9Sstevel@tonic-gate 		ct->ct_short_id = id = (ct->ct_short_id - CALLOUT_COUNTER_LOW) |
1947c478bd9Sstevel@tonic-gate 		    CALLOUT_COUNTER_HIGH;
1957c478bd9Sstevel@tonic-gate 
1967c478bd9Sstevel@tonic-gate 	cp->c_xid = id;
1977c478bd9Sstevel@tonic-gate 
1987c478bd9Sstevel@tonic-gate 	CALLOUT_HASH_UPDATE(INSERT, ct, cp, id, runtime);
199a913396dSqiao 	CALLOUT_HRES_UPDATE(INSERT, ct, cp, id, hresms);
2007c478bd9Sstevel@tonic-gate 
2017c478bd9Sstevel@tonic-gate 	mutex_exit(&ct->ct_lock);
2027c478bd9Sstevel@tonic-gate 
2037c478bd9Sstevel@tonic-gate 	TRACE_4(TR_FAC_CALLOUT, TR_TIMEOUT,
2047c478bd9Sstevel@tonic-gate 		"timeout:%K(%p) in %ld ticks, cp %p",
2057c478bd9Sstevel@tonic-gate 		func, arg, delta, cp);
2067c478bd9Sstevel@tonic-gate 
2077c478bd9Sstevel@tonic-gate 	return ((timeout_id_t)id);
2087c478bd9Sstevel@tonic-gate }
2097c478bd9Sstevel@tonic-gate 
2107c478bd9Sstevel@tonic-gate timeout_id_t
2117c478bd9Sstevel@tonic-gate timeout(void (*func)(void *), void *arg, clock_t delta)
2127c478bd9Sstevel@tonic-gate {
2137c478bd9Sstevel@tonic-gate 	return (timeout_common(func, arg, delta,
2147c478bd9Sstevel@tonic-gate 	    callout_table[CALLOUT_TABLE(CALLOUT_NORMAL, CPU->cpu_seqid)]));
2157c478bd9Sstevel@tonic-gate 
2167c478bd9Sstevel@tonic-gate }
2177c478bd9Sstevel@tonic-gate 
2187c478bd9Sstevel@tonic-gate timeout_id_t
2197c478bd9Sstevel@tonic-gate realtime_timeout(void (*func)(void *), void *arg, clock_t delta)
2207c478bd9Sstevel@tonic-gate {
2217c478bd9Sstevel@tonic-gate 	return (timeout_common(func, arg, delta,
2227c478bd9Sstevel@tonic-gate 	    callout_table[CALLOUT_TABLE(CALLOUT_REALTIME, CPU->cpu_seqid)]));
2237c478bd9Sstevel@tonic-gate }
2247c478bd9Sstevel@tonic-gate 
2257c478bd9Sstevel@tonic-gate clock_t
2267c478bd9Sstevel@tonic-gate untimeout(timeout_id_t id_arg)
2277c478bd9Sstevel@tonic-gate {
2287c478bd9Sstevel@tonic-gate 	callout_id_t id = (callout_id_t)id_arg;
2297c478bd9Sstevel@tonic-gate 	callout_table_t *ct;
2307c478bd9Sstevel@tonic-gate 	callout_t *cp;
2317c478bd9Sstevel@tonic-gate 	callout_id_t xid;
2327c478bd9Sstevel@tonic-gate 
2337c478bd9Sstevel@tonic-gate 	ct = callout_table[id & CALLOUT_TABLE_MASK];
2347c478bd9Sstevel@tonic-gate 
2357c478bd9Sstevel@tonic-gate 	mutex_enter(&ct->ct_lock);
2367c478bd9Sstevel@tonic-gate 
2377c478bd9Sstevel@tonic-gate 	for (cp = ct->ct_idhash[CALLOUT_IDHASH(id)]; cp; cp = cp->c_idnext) {
2387c478bd9Sstevel@tonic-gate 
2397c478bd9Sstevel@tonic-gate 		if ((xid = cp->c_xid) == id) {
2407c478bd9Sstevel@tonic-gate 			clock_t runtime = cp->c_runtime;
2417c478bd9Sstevel@tonic-gate 			clock_t time_left = runtime - lbolt;
2427c478bd9Sstevel@tonic-gate 
2437c478bd9Sstevel@tonic-gate 			CALLOUT_HASH_UPDATE(DELETE, ct, cp, id, runtime);
244a913396dSqiao 			CALLOUT_HRES_UPDATE(DELETE, ct, cp, id, 0);
2457c478bd9Sstevel@tonic-gate 			cp->c_idnext = ct->ct_freelist;
2467c478bd9Sstevel@tonic-gate 			ct->ct_freelist = cp;
2477c478bd9Sstevel@tonic-gate 			mutex_exit(&ct->ct_lock);
2487c478bd9Sstevel@tonic-gate 			TRACE_2(TR_FAC_CALLOUT, TR_UNTIMEOUT,
2497c478bd9Sstevel@tonic-gate 			    "untimeout:ID %lx ticks_left %ld", id, time_left);
2507c478bd9Sstevel@tonic-gate 			return (time_left < 0 ? 0 : time_left);
2517c478bd9Sstevel@tonic-gate 		}
2527c478bd9Sstevel@tonic-gate 
2537c478bd9Sstevel@tonic-gate 		if (xid != (id | CALLOUT_EXECUTING))
2547c478bd9Sstevel@tonic-gate 			continue;
2557c478bd9Sstevel@tonic-gate 
2567c478bd9Sstevel@tonic-gate 		/*
2577c478bd9Sstevel@tonic-gate 		 * The callout we want to delete is currently executing.
2587c478bd9Sstevel@tonic-gate 		 * The DDI states that we must wait until the callout
2597c478bd9Sstevel@tonic-gate 		 * completes before returning, so we block on c_done until
2607c478bd9Sstevel@tonic-gate 		 * the callout ID changes (to zero if it's on the freelist,
2617c478bd9Sstevel@tonic-gate 		 * or to a new callout ID if it's in use).  This implicitly
2627c478bd9Sstevel@tonic-gate 		 * assumes that callout structures are persistent (they are).
2637c478bd9Sstevel@tonic-gate 		 */
2647c478bd9Sstevel@tonic-gate 		if (cp->c_executor == curthread) {
2657c478bd9Sstevel@tonic-gate 			/*
2667c478bd9Sstevel@tonic-gate 			 * The timeout handler called untimeout() on itself.
2677c478bd9Sstevel@tonic-gate 			 * Stupid, but legal.  We can't wait for the timeout
2687c478bd9Sstevel@tonic-gate 			 * to complete without deadlocking, so we just return.
2697c478bd9Sstevel@tonic-gate 			 */
2707c478bd9Sstevel@tonic-gate 			mutex_exit(&ct->ct_lock);
2717c478bd9Sstevel@tonic-gate 			TRACE_1(TR_FAC_CALLOUT, TR_UNTIMEOUT_SELF,
2727c478bd9Sstevel@tonic-gate 			    "untimeout_self:ID %x", id);
2737c478bd9Sstevel@tonic-gate 			return (-1);
2747c478bd9Sstevel@tonic-gate 		}
2757c478bd9Sstevel@tonic-gate 		while (cp->c_xid == xid)
2767c478bd9Sstevel@tonic-gate 			cv_wait(&cp->c_done, &ct->ct_lock);
2777c478bd9Sstevel@tonic-gate 		mutex_exit(&ct->ct_lock);
2787c478bd9Sstevel@tonic-gate 		TRACE_1(TR_FAC_CALLOUT, TR_UNTIMEOUT_EXECUTING,
2797c478bd9Sstevel@tonic-gate 		    "untimeout_executing:ID %lx", id);
2807c478bd9Sstevel@tonic-gate 		return (-1);
2817c478bd9Sstevel@tonic-gate 	}
2827c478bd9Sstevel@tonic-gate 
2837c478bd9Sstevel@tonic-gate 	mutex_exit(&ct->ct_lock);
2847c478bd9Sstevel@tonic-gate 	TRACE_1(TR_FAC_CALLOUT, TR_UNTIMEOUT_BOGUS_ID,
2857c478bd9Sstevel@tonic-gate 	    "untimeout_bogus_id:ID %lx", id);
2867c478bd9Sstevel@tonic-gate 
2877c478bd9Sstevel@tonic-gate 	/*
2887c478bd9Sstevel@tonic-gate 	 * We didn't find the specified callout ID.  This means either
2897c478bd9Sstevel@tonic-gate 	 * (1) the callout already fired, or (2) the caller passed us
2907c478bd9Sstevel@tonic-gate 	 * a bogus value.  Perform a sanity check to detect case (2).
2917c478bd9Sstevel@tonic-gate 	 */
2927c478bd9Sstevel@tonic-gate 	if (id != 0 && (id & (CALLOUT_COUNTER_HIGH | CALLOUT_EXECUTING)) !=
2937c478bd9Sstevel@tonic-gate 	    CALLOUT_COUNTER_HIGH)
2947c478bd9Sstevel@tonic-gate 		panic("untimeout: impossible timeout id %lx", id);
2957c478bd9Sstevel@tonic-gate 
2967c478bd9Sstevel@tonic-gate 	return (-1);
2977c478bd9Sstevel@tonic-gate }
2987c478bd9Sstevel@tonic-gate 
2997c478bd9Sstevel@tonic-gate /*
3007c478bd9Sstevel@tonic-gate  * Do the actual work of executing callouts.  This routine is called either
3017c478bd9Sstevel@tonic-gate  * by a taskq_thread (normal case), or by softcall (realtime case).
3027c478bd9Sstevel@tonic-gate  */
3037c478bd9Sstevel@tonic-gate static void
3047c478bd9Sstevel@tonic-gate callout_execute(callout_table_t *ct)
3057c478bd9Sstevel@tonic-gate {
3067c478bd9Sstevel@tonic-gate 	callout_t	*cp;
3077c478bd9Sstevel@tonic-gate 	callout_id_t	xid;
3087c478bd9Sstevel@tonic-gate 	clock_t		runtime;
309a913396dSqiao 	timestruc_t	now;
310a913396dSqiao 	int64_t		hresms;
3117c478bd9Sstevel@tonic-gate 
3127c478bd9Sstevel@tonic-gate 	mutex_enter(&ct->ct_lock);
3137c478bd9Sstevel@tonic-gate 
3147c478bd9Sstevel@tonic-gate 	while (((runtime = ct->ct_runtime) - ct->ct_curtime) <= 0) {
3157c478bd9Sstevel@tonic-gate 		for (cp = ct->ct_lbhash[CALLOUT_LBHASH(runtime)];
3167c478bd9Sstevel@tonic-gate 		    cp != NULL; cp = cp->c_lbnext) {
3177c478bd9Sstevel@tonic-gate 			xid = cp->c_xid;
3187c478bd9Sstevel@tonic-gate 			if (cp->c_runtime != runtime ||
3197c478bd9Sstevel@tonic-gate 			    (xid & CALLOUT_EXECUTING))
3207c478bd9Sstevel@tonic-gate 				continue;
3217c478bd9Sstevel@tonic-gate 			cp->c_executor = curthread;
3227c478bd9Sstevel@tonic-gate 			cp->c_xid = xid |= CALLOUT_EXECUTING;
3237c478bd9Sstevel@tonic-gate 			mutex_exit(&ct->ct_lock);
3247c478bd9Sstevel@tonic-gate 			DTRACE_PROBE1(callout__start, callout_t *, cp);
3257c478bd9Sstevel@tonic-gate 			(*cp->c_func)(cp->c_arg);
3267c478bd9Sstevel@tonic-gate 			DTRACE_PROBE1(callout__end, callout_t *, cp);
3277c478bd9Sstevel@tonic-gate 			mutex_enter(&ct->ct_lock);
3287c478bd9Sstevel@tonic-gate 
3297c478bd9Sstevel@tonic-gate 			/*
330a913396dSqiao 			 * Delete callout from both the hash tables and the
331a913396dSqiao 			 * hres queue, return it to freelist, and tell anyone
332a913396dSqiao 			 * who cares that we're done.
3337c478bd9Sstevel@tonic-gate 			 * Even though we dropped and reacquired ct->ct_lock,
3347c478bd9Sstevel@tonic-gate 			 * it's OK to pick up where we left off because only
3357c478bd9Sstevel@tonic-gate 			 * newly-created timeouts can precede cp on ct_lbhash,
3367c478bd9Sstevel@tonic-gate 			 * and those timeouts cannot be due on this tick.
3377c478bd9Sstevel@tonic-gate 			 */
3387c478bd9Sstevel@tonic-gate 			CALLOUT_HASH_UPDATE(DELETE, ct, cp, xid, runtime);
339a913396dSqiao 			CALLOUT_HRES_UPDATE(DELETE, ct, cp, xid, hresms);
3407c478bd9Sstevel@tonic-gate 			cp->c_idnext = ct->ct_freelist;
3417c478bd9Sstevel@tonic-gate 			ct->ct_freelist = cp;
3427c478bd9Sstevel@tonic-gate 			cp->c_xid = 0;	/* Indicate completion for c_done */
3437c478bd9Sstevel@tonic-gate 			cv_broadcast(&cp->c_done);
3447c478bd9Sstevel@tonic-gate 		}
3457c478bd9Sstevel@tonic-gate 		/*
3467c478bd9Sstevel@tonic-gate 		 * We have completed all callouts that were scheduled to
3477c478bd9Sstevel@tonic-gate 		 * run at "runtime".  If the global run time still matches
3487c478bd9Sstevel@tonic-gate 		 * our local copy, then we advance the global run time;
3497c478bd9Sstevel@tonic-gate 		 * otherwise, another callout thread must have already done so.
3507c478bd9Sstevel@tonic-gate 		 */
3517c478bd9Sstevel@tonic-gate 		if (ct->ct_runtime == runtime)
3527c478bd9Sstevel@tonic-gate 			ct->ct_runtime = runtime + 1;
3537c478bd9Sstevel@tonic-gate 	}
354a913396dSqiao 
355a913396dSqiao 	gethrestime(&now);
356a913396dSqiao 
357*30392143Sqiao 	/*
358*30392143Sqiao 	 * Calculate the future time in millisecond.
359*30392143Sqiao 	 * We must cast tv_sec to 64-bit integer
360*30392143Sqiao 	 * to avoid integer overflow on 32-platforms.
361*30392143Sqiao 	 */
362*30392143Sqiao 	hresms = (int64_t)now.tv_sec * MILLISEC + now.tv_nsec / MICROSEC;
363a913396dSqiao 
364a913396dSqiao 	cp = ct->ct_hresq;
365a913396dSqiao 	while (cp != NULL && hresms >= cp->c_hresms) {
366a913396dSqiao 		xid = cp->c_xid;
367a913396dSqiao 		if (xid & CALLOUT_EXECUTING) {
368a913396dSqiao 			cp = cp->c_hrnext;
369a913396dSqiao 			continue;
370a913396dSqiao 		}
371a913396dSqiao 		cp->c_executor = curthread;
372a913396dSqiao 		cp->c_xid = xid |= CALLOUT_EXECUTING;
373a913396dSqiao 		runtime = cp->c_runtime;
374a913396dSqiao 		mutex_exit(&ct->ct_lock);
375a913396dSqiao 		DTRACE_PROBE1(callout__start, callout_t *, cp);
376a913396dSqiao 		(*cp->c_func)(cp->c_arg);
377a913396dSqiao 		DTRACE_PROBE1(callout__end, callout_t *, cp);
378a913396dSqiao 		mutex_enter(&ct->ct_lock);
379a913396dSqiao 
380a913396dSqiao 		/*
381a913396dSqiao 		 * See comments above.
382a913396dSqiao 		 */
383a913396dSqiao 		CALLOUT_HASH_UPDATE(DELETE, ct, cp, xid, runtime);
384a913396dSqiao 		CALLOUT_HRES_UPDATE(DELETE, ct, cp, xid, hresms);
385a913396dSqiao 		cp->c_idnext = ct->ct_freelist;
386a913396dSqiao 		ct->ct_freelist = cp;
387a913396dSqiao 		cp->c_xid = 0;	/* Indicate completion for c_done */
388a913396dSqiao 		cv_broadcast(&cp->c_done);
389a913396dSqiao 
390a913396dSqiao 		/*
391a913396dSqiao 		 * Start over from the head of the list, see if
392a913396dSqiao 		 * any timeout bearing an earlier hres time.
393a913396dSqiao 		 */
394a913396dSqiao 		cp = ct->ct_hresq;
395a913396dSqiao 	}
3967c478bd9Sstevel@tonic-gate 	mutex_exit(&ct->ct_lock);
3977c478bd9Sstevel@tonic-gate }
3987c478bd9Sstevel@tonic-gate 
3997c478bd9Sstevel@tonic-gate /*
4007c478bd9Sstevel@tonic-gate  * Schedule any callouts that are due on or before this tick.
4017c478bd9Sstevel@tonic-gate  */
4027c478bd9Sstevel@tonic-gate static void
4037c478bd9Sstevel@tonic-gate callout_schedule_1(callout_table_t *ct)
4047c478bd9Sstevel@tonic-gate {
4057c478bd9Sstevel@tonic-gate 	callout_t	*cp;
4067c478bd9Sstevel@tonic-gate 	clock_t		curtime, runtime;
407a913396dSqiao 	timestruc_t	now;
408a913396dSqiao 	int64_t		hresms;
4097c478bd9Sstevel@tonic-gate 
4107c478bd9Sstevel@tonic-gate 	mutex_enter(&ct->ct_lock);
4117c478bd9Sstevel@tonic-gate 	ct->ct_curtime = curtime = lbolt;
4127c478bd9Sstevel@tonic-gate 	while (((runtime = ct->ct_runtime) - curtime) <= 0) {
4137c478bd9Sstevel@tonic-gate 		for (cp = ct->ct_lbhash[CALLOUT_LBHASH(runtime)];
4147c478bd9Sstevel@tonic-gate 		    cp != NULL; cp = cp->c_lbnext) {
4157c478bd9Sstevel@tonic-gate 			if (cp->c_runtime != runtime ||
4167c478bd9Sstevel@tonic-gate 			    (cp->c_xid & CALLOUT_EXECUTING))
4177c478bd9Sstevel@tonic-gate 				continue;
4187c478bd9Sstevel@tonic-gate 			mutex_exit(&ct->ct_lock);
4197c478bd9Sstevel@tonic-gate 			if (ct->ct_taskq == NULL)
4207c478bd9Sstevel@tonic-gate 				softcall((void (*)(void *))callout_execute, ct);
4217c478bd9Sstevel@tonic-gate 			else
4227c478bd9Sstevel@tonic-gate 				(void) taskq_dispatch(ct->ct_taskq,
4237c478bd9Sstevel@tonic-gate 				    (task_func_t *)callout_execute, ct,
4247c478bd9Sstevel@tonic-gate 				    KM_NOSLEEP);
4257c478bd9Sstevel@tonic-gate 			return;
4267c478bd9Sstevel@tonic-gate 		}
4277c478bd9Sstevel@tonic-gate 		ct->ct_runtime++;
4287c478bd9Sstevel@tonic-gate 	}
429a913396dSqiao 
430a913396dSqiao 	gethrestime(&now);
431a913396dSqiao 
432*30392143Sqiao 	/*
433*30392143Sqiao 	 * Calculate the future time in millisecond.
434*30392143Sqiao 	 * We must cast tv_sec to 64-bit integer
435*30392143Sqiao 	 * to avoid integer overflow on 32-platforms.
436*30392143Sqiao 	 */
437*30392143Sqiao 	hresms = (int64_t)now.tv_sec * MILLISEC + now.tv_nsec / MICROSEC;
438a913396dSqiao 
439a913396dSqiao 	cp = ct->ct_hresq;
440a913396dSqiao 	while (cp != NULL && hresms >= cp->c_hresms) {
441a913396dSqiao 		if (cp->c_xid & CALLOUT_EXECUTING) {
442a913396dSqiao 			cp = cp->c_hrnext;
443a913396dSqiao 			continue;
444a913396dSqiao 		}
445a913396dSqiao 		mutex_exit(&ct->ct_lock);
446a913396dSqiao 		if (ct->ct_taskq == NULL)
447a913396dSqiao 			softcall((void (*)(void *))callout_execute, ct);
448a913396dSqiao 		else
449a913396dSqiao 			(void) taskq_dispatch(ct->ct_taskq,
450a913396dSqiao 			    (task_func_t *)callout_execute, ct, KM_NOSLEEP);
451a913396dSqiao 		return;
452a913396dSqiao 	}
4537c478bd9Sstevel@tonic-gate 	mutex_exit(&ct->ct_lock);
4547c478bd9Sstevel@tonic-gate }
4557c478bd9Sstevel@tonic-gate 
4567c478bd9Sstevel@tonic-gate /*
4577c478bd9Sstevel@tonic-gate  * Schedule callouts for all callout tables.  Called by clock() on each tick.
4587c478bd9Sstevel@tonic-gate  */
4597c478bd9Sstevel@tonic-gate void
4607c478bd9Sstevel@tonic-gate callout_schedule(void)
4617c478bd9Sstevel@tonic-gate {
4627c478bd9Sstevel@tonic-gate 	int f, t;
4637c478bd9Sstevel@tonic-gate 
4647c478bd9Sstevel@tonic-gate 	if (cpr_stop_callout)
4657c478bd9Sstevel@tonic-gate 		return;
4667c478bd9Sstevel@tonic-gate 
4677c478bd9Sstevel@tonic-gate 	for (t = 0; t < CALLOUT_NTYPES; t++)
4687c478bd9Sstevel@tonic-gate 		for (f = 0; f < callout_fanout; f++)
4697c478bd9Sstevel@tonic-gate 			callout_schedule_1(callout_table[CALLOUT_TABLE(t, f)]);
4707c478bd9Sstevel@tonic-gate }
4717c478bd9Sstevel@tonic-gate 
4727c478bd9Sstevel@tonic-gate /*
4737c478bd9Sstevel@tonic-gate  * Callback handler used by CPR to stop and resume callouts.
4747c478bd9Sstevel@tonic-gate  */
4757c478bd9Sstevel@tonic-gate /*ARGSUSED*/
4767c478bd9Sstevel@tonic-gate static boolean_t
4777c478bd9Sstevel@tonic-gate callout_cpr_callb(void *arg, int code)
4787c478bd9Sstevel@tonic-gate {
4797c478bd9Sstevel@tonic-gate 	cpr_stop_callout = (code == CB_CODE_CPR_CHKPT);
4807c478bd9Sstevel@tonic-gate 	return (B_TRUE);
4817c478bd9Sstevel@tonic-gate }
4827c478bd9Sstevel@tonic-gate 
4837c478bd9Sstevel@tonic-gate /*
4847c478bd9Sstevel@tonic-gate  * Initialize all callout tables.  Called at boot time just before clkstart().
4857c478bd9Sstevel@tonic-gate  */
4867c478bd9Sstevel@tonic-gate void
4877c478bd9Sstevel@tonic-gate callout_init(void)
4887c478bd9Sstevel@tonic-gate {
4897c478bd9Sstevel@tonic-gate 	int f, t;
4907c478bd9Sstevel@tonic-gate 	int table_id;
4917c478bd9Sstevel@tonic-gate 	callout_table_t *ct;
4927c478bd9Sstevel@tonic-gate 
4937c478bd9Sstevel@tonic-gate 	callout_fanout = MIN(CALLOUT_FANOUT, max_ncpus);
4947c478bd9Sstevel@tonic-gate 
4957c478bd9Sstevel@tonic-gate 	for (t = 0; t < CALLOUT_NTYPES; t++) {
4967c478bd9Sstevel@tonic-gate 		for (f = 0; f < CALLOUT_FANOUT; f++) {
4977c478bd9Sstevel@tonic-gate 			table_id = CALLOUT_TABLE(t, f);
4987c478bd9Sstevel@tonic-gate 			if (f >= callout_fanout) {
4997c478bd9Sstevel@tonic-gate 				callout_table[table_id] =
5007c478bd9Sstevel@tonic-gate 				    callout_table[table_id - callout_fanout];
5017c478bd9Sstevel@tonic-gate 				continue;
5027c478bd9Sstevel@tonic-gate 			}
5037c478bd9Sstevel@tonic-gate 			ct = kmem_zalloc(sizeof (callout_table_t), KM_SLEEP);
5047c478bd9Sstevel@tonic-gate 			callout_table[table_id] = ct;
5057c478bd9Sstevel@tonic-gate 			ct->ct_short_id = (callout_id_t)table_id |
5067c478bd9Sstevel@tonic-gate 			    CALLOUT_COUNTER_HIGH;
5077c478bd9Sstevel@tonic-gate 			ct->ct_long_id = ct->ct_short_id | CALLOUT_LONGTERM;
5087c478bd9Sstevel@tonic-gate 			ct->ct_curtime = ct->ct_runtime = lbolt;
5097c478bd9Sstevel@tonic-gate 			if (t == CALLOUT_NORMAL) {
5107c478bd9Sstevel@tonic-gate 				/*
5117c478bd9Sstevel@tonic-gate 				 * Each callout thread consumes exactly one
5127c478bd9Sstevel@tonic-gate 				 * task structure while active.  Therefore,
5137c478bd9Sstevel@tonic-gate 				 * prepopulating with 2 * CALLOUT_THREADS tasks
5147c478bd9Sstevel@tonic-gate 				 * ensures that there's at least one task per
5157c478bd9Sstevel@tonic-gate 				 * thread that's either scheduled or on the
5167c478bd9Sstevel@tonic-gate 				 * freelist.  In turn, this guarantees that
5177c478bd9Sstevel@tonic-gate 				 * taskq_dispatch() will always either succeed
5187c478bd9Sstevel@tonic-gate 				 * (because there's a free task structure) or
5197c478bd9Sstevel@tonic-gate 				 * be unnecessary (because "callout_excute(ct)"
5207c478bd9Sstevel@tonic-gate 				 * has already scheduled).
5217c478bd9Sstevel@tonic-gate 				 */
5227c478bd9Sstevel@tonic-gate 				ct->ct_taskq =
5237c478bd9Sstevel@tonic-gate 				    taskq_create_instance("callout_taskq", f,
5247c478bd9Sstevel@tonic-gate 				    CALLOUT_THREADS, maxclsyspri,
5257c478bd9Sstevel@tonic-gate 				    2 * CALLOUT_THREADS, 2 * CALLOUT_THREADS,
5267c478bd9Sstevel@tonic-gate 				    TASKQ_PREPOPULATE | TASKQ_CPR_SAFE);
5277c478bd9Sstevel@tonic-gate 			}
5287c478bd9Sstevel@tonic-gate 		}
5297c478bd9Sstevel@tonic-gate 	}
5307c478bd9Sstevel@tonic-gate 	(void) callb_add(callout_cpr_callb, 0, CB_CL_CPR_CALLOUT, "callout");
5317c478bd9Sstevel@tonic-gate }
532