/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #pragma ident "%Z%%M% %I% %E% SMI" #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Callout tables. See timeout(9F) for details. */ static int cpr_stop_callout; static int callout_fanout; static int ncallout; static callout_table_t *callout_table[CALLOUT_TABLES]; #define CALLOUT_HASH_INSERT(cthead, cp, cnext, cprev) \ { \ callout_t **headpp = &cthead; \ callout_t *headp = *headpp; \ cp->cnext = headp; \ cp->cprev = NULL; \ if (headp != NULL) \ headp->cprev = cp; \ *headpp = cp; \ } #define CALLOUT_HASH_DELETE(cthead, cp, cnext, cprev) \ { \ callout_t *nextp = cp->cnext; \ callout_t *prevp = cp->cprev; \ if (nextp != NULL) \ nextp->cprev = prevp; \ if (prevp != NULL) \ prevp->cnext = nextp; \ else \ cthead = nextp; \ } #define CALLOUT_HASH_UPDATE(INSDEL, ct, cp, id, runtime, runhrtime) \ ASSERT(MUTEX_HELD(&ct->ct_lock)); \ ASSERT(cp->c_xid == id && ((cp->c_runtime == runtime) || \ (cp->c_runhrtime <= runhrtime))); \ CALLOUT_HASH_##INSDEL(ct->ct_idhash[CALLOUT_IDHASH(id)], \ cp, c_idnext, c_idprev) \ CALLOUT_HASH_##INSDEL(ct->ct_lbhash[CALLOUT_LBHASH(runtime)], \ cp, c_lbnext, c_lbprev) /* * Allocate a callout structure. We try quite hard because we * can't sleep, and if we can't do the allocation, we're toast. * Failing all, we try a KM_PANIC allocation. */ static callout_t * callout_alloc(callout_table_t *ct) { size_t size = 0; callout_t *cp = NULL; mutex_exit(&ct->ct_lock); cp = kmem_alloc_tryhard(sizeof (callout_t), &size, KM_NOSLEEP | KM_PANIC); bzero(cp, sizeof (callout_t)); ncallout++; mutex_enter(&ct->ct_lock); return (cp); } /* * Arrange that func(arg) be called after delta clock ticks. */ static timeout_id_t timeout_common(void (*func)(void *), void *arg, clock_t delta, callout_table_t *ct) { callout_t *cp; callout_id_t id; clock_t runtime; timestruc_t start; int64_t runhrtime; gethrestime_lasttick(&start); mutex_enter(&ct->ct_lock); if ((cp = ct->ct_freelist) == NULL) cp = callout_alloc(ct); else ct->ct_freelist = cp->c_idnext; cp->c_func = func; cp->c_arg = arg; /* * Make sure the callout runs at least 1 tick in the future. */ if (delta <= 0) delta = 1; cp->c_runtime = runtime = lbolt + delta; cp->c_runhrtime = runhrtime = delta + timespectohz64(&start); /* * Assign an ID to this callout */ if (delta > CALLOUT_LONGTERM_TICKS) ct->ct_long_id = id = (ct->ct_long_id - CALLOUT_COUNTER_LOW) | CALLOUT_COUNTER_HIGH; else ct->ct_short_id = id = (ct->ct_short_id - CALLOUT_COUNTER_LOW) | CALLOUT_COUNTER_HIGH; cp->c_xid = id; CALLOUT_HASH_UPDATE(INSERT, ct, cp, id, runtime, runhrtime); mutex_exit(&ct->ct_lock); TRACE_4(TR_FAC_CALLOUT, TR_TIMEOUT, "timeout:%K(%p) in %ld ticks, cp %p", func, arg, delta, cp); return ((timeout_id_t)id); } timeout_id_t timeout(void (*func)(void *), void *arg, clock_t delta) { return (timeout_common(func, arg, delta, callout_table[CALLOUT_TABLE(CALLOUT_NORMAL, CPU->cpu_seqid)])); } timeout_id_t realtime_timeout(void (*func)(void *), void *arg, clock_t delta) { return (timeout_common(func, arg, delta, callout_table[CALLOUT_TABLE(CALLOUT_REALTIME, CPU->cpu_seqid)])); } clock_t untimeout(timeout_id_t id_arg) { callout_id_t id = (callout_id_t)id_arg; callout_table_t *ct; callout_t *cp; callout_id_t xid; ct = callout_table[id & CALLOUT_TABLE_MASK]; mutex_enter(&ct->ct_lock); for (cp = ct->ct_idhash[CALLOUT_IDHASH(id)]; cp; cp = cp->c_idnext) { if ((xid = cp->c_xid) == id) { clock_t runtime = cp->c_runtime; int64_t runhrtime = cp->c_runhrtime; clock_t time_left = runtime - lbolt; CALLOUT_HASH_UPDATE(DELETE, ct, cp, id, runtime, runhrtime); cp->c_idnext = ct->ct_freelist; ct->ct_freelist = cp; mutex_exit(&ct->ct_lock); TRACE_2(TR_FAC_CALLOUT, TR_UNTIMEOUT, "untimeout:ID %lx ticks_left %ld", id, time_left); return (time_left < 0 ? 0 : time_left); } if (xid != (id | CALLOUT_EXECUTING)) continue; /* * The callout we want to delete is currently executing. * The DDI states that we must wait until the callout * completes before returning, so we block on c_done until * the callout ID changes (to zero if it's on the freelist, * or to a new callout ID if it's in use). This implicitly * assumes that callout structures are persistent (they are). */ if (cp->c_executor == curthread) { /* * The timeout handler called untimeout() on itself. * Stupid, but legal. We can't wait for the timeout * to complete without deadlocking, so we just return. */ mutex_exit(&ct->ct_lock); TRACE_1(TR_FAC_CALLOUT, TR_UNTIMEOUT_SELF, "untimeout_self:ID %x", id); return (-1); } while (cp->c_xid == xid) cv_wait(&cp->c_done, &ct->ct_lock); mutex_exit(&ct->ct_lock); TRACE_1(TR_FAC_CALLOUT, TR_UNTIMEOUT_EXECUTING, "untimeout_executing:ID %lx", id); return (-1); } mutex_exit(&ct->ct_lock); TRACE_1(TR_FAC_CALLOUT, TR_UNTIMEOUT_BOGUS_ID, "untimeout_bogus_id:ID %lx", id); /* * We didn't find the specified callout ID. This means either * (1) the callout already fired, or (2) the caller passed us * a bogus value. Perform a sanity check to detect case (2). */ if (id != 0 && (id & (CALLOUT_COUNTER_HIGH | CALLOUT_EXECUTING)) != CALLOUT_COUNTER_HIGH) panic("untimeout: impossible timeout id %lx", id); return (-1); } /* * Do the actual work of executing callouts. This routine is called either * by a taskq_thread (normal case), or by softcall (realtime case). */ static void callout_execute(callout_table_t *ct) { callout_t *cp; callout_id_t xid; clock_t runtime; int64_t curhrtime; mutex_enter(&ct->ct_lock); /* * Assuming the system time can be set forward and backward * at any time. If it is set backward, we will measure the * c_runtime; otherwise, we will compare c_runhrtime with * ct_curhrtime. */ curhrtime = ct->ct_curhrtime; while (((runtime = ct->ct_runtime) - ct->ct_curtime) <= 0) { for (cp = ct->ct_lbhash[CALLOUT_LBHASH(runtime)]; cp != NULL; cp = cp->c_lbnext) { xid = cp->c_xid; if ((cp->c_runtime != runtime && cp->c_runhrtime > curhrtime) || (xid & CALLOUT_EXECUTING)) continue; cp->c_executor = curthread; cp->c_xid = xid |= CALLOUT_EXECUTING; mutex_exit(&ct->ct_lock); DTRACE_PROBE1(callout__start, callout_t *, cp); (*cp->c_func)(cp->c_arg); DTRACE_PROBE1(callout__end, callout_t *, cp); mutex_enter(&ct->ct_lock); /* * Delete callout from hash tables, return to freelist, * and tell anyone who cares that we're done. * Even though we dropped and reacquired ct->ct_lock, * it's OK to pick up where we left off because only * newly-created timeouts can precede cp on ct_lbhash, * and those timeouts cannot be due on this tick. */ CALLOUT_HASH_UPDATE(DELETE, ct, cp, xid, runtime, curhrtime); cp->c_idnext = ct->ct_freelist; ct->ct_freelist = cp; cp->c_xid = 0; /* Indicate completion for c_done */ cv_broadcast(&cp->c_done); } /* * We have completed all callouts that were scheduled to * run at "runtime". If the global run time still matches * our local copy, then we advance the global run time; * otherwise, another callout thread must have already done so. */ if (ct->ct_runtime == runtime) ct->ct_runtime = runtime + 1; } mutex_exit(&ct->ct_lock); } /* * Schedule any callouts that are due on or before this tick. */ static void callout_schedule_1(callout_table_t *ct) { callout_t *cp; clock_t curtime, runtime; timestruc_t now; int64_t curhrtime; gethrestime(&now); curhrtime = timespectohz64(&now); mutex_enter(&ct->ct_lock); ct->ct_curtime = curtime = lbolt; /* * We use both the conditions cp->c_runtime == runtime and * cp->c_runhrtime <= curhrtime to determine a timeout is * premature or not. If the system time has been set backwards, * then cp->c_runtime == runtime will become true first. * Otherwise, we test cp->c_runhrtime <= curhrtime */ ct->ct_curhrtime = curhrtime; while (((runtime = ct->ct_runtime) - curtime) <= 0) { for (cp = ct->ct_lbhash[CALLOUT_LBHASH(runtime)]; cp != NULL; cp = cp->c_lbnext) { if ((cp->c_runtime != runtime && cp->c_runhrtime > curhrtime) || (cp->c_xid & CALLOUT_EXECUTING)) continue; mutex_exit(&ct->ct_lock); if (ct->ct_taskq == NULL) softcall((void (*)(void *))callout_execute, ct); else (void) taskq_dispatch(ct->ct_taskq, (task_func_t *)callout_execute, ct, KM_NOSLEEP); return; } ct->ct_runtime++; } mutex_exit(&ct->ct_lock); } /* * Schedule callouts for all callout tables. Called by clock() on each tick. */ void callout_schedule(void) { int f, t; if (cpr_stop_callout) return; for (t = 0; t < CALLOUT_NTYPES; t++) for (f = 0; f < callout_fanout; f++) callout_schedule_1(callout_table[CALLOUT_TABLE(t, f)]); } /* * Callback handler used by CPR to stop and resume callouts. */ /*ARGSUSED*/ static boolean_t callout_cpr_callb(void *arg, int code) { cpr_stop_callout = (code == CB_CODE_CPR_CHKPT); return (B_TRUE); } /* * Initialize all callout tables. Called at boot time just before clkstart(). */ void callout_init(void) { int f, t; int table_id; callout_table_t *ct; callout_fanout = MIN(CALLOUT_FANOUT, max_ncpus); for (t = 0; t < CALLOUT_NTYPES; t++) { for (f = 0; f < CALLOUT_FANOUT; f++) { table_id = CALLOUT_TABLE(t, f); if (f >= callout_fanout) { callout_table[table_id] = callout_table[table_id - callout_fanout]; continue; } ct = kmem_zalloc(sizeof (callout_table_t), KM_SLEEP); callout_table[table_id] = ct; ct->ct_short_id = (callout_id_t)table_id | CALLOUT_COUNTER_HIGH; ct->ct_long_id = ct->ct_short_id | CALLOUT_LONGTERM; ct->ct_curtime = ct->ct_runtime = lbolt; /* * We can not call gethrestime() at this moment * since the system time has not been validated. * So Set ct_curhrtime to zero. */ ct->ct_curhrtime = 0; if (t == CALLOUT_NORMAL) { /* * Each callout thread consumes exactly one * task structure while active. Therefore, * prepopulating with 2 * CALLOUT_THREADS tasks * ensures that there's at least one task per * thread that's either scheduled or on the * freelist. In turn, this guarantees that * taskq_dispatch() will always either succeed * (because there's a free task structure) or * be unnecessary (because "callout_excute(ct)" * has already scheduled). */ ct->ct_taskq = taskq_create_instance("callout_taskq", f, CALLOUT_THREADS, maxclsyspri, 2 * CALLOUT_THREADS, 2 * CALLOUT_THREADS, TASKQ_PREPOPULATE | TASKQ_CPR_SAFE); } } } (void) callb_add(callout_cpr_callb, 0, CB_CL_CPR_CALLOUT, "callout"); }