1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/callo.h> 29 #include <sys/param.h> 30 #include <sys/types.h> 31 #include <sys/systm.h> 32 #include <sys/cpuvar.h> 33 #include <sys/thread.h> 34 #include <sys/kmem.h> 35 #include <sys/cmn_err.h> 36 #include <sys/callb.h> 37 #include <sys/debug.h> 38 #include <sys/vtrace.h> 39 #include <sys/sysmacros.h> 40 #include <sys/sdt.h> 41 42 /* 43 * Callout tables. See timeout(9F) for details. 44 */ 45 static int cpr_stop_callout; 46 static int callout_fanout; 47 static int ncallout; 48 static callout_table_t *callout_table[CALLOUT_TABLES]; 49 50 #define CALLOUT_HASH_INSERT(cthead, cp, cnext, cprev) \ 51 { \ 52 callout_t **headpp = &cthead; \ 53 callout_t *headp = *headpp; \ 54 cp->cnext = headp; \ 55 cp->cprev = NULL; \ 56 if (headp != NULL) \ 57 headp->cprev = cp; \ 58 *headpp = cp; \ 59 } 60 61 #define CALLOUT_HASH_DELETE(cthead, cp, cnext, cprev) \ 62 { \ 63 callout_t *nextp = cp->cnext; \ 64 callout_t *prevp = cp->cprev; \ 65 if (nextp != NULL) \ 66 nextp->cprev = prevp; \ 67 if (prevp != NULL) \ 68 prevp->cnext = nextp; \ 69 else \ 70 cthead = nextp; \ 71 } 72 73 #define CALLOUT_HASH_UPDATE(INSDEL, ct, cp, id, runtime, runhrtime) \ 74 ASSERT(MUTEX_HELD(&ct->ct_lock)); \ 75 ASSERT(cp->c_xid == id && ((cp->c_runtime == runtime) || \ 76 (cp->c_runhrtime <= runhrtime))); \ 77 CALLOUT_HASH_##INSDEL(ct->ct_idhash[CALLOUT_IDHASH(id)], \ 78 cp, c_idnext, c_idprev) \ 79 CALLOUT_HASH_##INSDEL(ct->ct_lbhash[CALLOUT_LBHASH(runtime)], \ 80 cp, c_lbnext, c_lbprev) 81 82 /* 83 * Allocate a callout structure. We try quite hard because we 84 * can't sleep, and if we can't do the allocation, we're toast. 85 * Failing all, we try a KM_PANIC allocation. 86 */ 87 static callout_t * 88 callout_alloc(callout_table_t *ct) 89 { 90 size_t size = 0; 91 callout_t *cp = NULL; 92 93 mutex_exit(&ct->ct_lock); 94 cp = kmem_alloc_tryhard(sizeof (callout_t), &size, 95 KM_NOSLEEP | KM_PANIC); 96 bzero(cp, sizeof (callout_t)); 97 ncallout++; 98 mutex_enter(&ct->ct_lock); 99 return (cp); 100 } 101 102 /* 103 * Arrange that func(arg) be called after delta clock ticks. 104 */ 105 static timeout_id_t 106 timeout_common(void (*func)(void *), void *arg, clock_t delta, 107 callout_table_t *ct) 108 { 109 callout_t *cp; 110 callout_id_t id; 111 clock_t runtime; 112 timestruc_t start; 113 int64_t runhrtime; 114 115 gethrestime_lasttick(&start); 116 117 mutex_enter(&ct->ct_lock); 118 119 if ((cp = ct->ct_freelist) == NULL) 120 cp = callout_alloc(ct); 121 else 122 ct->ct_freelist = cp->c_idnext; 123 124 cp->c_func = func; 125 cp->c_arg = arg; 126 127 /* 128 * Make sure the callout runs at least 1 tick in the future. 129 */ 130 if (delta <= 0) 131 delta = 1; 132 cp->c_runtime = runtime = lbolt + delta; 133 cp->c_runhrtime = runhrtime = delta + timespectohz64(&start); 134 135 /* 136 * Assign an ID to this callout 137 */ 138 if (delta > CALLOUT_LONGTERM_TICKS) 139 ct->ct_long_id = id = (ct->ct_long_id - CALLOUT_COUNTER_LOW) | 140 CALLOUT_COUNTER_HIGH; 141 else 142 ct->ct_short_id = id = (ct->ct_short_id - CALLOUT_COUNTER_LOW) | 143 CALLOUT_COUNTER_HIGH; 144 145 cp->c_xid = id; 146 147 CALLOUT_HASH_UPDATE(INSERT, ct, cp, id, runtime, runhrtime); 148 149 mutex_exit(&ct->ct_lock); 150 151 TRACE_4(TR_FAC_CALLOUT, TR_TIMEOUT, 152 "timeout:%K(%p) in %ld ticks, cp %p", 153 func, arg, delta, cp); 154 155 return ((timeout_id_t)id); 156 } 157 158 timeout_id_t 159 timeout(void (*func)(void *), void *arg, clock_t delta) 160 { 161 return (timeout_common(func, arg, delta, 162 callout_table[CALLOUT_TABLE(CALLOUT_NORMAL, CPU->cpu_seqid)])); 163 164 } 165 166 timeout_id_t 167 realtime_timeout(void (*func)(void *), void *arg, clock_t delta) 168 { 169 return (timeout_common(func, arg, delta, 170 callout_table[CALLOUT_TABLE(CALLOUT_REALTIME, CPU->cpu_seqid)])); 171 } 172 173 clock_t 174 untimeout(timeout_id_t id_arg) 175 { 176 callout_id_t id = (callout_id_t)id_arg; 177 callout_table_t *ct; 178 callout_t *cp; 179 callout_id_t xid; 180 181 ct = callout_table[id & CALLOUT_TABLE_MASK]; 182 183 mutex_enter(&ct->ct_lock); 184 185 for (cp = ct->ct_idhash[CALLOUT_IDHASH(id)]; cp; cp = cp->c_idnext) { 186 187 if ((xid = cp->c_xid) == id) { 188 clock_t runtime = cp->c_runtime; 189 int64_t runhrtime = cp->c_runhrtime; 190 clock_t time_left = runtime - lbolt; 191 192 CALLOUT_HASH_UPDATE(DELETE, ct, cp, id, 193 runtime, runhrtime); 194 195 cp->c_idnext = ct->ct_freelist; 196 ct->ct_freelist = cp; 197 mutex_exit(&ct->ct_lock); 198 TRACE_2(TR_FAC_CALLOUT, TR_UNTIMEOUT, 199 "untimeout:ID %lx ticks_left %ld", id, time_left); 200 return (time_left < 0 ? 0 : time_left); 201 } 202 203 if (xid != (id | CALLOUT_EXECUTING)) 204 continue; 205 206 /* 207 * The callout we want to delete is currently executing. 208 * The DDI states that we must wait until the callout 209 * completes before returning, so we block on c_done until 210 * the callout ID changes (to zero if it's on the freelist, 211 * or to a new callout ID if it's in use). This implicitly 212 * assumes that callout structures are persistent (they are). 213 */ 214 if (cp->c_executor == curthread) { 215 /* 216 * The timeout handler called untimeout() on itself. 217 * Stupid, but legal. We can't wait for the timeout 218 * to complete without deadlocking, so we just return. 219 */ 220 mutex_exit(&ct->ct_lock); 221 TRACE_1(TR_FAC_CALLOUT, TR_UNTIMEOUT_SELF, 222 "untimeout_self:ID %x", id); 223 return (-1); 224 } 225 while (cp->c_xid == xid) 226 cv_wait(&cp->c_done, &ct->ct_lock); 227 mutex_exit(&ct->ct_lock); 228 TRACE_1(TR_FAC_CALLOUT, TR_UNTIMEOUT_EXECUTING, 229 "untimeout_executing:ID %lx", id); 230 return (-1); 231 } 232 233 mutex_exit(&ct->ct_lock); 234 TRACE_1(TR_FAC_CALLOUT, TR_UNTIMEOUT_BOGUS_ID, 235 "untimeout_bogus_id:ID %lx", id); 236 237 /* 238 * We didn't find the specified callout ID. This means either 239 * (1) the callout already fired, or (2) the caller passed us 240 * a bogus value. Perform a sanity check to detect case (2). 241 */ 242 if (id != 0 && (id & (CALLOUT_COUNTER_HIGH | CALLOUT_EXECUTING)) != 243 CALLOUT_COUNTER_HIGH) 244 panic("untimeout: impossible timeout id %lx", id); 245 246 return (-1); 247 } 248 249 /* 250 * Do the actual work of executing callouts. This routine is called either 251 * by a taskq_thread (normal case), or by softcall (realtime case). 252 */ 253 static void 254 callout_execute(callout_table_t *ct) 255 { 256 callout_t *cp; 257 callout_id_t xid; 258 clock_t runtime; 259 int64_t curhrtime; 260 261 mutex_enter(&ct->ct_lock); 262 263 /* 264 * Assuming the system time can be set forward and backward 265 * at any time. If it is set backward, we will measure the 266 * c_runtime; otherwise, we will compare c_runhrtime with 267 * ct_curhrtime. 268 */ 269 curhrtime = ct->ct_curhrtime; 270 while (((runtime = ct->ct_runtime) - ct->ct_curtime) <= 0) { 271 for (cp = ct->ct_lbhash[CALLOUT_LBHASH(runtime)]; 272 cp != NULL; cp = cp->c_lbnext) { 273 xid = cp->c_xid; 274 if ((cp->c_runtime != runtime && 275 cp->c_runhrtime > curhrtime) || 276 (xid & CALLOUT_EXECUTING)) 277 continue; 278 cp->c_executor = curthread; 279 cp->c_xid = xid |= CALLOUT_EXECUTING; 280 mutex_exit(&ct->ct_lock); 281 DTRACE_PROBE1(callout__start, callout_t *, cp); 282 (*cp->c_func)(cp->c_arg); 283 DTRACE_PROBE1(callout__end, callout_t *, cp); 284 mutex_enter(&ct->ct_lock); 285 286 /* 287 * Delete callout from hash tables, return to freelist, 288 * and tell anyone who cares that we're done. 289 * Even though we dropped and reacquired ct->ct_lock, 290 * it's OK to pick up where we left off because only 291 * newly-created timeouts can precede cp on ct_lbhash, 292 * and those timeouts cannot be due on this tick. 293 */ 294 CALLOUT_HASH_UPDATE(DELETE, ct, cp, xid, 295 runtime, curhrtime); 296 297 cp->c_idnext = ct->ct_freelist; 298 ct->ct_freelist = cp; 299 cp->c_xid = 0; /* Indicate completion for c_done */ 300 cv_broadcast(&cp->c_done); 301 } 302 /* 303 * We have completed all callouts that were scheduled to 304 * run at "runtime". If the global run time still matches 305 * our local copy, then we advance the global run time; 306 * otherwise, another callout thread must have already done so. 307 */ 308 if (ct->ct_runtime == runtime) 309 ct->ct_runtime = runtime + 1; 310 } 311 mutex_exit(&ct->ct_lock); 312 } 313 314 /* 315 * Schedule any callouts that are due on or before this tick. 316 */ 317 static void 318 callout_schedule_1(callout_table_t *ct) 319 { 320 callout_t *cp; 321 clock_t curtime, runtime; 322 timestruc_t now; 323 int64_t curhrtime; 324 325 gethrestime(&now); 326 curhrtime = timespectohz64(&now); 327 328 mutex_enter(&ct->ct_lock); 329 ct->ct_curtime = curtime = lbolt; 330 331 /* 332 * We use both the conditions cp->c_runtime == runtime and 333 * cp->c_runhrtime <= curhrtime to determine a timeout is 334 * premature or not. If the system time has been set backwards, 335 * then cp->c_runtime == runtime will become true first. 336 * Otherwise, we test cp->c_runhrtime <= curhrtime 337 */ 338 ct->ct_curhrtime = curhrtime; 339 while (((runtime = ct->ct_runtime) - curtime) <= 0) { 340 for (cp = ct->ct_lbhash[CALLOUT_LBHASH(runtime)]; 341 cp != NULL; cp = cp->c_lbnext) { 342 if ((cp->c_runtime != runtime && 343 cp->c_runhrtime > curhrtime) || 344 (cp->c_xid & CALLOUT_EXECUTING)) 345 continue; 346 mutex_exit(&ct->ct_lock); 347 if (ct->ct_taskq == NULL) 348 softcall((void (*)(void *))callout_execute, ct); 349 else 350 (void) taskq_dispatch(ct->ct_taskq, 351 (task_func_t *)callout_execute, ct, 352 KM_NOSLEEP); 353 return; 354 } 355 ct->ct_runtime++; 356 } 357 mutex_exit(&ct->ct_lock); 358 } 359 360 /* 361 * Schedule callouts for all callout tables. Called by clock() on each tick. 362 */ 363 void 364 callout_schedule(void) 365 { 366 int f, t; 367 368 if (cpr_stop_callout) 369 return; 370 371 for (t = 0; t < CALLOUT_NTYPES; t++) 372 for (f = 0; f < callout_fanout; f++) 373 callout_schedule_1(callout_table[CALLOUT_TABLE(t, f)]); 374 } 375 376 /* 377 * Callback handler used by CPR to stop and resume callouts. 378 */ 379 /*ARGSUSED*/ 380 static boolean_t 381 callout_cpr_callb(void *arg, int code) 382 { 383 cpr_stop_callout = (code == CB_CODE_CPR_CHKPT); 384 return (B_TRUE); 385 } 386 387 /* 388 * Initialize all callout tables. Called at boot time just before clkstart(). 389 */ 390 void 391 callout_init(void) 392 { 393 int f, t; 394 int table_id; 395 callout_table_t *ct; 396 397 callout_fanout = MIN(CALLOUT_FANOUT, max_ncpus); 398 399 for (t = 0; t < CALLOUT_NTYPES; t++) { 400 for (f = 0; f < CALLOUT_FANOUT; f++) { 401 table_id = CALLOUT_TABLE(t, f); 402 if (f >= callout_fanout) { 403 callout_table[table_id] = 404 callout_table[table_id - callout_fanout]; 405 continue; 406 } 407 ct = kmem_zalloc(sizeof (callout_table_t), KM_SLEEP); 408 callout_table[table_id] = ct; 409 ct->ct_short_id = (callout_id_t)table_id | 410 CALLOUT_COUNTER_HIGH; 411 ct->ct_long_id = ct->ct_short_id | CALLOUT_LONGTERM; 412 ct->ct_curtime = ct->ct_runtime = lbolt; 413 414 /* 415 * We can not call gethrestime() at this moment 416 * since the system time has not been validated. 417 * So Set ct_curhrtime to zero. 418 */ 419 ct->ct_curhrtime = 0; 420 421 if (t == CALLOUT_NORMAL) { 422 /* 423 * Each callout thread consumes exactly one 424 * task structure while active. Therefore, 425 * prepopulating with 2 * CALLOUT_THREADS tasks 426 * ensures that there's at least one task per 427 * thread that's either scheduled or on the 428 * freelist. In turn, this guarantees that 429 * taskq_dispatch() will always either succeed 430 * (because there's a free task structure) or 431 * be unnecessary (because "callout_excute(ct)" 432 * has already scheduled). 433 */ 434 ct->ct_taskq = 435 taskq_create_instance("callout_taskq", f, 436 CALLOUT_THREADS, maxclsyspri, 437 2 * CALLOUT_THREADS, 2 * CALLOUT_THREADS, 438 TASKQ_PREPOPULATE | TASKQ_CPR_SAFE); 439 } 440 } 441 } 442 (void) callb_add(callout_cpr_callb, 0, CB_CL_CPR_CALLOUT, "callout"); 443 } 444