1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/callo.h> 29 #include <sys/param.h> 30 #include <sys/types.h> 31 #include <sys/systm.h> 32 #include <sys/cpuvar.h> 33 #include <sys/thread.h> 34 #include <sys/kmem.h> 35 #include <sys/cmn_err.h> 36 #include <sys/callb.h> 37 #include <sys/debug.h> 38 #include <sys/vtrace.h> 39 #include <sys/sysmacros.h> 40 #include <sys/sdt.h> 41 42 /* 43 * Callout tables. See timeout(9F) for details. 44 */ 45 static int cpr_stop_callout; 46 static int callout_fanout; 47 static int ncallout; 48 static callout_table_t *callout_table[CALLOUT_TABLES]; 49 50 #define CALLOUT_HASH_INSERT(cthead, cp, cnext, cprev) \ 51 { \ 52 callout_t **headpp = &cthead; \ 53 callout_t *headp = *headpp; \ 54 cp->cnext = headp; \ 55 cp->cprev = NULL; \ 56 if (headp != NULL) \ 57 headp->cprev = cp; \ 58 *headpp = cp; \ 59 } 60 61 #define CALLOUT_HASH_DELETE(cthead, cp, cnext, cprev) \ 62 { \ 63 callout_t *nextp = cp->cnext; \ 64 callout_t *prevp = cp->cprev; \ 65 if (nextp != NULL) \ 66 nextp->cprev = prevp; \ 67 if (prevp != NULL) \ 68 prevp->cnext = nextp; \ 69 else \ 70 cthead = nextp; \ 71 } 72 73 #define CALLOUT_HASH_UPDATE(INSDEL, ct, cp, id, runtime) \ 74 ASSERT(MUTEX_HELD(&ct->ct_lock)); \ 75 ASSERT(cp->c_xid == id && cp->c_runtime == runtime); \ 76 CALLOUT_HASH_##INSDEL(ct->ct_idhash[CALLOUT_IDHASH(id)], \ 77 cp, c_idnext, c_idprev) \ 78 CALLOUT_HASH_##INSDEL(ct->ct_lbhash[CALLOUT_LBHASH(runtime)], \ 79 cp, c_lbnext, c_lbprev) 80 81 #define CALLOUT_HRES_INSERT(ct, cp, cnext, cprev, hresms) \ 82 { \ 83 callout_t *nextp = ct->ct_hresq; \ 84 callout_t *prevp; \ 85 \ 86 if (nextp == NULL || hresms <= nextp->c_hresms) { \ 87 cp->cnext = ct->ct_hresq; \ 88 ct->ct_hresq = cp; \ 89 cp->cprev = NULL; \ 90 if (cp->cnext != NULL) \ 91 cp->cnext->cprev = cp; \ 92 } else { \ 93 do { \ 94 prevp = nextp; \ 95 nextp = nextp->cnext; \ 96 } while (nextp != NULL && hresms > nextp->c_hresms); \ 97 prevp->cnext = cp; \ 98 cp->cprev = prevp; \ 99 cp->cnext = nextp; \ 100 if (nextp != NULL) \ 101 nextp->cprev = cp; \ 102 } \ 103 } 104 105 #define CALLOUT_HRES_DELETE(ct, cp, cnext, cprev, hresms) \ 106 { \ 107 if (cp == ct->ct_hresq) { \ 108 ct->ct_hresq = cp->cnext; \ 109 if (cp->cnext != NULL) \ 110 cp->cnext->cprev = NULL; \ 111 } else { \ 112 cp->cprev->cnext = cp->cnext; \ 113 if (cp->cnext != NULL) \ 114 cp->cnext->cprev = cp->cprev; \ 115 } \ 116 } 117 118 #define CALLOUT_HRES_UPDATE(INSDEL, ct, cp, id, hresms) \ 119 ASSERT(MUTEX_HELD(&ct->ct_lock)); \ 120 ASSERT(cp->c_xid == id); \ 121 CALLOUT_HRES_##INSDEL(ct, cp, c_hrnext, \ 122 c_hrprev, hresms) 123 124 /* 125 * Allocate a callout structure. We try quite hard because we 126 * can't sleep, and if we can't do the allocation, we're toast. 127 * Failing all, we try a KM_PANIC allocation. 128 */ 129 static callout_t * 130 callout_alloc(callout_table_t *ct) 131 { 132 size_t size = 0; 133 callout_t *cp = NULL; 134 135 mutex_exit(&ct->ct_lock); 136 cp = kmem_alloc_tryhard(sizeof (callout_t), &size, 137 KM_NOSLEEP | KM_PANIC); 138 bzero(cp, sizeof (callout_t)); 139 ncallout++; 140 mutex_enter(&ct->ct_lock); 141 return (cp); 142 } 143 144 /* 145 * Arrange that func(arg) be called after delta clock ticks. 146 */ 147 static timeout_id_t 148 timeout_common(void (*func)(void *), void *arg, clock_t delta, 149 callout_table_t *ct) 150 { 151 callout_t *cp; 152 callout_id_t id; 153 clock_t runtime; 154 timestruc_t now; 155 int64_t hresms; 156 157 gethrestime(&now); 158 159 mutex_enter(&ct->ct_lock); 160 161 if ((cp = ct->ct_freelist) == NULL) 162 cp = callout_alloc(ct); 163 else 164 ct->ct_freelist = cp->c_idnext; 165 166 cp->c_func = func; 167 cp->c_arg = arg; 168 169 /* 170 * Make sure the callout runs at least 1 tick in the future. 171 */ 172 if (delta <= 0) 173 delta = 1; 174 cp->c_runtime = runtime = lbolt + delta; 175 176 /* Calculate the future time in milli-second */ 177 hresms = now.tv_sec * MILLISEC + now.tv_nsec / MICROSEC + 178 TICK_TO_MSEC(delta); 179 cp->c_hresms = hresms; 180 181 /* 182 * Assign an ID to this callout 183 */ 184 if (delta > CALLOUT_LONGTERM_TICKS) 185 ct->ct_long_id = id = (ct->ct_long_id - CALLOUT_COUNTER_LOW) | 186 CALLOUT_COUNTER_HIGH; 187 else 188 ct->ct_short_id = id = (ct->ct_short_id - CALLOUT_COUNTER_LOW) | 189 CALLOUT_COUNTER_HIGH; 190 191 cp->c_xid = id; 192 193 CALLOUT_HASH_UPDATE(INSERT, ct, cp, id, runtime); 194 CALLOUT_HRES_UPDATE(INSERT, ct, cp, id, hresms); 195 196 mutex_exit(&ct->ct_lock); 197 198 TRACE_4(TR_FAC_CALLOUT, TR_TIMEOUT, 199 "timeout:%K(%p) in %ld ticks, cp %p", 200 func, arg, delta, cp); 201 202 return ((timeout_id_t)id); 203 } 204 205 timeout_id_t 206 timeout(void (*func)(void *), void *arg, clock_t delta) 207 { 208 return (timeout_common(func, arg, delta, 209 callout_table[CALLOUT_TABLE(CALLOUT_NORMAL, CPU->cpu_seqid)])); 210 211 } 212 213 timeout_id_t 214 realtime_timeout(void (*func)(void *), void *arg, clock_t delta) 215 { 216 return (timeout_common(func, arg, delta, 217 callout_table[CALLOUT_TABLE(CALLOUT_REALTIME, CPU->cpu_seqid)])); 218 } 219 220 clock_t 221 untimeout(timeout_id_t id_arg) 222 { 223 callout_id_t id = (callout_id_t)id_arg; 224 callout_table_t *ct; 225 callout_t *cp; 226 callout_id_t xid; 227 228 ct = callout_table[id & CALLOUT_TABLE_MASK]; 229 230 mutex_enter(&ct->ct_lock); 231 232 for (cp = ct->ct_idhash[CALLOUT_IDHASH(id)]; cp; cp = cp->c_idnext) { 233 234 if ((xid = cp->c_xid) == id) { 235 clock_t runtime = cp->c_runtime; 236 clock_t time_left = runtime - lbolt; 237 238 CALLOUT_HASH_UPDATE(DELETE, ct, cp, id, runtime); 239 CALLOUT_HRES_UPDATE(DELETE, ct, cp, id, 0); 240 cp->c_idnext = ct->ct_freelist; 241 ct->ct_freelist = cp; 242 mutex_exit(&ct->ct_lock); 243 TRACE_2(TR_FAC_CALLOUT, TR_UNTIMEOUT, 244 "untimeout:ID %lx ticks_left %ld", id, time_left); 245 return (time_left < 0 ? 0 : time_left); 246 } 247 248 if (xid != (id | CALLOUT_EXECUTING)) 249 continue; 250 251 /* 252 * The callout we want to delete is currently executing. 253 * The DDI states that we must wait until the callout 254 * completes before returning, so we block on c_done until 255 * the callout ID changes (to zero if it's on the freelist, 256 * or to a new callout ID if it's in use). This implicitly 257 * assumes that callout structures are persistent (they are). 258 */ 259 if (cp->c_executor == curthread) { 260 /* 261 * The timeout handler called untimeout() on itself. 262 * Stupid, but legal. We can't wait for the timeout 263 * to complete without deadlocking, so we just return. 264 */ 265 mutex_exit(&ct->ct_lock); 266 TRACE_1(TR_FAC_CALLOUT, TR_UNTIMEOUT_SELF, 267 "untimeout_self:ID %x", id); 268 return (-1); 269 } 270 while (cp->c_xid == xid) 271 cv_wait(&cp->c_done, &ct->ct_lock); 272 mutex_exit(&ct->ct_lock); 273 TRACE_1(TR_FAC_CALLOUT, TR_UNTIMEOUT_EXECUTING, 274 "untimeout_executing:ID %lx", id); 275 return (-1); 276 } 277 278 mutex_exit(&ct->ct_lock); 279 TRACE_1(TR_FAC_CALLOUT, TR_UNTIMEOUT_BOGUS_ID, 280 "untimeout_bogus_id:ID %lx", id); 281 282 /* 283 * We didn't find the specified callout ID. This means either 284 * (1) the callout already fired, or (2) the caller passed us 285 * a bogus value. Perform a sanity check to detect case (2). 286 */ 287 if (id != 0 && (id & (CALLOUT_COUNTER_HIGH | CALLOUT_EXECUTING)) != 288 CALLOUT_COUNTER_HIGH) 289 panic("untimeout: impossible timeout id %lx", id); 290 291 return (-1); 292 } 293 294 /* 295 * Do the actual work of executing callouts. This routine is called either 296 * by a taskq_thread (normal case), or by softcall (realtime case). 297 */ 298 static void 299 callout_execute(callout_table_t *ct) 300 { 301 callout_t *cp; 302 callout_id_t xid; 303 clock_t runtime; 304 timestruc_t now; 305 int64_t hresms; 306 307 mutex_enter(&ct->ct_lock); 308 309 while (((runtime = ct->ct_runtime) - ct->ct_curtime) <= 0) { 310 for (cp = ct->ct_lbhash[CALLOUT_LBHASH(runtime)]; 311 cp != NULL; cp = cp->c_lbnext) { 312 xid = cp->c_xid; 313 if (cp->c_runtime != runtime || 314 (xid & CALLOUT_EXECUTING)) 315 continue; 316 cp->c_executor = curthread; 317 cp->c_xid = xid |= CALLOUT_EXECUTING; 318 mutex_exit(&ct->ct_lock); 319 DTRACE_PROBE1(callout__start, callout_t *, cp); 320 (*cp->c_func)(cp->c_arg); 321 DTRACE_PROBE1(callout__end, callout_t *, cp); 322 mutex_enter(&ct->ct_lock); 323 324 /* 325 * Delete callout from both the hash tables and the 326 * hres queue, return it to freelist, and tell anyone 327 * who cares that we're done. 328 * Even though we dropped and reacquired ct->ct_lock, 329 * it's OK to pick up where we left off because only 330 * newly-created timeouts can precede cp on ct_lbhash, 331 * and those timeouts cannot be due on this tick. 332 */ 333 CALLOUT_HASH_UPDATE(DELETE, ct, cp, xid, runtime); 334 CALLOUT_HRES_UPDATE(DELETE, ct, cp, xid, hresms); 335 cp->c_idnext = ct->ct_freelist; 336 ct->ct_freelist = cp; 337 cp->c_xid = 0; /* Indicate completion for c_done */ 338 cv_broadcast(&cp->c_done); 339 } 340 /* 341 * We have completed all callouts that were scheduled to 342 * run at "runtime". If the global run time still matches 343 * our local copy, then we advance the global run time; 344 * otherwise, another callout thread must have already done so. 345 */ 346 if (ct->ct_runtime == runtime) 347 ct->ct_runtime = runtime + 1; 348 } 349 350 gethrestime(&now); 351 352 /* Calculate the current time in milli-second */ 353 hresms = now.tv_sec * MILLISEC + now.tv_nsec / MICROSEC; 354 355 cp = ct->ct_hresq; 356 while (cp != NULL && hresms >= cp->c_hresms) { 357 xid = cp->c_xid; 358 if (xid & CALLOUT_EXECUTING) { 359 cp = cp->c_hrnext; 360 continue; 361 } 362 cp->c_executor = curthread; 363 cp->c_xid = xid |= CALLOUT_EXECUTING; 364 runtime = cp->c_runtime; 365 mutex_exit(&ct->ct_lock); 366 DTRACE_PROBE1(callout__start, callout_t *, cp); 367 (*cp->c_func)(cp->c_arg); 368 DTRACE_PROBE1(callout__end, callout_t *, cp); 369 mutex_enter(&ct->ct_lock); 370 371 /* 372 * See comments above. 373 */ 374 CALLOUT_HASH_UPDATE(DELETE, ct, cp, xid, runtime); 375 CALLOUT_HRES_UPDATE(DELETE, ct, cp, xid, hresms); 376 cp->c_idnext = ct->ct_freelist; 377 ct->ct_freelist = cp; 378 cp->c_xid = 0; /* Indicate completion for c_done */ 379 cv_broadcast(&cp->c_done); 380 381 /* 382 * Start over from the head of the list, see if 383 * any timeout bearing an earlier hres time. 384 */ 385 cp = ct->ct_hresq; 386 } 387 mutex_exit(&ct->ct_lock); 388 } 389 390 /* 391 * Schedule any callouts that are due on or before this tick. 392 */ 393 static void 394 callout_schedule_1(callout_table_t *ct) 395 { 396 callout_t *cp; 397 clock_t curtime, runtime; 398 timestruc_t now; 399 int64_t hresms; 400 401 mutex_enter(&ct->ct_lock); 402 ct->ct_curtime = curtime = lbolt; 403 while (((runtime = ct->ct_runtime) - curtime) <= 0) { 404 for (cp = ct->ct_lbhash[CALLOUT_LBHASH(runtime)]; 405 cp != NULL; cp = cp->c_lbnext) { 406 if (cp->c_runtime != runtime || 407 (cp->c_xid & CALLOUT_EXECUTING)) 408 continue; 409 mutex_exit(&ct->ct_lock); 410 if (ct->ct_taskq == NULL) 411 softcall((void (*)(void *))callout_execute, ct); 412 else 413 (void) taskq_dispatch(ct->ct_taskq, 414 (task_func_t *)callout_execute, ct, 415 KM_NOSLEEP); 416 return; 417 } 418 ct->ct_runtime++; 419 } 420 421 gethrestime(&now); 422 423 /* Calculate the current time in milli-second */ 424 hresms = now.tv_sec * MILLISEC + now.tv_nsec / MICROSEC; 425 426 cp = ct->ct_hresq; 427 while (cp != NULL && hresms >= cp->c_hresms) { 428 if (cp->c_xid & CALLOUT_EXECUTING) { 429 cp = cp->c_hrnext; 430 continue; 431 } 432 mutex_exit(&ct->ct_lock); 433 if (ct->ct_taskq == NULL) 434 softcall((void (*)(void *))callout_execute, ct); 435 else 436 (void) taskq_dispatch(ct->ct_taskq, 437 (task_func_t *)callout_execute, ct, KM_NOSLEEP); 438 return; 439 } 440 mutex_exit(&ct->ct_lock); 441 } 442 443 /* 444 * Schedule callouts for all callout tables. Called by clock() on each tick. 445 */ 446 void 447 callout_schedule(void) 448 { 449 int f, t; 450 451 if (cpr_stop_callout) 452 return; 453 454 for (t = 0; t < CALLOUT_NTYPES; t++) 455 for (f = 0; f < callout_fanout; f++) 456 callout_schedule_1(callout_table[CALLOUT_TABLE(t, f)]); 457 } 458 459 /* 460 * Callback handler used by CPR to stop and resume callouts. 461 */ 462 /*ARGSUSED*/ 463 static boolean_t 464 callout_cpr_callb(void *arg, int code) 465 { 466 cpr_stop_callout = (code == CB_CODE_CPR_CHKPT); 467 return (B_TRUE); 468 } 469 470 /* 471 * Initialize all callout tables. Called at boot time just before clkstart(). 472 */ 473 void 474 callout_init(void) 475 { 476 int f, t; 477 int table_id; 478 callout_table_t *ct; 479 480 callout_fanout = MIN(CALLOUT_FANOUT, max_ncpus); 481 482 for (t = 0; t < CALLOUT_NTYPES; t++) { 483 for (f = 0; f < CALLOUT_FANOUT; f++) { 484 table_id = CALLOUT_TABLE(t, f); 485 if (f >= callout_fanout) { 486 callout_table[table_id] = 487 callout_table[table_id - callout_fanout]; 488 continue; 489 } 490 ct = kmem_zalloc(sizeof (callout_table_t), KM_SLEEP); 491 callout_table[table_id] = ct; 492 ct->ct_short_id = (callout_id_t)table_id | 493 CALLOUT_COUNTER_HIGH; 494 ct->ct_long_id = ct->ct_short_id | CALLOUT_LONGTERM; 495 ct->ct_curtime = ct->ct_runtime = lbolt; 496 if (t == CALLOUT_NORMAL) { 497 /* 498 * Each callout thread consumes exactly one 499 * task structure while active. Therefore, 500 * prepopulating with 2 * CALLOUT_THREADS tasks 501 * ensures that there's at least one task per 502 * thread that's either scheduled or on the 503 * freelist. In turn, this guarantees that 504 * taskq_dispatch() will always either succeed 505 * (because there's a free task structure) or 506 * be unnecessary (because "callout_excute(ct)" 507 * has already scheduled). 508 */ 509 ct->ct_taskq = 510 taskq_create_instance("callout_taskq", f, 511 CALLOUT_THREADS, maxclsyspri, 512 2 * CALLOUT_THREADS, 2 * CALLOUT_THREADS, 513 TASKQ_PREPOPULATE | TASKQ_CPR_SAFE); 514 } 515 } 516 } 517 (void) callb_add(callout_cpr_callb, 0, CB_CL_CPR_CALLOUT, "callout"); 518 } 519