1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/callo.h> 29 #include <sys/param.h> 30 #include <sys/types.h> 31 #include <sys/systm.h> 32 #include <sys/cpuvar.h> 33 #include <sys/thread.h> 34 #include <sys/kmem.h> 35 #include <sys/cmn_err.h> 36 #include <sys/callb.h> 37 #include <sys/debug.h> 38 #include <sys/vtrace.h> 39 #include <sys/sysmacros.h> 40 #include <sys/sdt.h> 41 42 /* 43 * Callout tables. See timeout(9F) for details. 44 */ 45 static int cpr_stop_callout; 46 static int callout_fanout; 47 static int ncallout; 48 static callout_table_t *callout_table[CALLOUT_TABLES]; 49 50 #define CALLOUT_HASH_INSERT(cthead, cp, cnext, cprev) \ 51 { \ 52 callout_t **headpp = &cthead; \ 53 callout_t *headp = *headpp; \ 54 cp->cnext = headp; \ 55 cp->cprev = NULL; \ 56 if (headp != NULL) \ 57 headp->cprev = cp; \ 58 *headpp = cp; \ 59 } 60 61 #define CALLOUT_HASH_DELETE(cthead, cp, cnext, cprev) \ 62 { \ 63 callout_t *nextp = cp->cnext; \ 64 callout_t *prevp = cp->cprev; \ 65 if (nextp != NULL) \ 66 nextp->cprev = prevp; \ 67 if (prevp != NULL) \ 68 prevp->cnext = nextp; \ 69 else \ 70 cthead = nextp; \ 71 } 72 73 #define CALLOUT_HASH_UPDATE(INSDEL, ct, cp, id, runtime) \ 74 ASSERT(MUTEX_HELD(&ct->ct_lock)); \ 75 ASSERT(cp->c_xid == id && cp->c_runtime == runtime); \ 76 CALLOUT_HASH_##INSDEL(ct->ct_idhash[CALLOUT_IDHASH(id)], \ 77 cp, c_idnext, c_idprev) \ 78 CALLOUT_HASH_##INSDEL(ct->ct_lbhash[CALLOUT_LBHASH(runtime)], \ 79 cp, c_lbnext, c_lbprev) 80 81 #define CALLOUT_HRES_INSERT(ct, cp, cnext, cprev, hresms) \ 82 { \ 83 callout_t *nextp = ct->ct_hresq; \ 84 callout_t *prevp; \ 85 \ 86 if (nextp == NULL || hresms <= nextp->c_hresms) { \ 87 cp->cnext = ct->ct_hresq; \ 88 ct->ct_hresq = cp; \ 89 cp->cprev = NULL; \ 90 if (cp->cnext != NULL) \ 91 cp->cnext->cprev = cp; \ 92 } else { \ 93 do { \ 94 prevp = nextp; \ 95 nextp = nextp->cnext; \ 96 } while (nextp != NULL && hresms > nextp->c_hresms); \ 97 prevp->cnext = cp; \ 98 cp->cprev = prevp; \ 99 cp->cnext = nextp; \ 100 if (nextp != NULL) \ 101 nextp->cprev = cp; \ 102 } \ 103 } 104 105 #define CALLOUT_HRES_DELETE(ct, cp, cnext, cprev, hresms) \ 106 { \ 107 if (cp == ct->ct_hresq) { \ 108 ct->ct_hresq = cp->cnext; \ 109 if (cp->cnext != NULL) \ 110 cp->cnext->cprev = NULL; \ 111 } else { \ 112 cp->cprev->cnext = cp->cnext; \ 113 if (cp->cnext != NULL) \ 114 cp->cnext->cprev = cp->cprev; \ 115 } \ 116 } 117 118 #define CALLOUT_HRES_UPDATE(INSDEL, ct, cp, id, hresms) \ 119 ASSERT(MUTEX_HELD(&ct->ct_lock)); \ 120 ASSERT(cp->c_xid == id); \ 121 CALLOUT_HRES_##INSDEL(ct, cp, c_hrnext, \ 122 c_hrprev, hresms) 123 124 /* 125 * Allocate a callout structure. We try quite hard because we 126 * can't sleep, and if we can't do the allocation, we're toast. 127 * Failing all, we try a KM_PANIC allocation. 128 */ 129 static callout_t * 130 callout_alloc(callout_table_t *ct) 131 { 132 size_t size = 0; 133 callout_t *cp = NULL; 134 135 mutex_exit(&ct->ct_lock); 136 cp = kmem_alloc_tryhard(sizeof (callout_t), &size, 137 KM_NOSLEEP | KM_PANIC); 138 bzero(cp, sizeof (callout_t)); 139 ncallout++; 140 mutex_enter(&ct->ct_lock); 141 return (cp); 142 } 143 144 /* 145 * Arrange that func(arg) be called after delta clock ticks. 146 */ 147 static timeout_id_t 148 timeout_common(void (*func)(void *), void *arg, clock_t delta, 149 callout_table_t *ct) 150 { 151 callout_t *cp; 152 callout_id_t id; 153 clock_t runtime; 154 timestruc_t now; 155 int64_t hresms; 156 157 gethrestime(&now); 158 159 mutex_enter(&ct->ct_lock); 160 161 if ((cp = ct->ct_freelist) == NULL) 162 cp = callout_alloc(ct); 163 else 164 ct->ct_freelist = cp->c_idnext; 165 166 cp->c_func = func; 167 cp->c_arg = arg; 168 169 /* 170 * Make sure the callout runs at least 1 tick in the future. 171 */ 172 if (delta <= 0) 173 delta = 1; 174 cp->c_runtime = runtime = lbolt + delta; 175 176 /* 177 * Calculate the future time in millisecond. 178 * We must cast tv_sec and delta to 64-bit integers 179 * to avoid integer overflow on 32-platforms. 180 */ 181 hresms = (int64_t)now.tv_sec * MILLISEC + now.tv_nsec / MICROSEC + 182 TICK_TO_MSEC((int64_t)delta); 183 184 cp->c_hresms = hresms; 185 186 /* 187 * Assign an ID to this callout 188 */ 189 if (delta > CALLOUT_LONGTERM_TICKS) 190 ct->ct_long_id = id = (ct->ct_long_id - CALLOUT_COUNTER_LOW) | 191 CALLOUT_COUNTER_HIGH; 192 else 193 ct->ct_short_id = id = (ct->ct_short_id - CALLOUT_COUNTER_LOW) | 194 CALLOUT_COUNTER_HIGH; 195 196 cp->c_xid = id; 197 198 CALLOUT_HASH_UPDATE(INSERT, ct, cp, id, runtime); 199 CALLOUT_HRES_UPDATE(INSERT, ct, cp, id, hresms); 200 201 mutex_exit(&ct->ct_lock); 202 203 TRACE_4(TR_FAC_CALLOUT, TR_TIMEOUT, 204 "timeout:%K(%p) in %ld ticks, cp %p", 205 func, arg, delta, cp); 206 207 return ((timeout_id_t)id); 208 } 209 210 timeout_id_t 211 timeout(void (*func)(void *), void *arg, clock_t delta) 212 { 213 return (timeout_common(func, arg, delta, 214 callout_table[CALLOUT_TABLE(CALLOUT_NORMAL, CPU->cpu_seqid)])); 215 216 } 217 218 timeout_id_t 219 realtime_timeout(void (*func)(void *), void *arg, clock_t delta) 220 { 221 return (timeout_common(func, arg, delta, 222 callout_table[CALLOUT_TABLE(CALLOUT_REALTIME, CPU->cpu_seqid)])); 223 } 224 225 clock_t 226 untimeout(timeout_id_t id_arg) 227 { 228 callout_id_t id = (callout_id_t)id_arg; 229 callout_table_t *ct; 230 callout_t *cp; 231 callout_id_t xid; 232 233 ct = callout_table[id & CALLOUT_TABLE_MASK]; 234 235 mutex_enter(&ct->ct_lock); 236 237 for (cp = ct->ct_idhash[CALLOUT_IDHASH(id)]; cp; cp = cp->c_idnext) { 238 239 if ((xid = cp->c_xid) == id) { 240 clock_t runtime = cp->c_runtime; 241 clock_t time_left = runtime - lbolt; 242 243 CALLOUT_HASH_UPDATE(DELETE, ct, cp, id, runtime); 244 CALLOUT_HRES_UPDATE(DELETE, ct, cp, id, 0); 245 cp->c_idnext = ct->ct_freelist; 246 ct->ct_freelist = cp; 247 mutex_exit(&ct->ct_lock); 248 TRACE_2(TR_FAC_CALLOUT, TR_UNTIMEOUT, 249 "untimeout:ID %lx ticks_left %ld", id, time_left); 250 return (time_left < 0 ? 0 : time_left); 251 } 252 253 if (xid != (id | CALLOUT_EXECUTING)) 254 continue; 255 256 /* 257 * The callout we want to delete is currently executing. 258 * The DDI states that we must wait until the callout 259 * completes before returning, so we block on c_done until 260 * the callout ID changes (to zero if it's on the freelist, 261 * or to a new callout ID if it's in use). This implicitly 262 * assumes that callout structures are persistent (they are). 263 */ 264 if (cp->c_executor == curthread) { 265 /* 266 * The timeout handler called untimeout() on itself. 267 * Stupid, but legal. We can't wait for the timeout 268 * to complete without deadlocking, so we just return. 269 */ 270 mutex_exit(&ct->ct_lock); 271 TRACE_1(TR_FAC_CALLOUT, TR_UNTIMEOUT_SELF, 272 "untimeout_self:ID %x", id); 273 return (-1); 274 } 275 while (cp->c_xid == xid) 276 cv_wait(&cp->c_done, &ct->ct_lock); 277 mutex_exit(&ct->ct_lock); 278 TRACE_1(TR_FAC_CALLOUT, TR_UNTIMEOUT_EXECUTING, 279 "untimeout_executing:ID %lx", id); 280 return (-1); 281 } 282 283 mutex_exit(&ct->ct_lock); 284 TRACE_1(TR_FAC_CALLOUT, TR_UNTIMEOUT_BOGUS_ID, 285 "untimeout_bogus_id:ID %lx", id); 286 287 /* 288 * We didn't find the specified callout ID. This means either 289 * (1) the callout already fired, or (2) the caller passed us 290 * a bogus value. Perform a sanity check to detect case (2). 291 */ 292 if (id != 0 && (id & (CALLOUT_COUNTER_HIGH | CALLOUT_EXECUTING)) != 293 CALLOUT_COUNTER_HIGH) 294 panic("untimeout: impossible timeout id %lx", id); 295 296 return (-1); 297 } 298 299 /* 300 * Do the actual work of executing callouts. This routine is called either 301 * by a taskq_thread (normal case), or by softcall (realtime case). 302 */ 303 static void 304 callout_execute(callout_table_t *ct) 305 { 306 callout_t *cp; 307 callout_id_t xid; 308 clock_t runtime; 309 timestruc_t now; 310 int64_t hresms; 311 312 mutex_enter(&ct->ct_lock); 313 314 while (((runtime = ct->ct_runtime) - ct->ct_curtime) <= 0) { 315 for (cp = ct->ct_lbhash[CALLOUT_LBHASH(runtime)]; 316 cp != NULL; cp = cp->c_lbnext) { 317 xid = cp->c_xid; 318 if (cp->c_runtime != runtime || 319 (xid & CALLOUT_EXECUTING)) 320 continue; 321 cp->c_executor = curthread; 322 cp->c_xid = xid |= CALLOUT_EXECUTING; 323 mutex_exit(&ct->ct_lock); 324 DTRACE_PROBE1(callout__start, callout_t *, cp); 325 (*cp->c_func)(cp->c_arg); 326 DTRACE_PROBE1(callout__end, callout_t *, cp); 327 mutex_enter(&ct->ct_lock); 328 329 /* 330 * Delete callout from both the hash tables and the 331 * hres queue, return it to freelist, and tell anyone 332 * who cares that we're done. 333 * Even though we dropped and reacquired ct->ct_lock, 334 * it's OK to pick up where we left off because only 335 * newly-created timeouts can precede cp on ct_lbhash, 336 * and those timeouts cannot be due on this tick. 337 */ 338 CALLOUT_HASH_UPDATE(DELETE, ct, cp, xid, runtime); 339 CALLOUT_HRES_UPDATE(DELETE, ct, cp, xid, hresms); 340 cp->c_idnext = ct->ct_freelist; 341 ct->ct_freelist = cp; 342 cp->c_xid = 0; /* Indicate completion for c_done */ 343 cv_broadcast(&cp->c_done); 344 } 345 /* 346 * We have completed all callouts that were scheduled to 347 * run at "runtime". If the global run time still matches 348 * our local copy, then we advance the global run time; 349 * otherwise, another callout thread must have already done so. 350 */ 351 if (ct->ct_runtime == runtime) 352 ct->ct_runtime = runtime + 1; 353 } 354 355 gethrestime(&now); 356 357 /* 358 * Calculate the future time in millisecond. 359 * We must cast tv_sec to 64-bit integer 360 * to avoid integer overflow on 32-platforms. 361 */ 362 hresms = (int64_t)now.tv_sec * MILLISEC + now.tv_nsec / MICROSEC; 363 364 cp = ct->ct_hresq; 365 while (cp != NULL && hresms >= cp->c_hresms) { 366 xid = cp->c_xid; 367 if (xid & CALLOUT_EXECUTING) { 368 cp = cp->c_hrnext; 369 continue; 370 } 371 cp->c_executor = curthread; 372 cp->c_xid = xid |= CALLOUT_EXECUTING; 373 runtime = cp->c_runtime; 374 mutex_exit(&ct->ct_lock); 375 DTRACE_PROBE1(callout__start, callout_t *, cp); 376 (*cp->c_func)(cp->c_arg); 377 DTRACE_PROBE1(callout__end, callout_t *, cp); 378 mutex_enter(&ct->ct_lock); 379 380 /* 381 * See comments above. 382 */ 383 CALLOUT_HASH_UPDATE(DELETE, ct, cp, xid, runtime); 384 CALLOUT_HRES_UPDATE(DELETE, ct, cp, xid, hresms); 385 cp->c_idnext = ct->ct_freelist; 386 ct->ct_freelist = cp; 387 cp->c_xid = 0; /* Indicate completion for c_done */ 388 cv_broadcast(&cp->c_done); 389 390 /* 391 * Start over from the head of the list, see if 392 * any timeout bearing an earlier hres time. 393 */ 394 cp = ct->ct_hresq; 395 } 396 mutex_exit(&ct->ct_lock); 397 } 398 399 /* 400 * Schedule any callouts that are due on or before this tick. 401 */ 402 static void 403 callout_schedule_1(callout_table_t *ct) 404 { 405 callout_t *cp; 406 clock_t curtime, runtime; 407 timestruc_t now; 408 int64_t hresms; 409 410 mutex_enter(&ct->ct_lock); 411 ct->ct_curtime = curtime = lbolt; 412 while (((runtime = ct->ct_runtime) - curtime) <= 0) { 413 for (cp = ct->ct_lbhash[CALLOUT_LBHASH(runtime)]; 414 cp != NULL; cp = cp->c_lbnext) { 415 if (cp->c_runtime != runtime || 416 (cp->c_xid & CALLOUT_EXECUTING)) 417 continue; 418 mutex_exit(&ct->ct_lock); 419 if (ct->ct_taskq == NULL) 420 softcall((void (*)(void *))callout_execute, ct); 421 else 422 (void) taskq_dispatch(ct->ct_taskq, 423 (task_func_t *)callout_execute, ct, 424 KM_NOSLEEP); 425 return; 426 } 427 ct->ct_runtime++; 428 } 429 430 gethrestime(&now); 431 432 /* 433 * Calculate the future time in millisecond. 434 * We must cast tv_sec to 64-bit integer 435 * to avoid integer overflow on 32-platforms. 436 */ 437 hresms = (int64_t)now.tv_sec * MILLISEC + now.tv_nsec / MICROSEC; 438 439 cp = ct->ct_hresq; 440 while (cp != NULL && hresms >= cp->c_hresms) { 441 if (cp->c_xid & CALLOUT_EXECUTING) { 442 cp = cp->c_hrnext; 443 continue; 444 } 445 mutex_exit(&ct->ct_lock); 446 if (ct->ct_taskq == NULL) 447 softcall((void (*)(void *))callout_execute, ct); 448 else 449 (void) taskq_dispatch(ct->ct_taskq, 450 (task_func_t *)callout_execute, ct, KM_NOSLEEP); 451 return; 452 } 453 mutex_exit(&ct->ct_lock); 454 } 455 456 /* 457 * Schedule callouts for all callout tables. Called by clock() on each tick. 458 */ 459 void 460 callout_schedule(void) 461 { 462 int f, t; 463 464 if (cpr_stop_callout) 465 return; 466 467 for (t = 0; t < CALLOUT_NTYPES; t++) 468 for (f = 0; f < callout_fanout; f++) 469 callout_schedule_1(callout_table[CALLOUT_TABLE(t, f)]); 470 } 471 472 /* 473 * Callback handler used by CPR to stop and resume callouts. 474 */ 475 /*ARGSUSED*/ 476 static boolean_t 477 callout_cpr_callb(void *arg, int code) 478 { 479 cpr_stop_callout = (code == CB_CODE_CPR_CHKPT); 480 return (B_TRUE); 481 } 482 483 /* 484 * Initialize all callout tables. Called at boot time just before clkstart(). 485 */ 486 void 487 callout_init(void) 488 { 489 int f, t; 490 int table_id; 491 callout_table_t *ct; 492 493 callout_fanout = MIN(CALLOUT_FANOUT, max_ncpus); 494 495 for (t = 0; t < CALLOUT_NTYPES; t++) { 496 for (f = 0; f < CALLOUT_FANOUT; f++) { 497 table_id = CALLOUT_TABLE(t, f); 498 if (f >= callout_fanout) { 499 callout_table[table_id] = 500 callout_table[table_id - callout_fanout]; 501 continue; 502 } 503 ct = kmem_zalloc(sizeof (callout_table_t), KM_SLEEP); 504 callout_table[table_id] = ct; 505 ct->ct_short_id = (callout_id_t)table_id | 506 CALLOUT_COUNTER_HIGH; 507 ct->ct_long_id = ct->ct_short_id | CALLOUT_LONGTERM; 508 ct->ct_curtime = ct->ct_runtime = lbolt; 509 if (t == CALLOUT_NORMAL) { 510 /* 511 * Each callout thread consumes exactly one 512 * task structure while active. Therefore, 513 * prepopulating with 2 * CALLOUT_THREADS tasks 514 * ensures that there's at least one task per 515 * thread that's either scheduled or on the 516 * freelist. In turn, this guarantees that 517 * taskq_dispatch() will always either succeed 518 * (because there's a free task structure) or 519 * be unnecessary (because "callout_excute(ct)" 520 * has already scheduled). 521 */ 522 ct->ct_taskq = 523 taskq_create_instance("callout_taskq", f, 524 CALLOUT_THREADS, maxclsyspri, 525 2 * CALLOUT_THREADS, 2 * CALLOUT_THREADS, 526 TASKQ_PREPOPULATE | TASKQ_CPR_SAFE); 527 } 528 } 529 } 530 (void) callb_add(callout_cpr_callb, 0, CB_CL_CPR_CALLOUT, "callout"); 531 } 532