1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/callo.h> 27 #include <sys/param.h> 28 #include <sys/types.h> 29 #include <sys/cpuvar.h> 30 #include <sys/thread.h> 31 #include <sys/kmem.h> 32 #include <sys/kmem_impl.h> 33 #include <sys/cmn_err.h> 34 #include <sys/callb.h> 35 #include <sys/debug.h> 36 #include <sys/vtrace.h> 37 #include <sys/sysmacros.h> 38 #include <sys/sdt.h> 39 40 /* 41 * Callout tables. See timeout(9F) for details. 42 */ 43 static hrtime_t callout_debug_hrtime; /* debugger entry time */ 44 static int callout_min_resolution; /* Minimum resolution */ 45 static callout_table_t *callout_boot_ct; /* Boot CPU's callout tables */ 46 static clock_t callout_max_ticks; /* max interval */ 47 static hrtime_t callout_longterm; /* longterm nanoseconds */ 48 static ulong_t callout_counter_low; /* callout ID increment */ 49 static ulong_t callout_table_bits; /* number of table bits in ID */ 50 static ulong_t callout_table_mask; /* mask for the table bits */ 51 static callout_cache_t *callout_caches; /* linked list of caches */ 52 #pragma align 64(callout_table) 53 static callout_table_t *callout_table; /* global callout table array */ 54 55 /* 56 * We run normal callouts from PIL 10. This means that no other handler that 57 * runs at PIL 10 is allowed to wait for normal callouts directly or indirectly 58 * as it will cause a deadlock. This has always been an unwritten rule. 59 * We are making it explicit here. 60 */ 61 static int callout_realtime_level = CY_LOW_LEVEL; 62 static int callout_normal_level = CY_LOCK_LEVEL; 63 64 static char *callout_kstat_names[] = { 65 "callout_timeouts", 66 "callout_timeouts_pending", 67 "callout_untimeouts_unexpired", 68 "callout_untimeouts_executing", 69 "callout_untimeouts_expired", 70 "callout_expirations", 71 "callout_allocations", 72 }; 73 74 #define CALLOUT_HASH_INSERT(hash, cp, cnext, cprev) \ 75 { \ 76 callout_hash_t *hashp = &(hash); \ 77 \ 78 cp->cprev = NULL; \ 79 cp->cnext = hashp->ch_head; \ 80 if (hashp->ch_head == NULL) \ 81 hashp->ch_tail = cp; \ 82 else \ 83 cp->cnext->cprev = cp; \ 84 hashp->ch_head = cp; \ 85 } 86 87 #define CALLOUT_HASH_APPEND(hash, cp, cnext, cprev) \ 88 { \ 89 callout_hash_t *hashp = &(hash); \ 90 \ 91 cp->cnext = NULL; \ 92 cp->cprev = hashp->ch_tail; \ 93 if (hashp->ch_tail == NULL) \ 94 hashp->ch_head = cp; \ 95 else \ 96 cp->cprev->cnext = cp; \ 97 hashp->ch_tail = cp; \ 98 } 99 100 #define CALLOUT_HASH_DELETE(hash, cp, cnext, cprev) \ 101 { \ 102 callout_hash_t *hashp = &(hash); \ 103 \ 104 if (cp->cnext == NULL) \ 105 hashp->ch_tail = cp->cprev; \ 106 else \ 107 cp->cnext->cprev = cp->cprev; \ 108 if (cp->cprev == NULL) \ 109 hashp->ch_head = cp->cnext; \ 110 else \ 111 cp->cprev->cnext = cp->cnext; \ 112 } 113 114 /* 115 * These definitions help us queue callouts and callout lists. Here is 116 * the queueing rationale: 117 * 118 * - callouts are queued in a FIFO manner in the ID hash table. 119 * TCP timers are typically cancelled in the same order that they 120 * were issued. The FIFO queueing shortens the search for a callout 121 * during untimeout(). 122 * 123 * - callouts are queued in a FIFO manner in their callout lists. 124 * This ensures that the callouts are executed in the same order that 125 * they were queued. This is fair. Plus, it helps to make each 126 * callout expiration timely. It also favors cancellations. 127 * 128 * - callout lists are queued in a LIFO manner in the callout list hash 129 * table. This ensures that long term timers stay at the rear of the 130 * hash lists. 131 * 132 * - callout lists are queued in a FIFO manner in the expired callouts 133 * list. This ensures that callout lists are executed in the order 134 * of expiration. 135 */ 136 #define CALLOUT_APPEND(ct, cp) \ 137 CALLOUT_HASH_APPEND(ct->ct_idhash[CALLOUT_IDHASH(cp->c_xid)], \ 138 cp, c_idnext, c_idprev); \ 139 CALLOUT_HASH_APPEND(cp->c_list->cl_callouts, cp, c_clnext, c_clprev) 140 141 #define CALLOUT_DELETE(ct, cp) \ 142 CALLOUT_HASH_DELETE(ct->ct_idhash[CALLOUT_IDHASH(cp->c_xid)], \ 143 cp, c_idnext, c_idprev); \ 144 CALLOUT_HASH_DELETE(cp->c_list->cl_callouts, cp, c_clnext, c_clprev) 145 146 #define CALLOUT_LIST_INSERT(hash, cl) \ 147 CALLOUT_HASH_INSERT(hash, cl, cl_next, cl_prev) 148 149 #define CALLOUT_LIST_APPEND(hash, cl) \ 150 CALLOUT_HASH_APPEND(hash, cl, cl_next, cl_prev) 151 152 #define CALLOUT_LIST_DELETE(hash, cl) \ 153 CALLOUT_HASH_DELETE(hash, cl, cl_next, cl_prev) 154 155 /* 156 * For normal callouts, there is a deadlock scenario if two callouts that 157 * have an inter-dependency end up on the same callout list. To break the 158 * deadlock, you need two taskq threads running in parallel. We compute 159 * the number of taskq threads here using a bunch of conditions to make 160 * it optimal for the common case. This is an ugly hack, but one that is 161 * necessary (sigh). 162 */ 163 #define CALLOUT_THRESHOLD 100000000 164 #define CALLOUT_EXEC_COMPUTE(ct, exec) \ 165 { \ 166 callout_list_t *cl; \ 167 \ 168 cl = ct->ct_expired.ch_head; \ 169 if (cl == NULL) { \ 170 /* \ 171 * If the expired list is NULL, there is nothing to \ 172 * process. \ 173 */ \ 174 exec = 0; \ 175 } else if ((cl->cl_next == NULL) && \ 176 (cl->cl_callouts.ch_head == cl->cl_callouts.ch_tail)) { \ 177 /* \ 178 * If there is only one callout list and it contains \ 179 * only one callout, there is no need for two threads. \ 180 */ \ 181 exec = 1; \ 182 } else if ((ct->ct_heap_num == 0) || \ 183 (ct->ct_heap[0] > gethrtime() + CALLOUT_THRESHOLD)) { \ 184 /* \ 185 * If the heap has become empty, we need two threads as \ 186 * there is no one to kick off the second thread in the \ 187 * future. If the heap is not empty and the top of the \ 188 * heap does not expire in the near future, we need two \ 189 * threads. \ 190 */ \ 191 exec = 2; \ 192 } else { \ 193 /* \ 194 * We have multiple callouts to process. But the cyclic \ 195 * will fire in the near future. So, we only need one \ 196 * thread for now. \ 197 */ \ 198 exec = 1; \ 199 } \ 200 } 201 202 /* 203 * Allocate a callout structure. We try quite hard because we 204 * can't sleep, and if we can't do the allocation, we're toast. 205 * Failing all, we try a KM_PANIC allocation. Note that we never 206 * deallocate a callout. See untimeout() for the reasoning. 207 */ 208 static callout_t * 209 callout_alloc(callout_table_t *ct) 210 { 211 size_t size; 212 callout_t *cp; 213 214 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 215 mutex_exit(&ct->ct_mutex); 216 217 cp = kmem_cache_alloc(ct->ct_cache, KM_NOSLEEP); 218 if (cp == NULL) { 219 size = sizeof (callout_t); 220 cp = kmem_alloc_tryhard(size, &size, KM_NOSLEEP | KM_PANIC); 221 } 222 cp->c_xid = 0; 223 cp->c_executor = NULL; 224 cv_init(&cp->c_done, NULL, CV_DEFAULT, NULL); 225 cp->c_waiting = 0; 226 227 mutex_enter(&ct->ct_mutex); 228 ct->ct_allocations++; 229 return (cp); 230 } 231 232 /* 233 * Allocate a callout list structure. We try quite hard because we 234 * can't sleep, and if we can't do the allocation, we're toast. 235 * Failing all, we try a KM_PANIC allocation. Note that we never 236 * deallocate a callout list. 237 */ 238 static void 239 callout_list_alloc(callout_table_t *ct) 240 { 241 size_t size; 242 callout_list_t *cl; 243 244 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 245 mutex_exit(&ct->ct_mutex); 246 247 cl = kmem_cache_alloc(ct->ct_lcache, KM_NOSLEEP); 248 if (cl == NULL) { 249 size = sizeof (callout_list_t); 250 cl = kmem_alloc_tryhard(size, &size, KM_NOSLEEP | KM_PANIC); 251 } 252 bzero(cl, sizeof (callout_list_t)); 253 254 mutex_enter(&ct->ct_mutex); 255 cl->cl_next = ct->ct_lfree; 256 ct->ct_lfree = cl; 257 } 258 259 /* 260 * Find a callout list that corresponds to an expiration. 261 */ 262 static callout_list_t * 263 callout_list_get(callout_table_t *ct, hrtime_t expiration, int flags, int hash) 264 { 265 callout_list_t *cl; 266 267 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 268 269 for (cl = ct->ct_clhash[hash].ch_head; (cl != NULL); cl = cl->cl_next) { 270 if ((cl->cl_expiration == expiration) && 271 (cl->cl_flags == flags)) 272 return (cl); 273 } 274 275 return (NULL); 276 } 277 278 /* 279 * Find the callout list that corresponds to an expiration. 280 * If the callout list is null, free it. Else, return it. 281 */ 282 static callout_list_t * 283 callout_list_check(callout_table_t *ct, hrtime_t expiration, int hash) 284 { 285 callout_list_t *cl; 286 287 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 288 289 for (cl = ct->ct_clhash[hash].ch_head; (cl != NULL); cl = cl->cl_next) { 290 if (cl->cl_expiration == expiration) { 291 if (cl->cl_callouts.ch_head != NULL) { 292 /* 293 * Found a match. 294 */ 295 return (cl); 296 } 297 298 CALLOUT_LIST_DELETE(ct->ct_clhash[hash], cl); 299 cl->cl_next = ct->ct_lfree; 300 ct->ct_lfree = cl; 301 302 return (NULL); 303 } 304 } 305 306 return (NULL); 307 } 308 /* 309 * Initialize a callout table's heap, if necessary. Preallocate some free 310 * entries so we don't have to check for NULL elsewhere. 311 */ 312 static void 313 callout_heap_init(callout_table_t *ct) 314 { 315 size_t size; 316 317 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 318 ASSERT(ct->ct_heap == NULL); 319 320 ct->ct_heap_num = 0; 321 ct->ct_heap_max = CALLOUT_CHUNK; 322 size = sizeof (hrtime_t) * CALLOUT_CHUNK; 323 ct->ct_heap = kmem_alloc(size, KM_SLEEP); 324 } 325 326 /* 327 * Reallocate the heap. We try quite hard because we can't sleep, and if 328 * we can't do the allocation, we're toast. Failing all, we try a KM_PANIC 329 * allocation. Note that the heap only expands, it never contracts. 330 */ 331 static void 332 callout_heap_expand(callout_table_t *ct) 333 { 334 size_t max, size, osize; 335 hrtime_t *heap; 336 337 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 338 ASSERT(ct->ct_heap_num <= ct->ct_heap_max); 339 340 while (ct->ct_heap_num == ct->ct_heap_max) { 341 max = ct->ct_heap_max; 342 mutex_exit(&ct->ct_mutex); 343 344 osize = sizeof (hrtime_t) * max; 345 size = sizeof (hrtime_t) * (max + CALLOUT_CHUNK); 346 heap = kmem_alloc_tryhard(size, &size, KM_NOSLEEP | KM_PANIC); 347 348 mutex_enter(&ct->ct_mutex); 349 if (max < ct->ct_heap_max) { 350 /* 351 * Someone beat us to the allocation. Free what we 352 * just allocated and proceed. 353 */ 354 kmem_free(heap, size); 355 continue; 356 } 357 358 bcopy(ct->ct_heap, heap, osize); 359 kmem_free(ct->ct_heap, osize); 360 ct->ct_heap = heap; 361 ct->ct_heap_max = size / sizeof (hrtime_t); 362 } 363 } 364 365 /* 366 * Move an expiration from the bottom of the heap to its correct place 367 * in the heap. If we reached the root doing this, return 1. Else, 368 * return 0. 369 */ 370 static int 371 callout_upheap(callout_table_t *ct) 372 { 373 int current, parent; 374 hrtime_t *heap, current_expiration, parent_expiration; 375 376 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 377 ASSERT(ct->ct_heap_num >= 1); 378 379 if (ct->ct_heap_num == 1) { 380 return (1); 381 } 382 383 heap = ct->ct_heap; 384 current = ct->ct_heap_num - 1; 385 386 for (;;) { 387 parent = CALLOUT_HEAP_PARENT(current); 388 current_expiration = heap[current]; 389 parent_expiration = heap[parent]; 390 391 /* 392 * We have an expiration later than our parent; we're done. 393 */ 394 if (current_expiration >= parent_expiration) { 395 return (0); 396 } 397 398 /* 399 * We need to swap with our parent, and continue up the heap. 400 */ 401 heap[parent] = current_expiration; 402 heap[current] = parent_expiration; 403 404 /* 405 * If we just reached the root, we're done. 406 */ 407 if (parent == 0) { 408 return (1); 409 } 410 411 current = parent; 412 } 413 /*NOTREACHED*/ 414 } 415 416 /* 417 * Insert a new expiration into a callout table's heap. 418 */ 419 static void 420 callout_heap_insert(callout_table_t *ct, hrtime_t expiration) 421 { 422 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 423 ASSERT(ct->ct_heap_num < ct->ct_heap_max); 424 425 /* 426 * First, copy the expiration to the bottom of the heap. 427 */ 428 ct->ct_heap[ct->ct_heap_num] = expiration; 429 ct->ct_heap_num++; 430 431 /* 432 * Now, perform an upheap operation. If we reached the root, then 433 * the cyclic needs to be reprogrammed as we have an earlier 434 * expiration. 435 * 436 * Also, during the CPR suspend phase, do not reprogram the cyclic. 437 * We don't want any callout activity. When the CPR resume phase is 438 * entered, the cyclic will be programmed for the earliest expiration 439 * in the heap. 440 */ 441 if (callout_upheap(ct) && (ct->ct_suspend == 0)) 442 (void) cyclic_reprogram(ct->ct_cyclic, expiration); 443 } 444 445 /* 446 * Move an expiration from the top of the heap to its correct place 447 * in the heap. 448 */ 449 static void 450 callout_downheap(callout_table_t *ct) 451 { 452 int left, right, current, nelems; 453 hrtime_t *heap, left_expiration, right_expiration, current_expiration; 454 455 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 456 ASSERT(ct->ct_heap_num >= 1); 457 458 heap = ct->ct_heap; 459 current = 0; 460 nelems = ct->ct_heap_num; 461 462 for (;;) { 463 /* 464 * If we don't have a left child (i.e., we're a leaf), we're 465 * done. 466 */ 467 if ((left = CALLOUT_HEAP_LEFT(current)) >= nelems) 468 return; 469 470 left_expiration = heap[left]; 471 current_expiration = heap[current]; 472 473 right = CALLOUT_HEAP_RIGHT(current); 474 475 /* 476 * Even if we don't have a right child, we still need to compare 477 * our expiration against that of our left child. 478 */ 479 if (right >= nelems) 480 goto comp_left; 481 482 right_expiration = heap[right]; 483 484 /* 485 * We have both a left and a right child. We need to compare 486 * the expiration of the children to determine which 487 * expires earlier. 488 */ 489 if (right_expiration < left_expiration) { 490 /* 491 * Our right child is the earlier of our children. 492 * We'll now compare our expiration to its expiration. 493 * If ours is the earlier one, we're done. 494 */ 495 if (current_expiration <= right_expiration) 496 return; 497 498 /* 499 * Our right child expires earlier than we do; swap 500 * with our right child, and descend right. 501 */ 502 heap[right] = current_expiration; 503 heap[current] = right_expiration; 504 current = right; 505 continue; 506 } 507 508 comp_left: 509 /* 510 * Our left child is the earlier of our children (or we have 511 * no right child). We'll now compare our expiration 512 * to its expiration. If ours is the earlier one, we're done. 513 */ 514 if (current_expiration <= left_expiration) 515 return; 516 517 /* 518 * Our left child expires earlier than we do; swap with our 519 * left child, and descend left. 520 */ 521 heap[left] = current_expiration; 522 heap[current] = left_expiration; 523 current = left; 524 } 525 } 526 527 /* 528 * Delete and handle all past expirations in a callout table's heap. 529 */ 530 static void 531 callout_heap_delete(callout_table_t *ct) 532 { 533 hrtime_t now, expiration; 534 callout_list_t *cl; 535 int hash; 536 537 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 538 539 now = gethrtime(); 540 541 while (ct->ct_heap_num > 0) { 542 expiration = ct->ct_heap[0]; 543 /* 544 * Find the callout list that corresponds to the expiration. 545 * If the callout list is empty, callout_list_check() 546 * will free the callout list and return NULL. 547 */ 548 hash = CALLOUT_CLHASH(expiration); 549 cl = callout_list_check(ct, expiration, hash); 550 if (cl != NULL) { 551 /* 552 * If the root of the heap expires in the future, we are 553 * done. We are doing this check here instead of at the 554 * beginning because we want to first free all the 555 * empty callout lists at the top of the heap. 556 */ 557 if (expiration > now) 558 break; 559 560 /* 561 * Move the callout list for this expiration to the 562 * list of expired callout lists. It will be processed 563 * by the callout executor. 564 */ 565 CALLOUT_LIST_DELETE(ct->ct_clhash[hash], cl); 566 CALLOUT_LIST_APPEND(ct->ct_expired, cl); 567 } 568 569 /* 570 * Now delete the root. This is done by swapping the root with 571 * the last item in the heap and downheaping the item. 572 */ 573 ct->ct_heap_num--; 574 if (ct->ct_heap_num > 0) { 575 ct->ct_heap[0] = ct->ct_heap[ct->ct_heap_num]; 576 callout_downheap(ct); 577 } 578 } 579 580 /* 581 * If this callout table is empty or callouts have been suspended 582 * by CPR, just return. The cyclic has already been programmed to 583 * infinity by the cyclic subsystem. 584 */ 585 if ((ct->ct_heap_num == 0) || (ct->ct_suspend > 0)) 586 return; 587 588 (void) cyclic_reprogram(ct->ct_cyclic, expiration); 589 } 590 591 /* 592 * Common function used to create normal and realtime callouts. 593 * 594 * Realtime callouts are handled at CY_LOW_PIL by a cyclic handler. So, 595 * there is one restriction on a realtime callout handler - it should not 596 * directly or indirectly acquire cpu_lock. CPU offline waits for pending 597 * cyclic handlers to complete while holding cpu_lock. So, if a realtime 598 * callout handler were to try to get cpu_lock, there would be a deadlock 599 * during CPU offline. 600 */ 601 callout_id_t 602 timeout_generic(int type, void (*func)(void *), void *arg, 603 hrtime_t expiration, hrtime_t resolution, int flags) 604 { 605 callout_table_t *ct; 606 callout_t *cp; 607 callout_id_t id; 608 callout_list_t *cl; 609 hrtime_t now, interval; 610 int hash; 611 612 ASSERT(resolution > 0); 613 ASSERT(func != NULL); 614 615 /* 616 * Please see comment about minimum resolution in callout_init(). 617 */ 618 if (resolution < callout_min_resolution) 619 resolution = callout_min_resolution; 620 621 /* 622 * We disable kernel preemption so that we remain on the same CPU 623 * throughout. If we needed to reprogram the callout table's cyclic, 624 * we can avoid X-calls if we are on the same CPU. 625 * 626 * Note that callout_alloc() releases and reacquires the callout 627 * table mutex. While reacquiring the mutex, it is possible for us 628 * to go to sleep and later migrate to another CPU. This should be 629 * pretty rare, though. 630 */ 631 kpreempt_disable(); 632 633 ct = &callout_table[CALLOUT_TABLE(type, CPU->cpu_seqid)]; 634 mutex_enter(&ct->ct_mutex); 635 636 if (ct->ct_cyclic == CYCLIC_NONE) { 637 mutex_exit(&ct->ct_mutex); 638 /* 639 * The callout table has not yet been initialized fully. 640 * So, put this one on the boot callout table which is 641 * always initialized. 642 */ 643 ct = &callout_boot_ct[type]; 644 mutex_enter(&ct->ct_mutex); 645 } 646 647 if ((cp = ct->ct_free) == NULL) 648 cp = callout_alloc(ct); 649 else 650 ct->ct_free = cp->c_idnext; 651 652 cp->c_func = func; 653 cp->c_arg = arg; 654 655 /* 656 * Compute the expiration hrtime. 657 */ 658 now = gethrtime(); 659 if (flags & CALLOUT_FLAG_ABSOLUTE) { 660 interval = expiration - now; 661 } else { 662 interval = expiration; 663 expiration += now; 664 } 665 if (flags & CALLOUT_FLAG_ROUNDUP) 666 expiration += resolution - 1; 667 expiration = (expiration / resolution) * resolution; 668 if (expiration <= 0) { 669 /* 670 * expiration hrtime overflow has occurred. Just set the 671 * expiration to infinity. 672 */ 673 expiration = CY_INFINITY; 674 } 675 676 /* 677 * Assign an ID to this callout 678 */ 679 if (flags & CALLOUT_FLAG_32BIT) { 680 if (interval > callout_longterm) { 681 id = (ct->ct_long_id - callout_counter_low); 682 id |= CALLOUT_COUNTER_HIGH; 683 ct->ct_long_id = id; 684 } else { 685 id = (ct->ct_short_id - callout_counter_low); 686 id |= CALLOUT_COUNTER_HIGH; 687 ct->ct_short_id = id; 688 } 689 } else { 690 id = (ct->ct_gen_id - callout_counter_low); 691 if ((id & CALLOUT_COUNTER_HIGH) == 0) { 692 id |= CALLOUT_COUNTER_HIGH; 693 id += CALLOUT_GENERATION_LOW; 694 } 695 ct->ct_gen_id = id; 696 } 697 698 cp->c_xid = id; 699 700 flags &= CALLOUT_LIST_FLAGS; 701 hash = CALLOUT_CLHASH(expiration); 702 703 again: 704 /* 705 * Try to see if a callout list already exists for this expiration. 706 * Most of the time, this will be the case. 707 */ 708 cl = callout_list_get(ct, expiration, flags, hash); 709 if (cl == NULL) { 710 /* 711 * Check if we have enough space in the heap to insert one 712 * expiration. If not, expand the heap. 713 */ 714 if (ct->ct_heap_num == ct->ct_heap_max) { 715 callout_heap_expand(ct); 716 /* 717 * In the above call, we drop the lock, allocate and 718 * reacquire the lock. So, we could have been away 719 * for a while. In the meantime, someone could have 720 * inserted a callout list with the same expiration. 721 * So, the best course is to repeat the steps. This 722 * should be an infrequent event. 723 */ 724 goto again; 725 } 726 727 /* 728 * Check the free list. If we don't find one, we have to 729 * take the slow path and allocate from kmem. 730 */ 731 if ((cl = ct->ct_lfree) == NULL) { 732 callout_list_alloc(ct); 733 /* 734 * In the above call, we drop the lock, allocate and 735 * reacquire the lock. So, we could have been away 736 * for a while. In the meantime, someone could have 737 * inserted a callout list with the same expiration. 738 * Plus, the heap could have become full. So, the best 739 * course is to repeat the steps. This should be an 740 * infrequent event. 741 */ 742 goto again; 743 } 744 ct->ct_lfree = cl->cl_next; 745 cl->cl_expiration = expiration; 746 cl->cl_flags = flags; 747 748 CALLOUT_LIST_INSERT(ct->ct_clhash[hash], cl); 749 750 /* 751 * This is a new expiration. So, insert it into the heap. 752 * This will also reprogram the cyclic, if the expiration 753 * propagated to the root of the heap. 754 */ 755 callout_heap_insert(ct, expiration); 756 } 757 cp->c_list = cl; 758 CALLOUT_APPEND(ct, cp); 759 760 ct->ct_timeouts++; 761 ct->ct_timeouts_pending++; 762 763 mutex_exit(&ct->ct_mutex); 764 765 kpreempt_enable(); 766 767 TRACE_4(TR_FAC_CALLOUT, TR_TIMEOUT, 768 "timeout:%K(%p) in %llx expiration, cp %p", func, arg, expiration, 769 cp); 770 771 return (id); 772 } 773 774 timeout_id_t 775 timeout(void (*func)(void *), void *arg, clock_t delta) 776 { 777 ulong_t id; 778 779 /* 780 * Make sure the callout runs at least 1 tick in the future. 781 */ 782 if (delta <= 0) 783 delta = 1; 784 else if (delta > callout_max_ticks) 785 delta = callout_max_ticks; 786 787 id = (ulong_t)timeout_generic(CALLOUT_NORMAL, func, arg, 788 TICK_TO_NSEC(delta), nsec_per_tick, CALLOUT_LEGACY); 789 790 return ((timeout_id_t)id); 791 } 792 793 /* 794 * Convenience function that creates a normal callout with default parameters 795 * and returns a full ID. 796 */ 797 callout_id_t 798 timeout_default(void (*func)(void *), void *arg, clock_t delta) 799 { 800 callout_id_t id; 801 802 /* 803 * Make sure the callout runs at least 1 tick in the future. 804 */ 805 if (delta <= 0) 806 delta = 1; 807 else if (delta > callout_max_ticks) 808 delta = callout_max_ticks; 809 810 id = timeout_generic(CALLOUT_NORMAL, func, arg, TICK_TO_NSEC(delta), 811 nsec_per_tick, 0); 812 813 return (id); 814 } 815 816 timeout_id_t 817 realtime_timeout(void (*func)(void *), void *arg, clock_t delta) 818 { 819 ulong_t id; 820 821 /* 822 * Make sure the callout runs at least 1 tick in the future. 823 */ 824 if (delta <= 0) 825 delta = 1; 826 else if (delta > callout_max_ticks) 827 delta = callout_max_ticks; 828 829 id = (ulong_t)timeout_generic(CALLOUT_REALTIME, func, arg, 830 TICK_TO_NSEC(delta), nsec_per_tick, CALLOUT_LEGACY); 831 832 return ((timeout_id_t)id); 833 } 834 835 /* 836 * Convenience function that creates a realtime callout with default parameters 837 * and returns a full ID. 838 */ 839 callout_id_t 840 realtime_timeout_default(void (*func)(void *), void *arg, clock_t delta) 841 { 842 callout_id_t id; 843 844 /* 845 * Make sure the callout runs at least 1 tick in the future. 846 */ 847 if (delta <= 0) 848 delta = 1; 849 else if (delta > callout_max_ticks) 850 delta = callout_max_ticks; 851 852 id = timeout_generic(CALLOUT_REALTIME, func, arg, TICK_TO_NSEC(delta), 853 nsec_per_tick, 0); 854 855 return (id); 856 } 857 858 hrtime_t 859 untimeout_generic(callout_id_t id, int nowait) 860 { 861 callout_table_t *ct; 862 callout_t *cp; 863 callout_id_t xid; 864 int hash; 865 callout_id_t bogus; 866 867 ct = &callout_table[CALLOUT_ID_TO_TABLE(id)]; 868 hash = CALLOUT_IDHASH(id); 869 870 mutex_enter(&ct->ct_mutex); 871 872 /* 873 * Search the ID hash table for the callout. 874 */ 875 for (cp = ct->ct_idhash[hash].ch_head; cp; cp = cp->c_idnext) { 876 877 xid = cp->c_xid; 878 879 /* 880 * Match the ID and generation number. 881 */ 882 if ((xid & CALLOUT_ID_MASK) != id) 883 continue; 884 885 if ((xid & CALLOUT_EXECUTING) == 0) { 886 hrtime_t expiration; 887 888 /* 889 * Delete the callout. If the callout list becomes 890 * NULL, we don't remove it from the table. This is 891 * so it can be reused. If the empty callout list 892 * corresponds to the top of the the callout heap, we 893 * don't reprogram the table cyclic here. This is in 894 * order to avoid lots of X-calls to the CPU associated 895 * with the callout table. 896 */ 897 expiration = cp->c_list->cl_expiration; 898 CALLOUT_DELETE(ct, cp); 899 cp->c_idnext = ct->ct_free; 900 ct->ct_free = cp; 901 ct->ct_untimeouts_unexpired++; 902 ct->ct_timeouts_pending--; 903 mutex_exit(&ct->ct_mutex); 904 905 expiration -= gethrtime(); 906 TRACE_2(TR_FAC_CALLOUT, TR_UNTIMEOUT, 907 "untimeout:ID %lx hrtime left %llx", id, 908 expiration); 909 return (expiration < 0 ? 0 : expiration); 910 } 911 912 ct->ct_untimeouts_executing++; 913 /* 914 * The callout we want to delete is currently executing. 915 * The DDI states that we must wait until the callout 916 * completes before returning, so we block on c_done until the 917 * callout ID changes (to the old ID if it's on the freelist, 918 * or to a new callout ID if it's in use). This implicitly 919 * assumes that callout structures are persistent (they are). 920 */ 921 if (cp->c_executor == curthread) { 922 /* 923 * The timeout handler called untimeout() on itself. 924 * Stupid, but legal. We can't wait for the timeout 925 * to complete without deadlocking, so we just return. 926 */ 927 mutex_exit(&ct->ct_mutex); 928 TRACE_1(TR_FAC_CALLOUT, TR_UNTIMEOUT_SELF, 929 "untimeout_self:ID %x", id); 930 return (-1); 931 } 932 if (nowait == 0) { 933 /* 934 * We need to wait. Indicate that we are waiting by 935 * incrementing c_waiting. This prevents the executor 936 * from doing a wakeup on c_done if there are no 937 * waiters. 938 */ 939 while (cp->c_xid == xid) { 940 cp->c_waiting = 1; 941 cv_wait(&cp->c_done, &ct->ct_mutex); 942 } 943 } 944 mutex_exit(&ct->ct_mutex); 945 TRACE_1(TR_FAC_CALLOUT, TR_UNTIMEOUT_EXECUTING, 946 "untimeout_executing:ID %lx", id); 947 return (-1); 948 } 949 ct->ct_untimeouts_expired++; 950 951 mutex_exit(&ct->ct_mutex); 952 TRACE_1(TR_FAC_CALLOUT, TR_UNTIMEOUT_BOGUS_ID, 953 "untimeout_bogus_id:ID %lx", id); 954 955 /* 956 * We didn't find the specified callout ID. This means either 957 * (1) the callout already fired, or (2) the caller passed us 958 * a bogus value. Perform a sanity check to detect case (2). 959 */ 960 bogus = (CALLOUT_EXECUTING | CALLOUT_COUNTER_HIGH); 961 if (((id & bogus) != CALLOUT_COUNTER_HIGH) && (id != 0)) 962 panic("untimeout: impossible timeout id %llx", 963 (unsigned long long)id); 964 965 return (-1); 966 } 967 968 clock_t 969 untimeout(timeout_id_t id_arg) 970 { 971 hrtime_t hleft; 972 clock_t tleft; 973 callout_id_t id; 974 975 id = (ulong_t)id_arg; 976 hleft = untimeout_generic(id, 0); 977 if (hleft < 0) 978 tleft = -1; 979 else if (hleft == 0) 980 tleft = 0; 981 else 982 tleft = NSEC_TO_TICK(hleft); 983 984 return (tleft); 985 } 986 987 /* 988 * Convenience function to untimeout a timeout with a full ID with default 989 * parameters. 990 */ 991 clock_t 992 untimeout_default(callout_id_t id, int nowait) 993 { 994 hrtime_t hleft; 995 clock_t tleft; 996 997 hleft = untimeout_generic(id, nowait); 998 if (hleft < 0) 999 tleft = -1; 1000 else if (hleft == 0) 1001 tleft = 0; 1002 else 1003 tleft = NSEC_TO_TICK(hleft); 1004 1005 return (tleft); 1006 } 1007 1008 /* 1009 * Expire all the callouts queued in the specified callout list. 1010 */ 1011 static void 1012 callout_list_expire(callout_table_t *ct, callout_list_t *cl) 1013 { 1014 callout_t *cp, *cnext; 1015 1016 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 1017 ASSERT(cl != NULL); 1018 1019 for (cp = cl->cl_callouts.ch_head; cp != NULL; cp = cnext) { 1020 /* 1021 * Multiple executor threads could be running at the same 1022 * time. If this callout is already being executed, 1023 * go on to the next one. 1024 */ 1025 if (cp->c_xid & CALLOUT_EXECUTING) { 1026 cnext = cp->c_clnext; 1027 continue; 1028 } 1029 1030 /* 1031 * Indicate to untimeout() that a callout is 1032 * being expired by the executor. 1033 */ 1034 cp->c_xid |= CALLOUT_EXECUTING; 1035 cp->c_executor = curthread; 1036 mutex_exit(&ct->ct_mutex); 1037 1038 DTRACE_PROBE1(callout__start, callout_t *, cp); 1039 (*cp->c_func)(cp->c_arg); 1040 DTRACE_PROBE1(callout__end, callout_t *, cp); 1041 1042 mutex_enter(&ct->ct_mutex); 1043 1044 ct->ct_expirations++; 1045 ct->ct_timeouts_pending--; 1046 /* 1047 * Indicate completion for c_done. 1048 */ 1049 cp->c_xid &= ~CALLOUT_EXECUTING; 1050 cp->c_executor = NULL; 1051 cnext = cp->c_clnext; 1052 1053 /* 1054 * Delete callout from ID hash table and the callout 1055 * list, return to freelist, and tell any untimeout() that 1056 * cares that we're done. 1057 */ 1058 CALLOUT_DELETE(ct, cp); 1059 cp->c_idnext = ct->ct_free; 1060 ct->ct_free = cp; 1061 1062 if (cp->c_waiting) { 1063 cp->c_waiting = 0; 1064 cv_broadcast(&cp->c_done); 1065 } 1066 } 1067 } 1068 1069 /* 1070 * Execute all expired callout lists for a callout table. 1071 */ 1072 static void 1073 callout_expire(callout_table_t *ct) 1074 { 1075 callout_list_t *cl, *clnext; 1076 1077 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 1078 1079 for (cl = ct->ct_expired.ch_head; (cl != NULL); cl = clnext) { 1080 /* 1081 * Expire all the callouts in this callout list. 1082 */ 1083 callout_list_expire(ct, cl); 1084 1085 clnext = cl->cl_next; 1086 if (cl->cl_callouts.ch_head == NULL) { 1087 /* 1088 * Free the callout list. 1089 */ 1090 CALLOUT_LIST_DELETE(ct->ct_expired, cl); 1091 cl->cl_next = ct->ct_lfree; 1092 ct->ct_lfree = cl; 1093 } 1094 } 1095 } 1096 1097 /* 1098 * The cyclic handlers below process callouts in two steps: 1099 * 1100 * 1. Find all expired callout lists and queue them in a separate 1101 * list of expired callouts. 1102 * 2. Execute the expired callout lists. 1103 * 1104 * This is done for two reasons: 1105 * 1106 * 1. We want to quickly find the next earliest expiration to program 1107 * the cyclic to and reprogram it. We can do this right at the end 1108 * of step 1. 1109 * 2. The realtime cyclic handler expires callouts in place. However, 1110 * for normal callouts, callouts are expired by a taskq thread. 1111 * So, it is simpler and more robust to have the taskq thread just 1112 * do step 2. 1113 */ 1114 1115 /* 1116 * Realtime callout cyclic handler. 1117 */ 1118 void 1119 callout_realtime(callout_table_t *ct) 1120 { 1121 mutex_enter(&ct->ct_mutex); 1122 callout_heap_delete(ct); 1123 callout_expire(ct); 1124 mutex_exit(&ct->ct_mutex); 1125 } 1126 1127 void 1128 callout_execute(callout_table_t *ct) 1129 { 1130 mutex_enter(&ct->ct_mutex); 1131 callout_expire(ct); 1132 mutex_exit(&ct->ct_mutex); 1133 } 1134 1135 /* 1136 * Normal callout cyclic handler. 1137 */ 1138 void 1139 callout_normal(callout_table_t *ct) 1140 { 1141 int i, exec; 1142 1143 mutex_enter(&ct->ct_mutex); 1144 callout_heap_delete(ct); 1145 CALLOUT_EXEC_COMPUTE(ct, exec); 1146 mutex_exit(&ct->ct_mutex); 1147 1148 for (i = 0; i < exec; i++) { 1149 ASSERT(ct->ct_taskq != NULL); 1150 (void) taskq_dispatch(ct->ct_taskq, 1151 (task_func_t *)callout_execute, ct, TQ_NOSLEEP); 1152 } 1153 } 1154 1155 /* 1156 * Suspend callout processing. 1157 */ 1158 static void 1159 callout_suspend(void) 1160 { 1161 int t, f; 1162 callout_table_t *ct; 1163 1164 /* 1165 * Traverse every callout table in the system and suspend callout 1166 * processing. 1167 * 1168 * We need to suspend all the tables (including the inactive ones) 1169 * so that if a table is made active while the suspend is still on, 1170 * the table remains suspended. 1171 */ 1172 for (f = 0; f < max_ncpus; f++) { 1173 for (t = 0; t < CALLOUT_NTYPES; t++) { 1174 ct = &callout_table[CALLOUT_TABLE(t, f)]; 1175 1176 mutex_enter(&ct->ct_mutex); 1177 ct->ct_suspend++; 1178 if (ct->ct_cyclic == CYCLIC_NONE) { 1179 mutex_exit(&ct->ct_mutex); 1180 continue; 1181 } 1182 if (ct->ct_suspend == 1) 1183 (void) cyclic_reprogram(ct->ct_cyclic, 1184 CY_INFINITY); 1185 mutex_exit(&ct->ct_mutex); 1186 } 1187 } 1188 } 1189 1190 static void 1191 callout_adjust(callout_table_t *ct, hrtime_t delta) 1192 { 1193 int hash, newhash; 1194 hrtime_t expiration; 1195 callout_list_t *cl; 1196 callout_hash_t list; 1197 1198 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 1199 1200 /* 1201 * In order to adjust the expirations, we null out the heap. Then, 1202 * we reinsert adjusted expirations in the heap. Keeps it simple. 1203 * Note that since the CALLOUT_TABLE_SUSPENDED flag is set by the 1204 * caller, the heap insert does not result in cyclic reprogramming. 1205 */ 1206 ct->ct_heap_num = 0; 1207 1208 /* 1209 * First, remove all the callout lists from the table and string them 1210 * in a list. 1211 */ 1212 list.ch_head = list.ch_tail = NULL; 1213 for (hash = 0; hash < CALLOUT_BUCKETS; hash++) { 1214 while ((cl = ct->ct_clhash[hash].ch_head) != NULL) { 1215 CALLOUT_LIST_DELETE(ct->ct_clhash[hash], cl); 1216 CALLOUT_LIST_APPEND(list, cl); 1217 } 1218 } 1219 1220 /* 1221 * Now, traverse the callout lists and adjust their expirations. 1222 */ 1223 while ((cl = list.ch_head) != NULL) { 1224 CALLOUT_LIST_DELETE(list, cl); 1225 /* 1226 * Set the new expiration and reinsert in the right 1227 * hash bucket. 1228 */ 1229 expiration = cl->cl_expiration; 1230 expiration += delta; 1231 cl->cl_expiration = expiration; 1232 newhash = CALLOUT_CLHASH(expiration); 1233 CALLOUT_LIST_INSERT(ct->ct_clhash[newhash], cl); 1234 callout_heap_insert(ct, expiration); 1235 } 1236 } 1237 1238 /* 1239 * Resume callout processing. 1240 */ 1241 static void 1242 callout_resume(hrtime_t delta) 1243 { 1244 hrtime_t exp; 1245 int t, f; 1246 callout_table_t *ct; 1247 1248 /* 1249 * Traverse every callout table in the system and resume callout 1250 * processing. For active tables, perform any hrtime adjustments 1251 * necessary. 1252 */ 1253 for (f = 0; f < max_ncpus; f++) { 1254 for (t = 0; t < CALLOUT_NTYPES; t++) { 1255 ct = &callout_table[CALLOUT_TABLE(t, f)]; 1256 1257 mutex_enter(&ct->ct_mutex); 1258 if (ct->ct_cyclic == CYCLIC_NONE) { 1259 ct->ct_suspend--; 1260 mutex_exit(&ct->ct_mutex); 1261 continue; 1262 } 1263 1264 if (delta) 1265 callout_adjust(ct, delta); 1266 1267 ct->ct_suspend--; 1268 if (ct->ct_suspend == 0) { 1269 /* 1270 * If the expired list is non-empty, then have 1271 * the cyclic expire immediately. Else, program 1272 * the cyclic based on the heap. 1273 */ 1274 if (ct->ct_expired.ch_head != NULL) 1275 exp = gethrtime(); 1276 else if (ct->ct_heap_num > 0) 1277 exp = ct->ct_heap[0]; 1278 else 1279 exp = 0; 1280 if (exp != 0) 1281 (void) cyclic_reprogram(ct->ct_cyclic, 1282 exp); 1283 } 1284 mutex_exit(&ct->ct_mutex); 1285 } 1286 } 1287 } 1288 1289 /* 1290 * Callback handler used by CPR to stop and resume callouts. 1291 */ 1292 /*ARGSUSED*/ 1293 static boolean_t 1294 callout_cpr_callb(void *arg, int code) 1295 { 1296 if (code == CB_CODE_CPR_CHKPT) 1297 callout_suspend(); 1298 else 1299 callout_resume(0); 1300 1301 return (B_TRUE); 1302 } 1303 1304 /* 1305 * Callback handler invoked when the debugger is entered or exited. 1306 */ 1307 /*ARGSUSED*/ 1308 static boolean_t 1309 callout_debug_callb(void *arg, int code) 1310 { 1311 hrtime_t delta; 1312 1313 /* 1314 * When the system enters the debugger. make a note of the hrtime. 1315 * When it is resumed, compute how long the system was in the 1316 * debugger. This interval should not be counted for callouts. 1317 */ 1318 if (code == 0) { 1319 callout_suspend(); 1320 callout_debug_hrtime = gethrtime(); 1321 } else { 1322 delta = gethrtime() - callout_debug_hrtime; 1323 callout_resume(delta); 1324 } 1325 1326 return (B_TRUE); 1327 } 1328 1329 /* 1330 * Move the absolute hrestime callouts to the expired list. Then program the 1331 * table's cyclic to expire immediately so that the callouts can be executed 1332 * immediately. 1333 */ 1334 static void 1335 callout_hrestime_one(callout_table_t *ct) 1336 { 1337 callout_list_t *cl, *clnext; 1338 int hash, flags; 1339 1340 mutex_enter(&ct->ct_mutex); 1341 if (ct->ct_heap_num == 0) { 1342 mutex_exit(&ct->ct_mutex); 1343 return; 1344 } 1345 1346 flags = CALLOUT_LIST_FLAGS; 1347 for (hash = 0; hash < CALLOUT_BUCKETS; hash++) { 1348 for (cl = ct->ct_clhash[hash].ch_head; cl; cl = clnext) { 1349 clnext = cl->cl_next; 1350 if (cl->cl_flags == flags) { 1351 CALLOUT_LIST_DELETE(ct->ct_clhash[hash], cl); 1352 CALLOUT_LIST_APPEND(ct->ct_expired, cl); 1353 } 1354 } 1355 } 1356 1357 if ((ct->ct_expired.ch_head != NULL) && (ct->ct_suspend == 0)) 1358 (void) cyclic_reprogram(ct->ct_cyclic, gethrtime()); 1359 1360 mutex_exit(&ct->ct_mutex); 1361 } 1362 1363 /* 1364 * This function is called whenever system time (hrestime) is changed 1365 * explicitly. All the HRESTIME callouts must be expired at once. 1366 */ 1367 /*ARGSUSED*/ 1368 void 1369 callout_hrestime(void) 1370 { 1371 int t, f; 1372 callout_table_t *ct; 1373 1374 /* 1375 * Traverse every callout table in the system and process the hrestime 1376 * callouts therein. 1377 * 1378 * We look at all the tables because we don't know which ones were 1379 * onlined and offlined in the past. The offlined tables may still 1380 * have active cyclics processing timers somewhere. 1381 */ 1382 for (f = 0; f < max_ncpus; f++) { 1383 for (t = 0; t < CALLOUT_NTYPES; t++) { 1384 ct = &callout_table[CALLOUT_TABLE(t, f)]; 1385 callout_hrestime_one(ct); 1386 } 1387 } 1388 } 1389 1390 /* 1391 * Create the hash tables for this callout table. 1392 */ 1393 static void 1394 callout_hash_init(callout_table_t *ct) 1395 { 1396 size_t size; 1397 1398 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 1399 ASSERT((ct->ct_idhash == NULL) && (ct->ct_clhash == NULL)); 1400 1401 size = sizeof (callout_hash_t) * CALLOUT_BUCKETS; 1402 ct->ct_idhash = kmem_zalloc(size, KM_SLEEP); 1403 ct->ct_clhash = kmem_zalloc(size, KM_SLEEP); 1404 } 1405 1406 /* 1407 * Create per-callout table kstats. 1408 */ 1409 static void 1410 callout_kstat_init(callout_table_t *ct) 1411 { 1412 callout_stat_type_t stat; 1413 kstat_t *ct_kstats; 1414 int ndx; 1415 1416 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 1417 ASSERT(ct->ct_kstats == NULL); 1418 1419 ndx = ct - callout_table; 1420 ct_kstats = kstat_create("unix", ndx, "callout", 1421 "misc", KSTAT_TYPE_NAMED, CALLOUT_NUM_STATS, KSTAT_FLAG_VIRTUAL); 1422 1423 if (ct_kstats == NULL) { 1424 cmn_err(CE_WARN, "kstat_create for callout table %p failed", 1425 (void *)ct); 1426 } else { 1427 ct_kstats->ks_data = ct->ct_kstat_data; 1428 for (stat = 0; stat < CALLOUT_NUM_STATS; stat++) 1429 kstat_named_init(&ct->ct_kstat_data[stat], 1430 callout_kstat_names[stat], KSTAT_DATA_INT64); 1431 ct->ct_kstats = ct_kstats; 1432 kstat_install(ct_kstats); 1433 } 1434 } 1435 1436 static void 1437 callout_cyclic_init(callout_table_t *ct) 1438 { 1439 cyc_handler_t hdlr; 1440 cyc_time_t when; 1441 processorid_t seqid; 1442 int t; 1443 1444 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 1445 1446 t = CALLOUT_TABLE_TYPE(ct); 1447 seqid = CALLOUT_TABLE_SEQID(ct); 1448 1449 /* 1450 * Create the taskq thread if the table type is normal. 1451 * Realtime tables are handled at PIL1 by a softint 1452 * handler. 1453 */ 1454 if (t == CALLOUT_NORMAL) { 1455 ASSERT(ct->ct_taskq == NULL); 1456 /* 1457 * Each callout thread consumes exactly one 1458 * task structure while active. Therefore, 1459 * prepopulating with 2 * CALLOUT_THREADS tasks 1460 * ensures that there's at least one task per 1461 * thread that's either scheduled or on the 1462 * freelist. In turn, this guarantees that 1463 * taskq_dispatch() will always either succeed 1464 * (because there's a free task structure) or 1465 * be unnecessary (because "callout_excute(ct)" 1466 * has already scheduled). 1467 */ 1468 ct->ct_taskq = 1469 taskq_create_instance("callout_taskq", seqid, 1470 CALLOUT_THREADS, maxclsyspri, 1471 2 * CALLOUT_THREADS, 2 * CALLOUT_THREADS, 1472 TASKQ_PREPOPULATE | TASKQ_CPR_SAFE); 1473 } 1474 1475 /* 1476 * callouts can only be created in a table whose 1477 * cyclic has been initialized. 1478 */ 1479 ASSERT(ct->ct_heap_num == 0); 1480 1481 /* 1482 * Create the callout table cyclics. 1483 * 1484 * The realtime cyclic handler executes at low PIL. The normal cyclic 1485 * handler executes at lock PIL. This is because there are cases 1486 * where code can block at PIL > 1 waiting for a normal callout handler 1487 * to unblock it directly or indirectly. If the normal cyclic were to 1488 * be executed at low PIL, it could get blocked out by the waiter 1489 * and cause a deadlock. 1490 */ 1491 ASSERT(ct->ct_cyclic == CYCLIC_NONE); 1492 1493 hdlr.cyh_func = (cyc_func_t)CALLOUT_CYCLIC_HANDLER(t); 1494 if (ct->ct_type == CALLOUT_REALTIME) 1495 hdlr.cyh_level = callout_realtime_level; 1496 else 1497 hdlr.cyh_level = callout_normal_level; 1498 hdlr.cyh_arg = ct; 1499 when.cyt_when = CY_INFINITY; 1500 when.cyt_interval = CY_INFINITY; 1501 1502 ct->ct_cyclic = cyclic_add(&hdlr, &when); 1503 } 1504 1505 void 1506 callout_cpu_online(cpu_t *cp) 1507 { 1508 lgrp_handle_t hand; 1509 callout_cache_t *cache; 1510 char s[KMEM_CACHE_NAMELEN]; 1511 callout_table_t *ct; 1512 processorid_t seqid; 1513 int t; 1514 1515 ASSERT(MUTEX_HELD(&cpu_lock)); 1516 1517 /* 1518 * Locate the cache corresponding to the onlined CPU's lgroup. 1519 * Note that access to callout_caches is protected by cpu_lock. 1520 */ 1521 hand = lgrp_plat_cpu_to_hand(cp->cpu_id); 1522 for (cache = callout_caches; cache != NULL; cache = cache->cc_next) { 1523 if (cache->cc_hand == hand) 1524 break; 1525 } 1526 1527 /* 1528 * If not found, create one. The caches are never destroyed. 1529 */ 1530 if (cache == NULL) { 1531 cache = kmem_alloc(sizeof (callout_cache_t), KM_SLEEP); 1532 cache->cc_hand = hand; 1533 (void) snprintf(s, KMEM_CACHE_NAMELEN, "callout_cache%lx", 1534 (long)hand); 1535 cache->cc_cache = kmem_cache_create(s, sizeof (callout_t), 1536 CALLOUT_ALIGN, NULL, NULL, NULL, NULL, NULL, 0); 1537 (void) snprintf(s, KMEM_CACHE_NAMELEN, "callout_lcache%lx", 1538 (long)hand); 1539 cache->cc_lcache = kmem_cache_create(s, sizeof (callout_list_t), 1540 CALLOUT_ALIGN, NULL, NULL, NULL, NULL, NULL, 0); 1541 cache->cc_next = callout_caches; 1542 callout_caches = cache; 1543 } 1544 1545 seqid = cp->cpu_seqid; 1546 1547 for (t = 0; t < CALLOUT_NTYPES; t++) { 1548 ct = &callout_table[CALLOUT_TABLE(t, seqid)]; 1549 1550 mutex_enter(&ct->ct_mutex); 1551 /* 1552 * Store convinience pointers to the kmem caches 1553 * in the callout table. These assignments should always be 1554 * done as callout tables can map to different physical 1555 * CPUs each time. 1556 */ 1557 ct->ct_cache = cache->cc_cache; 1558 ct->ct_lcache = cache->cc_lcache; 1559 1560 /* 1561 * We use the heap pointer to check if stuff has been 1562 * initialized for this callout table. 1563 */ 1564 if (ct->ct_heap == NULL) { 1565 callout_heap_init(ct); 1566 callout_hash_init(ct); 1567 callout_kstat_init(ct); 1568 callout_cyclic_init(ct); 1569 } 1570 1571 mutex_exit(&ct->ct_mutex); 1572 1573 /* 1574 * Move the cyclic to this CPU by doing a bind. 1575 */ 1576 cyclic_bind(ct->ct_cyclic, cp, NULL); 1577 } 1578 } 1579 1580 void 1581 callout_cpu_offline(cpu_t *cp) 1582 { 1583 callout_table_t *ct; 1584 processorid_t seqid; 1585 int t; 1586 1587 ASSERT(MUTEX_HELD(&cpu_lock)); 1588 1589 seqid = cp->cpu_seqid; 1590 1591 for (t = 0; t < CALLOUT_NTYPES; t++) { 1592 ct = &callout_table[CALLOUT_TABLE(t, seqid)]; 1593 1594 /* 1595 * Unbind the cyclic. This will allow the cyclic subsystem 1596 * to juggle the cyclic during CPU offline. 1597 */ 1598 cyclic_bind(ct->ct_cyclic, NULL, NULL); 1599 } 1600 } 1601 1602 /* 1603 * This is called to perform per-CPU initialization for slave CPUs at 1604 * boot time. 1605 */ 1606 void 1607 callout_mp_init(void) 1608 { 1609 cpu_t *cp; 1610 1611 mutex_enter(&cpu_lock); 1612 1613 cp = cpu_active; 1614 do { 1615 callout_cpu_online(cp); 1616 } while ((cp = cp->cpu_next_onln) != cpu_active); 1617 1618 mutex_exit(&cpu_lock); 1619 } 1620 1621 /* 1622 * Initialize all callout tables. Called at boot time just before clkstart(). 1623 */ 1624 void 1625 callout_init(void) 1626 { 1627 int f, t; 1628 size_t size; 1629 int table_id; 1630 callout_table_t *ct; 1631 long bits, fanout; 1632 uintptr_t buf; 1633 1634 /* 1635 * Initialize callout globals. 1636 */ 1637 bits = 0; 1638 for (fanout = 1; (fanout < max_ncpus); fanout <<= 1) 1639 bits++; 1640 callout_table_bits = CALLOUT_TYPE_BITS + bits; 1641 callout_table_mask = (1 << callout_table_bits) - 1; 1642 callout_counter_low = 1 << CALLOUT_COUNTER_SHIFT; 1643 callout_longterm = TICK_TO_NSEC(CALLOUT_LONGTERM_TICKS); 1644 callout_max_ticks = CALLOUT_MAX_TICKS; 1645 1646 /* 1647 * Because of the variability in timing behavior across systems with 1648 * different architectures, we cannot allow arbitrarily low 1649 * resolutions. The minimum resolution has to be determined in a 1650 * platform-specific way. Until then, we define a blanket minimum 1651 * resolution for callouts of CALLOUT_MIN_RESOLUTION. 1652 * 1653 * If, in the future, someone requires lower resolution timers, they 1654 * can do one of two things: 1655 * 1656 * - Define a lower value for callout_min_resolution. This would 1657 * affect all clients of the callout subsystem. If this done 1658 * via /etc/system, then no code changes are required and it 1659 * would affect only that customer. 1660 * 1661 * - Define a flag to be passed to timeout creation that allows 1662 * the lower resolution. This involves code changes. But it 1663 * would affect only the calling module. It is the developer's 1664 * responsibility to test on all systems and make sure that 1665 * everything works. 1666 */ 1667 if (callout_min_resolution <= 0) 1668 callout_min_resolution = CALLOUT_MIN_RESOLUTION; 1669 1670 /* 1671 * Allocate all the callout tables based on max_ncpus. We have chosen 1672 * to do boot-time allocation instead of dynamic allocation because: 1673 * 1674 * - the size of the callout tables is not too large. 1675 * - there are race conditions involved in making this dynamic. 1676 * - the hash tables that go with the callout tables consume 1677 * most of the memory and they are only allocated in 1678 * callout_cpu_online(). 1679 * 1680 * Each CPU has two tables that are consecutive in the array. The first 1681 * one is for realtime callouts and the second one is for normal ones. 1682 * 1683 * We do this alignment dance to make sure that callout table 1684 * structures will always be on a cache line boundary. 1685 */ 1686 size = sizeof (callout_table_t) * CALLOUT_NTYPES * max_ncpus; 1687 size += CALLOUT_ALIGN; 1688 buf = (uintptr_t)kmem_zalloc(size, KM_SLEEP); 1689 callout_table = (callout_table_t *)P2ROUNDUP(buf, CALLOUT_ALIGN); 1690 1691 size = sizeof (kstat_named_t) * CALLOUT_NUM_STATS; 1692 /* 1693 * Now, initialize the tables for all the CPUs. 1694 */ 1695 for (f = 0; f < max_ncpus; f++) { 1696 for (t = 0; t < CALLOUT_NTYPES; t++) { 1697 table_id = CALLOUT_TABLE(t, f); 1698 ct = &callout_table[table_id]; 1699 ct->ct_type = t; 1700 mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL); 1701 /* 1702 * Precompute the base IDs for long and short-term 1703 * legacy IDs. This makes ID generation during 1704 * timeout() fast. 1705 */ 1706 ct->ct_short_id = CALLOUT_SHORT_ID(table_id); 1707 ct->ct_long_id = CALLOUT_LONG_ID(table_id); 1708 /* 1709 * Precompute the base ID for generation-based IDs. 1710 * Note that when the first ID gets allocated, the 1711 * ID will wrap. This will cause the generation 1712 * number to be incremented to 1. 1713 */ 1714 ct->ct_gen_id = CALLOUT_SHORT_ID(table_id); 1715 /* 1716 * Initialize the cyclic as NONE. This will get set 1717 * during CPU online. This is so that partially 1718 * populated systems will only have the required 1719 * number of cyclics, not more. 1720 */ 1721 ct->ct_cyclic = CYCLIC_NONE; 1722 ct->ct_kstat_data = kmem_zalloc(size, KM_SLEEP); 1723 } 1724 } 1725 1726 /* 1727 * Add the callback for CPR. This is called during checkpoint 1728 * resume to suspend and resume callouts. 1729 */ 1730 (void) callb_add(callout_cpr_callb, 0, CB_CL_CPR_CALLOUT, 1731 "callout_cpr"); 1732 (void) callb_add(callout_debug_callb, 0, CB_CL_ENTER_DEBUGGER, 1733 "callout_debug"); 1734 1735 /* 1736 * Call the per-CPU initialization function for the boot CPU. This 1737 * is done here because the function is not called automatically for 1738 * the boot CPU from the CPU online/offline hooks. Note that the 1739 * CPU lock is taken here because of convention. 1740 */ 1741 mutex_enter(&cpu_lock); 1742 callout_boot_ct = &callout_table[CALLOUT_TABLE(0, CPU->cpu_seqid)]; 1743 callout_cpu_online(CPU); 1744 mutex_exit(&cpu_lock); 1745 } 1746