1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/callo.h> 27 #include <sys/param.h> 28 #include <sys/types.h> 29 #include <sys/cpuvar.h> 30 #include <sys/thread.h> 31 #include <sys/kmem.h> 32 #include <sys/kmem_impl.h> 33 #include <sys/cmn_err.h> 34 #include <sys/callb.h> 35 #include <sys/debug.h> 36 #include <sys/vtrace.h> 37 #include <sys/sysmacros.h> 38 #include <sys/sdt.h> 39 40 /* 41 * Callout tables. See timeout(9F) for details. 42 */ 43 static int callout_threads; /* callout normal threads */ 44 static hrtime_t callout_debug_hrtime; /* debugger entry time */ 45 static int callout_chunk; /* callout heap chunk size */ 46 static int callout_min_reap; /* callout minimum reap count */ 47 static int callout_tolerance; /* callout hires tolerance */ 48 static callout_table_t *callout_boot_ct; /* Boot CPU's callout tables */ 49 static clock_t callout_max_ticks; /* max interval */ 50 static hrtime_t callout_longterm; /* longterm nanoseconds */ 51 static ulong_t callout_counter_low; /* callout ID increment */ 52 static ulong_t callout_table_bits; /* number of table bits in ID */ 53 static ulong_t callout_table_mask; /* mask for the table bits */ 54 static callout_cache_t *callout_caches; /* linked list of caches */ 55 #pragma align 64(callout_table) 56 static callout_table_t *callout_table; /* global callout table array */ 57 58 /* 59 * We run 'realtime' callouts at PIL 1 (CY_LOW_LEVEL). For 'normal' 60 * callouts, from PIL 10 (CY_LOCK_LEVEL) we dispatch the callout, 61 * via taskq, to a thread that executes at PIL 0 - so we end up running 62 * 'normal' callouts at PIL 0. 63 */ 64 static volatile int callout_realtime_level = CY_LOW_LEVEL; 65 static volatile int callout_normal_level = CY_LOCK_LEVEL; 66 67 static char *callout_kstat_names[] = { 68 "callout_timeouts", 69 "callout_timeouts_pending", 70 "callout_untimeouts_unexpired", 71 "callout_untimeouts_executing", 72 "callout_untimeouts_expired", 73 "callout_expirations", 74 "callout_allocations", 75 "callout_cleanups", 76 }; 77 78 static hrtime_t callout_heap_process(callout_table_t *, hrtime_t, int); 79 80 #define CALLOUT_HASH_INSERT(hash, cp, cnext, cprev) \ 81 { \ 82 callout_hash_t *hashp = &(hash); \ 83 \ 84 cp->cprev = NULL; \ 85 cp->cnext = hashp->ch_head; \ 86 if (hashp->ch_head == NULL) \ 87 hashp->ch_tail = cp; \ 88 else \ 89 cp->cnext->cprev = cp; \ 90 hashp->ch_head = cp; \ 91 } 92 93 #define CALLOUT_HASH_APPEND(hash, cp, cnext, cprev) \ 94 { \ 95 callout_hash_t *hashp = &(hash); \ 96 \ 97 cp->cnext = NULL; \ 98 cp->cprev = hashp->ch_tail; \ 99 if (hashp->ch_tail == NULL) \ 100 hashp->ch_head = cp; \ 101 else \ 102 cp->cprev->cnext = cp; \ 103 hashp->ch_tail = cp; \ 104 } 105 106 #define CALLOUT_HASH_DELETE(hash, cp, cnext, cprev) \ 107 { \ 108 callout_hash_t *hashp = &(hash); \ 109 \ 110 if (cp->cnext == NULL) \ 111 hashp->ch_tail = cp->cprev; \ 112 else \ 113 cp->cnext->cprev = cp->cprev; \ 114 if (cp->cprev == NULL) \ 115 hashp->ch_head = cp->cnext; \ 116 else \ 117 cp->cprev->cnext = cp->cnext; \ 118 } 119 120 /* 121 * These definitions help us queue callouts and callout lists. Here is 122 * the queueing rationale: 123 * 124 * - callouts are queued in a FIFO manner in the ID hash table. 125 * TCP timers are typically cancelled in the same order that they 126 * were issued. The FIFO queueing shortens the search for a callout 127 * during untimeout(). 128 * 129 * - callouts are queued in a FIFO manner in their callout lists. 130 * This ensures that the callouts are executed in the same order that 131 * they were queued. This is fair. Plus, it helps to make each 132 * callout expiration timely. It also favors cancellations. 133 * 134 * - callout lists are queued in the following manner in the callout 135 * hash table buckets: 136 * 137 * - appended, if the callout list is a 1-nanosecond resolution 138 * callout list. When a callout is created, we first look for 139 * a callout list that has the same expiration so we can avoid 140 * allocating a callout list and inserting the expiration into 141 * the heap. However, we do not want to look at 1-nanosecond 142 * resolution callout lists as we will seldom find a match in 143 * them. Keeping these callout lists in the rear of the hash 144 * buckets allows us to skip these during the lookup. 145 * 146 * - inserted at the beginning, if the callout list is not a 147 * 1-nanosecond resolution callout list. This also has the 148 * side-effect of keeping the long term timers away from the 149 * front of the buckets. 150 * 151 * - callout lists are queued in a FIFO manner in the expired callouts 152 * list. This ensures that callout lists are executed in the order 153 * of expiration. 154 */ 155 #define CALLOUT_APPEND(ct, cp) \ 156 CALLOUT_HASH_APPEND(ct->ct_idhash[CALLOUT_IDHASH(cp->c_xid)], \ 157 cp, c_idnext, c_idprev); \ 158 CALLOUT_HASH_APPEND(cp->c_list->cl_callouts, cp, c_clnext, c_clprev) 159 160 #define CALLOUT_DELETE(ct, cp) \ 161 CALLOUT_HASH_DELETE(ct->ct_idhash[CALLOUT_IDHASH(cp->c_xid)], \ 162 cp, c_idnext, c_idprev); \ 163 CALLOUT_HASH_DELETE(cp->c_list->cl_callouts, cp, c_clnext, c_clprev) 164 165 #define CALLOUT_LIST_INSERT(hash, cl) \ 166 CALLOUT_HASH_INSERT(hash, cl, cl_next, cl_prev) 167 168 #define CALLOUT_LIST_APPEND(hash, cl) \ 169 CALLOUT_HASH_APPEND(hash, cl, cl_next, cl_prev) 170 171 #define CALLOUT_LIST_DELETE(hash, cl) \ 172 CALLOUT_HASH_DELETE(hash, cl, cl_next, cl_prev) 173 174 #define CALLOUT_LIST_BEFORE(cl, nextcl) \ 175 { \ 176 (cl)->cl_prev = (nextcl)->cl_prev; \ 177 (cl)->cl_next = (nextcl); \ 178 (nextcl)->cl_prev = (cl); \ 179 if (cl->cl_prev != NULL) \ 180 cl->cl_prev->cl_next = cl; \ 181 } 182 183 /* 184 * For normal callouts, there is a deadlock scenario if two callouts that 185 * have an inter-dependency end up on the same callout list. To break the 186 * deadlock, you need two taskq threads running in parallel. We compute 187 * the number of taskq threads here using a bunch of conditions to make 188 * it optimal for the common case. This is an ugly hack, but one that is 189 * necessary (sigh). 190 */ 191 #define CALLOUT_THRESHOLD 100000000 192 #define CALLOUT_EXEC_COMPUTE(ct, nextexp, exec) \ 193 { \ 194 callout_list_t *cl; \ 195 \ 196 cl = ct->ct_expired.ch_head; \ 197 if (cl == NULL) { \ 198 /* \ 199 * If the expired list is NULL, there is nothing to \ 200 * process. \ 201 */ \ 202 exec = 0; \ 203 } else if ((cl->cl_next == NULL) && \ 204 (cl->cl_callouts.ch_head == cl->cl_callouts.ch_tail)) { \ 205 /* \ 206 * If there is only one callout list and it contains \ 207 * only one callout, there is no need for two threads. \ 208 */ \ 209 exec = 1; \ 210 } else if ((nextexp) > (gethrtime() + CALLOUT_THRESHOLD)) { \ 211 /* \ 212 * If the next expiration of the cyclic is way out into \ 213 * the future, we need two threads. \ 214 */ \ 215 exec = 2; \ 216 } else { \ 217 /* \ 218 * We have multiple callouts to process. But the cyclic \ 219 * will fire in the near future. So, we only need one \ 220 * thread for now. \ 221 */ \ 222 exec = 1; \ 223 } \ 224 } 225 226 /* 227 * Macro to swap two heap items. 228 */ 229 #define CALLOUT_SWAP(h1, h2) \ 230 { \ 231 callout_heap_t tmp; \ 232 \ 233 tmp = *h1; \ 234 *h1 = *h2; \ 235 *h2 = tmp; \ 236 } 237 238 /* 239 * Macro to free a callout list. 240 */ 241 #define CALLOUT_LIST_FREE(ct, cl) \ 242 { \ 243 cl->cl_next = ct->ct_lfree; \ 244 ct->ct_lfree = cl; \ 245 cl->cl_flags |= CALLOUT_LIST_FLAG_FREE; \ 246 } 247 248 /* 249 * Macro to free a callout. 250 */ 251 #define CALLOUT_FREE(ct, cl) \ 252 { \ 253 cp->c_idnext = ct->ct_free; \ 254 ct->ct_free = cp; \ 255 cp->c_xid |= CALLOUT_ID_FREE; \ 256 } 257 258 /* 259 * Allocate a callout structure. We try quite hard because we 260 * can't sleep, and if we can't do the allocation, we're toast. 261 * Failing all, we try a KM_PANIC allocation. Note that we never 262 * deallocate a callout. See untimeout() for the reasoning. 263 */ 264 static callout_t * 265 callout_alloc(callout_table_t *ct) 266 { 267 size_t size; 268 callout_t *cp; 269 270 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 271 mutex_exit(&ct->ct_mutex); 272 273 cp = kmem_cache_alloc(ct->ct_cache, KM_NOSLEEP); 274 if (cp == NULL) { 275 size = sizeof (callout_t); 276 cp = kmem_alloc_tryhard(size, &size, KM_NOSLEEP | KM_PANIC); 277 } 278 cp->c_xid = 0; 279 cp->c_executor = NULL; 280 cv_init(&cp->c_done, NULL, CV_DEFAULT, NULL); 281 cp->c_waiting = 0; 282 283 mutex_enter(&ct->ct_mutex); 284 ct->ct_allocations++; 285 return (cp); 286 } 287 288 /* 289 * Allocate a callout list structure. We try quite hard because we 290 * can't sleep, and if we can't do the allocation, we're toast. 291 * Failing all, we try a KM_PANIC allocation. Note that we never 292 * deallocate a callout list. 293 */ 294 static void 295 callout_list_alloc(callout_table_t *ct) 296 { 297 size_t size; 298 callout_list_t *cl; 299 300 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 301 mutex_exit(&ct->ct_mutex); 302 303 cl = kmem_cache_alloc(ct->ct_lcache, KM_NOSLEEP); 304 if (cl == NULL) { 305 size = sizeof (callout_list_t); 306 cl = kmem_alloc_tryhard(size, &size, KM_NOSLEEP | KM_PANIC); 307 } 308 bzero(cl, sizeof (callout_list_t)); 309 310 mutex_enter(&ct->ct_mutex); 311 CALLOUT_LIST_FREE(ct, cl); 312 } 313 314 /* 315 * Find a callout list that corresponds to an expiration and matching flags. 316 */ 317 static callout_list_t * 318 callout_list_get(callout_table_t *ct, hrtime_t expiration, int flags, int hash) 319 { 320 callout_list_t *cl; 321 int clflags; 322 323 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 324 325 if (flags & CALLOUT_LIST_FLAG_NANO) { 326 /* 327 * This is a 1-nanosecond resolution callout. We will rarely 328 * find a match for this. So, bail out. 329 */ 330 return (NULL); 331 } 332 333 clflags = (CALLOUT_LIST_FLAG_ABSOLUTE | CALLOUT_LIST_FLAG_HRESTIME); 334 for (cl = ct->ct_clhash[hash].ch_head; (cl != NULL); cl = cl->cl_next) { 335 /* 336 * If we have reached a 1-nanosecond resolution callout list, 337 * we don't have much hope of finding a match in this hash 338 * bucket. So, just bail out. 339 */ 340 if (cl->cl_flags & CALLOUT_LIST_FLAG_NANO) 341 return (NULL); 342 343 if ((cl->cl_expiration == expiration) && 344 ((cl->cl_flags & clflags) == (flags & clflags))) 345 return (cl); 346 } 347 348 return (NULL); 349 } 350 351 /* 352 * Add a new callout list into a callout table's queue in sorted order by 353 * expiration. 354 */ 355 static int 356 callout_queue_add(callout_table_t *ct, callout_list_t *cl) 357 { 358 callout_list_t *nextcl; 359 hrtime_t expiration; 360 361 expiration = cl->cl_expiration; 362 nextcl = ct->ct_queue.ch_head; 363 if ((nextcl == NULL) || (expiration < nextcl->cl_expiration)) { 364 CALLOUT_LIST_INSERT(ct->ct_queue, cl); 365 return (1); 366 } 367 368 while (nextcl != NULL) { 369 if (expiration < nextcl->cl_expiration) { 370 CALLOUT_LIST_BEFORE(cl, nextcl); 371 return (0); 372 } 373 nextcl = nextcl->cl_next; 374 } 375 CALLOUT_LIST_APPEND(ct->ct_queue, cl); 376 377 return (0); 378 } 379 380 /* 381 * Insert a callout list into a callout table's queue and reprogram the queue 382 * cyclic if needed. 383 */ 384 static void 385 callout_queue_insert(callout_table_t *ct, callout_list_t *cl) 386 { 387 cl->cl_flags |= CALLOUT_LIST_FLAG_QUEUED; 388 389 /* 390 * Add the callout to the callout queue. If it ends up at the head, 391 * the cyclic needs to be reprogrammed as we have an earlier 392 * expiration. 393 * 394 * Also, during the CPR suspend phase, do not reprogram the cyclic. 395 * We don't want any callout activity. When the CPR resume phase is 396 * entered, the cyclic will be programmed for the earliest expiration 397 * in the queue. 398 */ 399 if (callout_queue_add(ct, cl) && (ct->ct_suspend == 0)) 400 (void) cyclic_reprogram(ct->ct_qcyclic, cl->cl_expiration); 401 } 402 403 /* 404 * Delete and handle all past expirations in a callout table's queue. 405 */ 406 static hrtime_t 407 callout_queue_delete(callout_table_t *ct) 408 { 409 callout_list_t *cl; 410 hrtime_t now; 411 412 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 413 414 now = gethrtime(); 415 while ((cl = ct->ct_queue.ch_head) != NULL) { 416 if (cl->cl_expiration > now) 417 break; 418 cl->cl_flags &= ~CALLOUT_LIST_FLAG_QUEUED; 419 CALLOUT_LIST_DELETE(ct->ct_queue, cl); 420 CALLOUT_LIST_APPEND(ct->ct_expired, cl); 421 } 422 423 /* 424 * If this callout queue is empty or callouts have been suspended, 425 * just return. 426 */ 427 if ((cl == NULL) || (ct->ct_suspend > 0)) 428 return (CY_INFINITY); 429 430 (void) cyclic_reprogram(ct->ct_qcyclic, cl->cl_expiration); 431 432 return (cl->cl_expiration); 433 } 434 435 static hrtime_t 436 callout_queue_process(callout_table_t *ct, hrtime_t delta, int timechange) 437 { 438 callout_list_t *firstcl, *cl; 439 hrtime_t expiration, now; 440 int clflags; 441 callout_hash_t temp; 442 443 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 444 445 firstcl = ct->ct_queue.ch_head; 446 if (firstcl == NULL) 447 return (CY_INFINITY); 448 449 /* 450 * We walk the callout queue. If we encounter a hrestime entry that 451 * must be removed, we clean it out. Otherwise, we apply any 452 * adjustments needed to it. Because of the latter, we need to 453 * recreate the list as we go along. 454 */ 455 temp = ct->ct_queue; 456 ct->ct_queue.ch_head = NULL; 457 ct->ct_queue.ch_tail = NULL; 458 459 clflags = (CALLOUT_LIST_FLAG_HRESTIME | CALLOUT_LIST_FLAG_ABSOLUTE); 460 now = gethrtime(); 461 while ((cl = temp.ch_head) != NULL) { 462 CALLOUT_LIST_DELETE(temp, cl); 463 464 /* 465 * Delete the callout and expire it, if one of the following 466 * is true: 467 * - the callout has expired 468 * - the callout is an absolute hrestime one and 469 * there has been a system time change 470 */ 471 if ((cl->cl_expiration <= now) || 472 (timechange && ((cl->cl_flags & clflags) == clflags))) { 473 cl->cl_flags &= ~CALLOUT_LIST_FLAG_QUEUED; 474 CALLOUT_LIST_APPEND(ct->ct_expired, cl); 475 continue; 476 } 477 478 /* 479 * Apply adjustments, if any. Adjustments are applied after 480 * the system returns from KMDB or OBP. They are only applied 481 * to relative callout lists. 482 */ 483 if (delta && !(cl->cl_flags & CALLOUT_LIST_FLAG_ABSOLUTE)) { 484 expiration = cl->cl_expiration + delta; 485 if (expiration <= 0) 486 expiration = CY_INFINITY; 487 cl->cl_expiration = expiration; 488 } 489 490 (void) callout_queue_add(ct, cl); 491 } 492 493 /* 494 * We need to return the expiration to help program the cyclic. 495 * If there are expired callouts, the cyclic needs to go off 496 * immediately. If the queue has become empty, then we return infinity. 497 * Else, we return the expiration of the earliest callout in the queue. 498 */ 499 if (ct->ct_expired.ch_head != NULL) 500 return (gethrtime()); 501 502 cl = ct->ct_queue.ch_head; 503 if (cl == NULL) 504 return (CY_INFINITY); 505 506 return (cl->cl_expiration); 507 } 508 509 /* 510 * Initialize a callout table's heap, if necessary. Preallocate some free 511 * entries so we don't have to check for NULL elsewhere. 512 */ 513 static void 514 callout_heap_init(callout_table_t *ct) 515 { 516 size_t size; 517 518 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 519 ASSERT(ct->ct_heap == NULL); 520 521 ct->ct_heap_num = 0; 522 ct->ct_heap_max = callout_chunk; 523 size = sizeof (callout_heap_t) * callout_chunk; 524 ct->ct_heap = kmem_alloc(size, KM_SLEEP); 525 } 526 527 /* 528 * Reallocate the heap. Return 0 if the heap is still full at the end of it. 529 * Return 1 otherwise. Note that the heap only expands, it never contracts. 530 */ 531 static int 532 callout_heap_expand(callout_table_t *ct) 533 { 534 size_t max, size, osize; 535 callout_heap_t *heap; 536 537 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 538 ASSERT(ct->ct_heap_num <= ct->ct_heap_max); 539 540 while (ct->ct_heap_num == ct->ct_heap_max) { 541 max = ct->ct_heap_max; 542 mutex_exit(&ct->ct_mutex); 543 544 osize = sizeof (callout_heap_t) * max; 545 size = sizeof (callout_heap_t) * (max + callout_chunk); 546 heap = kmem_alloc(size, KM_NOSLEEP); 547 548 mutex_enter(&ct->ct_mutex); 549 if (heap == NULL) { 550 /* 551 * We could not allocate memory. If we can free up 552 * some entries, that would be great. 553 */ 554 if (ct->ct_nreap > 0) 555 (void) callout_heap_process(ct, 0, 0); 556 /* 557 * If we still have no space in the heap, inform the 558 * caller. 559 */ 560 if (ct->ct_heap_num == ct->ct_heap_max) 561 return (0); 562 return (1); 563 } 564 if (max < ct->ct_heap_max) { 565 /* 566 * Someone beat us to the allocation. Free what we 567 * just allocated and proceed. 568 */ 569 kmem_free(heap, size); 570 continue; 571 } 572 573 bcopy(ct->ct_heap, heap, osize); 574 kmem_free(ct->ct_heap, osize); 575 ct->ct_heap = heap; 576 ct->ct_heap_max = size / sizeof (callout_heap_t); 577 } 578 579 return (1); 580 } 581 582 /* 583 * Move an expiration from the bottom of the heap to its correct place 584 * in the heap. If we reached the root doing this, return 1. Else, 585 * return 0. 586 */ 587 static int 588 callout_upheap(callout_table_t *ct) 589 { 590 int current, parent; 591 callout_heap_t *heap, *hcurrent, *hparent; 592 593 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 594 ASSERT(ct->ct_heap_num >= 1); 595 596 if (ct->ct_heap_num == 1) { 597 return (1); 598 } 599 600 heap = ct->ct_heap; 601 current = ct->ct_heap_num - 1; 602 603 for (;;) { 604 parent = CALLOUT_HEAP_PARENT(current); 605 hparent = &heap[parent]; 606 hcurrent = &heap[current]; 607 608 /* 609 * We have an expiration later than our parent; we're done. 610 */ 611 if (hcurrent->ch_expiration >= hparent->ch_expiration) { 612 return (0); 613 } 614 615 /* 616 * We need to swap with our parent, and continue up the heap. 617 */ 618 CALLOUT_SWAP(hparent, hcurrent); 619 620 /* 621 * If we just reached the root, we're done. 622 */ 623 if (parent == 0) { 624 return (1); 625 } 626 627 current = parent; 628 } 629 /*NOTREACHED*/ 630 } 631 632 /* 633 * Insert a new heap item into a callout table's heap. 634 */ 635 static void 636 callout_heap_insert(callout_table_t *ct, callout_list_t *cl) 637 { 638 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 639 ASSERT(ct->ct_heap_num < ct->ct_heap_max); 640 641 cl->cl_flags |= CALLOUT_LIST_FLAG_HEAPED; 642 /* 643 * First, copy the expiration and callout list pointer to the bottom 644 * of the heap. 645 */ 646 ct->ct_heap[ct->ct_heap_num].ch_expiration = cl->cl_expiration; 647 ct->ct_heap[ct->ct_heap_num].ch_list = cl; 648 ct->ct_heap_num++; 649 650 /* 651 * Now, perform an upheap operation. If we reached the root, then 652 * the cyclic needs to be reprogrammed as we have an earlier 653 * expiration. 654 * 655 * Also, during the CPR suspend phase, do not reprogram the cyclic. 656 * We don't want any callout activity. When the CPR resume phase is 657 * entered, the cyclic will be programmed for the earliest expiration 658 * in the heap. 659 */ 660 if (callout_upheap(ct) && (ct->ct_suspend == 0)) 661 (void) cyclic_reprogram(ct->ct_cyclic, cl->cl_expiration); 662 } 663 664 /* 665 * Move an expiration from the top of the heap to its correct place 666 * in the heap. 667 */ 668 static void 669 callout_downheap(callout_table_t *ct) 670 { 671 int current, left, right, nelems; 672 callout_heap_t *heap, *hleft, *hright, *hcurrent; 673 674 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 675 ASSERT(ct->ct_heap_num >= 1); 676 677 heap = ct->ct_heap; 678 current = 0; 679 nelems = ct->ct_heap_num; 680 681 for (;;) { 682 /* 683 * If we don't have a left child (i.e., we're a leaf), we're 684 * done. 685 */ 686 if ((left = CALLOUT_HEAP_LEFT(current)) >= nelems) 687 return; 688 689 hleft = &heap[left]; 690 hcurrent = &heap[current]; 691 692 right = CALLOUT_HEAP_RIGHT(current); 693 694 /* 695 * Even if we don't have a right child, we still need to compare 696 * our expiration against that of our left child. 697 */ 698 if (right >= nelems) 699 goto comp_left; 700 701 hright = &heap[right]; 702 703 /* 704 * We have both a left and a right child. We need to compare 705 * the expiration of the children to determine which 706 * expires earlier. 707 */ 708 if (hright->ch_expiration < hleft->ch_expiration) { 709 /* 710 * Our right child is the earlier of our children. 711 * We'll now compare our expiration to its expiration. 712 * If ours is the earlier one, we're done. 713 */ 714 if (hcurrent->ch_expiration <= hright->ch_expiration) 715 return; 716 717 /* 718 * Our right child expires earlier than we do; swap 719 * with our right child, and descend right. 720 */ 721 CALLOUT_SWAP(hright, hcurrent); 722 current = right; 723 continue; 724 } 725 726 comp_left: 727 /* 728 * Our left child is the earlier of our children (or we have 729 * no right child). We'll now compare our expiration 730 * to its expiration. If ours is the earlier one, we're done. 731 */ 732 if (hcurrent->ch_expiration <= hleft->ch_expiration) 733 return; 734 735 /* 736 * Our left child expires earlier than we do; swap with our 737 * left child, and descend left. 738 */ 739 CALLOUT_SWAP(hleft, hcurrent); 740 current = left; 741 } 742 } 743 744 /* 745 * Delete and handle all past expirations in a callout table's heap. 746 */ 747 static hrtime_t 748 callout_heap_delete(callout_table_t *ct) 749 { 750 hrtime_t now, expiration, next; 751 callout_list_t *cl; 752 callout_heap_t *heap; 753 int hash; 754 755 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 756 757 if (CALLOUT_CLEANUP(ct)) { 758 /* 759 * There are too many heap elements pointing to empty callout 760 * lists. Clean them out. 761 */ 762 (void) callout_heap_process(ct, 0, 0); 763 } 764 765 now = gethrtime(); 766 heap = ct->ct_heap; 767 768 while (ct->ct_heap_num > 0) { 769 expiration = heap->ch_expiration; 770 hash = CALLOUT_CLHASH(expiration); 771 cl = heap->ch_list; 772 ASSERT(expiration == cl->cl_expiration); 773 774 if (cl->cl_callouts.ch_head == NULL) { 775 /* 776 * If the callout list is empty, reap it. 777 * Decrement the reap count. 778 */ 779 CALLOUT_LIST_DELETE(ct->ct_clhash[hash], cl); 780 CALLOUT_LIST_FREE(ct, cl); 781 ct->ct_nreap--; 782 } else { 783 /* 784 * If the root of the heap expires in the future, 785 * bail out. 786 */ 787 if (expiration > now) 788 break; 789 790 /* 791 * Move the callout list for this expiration to the 792 * list of expired callout lists. It will be processed 793 * by the callout executor. 794 */ 795 cl->cl_flags &= ~CALLOUT_LIST_FLAG_HEAPED; 796 CALLOUT_LIST_DELETE(ct->ct_clhash[hash], cl); 797 CALLOUT_LIST_APPEND(ct->ct_expired, cl); 798 } 799 800 /* 801 * Now delete the root. This is done by swapping the root with 802 * the last item in the heap and downheaping the item. 803 */ 804 ct->ct_heap_num--; 805 if (ct->ct_heap_num > 0) { 806 heap[0] = heap[ct->ct_heap_num]; 807 callout_downheap(ct); 808 } 809 } 810 811 /* 812 * If this callout table is empty or callouts have been suspended, 813 * just return. The cyclic has already been programmed to 814 * infinity by the cyclic subsystem. 815 */ 816 if ((ct->ct_heap_num == 0) || (ct->ct_suspend > 0)) 817 return (CY_INFINITY); 818 819 /* 820 * If the top expirations are within callout_tolerance of each other, 821 * delay the cyclic expire so that they can be processed together. 822 * This is to prevent high resolution timers from swamping the system 823 * with cyclic activity. 824 */ 825 if (ct->ct_heap_num > 2) { 826 next = expiration + callout_tolerance; 827 if ((heap[1].ch_expiration < next) || 828 (heap[2].ch_expiration < next)) 829 expiration = next; 830 } 831 832 (void) cyclic_reprogram(ct->ct_cyclic, expiration); 833 834 return (expiration); 835 } 836 837 /* 838 * There are some situations when the entire heap is walked and processed. 839 * This function is called to do the processing. These are the situations: 840 * 841 * 1. When the reap count reaches its threshold, the heap has to be cleared 842 * of all empty callout lists. 843 * 844 * 2. When the system enters and exits KMDB/OBP, all entries in the heap 845 * need to be adjusted by the interval spent in KMDB/OBP. 846 * 847 * 3. When system time is changed, the heap has to be scanned for 848 * absolute hrestime timers. These need to be removed from the heap 849 * and expired immediately. 850 * 851 * In cases 2 and 3, it is a good idea to do 1 as well since we are 852 * scanning the heap anyway. 853 * 854 * If the root gets changed and/or callout lists are expired, return the 855 * new expiration to the caller so he can reprogram the cyclic accordingly. 856 */ 857 static hrtime_t 858 callout_heap_process(callout_table_t *ct, hrtime_t delta, int timechange) 859 { 860 callout_heap_t *heap; 861 callout_list_t *cl; 862 hrtime_t expiration, now; 863 int i, hash, clflags; 864 ulong_t num; 865 866 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 867 868 if (ct->ct_heap_num == 0) 869 return (CY_INFINITY); 870 871 if (ct->ct_nreap > 0) 872 ct->ct_cleanups++; 873 874 heap = ct->ct_heap; 875 876 /* 877 * We walk the heap from the top to the bottom. If we encounter 878 * a heap item that points to an empty callout list, we clean 879 * it out. If we encounter a hrestime entry that must be removed, 880 * again we clean it out. Otherwise, we apply any adjustments needed 881 * to an element. 882 * 883 * During the walk, we also compact the heap from the bottom and 884 * reconstruct the heap using upheap operations. This is very 885 * efficient if the number of elements to be cleaned is greater than 886 * or equal to half the heap. This is the common case. 887 * 888 * Even in the non-common case, the upheap operations should be short 889 * as the entries below generally tend to be bigger than the entries 890 * above. 891 */ 892 num = ct->ct_heap_num; 893 ct->ct_heap_num = 0; 894 clflags = (CALLOUT_LIST_FLAG_HRESTIME | CALLOUT_LIST_FLAG_ABSOLUTE); 895 now = gethrtime(); 896 for (i = 0; i < num; i++) { 897 cl = heap[i].ch_list; 898 /* 899 * If the callout list is empty, delete the heap element and 900 * free the callout list. 901 */ 902 if (cl->cl_callouts.ch_head == NULL) { 903 hash = CALLOUT_CLHASH(cl->cl_expiration); 904 CALLOUT_LIST_DELETE(ct->ct_clhash[hash], cl); 905 CALLOUT_LIST_FREE(ct, cl); 906 continue; 907 } 908 909 /* 910 * Delete the heap element and expire the callout list, if 911 * one of the following is true: 912 * - the callout list has expired 913 * - the callout list is an absolute hrestime one and 914 * there has been a system time change 915 */ 916 if ((cl->cl_expiration <= now) || 917 (timechange && ((cl->cl_flags & clflags) == clflags))) { 918 hash = CALLOUT_CLHASH(cl->cl_expiration); 919 cl->cl_flags &= ~CALLOUT_LIST_FLAG_HEAPED; 920 CALLOUT_LIST_DELETE(ct->ct_clhash[hash], cl); 921 CALLOUT_LIST_APPEND(ct->ct_expired, cl); 922 continue; 923 } 924 925 /* 926 * Apply adjustments, if any. Adjustments are applied after 927 * the system returns from KMDB or OBP. They are only applied 928 * to relative callout lists. 929 */ 930 if (delta && !(cl->cl_flags & CALLOUT_LIST_FLAG_ABSOLUTE)) { 931 hash = CALLOUT_CLHASH(cl->cl_expiration); 932 CALLOUT_LIST_DELETE(ct->ct_clhash[hash], cl); 933 expiration = cl->cl_expiration + delta; 934 if (expiration <= 0) 935 expiration = CY_INFINITY; 936 heap[i].ch_expiration = expiration; 937 cl->cl_expiration = expiration; 938 hash = CALLOUT_CLHASH(cl->cl_expiration); 939 if (cl->cl_flags & CALLOUT_LIST_FLAG_NANO) { 940 CALLOUT_LIST_APPEND(ct->ct_clhash[hash], cl); 941 } else { 942 CALLOUT_LIST_INSERT(ct->ct_clhash[hash], cl); 943 } 944 } 945 946 heap[ct->ct_heap_num] = heap[i]; 947 ct->ct_heap_num++; 948 (void) callout_upheap(ct); 949 } 950 951 ct->ct_nreap = 0; 952 953 /* 954 * We need to return the expiration to help program the cyclic. 955 * If there are expired callouts, the cyclic needs to go off 956 * immediately. If the heap has become empty, then we return infinity. 957 * Else, return the expiration of the earliest callout in the heap. 958 */ 959 if (ct->ct_expired.ch_head != NULL) 960 return (gethrtime()); 961 962 if (ct->ct_heap_num == 0) 963 return (CY_INFINITY); 964 965 return (heap->ch_expiration); 966 } 967 968 /* 969 * Common function used to create normal and realtime callouts. 970 * 971 * Realtime callouts are handled at CY_LOW_PIL by a cyclic handler. So, 972 * there is one restriction on a realtime callout handler - it should not 973 * directly or indirectly acquire cpu_lock. CPU offline waits for pending 974 * cyclic handlers to complete while holding cpu_lock. So, if a realtime 975 * callout handler were to try to get cpu_lock, there would be a deadlock 976 * during CPU offline. 977 */ 978 callout_id_t 979 timeout_generic(int type, void (*func)(void *), void *arg, 980 hrtime_t expiration, hrtime_t resolution, int flags) 981 { 982 callout_table_t *ct; 983 callout_t *cp; 984 callout_id_t id; 985 callout_list_t *cl; 986 hrtime_t now, interval; 987 int hash, clflags; 988 989 ASSERT(resolution > 0); 990 ASSERT(func != NULL); 991 992 /* 993 * We get the current hrtime right upfront so that latencies in 994 * this function do not affect the accuracy of the callout. 995 */ 996 now = gethrtime(); 997 998 /* 999 * We disable kernel preemption so that we remain on the same CPU 1000 * throughout. If we needed to reprogram the callout table's cyclic, 1001 * we can avoid X-calls if we are on the same CPU. 1002 * 1003 * Note that callout_alloc() releases and reacquires the callout 1004 * table mutex. While reacquiring the mutex, it is possible for us 1005 * to go to sleep and later migrate to another CPU. This should be 1006 * pretty rare, though. 1007 */ 1008 kpreempt_disable(); 1009 1010 ct = &callout_table[CALLOUT_TABLE(type, CPU->cpu_seqid)]; 1011 mutex_enter(&ct->ct_mutex); 1012 1013 if (ct->ct_cyclic == CYCLIC_NONE) { 1014 mutex_exit(&ct->ct_mutex); 1015 /* 1016 * The callout table has not yet been initialized fully. 1017 * So, put this one on the boot callout table which is 1018 * always initialized. 1019 */ 1020 ct = &callout_boot_ct[type]; 1021 mutex_enter(&ct->ct_mutex); 1022 } 1023 1024 if (CALLOUT_CLEANUP(ct)) { 1025 /* 1026 * There are too many heap elements pointing to empty callout 1027 * lists. Clean them out. Since cleanup is only done once 1028 * in a while, no need to reprogram the cyclic if the root 1029 * of the heap gets cleaned out. 1030 */ 1031 (void) callout_heap_process(ct, 0, 0); 1032 } 1033 1034 if ((cp = ct->ct_free) == NULL) 1035 cp = callout_alloc(ct); 1036 else 1037 ct->ct_free = cp->c_idnext; 1038 1039 cp->c_func = func; 1040 cp->c_arg = arg; 1041 1042 /* 1043 * Compute the expiration hrtime. 1044 */ 1045 if (flags & CALLOUT_FLAG_ABSOLUTE) { 1046 interval = expiration - now; 1047 } else { 1048 interval = expiration; 1049 expiration += now; 1050 } 1051 1052 if (resolution > 1) { 1053 /* 1054 * Align expiration to the specified resolution. 1055 */ 1056 if (flags & CALLOUT_FLAG_ROUNDUP) 1057 expiration += resolution - 1; 1058 expiration = (expiration / resolution) * resolution; 1059 } 1060 1061 if (expiration <= 0) { 1062 /* 1063 * expiration hrtime overflow has occurred. Just set the 1064 * expiration to infinity. 1065 */ 1066 expiration = CY_INFINITY; 1067 } 1068 1069 /* 1070 * Assign an ID to this callout 1071 */ 1072 if (flags & CALLOUT_FLAG_32BIT) { 1073 if (interval > callout_longterm) { 1074 id = (ct->ct_long_id - callout_counter_low); 1075 id |= CALLOUT_COUNTER_HIGH; 1076 ct->ct_long_id = id; 1077 } else { 1078 id = (ct->ct_short_id - callout_counter_low); 1079 id |= CALLOUT_COUNTER_HIGH; 1080 ct->ct_short_id = id; 1081 } 1082 } else { 1083 id = (ct->ct_gen_id - callout_counter_low); 1084 if ((id & CALLOUT_COUNTER_HIGH) == 0) { 1085 id |= CALLOUT_COUNTER_HIGH; 1086 id += CALLOUT_GENERATION_LOW; 1087 } 1088 ct->ct_gen_id = id; 1089 } 1090 1091 cp->c_xid = id; 1092 1093 clflags = 0; 1094 if (flags & CALLOUT_FLAG_ABSOLUTE) 1095 clflags |= CALLOUT_LIST_FLAG_ABSOLUTE; 1096 if (flags & CALLOUT_FLAG_HRESTIME) 1097 clflags |= CALLOUT_LIST_FLAG_HRESTIME; 1098 if (resolution == 1) 1099 clflags |= CALLOUT_LIST_FLAG_NANO; 1100 hash = CALLOUT_CLHASH(expiration); 1101 1102 again: 1103 /* 1104 * Try to see if a callout list already exists for this expiration. 1105 */ 1106 cl = callout_list_get(ct, expiration, clflags, hash); 1107 if (cl == NULL) { 1108 /* 1109 * Check the free list. If we don't find one, we have to 1110 * take the slow path and allocate from kmem. 1111 */ 1112 if ((cl = ct->ct_lfree) == NULL) { 1113 callout_list_alloc(ct); 1114 /* 1115 * In the above call, we drop the lock, allocate and 1116 * reacquire the lock. So, we could have been away 1117 * for a while. In the meantime, someone could have 1118 * inserted a callout list with the same expiration. 1119 * Plus, the heap could have become full. So, the best 1120 * course is to repeat the steps. This should be an 1121 * infrequent event. 1122 */ 1123 goto again; 1124 } 1125 ct->ct_lfree = cl->cl_next; 1126 cl->cl_expiration = expiration; 1127 cl->cl_flags = clflags; 1128 1129 /* 1130 * Check if we have enough space in the heap to insert one 1131 * expiration. If not, expand the heap. 1132 */ 1133 if (ct->ct_heap_num == ct->ct_heap_max) { 1134 if (callout_heap_expand(ct) == 0) { 1135 /* 1136 * Could not expand the heap. Just queue it. 1137 */ 1138 callout_queue_insert(ct, cl); 1139 goto out; 1140 } 1141 1142 /* 1143 * In the above call, we drop the lock, allocate and 1144 * reacquire the lock. So, we could have been away 1145 * for a while. In the meantime, someone could have 1146 * inserted a callout list with the same expiration. 1147 * But we will not go back and check for it as this 1148 * should be a really infrequent event. There is no 1149 * point. 1150 */ 1151 } 1152 1153 if (clflags & CALLOUT_LIST_FLAG_NANO) { 1154 CALLOUT_LIST_APPEND(ct->ct_clhash[hash], cl); 1155 } else { 1156 CALLOUT_LIST_INSERT(ct->ct_clhash[hash], cl); 1157 } 1158 1159 /* 1160 * This is a new expiration. So, insert it into the heap. 1161 * This will also reprogram the cyclic, if the expiration 1162 * propagated to the root of the heap. 1163 */ 1164 callout_heap_insert(ct, cl); 1165 } else { 1166 /* 1167 * If the callout list was empty, untimeout_generic() would 1168 * have incremented a reap count. Decrement the reap count 1169 * as we are going to insert a callout into this list. 1170 */ 1171 if (cl->cl_callouts.ch_head == NULL) 1172 ct->ct_nreap--; 1173 } 1174 out: 1175 cp->c_list = cl; 1176 CALLOUT_APPEND(ct, cp); 1177 1178 ct->ct_timeouts++; 1179 ct->ct_timeouts_pending++; 1180 1181 mutex_exit(&ct->ct_mutex); 1182 1183 kpreempt_enable(); 1184 1185 TRACE_4(TR_FAC_CALLOUT, TR_TIMEOUT, 1186 "timeout:%K(%p) in %llx expiration, cp %p", func, arg, expiration, 1187 cp); 1188 1189 return (id); 1190 } 1191 1192 timeout_id_t 1193 timeout(void (*func)(void *), void *arg, clock_t delta) 1194 { 1195 ulong_t id; 1196 1197 /* 1198 * Make sure the callout runs at least 1 tick in the future. 1199 */ 1200 if (delta <= 0) 1201 delta = 1; 1202 else if (delta > callout_max_ticks) 1203 delta = callout_max_ticks; 1204 1205 id = (ulong_t)timeout_generic(CALLOUT_NORMAL, func, arg, 1206 TICK_TO_NSEC(delta), nsec_per_tick, CALLOUT_LEGACY); 1207 1208 return ((timeout_id_t)id); 1209 } 1210 1211 /* 1212 * Convenience function that creates a normal callout with default parameters 1213 * and returns a full ID. 1214 */ 1215 callout_id_t 1216 timeout_default(void (*func)(void *), void *arg, clock_t delta) 1217 { 1218 callout_id_t id; 1219 1220 /* 1221 * Make sure the callout runs at least 1 tick in the future. 1222 */ 1223 if (delta <= 0) 1224 delta = 1; 1225 else if (delta > callout_max_ticks) 1226 delta = callout_max_ticks; 1227 1228 id = timeout_generic(CALLOUT_NORMAL, func, arg, TICK_TO_NSEC(delta), 1229 nsec_per_tick, 0); 1230 1231 return (id); 1232 } 1233 1234 timeout_id_t 1235 realtime_timeout(void (*func)(void *), void *arg, clock_t delta) 1236 { 1237 ulong_t id; 1238 1239 /* 1240 * Make sure the callout runs at least 1 tick in the future. 1241 */ 1242 if (delta <= 0) 1243 delta = 1; 1244 else if (delta > callout_max_ticks) 1245 delta = callout_max_ticks; 1246 1247 id = (ulong_t)timeout_generic(CALLOUT_REALTIME, func, arg, 1248 TICK_TO_NSEC(delta), nsec_per_tick, CALLOUT_LEGACY); 1249 1250 return ((timeout_id_t)id); 1251 } 1252 1253 /* 1254 * Convenience function that creates a realtime callout with default parameters 1255 * and returns a full ID. 1256 */ 1257 callout_id_t 1258 realtime_timeout_default(void (*func)(void *), void *arg, clock_t delta) 1259 { 1260 callout_id_t id; 1261 1262 /* 1263 * Make sure the callout runs at least 1 tick in the future. 1264 */ 1265 if (delta <= 0) 1266 delta = 1; 1267 else if (delta > callout_max_ticks) 1268 delta = callout_max_ticks; 1269 1270 id = timeout_generic(CALLOUT_REALTIME, func, arg, TICK_TO_NSEC(delta), 1271 nsec_per_tick, 0); 1272 1273 return (id); 1274 } 1275 1276 hrtime_t 1277 untimeout_generic(callout_id_t id, int nowait) 1278 { 1279 callout_table_t *ct; 1280 callout_t *cp; 1281 callout_id_t xid; 1282 callout_list_t *cl; 1283 int hash, flags; 1284 callout_id_t bogus; 1285 1286 ct = &callout_table[CALLOUT_ID_TO_TABLE(id)]; 1287 hash = CALLOUT_IDHASH(id); 1288 1289 mutex_enter(&ct->ct_mutex); 1290 1291 /* 1292 * Search the ID hash table for the callout. 1293 */ 1294 for (cp = ct->ct_idhash[hash].ch_head; cp; cp = cp->c_idnext) { 1295 1296 xid = cp->c_xid; 1297 1298 /* 1299 * Match the ID and generation number. 1300 */ 1301 if ((xid & CALLOUT_ID_MASK) != id) 1302 continue; 1303 1304 if ((xid & CALLOUT_EXECUTING) == 0) { 1305 hrtime_t expiration; 1306 1307 /* 1308 * Delete the callout. If the callout list becomes 1309 * NULL, we don't remove it from the table. This is 1310 * so it can be reused. If the empty callout list 1311 * corresponds to the top of the the callout heap, we 1312 * don't reprogram the table cyclic here. This is in 1313 * order to avoid lots of X-calls to the CPU associated 1314 * with the callout table. 1315 */ 1316 cl = cp->c_list; 1317 expiration = cl->cl_expiration; 1318 CALLOUT_DELETE(ct, cp); 1319 CALLOUT_FREE(ct, cp); 1320 ct->ct_untimeouts_unexpired++; 1321 ct->ct_timeouts_pending--; 1322 1323 /* 1324 * If the callout list has become empty, there are 3 1325 * possibilities. If it is present: 1326 * - in the heap, it needs to be cleaned along 1327 * with its heap entry. Increment a reap count. 1328 * - in the callout queue, free it. 1329 * - in the expired list, free it. 1330 */ 1331 if (cl->cl_callouts.ch_head == NULL) { 1332 flags = cl->cl_flags; 1333 if (flags & CALLOUT_LIST_FLAG_HEAPED) { 1334 ct->ct_nreap++; 1335 } else if (flags & CALLOUT_LIST_FLAG_QUEUED) { 1336 CALLOUT_LIST_DELETE(ct->ct_queue, cl); 1337 CALLOUT_LIST_FREE(ct, cl); 1338 } else { 1339 CALLOUT_LIST_DELETE(ct->ct_expired, cl); 1340 CALLOUT_LIST_FREE(ct, cl); 1341 } 1342 } 1343 mutex_exit(&ct->ct_mutex); 1344 1345 expiration -= gethrtime(); 1346 TRACE_2(TR_FAC_CALLOUT, TR_UNTIMEOUT, 1347 "untimeout:ID %lx hrtime left %llx", id, 1348 expiration); 1349 return (expiration < 0 ? 0 : expiration); 1350 } 1351 1352 ct->ct_untimeouts_executing++; 1353 /* 1354 * The callout we want to delete is currently executing. 1355 * The DDI states that we must wait until the callout 1356 * completes before returning, so we block on c_done until the 1357 * callout ID changes (to the old ID if it's on the freelist, 1358 * or to a new callout ID if it's in use). This implicitly 1359 * assumes that callout structures are persistent (they are). 1360 */ 1361 if (cp->c_executor == curthread) { 1362 /* 1363 * The timeout handler called untimeout() on itself. 1364 * Stupid, but legal. We can't wait for the timeout 1365 * to complete without deadlocking, so we just return. 1366 */ 1367 mutex_exit(&ct->ct_mutex); 1368 TRACE_1(TR_FAC_CALLOUT, TR_UNTIMEOUT_SELF, 1369 "untimeout_self:ID %x", id); 1370 return (-1); 1371 } 1372 if (nowait == 0) { 1373 /* 1374 * We need to wait. Indicate that we are waiting by 1375 * incrementing c_waiting. This prevents the executor 1376 * from doing a wakeup on c_done if there are no 1377 * waiters. 1378 */ 1379 while (cp->c_xid == xid) { 1380 cp->c_waiting = 1; 1381 cv_wait(&cp->c_done, &ct->ct_mutex); 1382 } 1383 } 1384 mutex_exit(&ct->ct_mutex); 1385 TRACE_1(TR_FAC_CALLOUT, TR_UNTIMEOUT_EXECUTING, 1386 "untimeout_executing:ID %lx", id); 1387 return (-1); 1388 } 1389 ct->ct_untimeouts_expired++; 1390 1391 mutex_exit(&ct->ct_mutex); 1392 TRACE_1(TR_FAC_CALLOUT, TR_UNTIMEOUT_BOGUS_ID, 1393 "untimeout_bogus_id:ID %lx", id); 1394 1395 /* 1396 * We didn't find the specified callout ID. This means either 1397 * (1) the callout already fired, or (2) the caller passed us 1398 * a bogus value. Perform a sanity check to detect case (2). 1399 */ 1400 bogus = (CALLOUT_ID_FLAGS | CALLOUT_COUNTER_HIGH); 1401 if (((id & bogus) != CALLOUT_COUNTER_HIGH) && (id != 0)) 1402 panic("untimeout: impossible timeout id %llx", 1403 (unsigned long long)id); 1404 1405 return (-1); 1406 } 1407 1408 clock_t 1409 untimeout(timeout_id_t id_arg) 1410 { 1411 hrtime_t hleft; 1412 clock_t tleft; 1413 callout_id_t id; 1414 1415 id = (ulong_t)id_arg; 1416 hleft = untimeout_generic(id, 0); 1417 if (hleft < 0) 1418 tleft = -1; 1419 else if (hleft == 0) 1420 tleft = 0; 1421 else 1422 tleft = NSEC_TO_TICK(hleft); 1423 1424 return (tleft); 1425 } 1426 1427 /* 1428 * Convenience function to untimeout a timeout with a full ID with default 1429 * parameters. 1430 */ 1431 clock_t 1432 untimeout_default(callout_id_t id, int nowait) 1433 { 1434 hrtime_t hleft; 1435 clock_t tleft; 1436 1437 hleft = untimeout_generic(id, nowait); 1438 if (hleft < 0) 1439 tleft = -1; 1440 else if (hleft == 0) 1441 tleft = 0; 1442 else 1443 tleft = NSEC_TO_TICK(hleft); 1444 1445 return (tleft); 1446 } 1447 1448 /* 1449 * Expire all the callouts queued in the specified callout list. 1450 */ 1451 static void 1452 callout_list_expire(callout_table_t *ct, callout_list_t *cl) 1453 { 1454 callout_t *cp, *cnext; 1455 1456 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 1457 ASSERT(cl != NULL); 1458 1459 for (cp = cl->cl_callouts.ch_head; cp != NULL; cp = cnext) { 1460 /* 1461 * Multiple executor threads could be running at the same 1462 * time. If this callout is already being executed, 1463 * go on to the next one. 1464 */ 1465 if (cp->c_xid & CALLOUT_EXECUTING) { 1466 cnext = cp->c_clnext; 1467 continue; 1468 } 1469 1470 /* 1471 * Indicate to untimeout() that a callout is 1472 * being expired by the executor. 1473 */ 1474 cp->c_xid |= CALLOUT_EXECUTING; 1475 cp->c_executor = curthread; 1476 mutex_exit(&ct->ct_mutex); 1477 1478 DTRACE_PROBE1(callout__start, callout_t *, cp); 1479 (*cp->c_func)(cp->c_arg); 1480 DTRACE_PROBE1(callout__end, callout_t *, cp); 1481 1482 mutex_enter(&ct->ct_mutex); 1483 1484 ct->ct_expirations++; 1485 ct->ct_timeouts_pending--; 1486 /* 1487 * Indicate completion for c_done. 1488 */ 1489 cp->c_xid &= ~CALLOUT_EXECUTING; 1490 cp->c_executor = NULL; 1491 cnext = cp->c_clnext; 1492 1493 /* 1494 * Delete callout from ID hash table and the callout 1495 * list, return to freelist, and tell any untimeout() that 1496 * cares that we're done. 1497 */ 1498 CALLOUT_DELETE(ct, cp); 1499 CALLOUT_FREE(ct, cp); 1500 1501 if (cp->c_waiting) { 1502 cp->c_waiting = 0; 1503 cv_broadcast(&cp->c_done); 1504 } 1505 } 1506 } 1507 1508 /* 1509 * Execute all expired callout lists for a callout table. 1510 */ 1511 static void 1512 callout_expire(callout_table_t *ct) 1513 { 1514 callout_list_t *cl, *clnext; 1515 1516 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 1517 1518 for (cl = ct->ct_expired.ch_head; (cl != NULL); cl = clnext) { 1519 /* 1520 * Expire all the callouts in this callout list. 1521 */ 1522 callout_list_expire(ct, cl); 1523 1524 clnext = cl->cl_next; 1525 if (cl->cl_callouts.ch_head == NULL) { 1526 /* 1527 * Free the callout list. 1528 */ 1529 CALLOUT_LIST_DELETE(ct->ct_expired, cl); 1530 CALLOUT_LIST_FREE(ct, cl); 1531 } 1532 } 1533 } 1534 1535 /* 1536 * The cyclic handlers below process callouts in two steps: 1537 * 1538 * 1. Find all expired callout lists and queue them in a separate 1539 * list of expired callouts. 1540 * 2. Execute the expired callout lists. 1541 * 1542 * This is done for two reasons: 1543 * 1544 * 1. We want to quickly find the next earliest expiration to program 1545 * the cyclic to and reprogram it. We can do this right at the end 1546 * of step 1. 1547 * 2. The realtime cyclic handler expires callouts in place. However, 1548 * for normal callouts, callouts are expired by a taskq thread. 1549 * So, it is simpler and more robust to have the taskq thread just 1550 * do step 2. 1551 */ 1552 1553 /* 1554 * Realtime callout cyclic handlers. 1555 */ 1556 void 1557 callout_realtime(callout_table_t *ct) 1558 { 1559 mutex_enter(&ct->ct_mutex); 1560 (void) callout_heap_delete(ct); 1561 callout_expire(ct); 1562 mutex_exit(&ct->ct_mutex); 1563 } 1564 1565 void 1566 callout_queue_realtime(callout_table_t *ct) 1567 { 1568 mutex_enter(&ct->ct_mutex); 1569 (void) callout_queue_delete(ct); 1570 callout_expire(ct); 1571 mutex_exit(&ct->ct_mutex); 1572 } 1573 1574 void 1575 callout_execute(callout_table_t *ct) 1576 { 1577 mutex_enter(&ct->ct_mutex); 1578 callout_expire(ct); 1579 mutex_exit(&ct->ct_mutex); 1580 } 1581 1582 /* 1583 * Normal callout cyclic handlers. 1584 */ 1585 void 1586 callout_normal(callout_table_t *ct) 1587 { 1588 int i, exec; 1589 hrtime_t exp; 1590 1591 mutex_enter(&ct->ct_mutex); 1592 exp = callout_heap_delete(ct); 1593 CALLOUT_EXEC_COMPUTE(ct, exp, exec); 1594 mutex_exit(&ct->ct_mutex); 1595 1596 for (i = 0; i < exec; i++) { 1597 ASSERT(ct->ct_taskq != NULL); 1598 (void) taskq_dispatch(ct->ct_taskq, 1599 (task_func_t *)callout_execute, ct, TQ_NOSLEEP); 1600 } 1601 } 1602 1603 void 1604 callout_queue_normal(callout_table_t *ct) 1605 { 1606 int i, exec; 1607 hrtime_t exp; 1608 1609 mutex_enter(&ct->ct_mutex); 1610 exp = callout_queue_delete(ct); 1611 CALLOUT_EXEC_COMPUTE(ct, exp, exec); 1612 mutex_exit(&ct->ct_mutex); 1613 1614 for (i = 0; i < exec; i++) { 1615 ASSERT(ct->ct_taskq != NULL); 1616 (void) taskq_dispatch(ct->ct_taskq, 1617 (task_func_t *)callout_execute, ct, TQ_NOSLEEP); 1618 } 1619 } 1620 1621 /* 1622 * Suspend callout processing. 1623 */ 1624 static void 1625 callout_suspend(void) 1626 { 1627 int t, f; 1628 callout_table_t *ct; 1629 1630 /* 1631 * Traverse every callout table in the system and suspend callout 1632 * processing. 1633 * 1634 * We need to suspend all the tables (including the inactive ones) 1635 * so that if a table is made active while the suspend is still on, 1636 * the table remains suspended. 1637 */ 1638 for (f = 0; f < max_ncpus; f++) { 1639 for (t = 0; t < CALLOUT_NTYPES; t++) { 1640 ct = &callout_table[CALLOUT_TABLE(t, f)]; 1641 1642 mutex_enter(&ct->ct_mutex); 1643 ct->ct_suspend++; 1644 if (ct->ct_cyclic == CYCLIC_NONE) { 1645 mutex_exit(&ct->ct_mutex); 1646 continue; 1647 } 1648 if (ct->ct_suspend == 1) { 1649 (void) cyclic_reprogram(ct->ct_cyclic, 1650 CY_INFINITY); 1651 (void) cyclic_reprogram(ct->ct_qcyclic, 1652 CY_INFINITY); 1653 } 1654 mutex_exit(&ct->ct_mutex); 1655 } 1656 } 1657 } 1658 1659 /* 1660 * Resume callout processing. 1661 */ 1662 static void 1663 callout_resume(hrtime_t delta, int timechange) 1664 { 1665 hrtime_t hexp, qexp; 1666 int t, f; 1667 callout_table_t *ct; 1668 1669 /* 1670 * Traverse every callout table in the system and resume callout 1671 * processing. For active tables, perform any hrtime adjustments 1672 * necessary. 1673 */ 1674 for (f = 0; f < max_ncpus; f++) { 1675 for (t = 0; t < CALLOUT_NTYPES; t++) { 1676 ct = &callout_table[CALLOUT_TABLE(t, f)]; 1677 1678 mutex_enter(&ct->ct_mutex); 1679 if (ct->ct_cyclic == CYCLIC_NONE) { 1680 ct->ct_suspend--; 1681 mutex_exit(&ct->ct_mutex); 1682 continue; 1683 } 1684 1685 /* 1686 * If a delta is specified, adjust the expirations in 1687 * the heap by delta. Also, if the caller indicates 1688 * a timechange, process that. This step also cleans 1689 * out any empty callout lists that might happen to 1690 * be there. 1691 */ 1692 hexp = callout_heap_process(ct, delta, timechange); 1693 qexp = callout_queue_process(ct, delta, timechange); 1694 1695 ct->ct_suspend--; 1696 if (ct->ct_suspend == 0) { 1697 (void) cyclic_reprogram(ct->ct_cyclic, hexp); 1698 (void) cyclic_reprogram(ct->ct_qcyclic, qexp); 1699 } 1700 1701 mutex_exit(&ct->ct_mutex); 1702 } 1703 } 1704 } 1705 1706 /* 1707 * Callback handler used by CPR to stop and resume callouts. 1708 * The cyclic subsystem saves and restores hrtime during CPR. 1709 * That is why callout_resume() is called with a 0 delta. 1710 * Although hrtime is the same, hrestime (system time) has 1711 * progressed during CPR. So, we have to indicate a time change 1712 * to expire the absolute hrestime timers. 1713 */ 1714 /*ARGSUSED*/ 1715 static boolean_t 1716 callout_cpr_callb(void *arg, int code) 1717 { 1718 if (code == CB_CODE_CPR_CHKPT) 1719 callout_suspend(); 1720 else 1721 callout_resume(0, 1); 1722 1723 return (B_TRUE); 1724 } 1725 1726 /* 1727 * Callback handler invoked when the debugger is entered or exited. 1728 */ 1729 /*ARGSUSED*/ 1730 static boolean_t 1731 callout_debug_callb(void *arg, int code) 1732 { 1733 hrtime_t delta; 1734 1735 /* 1736 * When the system enters the debugger. make a note of the hrtime. 1737 * When it is resumed, compute how long the system was in the 1738 * debugger. This interval should not be counted for callouts. 1739 */ 1740 if (code == 0) { 1741 callout_suspend(); 1742 callout_debug_hrtime = gethrtime(); 1743 } else { 1744 delta = gethrtime() - callout_debug_hrtime; 1745 callout_resume(delta, 0); 1746 } 1747 1748 return (B_TRUE); 1749 } 1750 1751 /* 1752 * Move the absolute hrestime callouts to the expired list. Then program the 1753 * table's cyclic to expire immediately so that the callouts can be executed 1754 * immediately. 1755 */ 1756 static void 1757 callout_hrestime_one(callout_table_t *ct) 1758 { 1759 hrtime_t hexp, qexp; 1760 1761 mutex_enter(&ct->ct_mutex); 1762 if (ct->ct_cyclic == CYCLIC_NONE) { 1763 mutex_exit(&ct->ct_mutex); 1764 return; 1765 } 1766 1767 /* 1768 * Walk the heap and process all the absolute hrestime entries. 1769 */ 1770 hexp = callout_heap_process(ct, 0, 1); 1771 qexp = callout_queue_process(ct, 0, 1); 1772 1773 if (ct->ct_suspend == 0) { 1774 (void) cyclic_reprogram(ct->ct_cyclic, hexp); 1775 (void) cyclic_reprogram(ct->ct_qcyclic, qexp); 1776 } 1777 1778 mutex_exit(&ct->ct_mutex); 1779 } 1780 1781 /* 1782 * This function is called whenever system time (hrestime) is changed 1783 * explicitly. All the HRESTIME callouts must be expired at once. 1784 */ 1785 /*ARGSUSED*/ 1786 void 1787 callout_hrestime(void) 1788 { 1789 int t, f; 1790 callout_table_t *ct; 1791 1792 /* 1793 * Traverse every callout table in the system and process the hrestime 1794 * callouts therein. 1795 * 1796 * We look at all the tables because we don't know which ones were 1797 * onlined and offlined in the past. The offlined tables may still 1798 * have active cyclics processing timers somewhere. 1799 */ 1800 for (f = 0; f < max_ncpus; f++) { 1801 for (t = 0; t < CALLOUT_NTYPES; t++) { 1802 ct = &callout_table[CALLOUT_TABLE(t, f)]; 1803 callout_hrestime_one(ct); 1804 } 1805 } 1806 } 1807 1808 /* 1809 * Create the hash tables for this callout table. 1810 */ 1811 static void 1812 callout_hash_init(callout_table_t *ct) 1813 { 1814 size_t size; 1815 1816 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 1817 ASSERT((ct->ct_idhash == NULL) && (ct->ct_clhash == NULL)); 1818 1819 size = sizeof (callout_hash_t) * CALLOUT_BUCKETS; 1820 ct->ct_idhash = kmem_zalloc(size, KM_SLEEP); 1821 ct->ct_clhash = kmem_zalloc(size, KM_SLEEP); 1822 } 1823 1824 /* 1825 * Create per-callout table kstats. 1826 */ 1827 static void 1828 callout_kstat_init(callout_table_t *ct) 1829 { 1830 callout_stat_type_t stat; 1831 kstat_t *ct_kstats; 1832 int ndx; 1833 1834 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 1835 ASSERT(ct->ct_kstats == NULL); 1836 1837 ndx = ct - callout_table; 1838 ct_kstats = kstat_create("unix", ndx, "callout", 1839 "misc", KSTAT_TYPE_NAMED, CALLOUT_NUM_STATS, KSTAT_FLAG_VIRTUAL); 1840 1841 if (ct_kstats == NULL) { 1842 cmn_err(CE_WARN, "kstat_create for callout table %p failed", 1843 (void *)ct); 1844 } else { 1845 ct_kstats->ks_data = ct->ct_kstat_data; 1846 for (stat = 0; stat < CALLOUT_NUM_STATS; stat++) 1847 kstat_named_init(&ct->ct_kstat_data[stat], 1848 callout_kstat_names[stat], KSTAT_DATA_INT64); 1849 ct->ct_kstats = ct_kstats; 1850 kstat_install(ct_kstats); 1851 } 1852 } 1853 1854 static void 1855 callout_cyclic_init(callout_table_t *ct) 1856 { 1857 cyc_handler_t hdlr; 1858 cyc_time_t when; 1859 processorid_t seqid; 1860 int t; 1861 cyclic_id_t cyclic, qcyclic; 1862 1863 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 1864 1865 t = ct->ct_type; 1866 seqid = CALLOUT_TABLE_SEQID(ct); 1867 1868 /* 1869 * Create the taskq thread if the table type is normal. 1870 * Realtime tables are handled at PIL1 by a softint 1871 * handler. 1872 */ 1873 if (t == CALLOUT_NORMAL) { 1874 ASSERT(ct->ct_taskq == NULL); 1875 /* 1876 * Each callout thread consumes exactly one 1877 * task structure while active. Therefore, 1878 * prepopulating with 2 * callout_threads tasks 1879 * ensures that there's at least one task per 1880 * thread that's either scheduled or on the 1881 * freelist. In turn, this guarantees that 1882 * taskq_dispatch() will always either succeed 1883 * (because there's a free task structure) or 1884 * be unnecessary (because "callout_excute(ct)" 1885 * has already scheduled). 1886 */ 1887 ct->ct_taskq = 1888 taskq_create_instance("callout_taskq", seqid, 1889 callout_threads, maxclsyspri, 1890 2 * callout_threads, 2 * callout_threads, 1891 TASKQ_PREPOPULATE | TASKQ_CPR_SAFE); 1892 } 1893 1894 /* 1895 * callouts can only be created in a table whose 1896 * cyclic has been initialized. 1897 */ 1898 ASSERT(ct->ct_heap_num == 0); 1899 1900 /* 1901 * Drop the mutex before creating the callout cyclics. cyclic_add() 1902 * could potentially expand the cyclic heap. We don't want to be 1903 * holding the callout table mutex in that case. Note that this 1904 * function is called during CPU online. cpu_lock is held at this 1905 * point. So, only one thread can be executing the cyclic add logic 1906 * below at any time. 1907 */ 1908 mutex_exit(&ct->ct_mutex); 1909 1910 /* 1911 * Create the callout table cyclics. 1912 * 1913 * The realtime cyclic handler executes at low PIL. The normal cyclic 1914 * handler executes at lock PIL. This is because there are cases 1915 * where code can block at PIL > 1 waiting for a normal callout handler 1916 * to unblock it directly or indirectly. If the normal cyclic were to 1917 * be executed at low PIL, it could get blocked out by the waiter 1918 * and cause a deadlock. 1919 */ 1920 ASSERT(ct->ct_cyclic == CYCLIC_NONE); 1921 1922 if (t == CALLOUT_REALTIME) { 1923 hdlr.cyh_level = callout_realtime_level; 1924 hdlr.cyh_func = (cyc_func_t)callout_realtime; 1925 } else { 1926 hdlr.cyh_level = callout_normal_level; 1927 hdlr.cyh_func = (cyc_func_t)callout_normal; 1928 } 1929 hdlr.cyh_arg = ct; 1930 when.cyt_when = CY_INFINITY; 1931 when.cyt_interval = CY_INFINITY; 1932 1933 cyclic = cyclic_add(&hdlr, &when); 1934 1935 if (t == CALLOUT_REALTIME) 1936 hdlr.cyh_func = (cyc_func_t)callout_queue_realtime; 1937 else 1938 hdlr.cyh_func = (cyc_func_t)callout_queue_normal; 1939 1940 qcyclic = cyclic_add(&hdlr, &when); 1941 1942 mutex_enter(&ct->ct_mutex); 1943 ct->ct_cyclic = cyclic; 1944 ct->ct_qcyclic = qcyclic; 1945 } 1946 1947 void 1948 callout_cpu_online(cpu_t *cp) 1949 { 1950 lgrp_handle_t hand; 1951 callout_cache_t *cache; 1952 char s[KMEM_CACHE_NAMELEN]; 1953 callout_table_t *ct; 1954 processorid_t seqid; 1955 int t; 1956 1957 ASSERT(MUTEX_HELD(&cpu_lock)); 1958 1959 /* 1960 * Locate the cache corresponding to the onlined CPU's lgroup. 1961 * Note that access to callout_caches is protected by cpu_lock. 1962 */ 1963 hand = lgrp_plat_cpu_to_hand(cp->cpu_id); 1964 for (cache = callout_caches; cache != NULL; cache = cache->cc_next) { 1965 if (cache->cc_hand == hand) 1966 break; 1967 } 1968 1969 /* 1970 * If not found, create one. The caches are never destroyed. 1971 */ 1972 if (cache == NULL) { 1973 cache = kmem_alloc(sizeof (callout_cache_t), KM_SLEEP); 1974 cache->cc_hand = hand; 1975 (void) snprintf(s, KMEM_CACHE_NAMELEN, "callout_cache%lx", 1976 (long)hand); 1977 cache->cc_cache = kmem_cache_create(s, sizeof (callout_t), 1978 CALLOUT_ALIGN, NULL, NULL, NULL, NULL, NULL, 0); 1979 (void) snprintf(s, KMEM_CACHE_NAMELEN, "callout_lcache%lx", 1980 (long)hand); 1981 cache->cc_lcache = kmem_cache_create(s, sizeof (callout_list_t), 1982 CALLOUT_ALIGN, NULL, NULL, NULL, NULL, NULL, 0); 1983 cache->cc_next = callout_caches; 1984 callout_caches = cache; 1985 } 1986 1987 seqid = cp->cpu_seqid; 1988 1989 for (t = 0; t < CALLOUT_NTYPES; t++) { 1990 ct = &callout_table[CALLOUT_TABLE(t, seqid)]; 1991 1992 mutex_enter(&ct->ct_mutex); 1993 /* 1994 * Store convinience pointers to the kmem caches 1995 * in the callout table. These assignments should always be 1996 * done as callout tables can map to different physical 1997 * CPUs each time. 1998 */ 1999 ct->ct_cache = cache->cc_cache; 2000 ct->ct_lcache = cache->cc_lcache; 2001 2002 /* 2003 * We use the heap pointer to check if stuff has been 2004 * initialized for this callout table. 2005 */ 2006 if (ct->ct_heap == NULL) { 2007 callout_heap_init(ct); 2008 callout_hash_init(ct); 2009 callout_kstat_init(ct); 2010 callout_cyclic_init(ct); 2011 } 2012 2013 mutex_exit(&ct->ct_mutex); 2014 2015 /* 2016 * Move the cyclics to this CPU by doing a bind. 2017 */ 2018 cyclic_bind(ct->ct_cyclic, cp, NULL); 2019 cyclic_bind(ct->ct_qcyclic, cp, NULL); 2020 } 2021 } 2022 2023 void 2024 callout_cpu_offline(cpu_t *cp) 2025 { 2026 callout_table_t *ct; 2027 processorid_t seqid; 2028 int t; 2029 2030 ASSERT(MUTEX_HELD(&cpu_lock)); 2031 2032 seqid = cp->cpu_seqid; 2033 2034 for (t = 0; t < CALLOUT_NTYPES; t++) { 2035 ct = &callout_table[CALLOUT_TABLE(t, seqid)]; 2036 2037 /* 2038 * Unbind the cyclics. This will allow the cyclic subsystem 2039 * to juggle the cyclics during CPU offline. 2040 */ 2041 cyclic_bind(ct->ct_cyclic, NULL, NULL); 2042 cyclic_bind(ct->ct_qcyclic, NULL, NULL); 2043 } 2044 } 2045 2046 /* 2047 * This is called to perform per-CPU initialization for slave CPUs at 2048 * boot time. 2049 */ 2050 void 2051 callout_mp_init(void) 2052 { 2053 cpu_t *cp; 2054 size_t min, max; 2055 2056 if (callout_chunk == CALLOUT_CHUNK) { 2057 /* 2058 * No one has specified a chunk in /etc/system. We need to 2059 * compute it here based on the number of online CPUs and 2060 * available physical memory. 2061 */ 2062 min = CALLOUT_MIN_HEAP_SIZE; 2063 max = ptob(physmem / CALLOUT_MEM_FRACTION); 2064 if (min > max) 2065 min = max; 2066 callout_chunk = min / sizeof (callout_heap_t); 2067 callout_chunk /= ncpus_online; 2068 callout_chunk = P2ROUNDUP(callout_chunk, CALLOUT_CHUNK); 2069 } 2070 2071 mutex_enter(&cpu_lock); 2072 2073 cp = cpu_active; 2074 do { 2075 callout_cpu_online(cp); 2076 } while ((cp = cp->cpu_next_onln) != cpu_active); 2077 2078 mutex_exit(&cpu_lock); 2079 } 2080 2081 /* 2082 * Initialize all callout tables. Called at boot time just before clkstart(). 2083 */ 2084 void 2085 callout_init(void) 2086 { 2087 int f, t; 2088 size_t size; 2089 int table_id; 2090 callout_table_t *ct; 2091 long bits, fanout; 2092 uintptr_t buf; 2093 2094 /* 2095 * Initialize callout globals. 2096 */ 2097 bits = 0; 2098 for (fanout = 1; (fanout < max_ncpus); fanout <<= 1) 2099 bits++; 2100 callout_table_bits = CALLOUT_TYPE_BITS + bits; 2101 callout_table_mask = (1 << callout_table_bits) - 1; 2102 callout_counter_low = 1 << CALLOUT_COUNTER_SHIFT; 2103 callout_longterm = TICK_TO_NSEC(CALLOUT_LONGTERM_TICKS); 2104 callout_max_ticks = CALLOUT_MAX_TICKS; 2105 if (callout_min_reap == 0) 2106 callout_min_reap = CALLOUT_MIN_REAP; 2107 2108 if (callout_tolerance <= 0) 2109 callout_tolerance = CALLOUT_TOLERANCE; 2110 if (callout_threads <= 0) 2111 callout_threads = CALLOUT_THREADS; 2112 if (callout_chunk <= 0) 2113 callout_chunk = CALLOUT_CHUNK; 2114 else 2115 callout_chunk = P2ROUNDUP(callout_chunk, CALLOUT_CHUNK); 2116 2117 /* 2118 * Allocate all the callout tables based on max_ncpus. We have chosen 2119 * to do boot-time allocation instead of dynamic allocation because: 2120 * 2121 * - the size of the callout tables is not too large. 2122 * - there are race conditions involved in making this dynamic. 2123 * - the hash tables that go with the callout tables consume 2124 * most of the memory and they are only allocated in 2125 * callout_cpu_online(). 2126 * 2127 * Each CPU has two tables that are consecutive in the array. The first 2128 * one is for realtime callouts and the second one is for normal ones. 2129 * 2130 * We do this alignment dance to make sure that callout table 2131 * structures will always be on a cache line boundary. 2132 */ 2133 size = sizeof (callout_table_t) * CALLOUT_NTYPES * max_ncpus; 2134 size += CALLOUT_ALIGN; 2135 buf = (uintptr_t)kmem_zalloc(size, KM_SLEEP); 2136 callout_table = (callout_table_t *)P2ROUNDUP(buf, CALLOUT_ALIGN); 2137 2138 size = sizeof (kstat_named_t) * CALLOUT_NUM_STATS; 2139 /* 2140 * Now, initialize the tables for all the CPUs. 2141 */ 2142 for (f = 0; f < max_ncpus; f++) { 2143 for (t = 0; t < CALLOUT_NTYPES; t++) { 2144 table_id = CALLOUT_TABLE(t, f); 2145 ct = &callout_table[table_id]; 2146 ct->ct_type = t; 2147 mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL); 2148 /* 2149 * Precompute the base IDs for long and short-term 2150 * legacy IDs. This makes ID generation during 2151 * timeout() fast. 2152 */ 2153 ct->ct_short_id = CALLOUT_SHORT_ID(table_id); 2154 ct->ct_long_id = CALLOUT_LONG_ID(table_id); 2155 /* 2156 * Precompute the base ID for generation-based IDs. 2157 * Note that when the first ID gets allocated, the 2158 * ID will wrap. This will cause the generation 2159 * number to be incremented to 1. 2160 */ 2161 ct->ct_gen_id = CALLOUT_SHORT_ID(table_id); 2162 /* 2163 * Initialize the cyclics as NONE. This will get set 2164 * during CPU online. This is so that partially 2165 * populated systems will only have the required 2166 * number of cyclics, not more. 2167 */ 2168 ct->ct_cyclic = CYCLIC_NONE; 2169 ct->ct_qcyclic = CYCLIC_NONE; 2170 ct->ct_kstat_data = kmem_zalloc(size, KM_SLEEP); 2171 } 2172 } 2173 2174 /* 2175 * Add the callback for CPR. This is called during checkpoint 2176 * resume to suspend and resume callouts. 2177 */ 2178 (void) callb_add(callout_cpr_callb, 0, CB_CL_CPR_CALLOUT, 2179 "callout_cpr"); 2180 (void) callb_add(callout_debug_callb, 0, CB_CL_ENTER_DEBUGGER, 2181 "callout_debug"); 2182 2183 /* 2184 * Call the per-CPU initialization function for the boot CPU. This 2185 * is done here because the function is not called automatically for 2186 * the boot CPU from the CPU online/offline hooks. Note that the 2187 * CPU lock is taken here because of convention. 2188 */ 2189 mutex_enter(&cpu_lock); 2190 callout_boot_ct = &callout_table[CALLOUT_TABLE(0, CPU->cpu_seqid)]; 2191 callout_cpu_online(CPU); 2192 mutex_exit(&cpu_lock); 2193 } 2194