1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 #include <sys/callo.h> 26 #include <sys/param.h> 27 #include <sys/types.h> 28 #include <sys/cpuvar.h> 29 #include <sys/thread.h> 30 #include <sys/kmem.h> 31 #include <sys/kmem_impl.h> 32 #include <sys/cmn_err.h> 33 #include <sys/callb.h> 34 #include <sys/debug.h> 35 #include <sys/vtrace.h> 36 #include <sys/sysmacros.h> 37 #include <sys/sdt.h> 38 39 int callout_init_done; /* useful during boot */ 40 41 /* 42 * Callout tables. See timeout(9F) for details. 43 */ 44 static int callout_threads; /* callout normal threads */ 45 static hrtime_t callout_debug_hrtime; /* debugger entry time */ 46 static int callout_chunk; /* callout heap chunk size */ 47 static int callout_min_reap; /* callout minimum reap count */ 48 static int callout_tolerance; /* callout hires tolerance */ 49 static callout_table_t *callout_boot_ct; /* Boot CPU's callout tables */ 50 static clock_t callout_max_ticks; /* max interval */ 51 static hrtime_t callout_longterm; /* longterm nanoseconds */ 52 static ulong_t callout_counter_low; /* callout ID increment */ 53 static ulong_t callout_table_bits; /* number of table bits in ID */ 54 static ulong_t callout_table_mask; /* mask for the table bits */ 55 static callout_cache_t *callout_caches; /* linked list of caches */ 56 #pragma align 64(callout_table) 57 static callout_table_t *callout_table; /* global callout table array */ 58 59 /* 60 * We run 'realtime' callouts at PIL 1 (CY_LOW_LEVEL). For 'normal' 61 * callouts, from PIL 10 (CY_LOCK_LEVEL) we dispatch the callout, 62 * via taskq, to a thread that executes at PIL 0 - so we end up running 63 * 'normal' callouts at PIL 0. 64 */ 65 static volatile int callout_realtime_level = CY_LOW_LEVEL; 66 static volatile int callout_normal_level = CY_LOCK_LEVEL; 67 68 static char *callout_kstat_names[] = { 69 "callout_timeouts", 70 "callout_timeouts_pending", 71 "callout_untimeouts_unexpired", 72 "callout_untimeouts_executing", 73 "callout_untimeouts_expired", 74 "callout_expirations", 75 "callout_allocations", 76 "callout_cleanups", 77 }; 78 79 static hrtime_t callout_heap_process(callout_table_t *, hrtime_t, int); 80 81 #define CALLOUT_HASH_INSERT(hash, cp, cnext, cprev) \ 82 { \ 83 callout_hash_t *hashp = &(hash); \ 84 \ 85 cp->cprev = NULL; \ 86 cp->cnext = hashp->ch_head; \ 87 if (hashp->ch_head == NULL) \ 88 hashp->ch_tail = cp; \ 89 else \ 90 cp->cnext->cprev = cp; \ 91 hashp->ch_head = cp; \ 92 } 93 94 #define CALLOUT_HASH_APPEND(hash, cp, cnext, cprev) \ 95 { \ 96 callout_hash_t *hashp = &(hash); \ 97 \ 98 cp->cnext = NULL; \ 99 cp->cprev = hashp->ch_tail; \ 100 if (hashp->ch_tail == NULL) \ 101 hashp->ch_head = cp; \ 102 else \ 103 cp->cprev->cnext = cp; \ 104 hashp->ch_tail = cp; \ 105 } 106 107 #define CALLOUT_HASH_DELETE(hash, cp, cnext, cprev) \ 108 { \ 109 callout_hash_t *hashp = &(hash); \ 110 \ 111 if (cp->cnext == NULL) \ 112 hashp->ch_tail = cp->cprev; \ 113 else \ 114 cp->cnext->cprev = cp->cprev; \ 115 if (cp->cprev == NULL) \ 116 hashp->ch_head = cp->cnext; \ 117 else \ 118 cp->cprev->cnext = cp->cnext; \ 119 } 120 121 /* 122 * These definitions help us queue callouts and callout lists. Here is 123 * the queueing rationale: 124 * 125 * - callouts are queued in a FIFO manner in the ID hash table. 126 * TCP timers are typically cancelled in the same order that they 127 * were issued. The FIFO queueing shortens the search for a callout 128 * during untimeout(). 129 * 130 * - callouts are queued in a FIFO manner in their callout lists. 131 * This ensures that the callouts are executed in the same order that 132 * they were queued. This is fair. Plus, it helps to make each 133 * callout expiration timely. It also favors cancellations. 134 * 135 * - callout lists are queued in the following manner in the callout 136 * hash table buckets: 137 * 138 * - appended, if the callout list is a 1-nanosecond resolution 139 * callout list. When a callout is created, we first look for 140 * a callout list that has the same expiration so we can avoid 141 * allocating a callout list and inserting the expiration into 142 * the heap. However, we do not want to look at 1-nanosecond 143 * resolution callout lists as we will seldom find a match in 144 * them. Keeping these callout lists in the rear of the hash 145 * buckets allows us to skip these during the lookup. 146 * 147 * - inserted at the beginning, if the callout list is not a 148 * 1-nanosecond resolution callout list. This also has the 149 * side-effect of keeping the long term timers away from the 150 * front of the buckets. 151 * 152 * - callout lists are queued in a FIFO manner in the expired callouts 153 * list. This ensures that callout lists are executed in the order 154 * of expiration. 155 */ 156 #define CALLOUT_APPEND(ct, cp) \ 157 CALLOUT_HASH_APPEND(ct->ct_idhash[CALLOUT_IDHASH(cp->c_xid)], \ 158 cp, c_idnext, c_idprev); \ 159 CALLOUT_HASH_APPEND(cp->c_list->cl_callouts, cp, c_clnext, c_clprev) 160 161 #define CALLOUT_DELETE(ct, cp) \ 162 CALLOUT_HASH_DELETE(ct->ct_idhash[CALLOUT_IDHASH(cp->c_xid)], \ 163 cp, c_idnext, c_idprev); \ 164 CALLOUT_HASH_DELETE(cp->c_list->cl_callouts, cp, c_clnext, c_clprev) 165 166 #define CALLOUT_LIST_INSERT(hash, cl) \ 167 CALLOUT_HASH_INSERT(hash, cl, cl_next, cl_prev) 168 169 #define CALLOUT_LIST_APPEND(hash, cl) \ 170 CALLOUT_HASH_APPEND(hash, cl, cl_next, cl_prev) 171 172 #define CALLOUT_LIST_DELETE(hash, cl) \ 173 CALLOUT_HASH_DELETE(hash, cl, cl_next, cl_prev) 174 175 #define CALLOUT_LIST_BEFORE(cl, nextcl) \ 176 { \ 177 (cl)->cl_prev = (nextcl)->cl_prev; \ 178 (cl)->cl_next = (nextcl); \ 179 (nextcl)->cl_prev = (cl); \ 180 if (cl->cl_prev != NULL) \ 181 cl->cl_prev->cl_next = cl; \ 182 } 183 184 /* 185 * For normal callouts, there is a deadlock scenario if two callouts that 186 * have an inter-dependency end up on the same callout list. To break the 187 * deadlock, you need two taskq threads running in parallel. We compute 188 * the number of taskq threads here using a bunch of conditions to make 189 * it optimal for the common case. This is an ugly hack, but one that is 190 * necessary (sigh). 191 */ 192 #define CALLOUT_THRESHOLD 100000000 193 #define CALLOUT_EXEC_COMPUTE(ct, nextexp, exec) \ 194 { \ 195 callout_list_t *cl; \ 196 \ 197 cl = ct->ct_expired.ch_head; \ 198 if (cl == NULL) { \ 199 /* \ 200 * If the expired list is NULL, there is nothing to \ 201 * process. \ 202 */ \ 203 exec = 0; \ 204 } else if ((cl->cl_next == NULL) && \ 205 (cl->cl_callouts.ch_head == cl->cl_callouts.ch_tail)) { \ 206 /* \ 207 * If there is only one callout list and it contains \ 208 * only one callout, there is no need for two threads. \ 209 */ \ 210 exec = 1; \ 211 } else if ((nextexp) > (gethrtime() + CALLOUT_THRESHOLD)) { \ 212 /* \ 213 * If the next expiration of the cyclic is way out into \ 214 * the future, we need two threads. \ 215 */ \ 216 exec = 2; \ 217 } else { \ 218 /* \ 219 * We have multiple callouts to process. But the cyclic \ 220 * will fire in the near future. So, we only need one \ 221 * thread for now. \ 222 */ \ 223 exec = 1; \ 224 } \ 225 } 226 227 /* 228 * Macro to swap two heap items. 229 */ 230 #define CALLOUT_SWAP(h1, h2) \ 231 { \ 232 callout_heap_t tmp; \ 233 \ 234 tmp = *h1; \ 235 *h1 = *h2; \ 236 *h2 = tmp; \ 237 } 238 239 /* 240 * Macro to free a callout list. 241 */ 242 #define CALLOUT_LIST_FREE(ct, cl) \ 243 { \ 244 cl->cl_next = ct->ct_lfree; \ 245 ct->ct_lfree = cl; \ 246 cl->cl_flags |= CALLOUT_LIST_FLAG_FREE; \ 247 } 248 249 /* 250 * Macro to free a callout. 251 */ 252 #define CALLOUT_FREE(ct, cl) \ 253 { \ 254 cp->c_idnext = ct->ct_free; \ 255 ct->ct_free = cp; \ 256 cp->c_xid |= CALLOUT_ID_FREE; \ 257 } 258 259 /* 260 * Allocate a callout structure. We try quite hard because we 261 * can't sleep, and if we can't do the allocation, we're toast. 262 * Failing all, we try a KM_PANIC allocation. Note that we never 263 * deallocate a callout. See untimeout() for the reasoning. 264 */ 265 static callout_t * 266 callout_alloc(callout_table_t *ct) 267 { 268 size_t size; 269 callout_t *cp; 270 271 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 272 mutex_exit(&ct->ct_mutex); 273 274 cp = kmem_cache_alloc(ct->ct_cache, KM_NOSLEEP); 275 if (cp == NULL) { 276 size = sizeof (callout_t); 277 cp = kmem_alloc_tryhard(size, &size, KM_NOSLEEP | KM_PANIC); 278 } 279 cp->c_xid = 0; 280 cp->c_executor = NULL; 281 cv_init(&cp->c_done, NULL, CV_DEFAULT, NULL); 282 cp->c_waiting = 0; 283 284 mutex_enter(&ct->ct_mutex); 285 ct->ct_allocations++; 286 return (cp); 287 } 288 289 /* 290 * Allocate a callout list structure. We try quite hard because we 291 * can't sleep, and if we can't do the allocation, we're toast. 292 * Failing all, we try a KM_PANIC allocation. Note that we never 293 * deallocate a callout list. 294 */ 295 static void 296 callout_list_alloc(callout_table_t *ct) 297 { 298 size_t size; 299 callout_list_t *cl; 300 301 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 302 mutex_exit(&ct->ct_mutex); 303 304 cl = kmem_cache_alloc(ct->ct_lcache, KM_NOSLEEP); 305 if (cl == NULL) { 306 size = sizeof (callout_list_t); 307 cl = kmem_alloc_tryhard(size, &size, KM_NOSLEEP | KM_PANIC); 308 } 309 bzero(cl, sizeof (callout_list_t)); 310 311 mutex_enter(&ct->ct_mutex); 312 CALLOUT_LIST_FREE(ct, cl); 313 } 314 315 /* 316 * Find a callout list that corresponds to an expiration and matching flags. 317 */ 318 static callout_list_t * 319 callout_list_get(callout_table_t *ct, hrtime_t expiration, int flags, int hash) 320 { 321 callout_list_t *cl; 322 int clflags; 323 324 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 325 326 if (flags & CALLOUT_LIST_FLAG_NANO) { 327 /* 328 * This is a 1-nanosecond resolution callout. We will rarely 329 * find a match for this. So, bail out. 330 */ 331 return (NULL); 332 } 333 334 clflags = (CALLOUT_LIST_FLAG_ABSOLUTE | CALLOUT_LIST_FLAG_HRESTIME); 335 for (cl = ct->ct_clhash[hash].ch_head; (cl != NULL); cl = cl->cl_next) { 336 /* 337 * If we have reached a 1-nanosecond resolution callout list, 338 * we don't have much hope of finding a match in this hash 339 * bucket. So, just bail out. 340 */ 341 if (cl->cl_flags & CALLOUT_LIST_FLAG_NANO) 342 return (NULL); 343 344 if ((cl->cl_expiration == expiration) && 345 ((cl->cl_flags & clflags) == (flags & clflags))) 346 return (cl); 347 } 348 349 return (NULL); 350 } 351 352 /* 353 * Add a new callout list into a callout table's queue in sorted order by 354 * expiration. 355 */ 356 static int 357 callout_queue_add(callout_table_t *ct, callout_list_t *cl) 358 { 359 callout_list_t *nextcl; 360 hrtime_t expiration; 361 362 expiration = cl->cl_expiration; 363 nextcl = ct->ct_queue.ch_head; 364 if ((nextcl == NULL) || (expiration < nextcl->cl_expiration)) { 365 CALLOUT_LIST_INSERT(ct->ct_queue, cl); 366 return (1); 367 } 368 369 while (nextcl != NULL) { 370 if (expiration < nextcl->cl_expiration) { 371 CALLOUT_LIST_BEFORE(cl, nextcl); 372 return (0); 373 } 374 nextcl = nextcl->cl_next; 375 } 376 CALLOUT_LIST_APPEND(ct->ct_queue, cl); 377 378 return (0); 379 } 380 381 /* 382 * Insert a callout list into a callout table's queue and reprogram the queue 383 * cyclic if needed. 384 */ 385 static void 386 callout_queue_insert(callout_table_t *ct, callout_list_t *cl) 387 { 388 cl->cl_flags |= CALLOUT_LIST_FLAG_QUEUED; 389 390 /* 391 * Add the callout to the callout queue. If it ends up at the head, 392 * the cyclic needs to be reprogrammed as we have an earlier 393 * expiration. 394 * 395 * Also, during the CPR suspend phase, do not reprogram the cyclic. 396 * We don't want any callout activity. When the CPR resume phase is 397 * entered, the cyclic will be programmed for the earliest expiration 398 * in the queue. 399 */ 400 if (callout_queue_add(ct, cl) && (ct->ct_suspend == 0)) 401 (void) cyclic_reprogram(ct->ct_qcyclic, cl->cl_expiration); 402 } 403 404 /* 405 * Delete and handle all past expirations in a callout table's queue. 406 */ 407 static hrtime_t 408 callout_queue_delete(callout_table_t *ct) 409 { 410 callout_list_t *cl; 411 hrtime_t now; 412 413 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 414 415 now = gethrtime(); 416 while ((cl = ct->ct_queue.ch_head) != NULL) { 417 if (cl->cl_expiration > now) 418 break; 419 cl->cl_flags &= ~CALLOUT_LIST_FLAG_QUEUED; 420 CALLOUT_LIST_DELETE(ct->ct_queue, cl); 421 CALLOUT_LIST_APPEND(ct->ct_expired, cl); 422 } 423 424 /* 425 * If this callout queue is empty or callouts have been suspended, 426 * just return. 427 */ 428 if ((cl == NULL) || (ct->ct_suspend > 0)) 429 return (CY_INFINITY); 430 431 (void) cyclic_reprogram(ct->ct_qcyclic, cl->cl_expiration); 432 433 return (cl->cl_expiration); 434 } 435 436 static hrtime_t 437 callout_queue_process(callout_table_t *ct, hrtime_t delta, int timechange) 438 { 439 callout_list_t *firstcl, *cl; 440 hrtime_t expiration, now; 441 int clflags; 442 callout_hash_t temp; 443 444 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 445 446 firstcl = ct->ct_queue.ch_head; 447 if (firstcl == NULL) 448 return (CY_INFINITY); 449 450 /* 451 * We walk the callout queue. If we encounter a hrestime entry that 452 * must be removed, we clean it out. Otherwise, we apply any 453 * adjustments needed to it. Because of the latter, we need to 454 * recreate the list as we go along. 455 */ 456 temp = ct->ct_queue; 457 ct->ct_queue.ch_head = NULL; 458 ct->ct_queue.ch_tail = NULL; 459 460 clflags = (CALLOUT_LIST_FLAG_HRESTIME | CALLOUT_LIST_FLAG_ABSOLUTE); 461 now = gethrtime(); 462 while ((cl = temp.ch_head) != NULL) { 463 CALLOUT_LIST_DELETE(temp, cl); 464 465 /* 466 * Delete the callout and expire it, if one of the following 467 * is true: 468 * - the callout has expired 469 * - the callout is an absolute hrestime one and 470 * there has been a system time change 471 */ 472 if ((cl->cl_expiration <= now) || 473 (timechange && ((cl->cl_flags & clflags) == clflags))) { 474 cl->cl_flags &= ~CALLOUT_LIST_FLAG_QUEUED; 475 CALLOUT_LIST_APPEND(ct->ct_expired, cl); 476 continue; 477 } 478 479 /* 480 * Apply adjustments, if any. Adjustments are applied after 481 * the system returns from KMDB or OBP. They are only applied 482 * to relative callout lists. 483 */ 484 if (delta && !(cl->cl_flags & CALLOUT_LIST_FLAG_ABSOLUTE)) { 485 expiration = cl->cl_expiration + delta; 486 if (expiration <= 0) 487 expiration = CY_INFINITY; 488 cl->cl_expiration = expiration; 489 } 490 491 (void) callout_queue_add(ct, cl); 492 } 493 494 /* 495 * We need to return the expiration to help program the cyclic. 496 * If there are expired callouts, the cyclic needs to go off 497 * immediately. If the queue has become empty, then we return infinity. 498 * Else, we return the expiration of the earliest callout in the queue. 499 */ 500 if (ct->ct_expired.ch_head != NULL) 501 return (gethrtime()); 502 503 cl = ct->ct_queue.ch_head; 504 if (cl == NULL) 505 return (CY_INFINITY); 506 507 return (cl->cl_expiration); 508 } 509 510 /* 511 * Initialize a callout table's heap, if necessary. Preallocate some free 512 * entries so we don't have to check for NULL elsewhere. 513 */ 514 static void 515 callout_heap_init(callout_table_t *ct) 516 { 517 size_t size; 518 519 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 520 ASSERT(ct->ct_heap == NULL); 521 522 ct->ct_heap_num = 0; 523 ct->ct_heap_max = callout_chunk; 524 size = sizeof (callout_heap_t) * callout_chunk; 525 ct->ct_heap = kmem_alloc(size, KM_SLEEP); 526 } 527 528 /* 529 * Reallocate the heap. Return 0 if the heap is still full at the end of it. 530 * Return 1 otherwise. Note that the heap only expands, it never contracts. 531 */ 532 static int 533 callout_heap_expand(callout_table_t *ct) 534 { 535 size_t max, size, osize; 536 callout_heap_t *heap; 537 538 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 539 ASSERT(ct->ct_heap_num <= ct->ct_heap_max); 540 541 while (ct->ct_heap_num == ct->ct_heap_max) { 542 max = ct->ct_heap_max; 543 mutex_exit(&ct->ct_mutex); 544 545 osize = sizeof (callout_heap_t) * max; 546 size = sizeof (callout_heap_t) * (max + callout_chunk); 547 heap = kmem_alloc(size, KM_NOSLEEP); 548 549 mutex_enter(&ct->ct_mutex); 550 if (heap == NULL) { 551 /* 552 * We could not allocate memory. If we can free up 553 * some entries, that would be great. 554 */ 555 if (ct->ct_nreap > 0) 556 (void) callout_heap_process(ct, 0, 0); 557 /* 558 * If we still have no space in the heap, inform the 559 * caller. 560 */ 561 if (ct->ct_heap_num == ct->ct_heap_max) 562 return (0); 563 return (1); 564 } 565 if (max < ct->ct_heap_max) { 566 /* 567 * Someone beat us to the allocation. Free what we 568 * just allocated and proceed. 569 */ 570 kmem_free(heap, size); 571 continue; 572 } 573 574 bcopy(ct->ct_heap, heap, osize); 575 kmem_free(ct->ct_heap, osize); 576 ct->ct_heap = heap; 577 ct->ct_heap_max = size / sizeof (callout_heap_t); 578 } 579 580 return (1); 581 } 582 583 /* 584 * Move an expiration from the bottom of the heap to its correct place 585 * in the heap. If we reached the root doing this, return 1. Else, 586 * return 0. 587 */ 588 static int 589 callout_upheap(callout_table_t *ct) 590 { 591 int current, parent; 592 callout_heap_t *heap, *hcurrent, *hparent; 593 594 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 595 ASSERT(ct->ct_heap_num >= 1); 596 597 if (ct->ct_heap_num == 1) { 598 return (1); 599 } 600 601 heap = ct->ct_heap; 602 current = ct->ct_heap_num - 1; 603 604 for (;;) { 605 parent = CALLOUT_HEAP_PARENT(current); 606 hparent = &heap[parent]; 607 hcurrent = &heap[current]; 608 609 /* 610 * We have an expiration later than our parent; we're done. 611 */ 612 if (hcurrent->ch_expiration >= hparent->ch_expiration) { 613 return (0); 614 } 615 616 /* 617 * We need to swap with our parent, and continue up the heap. 618 */ 619 CALLOUT_SWAP(hparent, hcurrent); 620 621 /* 622 * If we just reached the root, we're done. 623 */ 624 if (parent == 0) { 625 return (1); 626 } 627 628 current = parent; 629 } 630 /*NOTREACHED*/ 631 } 632 633 /* 634 * Insert a new heap item into a callout table's heap. 635 */ 636 static void 637 callout_heap_insert(callout_table_t *ct, callout_list_t *cl) 638 { 639 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 640 ASSERT(ct->ct_heap_num < ct->ct_heap_max); 641 642 cl->cl_flags |= CALLOUT_LIST_FLAG_HEAPED; 643 /* 644 * First, copy the expiration and callout list pointer to the bottom 645 * of the heap. 646 */ 647 ct->ct_heap[ct->ct_heap_num].ch_expiration = cl->cl_expiration; 648 ct->ct_heap[ct->ct_heap_num].ch_list = cl; 649 ct->ct_heap_num++; 650 651 /* 652 * Now, perform an upheap operation. If we reached the root, then 653 * the cyclic needs to be reprogrammed as we have an earlier 654 * expiration. 655 * 656 * Also, during the CPR suspend phase, do not reprogram the cyclic. 657 * We don't want any callout activity. When the CPR resume phase is 658 * entered, the cyclic will be programmed for the earliest expiration 659 * in the heap. 660 */ 661 if (callout_upheap(ct) && (ct->ct_suspend == 0)) 662 (void) cyclic_reprogram(ct->ct_cyclic, cl->cl_expiration); 663 } 664 665 /* 666 * Move an expiration from the top of the heap to its correct place 667 * in the heap. 668 */ 669 static void 670 callout_downheap(callout_table_t *ct) 671 { 672 int current, left, right, nelems; 673 callout_heap_t *heap, *hleft, *hright, *hcurrent; 674 675 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 676 ASSERT(ct->ct_heap_num >= 1); 677 678 heap = ct->ct_heap; 679 current = 0; 680 nelems = ct->ct_heap_num; 681 682 for (;;) { 683 /* 684 * If we don't have a left child (i.e., we're a leaf), we're 685 * done. 686 */ 687 if ((left = CALLOUT_HEAP_LEFT(current)) >= nelems) 688 return; 689 690 hleft = &heap[left]; 691 hcurrent = &heap[current]; 692 693 right = CALLOUT_HEAP_RIGHT(current); 694 695 /* 696 * Even if we don't have a right child, we still need to compare 697 * our expiration against that of our left child. 698 */ 699 if (right >= nelems) 700 goto comp_left; 701 702 hright = &heap[right]; 703 704 /* 705 * We have both a left and a right child. We need to compare 706 * the expiration of the children to determine which 707 * expires earlier. 708 */ 709 if (hright->ch_expiration < hleft->ch_expiration) { 710 /* 711 * Our right child is the earlier of our children. 712 * We'll now compare our expiration to its expiration. 713 * If ours is the earlier one, we're done. 714 */ 715 if (hcurrent->ch_expiration <= hright->ch_expiration) 716 return; 717 718 /* 719 * Our right child expires earlier than we do; swap 720 * with our right child, and descend right. 721 */ 722 CALLOUT_SWAP(hright, hcurrent); 723 current = right; 724 continue; 725 } 726 727 comp_left: 728 /* 729 * Our left child is the earlier of our children (or we have 730 * no right child). We'll now compare our expiration 731 * to its expiration. If ours is the earlier one, we're done. 732 */ 733 if (hcurrent->ch_expiration <= hleft->ch_expiration) 734 return; 735 736 /* 737 * Our left child expires earlier than we do; swap with our 738 * left child, and descend left. 739 */ 740 CALLOUT_SWAP(hleft, hcurrent); 741 current = left; 742 } 743 } 744 745 /* 746 * Delete and handle all past expirations in a callout table's heap. 747 */ 748 static hrtime_t 749 callout_heap_delete(callout_table_t *ct) 750 { 751 hrtime_t now, expiration, next; 752 callout_list_t *cl; 753 callout_heap_t *heap; 754 int hash; 755 756 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 757 758 if (CALLOUT_CLEANUP(ct)) { 759 /* 760 * There are too many heap elements pointing to empty callout 761 * lists. Clean them out. 762 */ 763 (void) callout_heap_process(ct, 0, 0); 764 } 765 766 now = gethrtime(); 767 heap = ct->ct_heap; 768 769 while (ct->ct_heap_num > 0) { 770 expiration = heap->ch_expiration; 771 hash = CALLOUT_CLHASH(expiration); 772 cl = heap->ch_list; 773 ASSERT(expiration == cl->cl_expiration); 774 775 if (cl->cl_callouts.ch_head == NULL) { 776 /* 777 * If the callout list is empty, reap it. 778 * Decrement the reap count. 779 */ 780 CALLOUT_LIST_DELETE(ct->ct_clhash[hash], cl); 781 CALLOUT_LIST_FREE(ct, cl); 782 ct->ct_nreap--; 783 } else { 784 /* 785 * If the root of the heap expires in the future, 786 * bail out. 787 */ 788 if (expiration > now) 789 break; 790 791 /* 792 * Move the callout list for this expiration to the 793 * list of expired callout lists. It will be processed 794 * by the callout executor. 795 */ 796 cl->cl_flags &= ~CALLOUT_LIST_FLAG_HEAPED; 797 CALLOUT_LIST_DELETE(ct->ct_clhash[hash], cl); 798 CALLOUT_LIST_APPEND(ct->ct_expired, cl); 799 } 800 801 /* 802 * Now delete the root. This is done by swapping the root with 803 * the last item in the heap and downheaping the item. 804 */ 805 ct->ct_heap_num--; 806 if (ct->ct_heap_num > 0) { 807 heap[0] = heap[ct->ct_heap_num]; 808 callout_downheap(ct); 809 } 810 } 811 812 /* 813 * If this callout table is empty or callouts have been suspended, 814 * just return. The cyclic has already been programmed to 815 * infinity by the cyclic subsystem. 816 */ 817 if ((ct->ct_heap_num == 0) || (ct->ct_suspend > 0)) 818 return (CY_INFINITY); 819 820 /* 821 * If the top expirations are within callout_tolerance of each other, 822 * delay the cyclic expire so that they can be processed together. 823 * This is to prevent high resolution timers from swamping the system 824 * with cyclic activity. 825 */ 826 if (ct->ct_heap_num > 2) { 827 next = expiration + callout_tolerance; 828 if ((heap[1].ch_expiration < next) || 829 (heap[2].ch_expiration < next)) 830 expiration = next; 831 } 832 833 (void) cyclic_reprogram(ct->ct_cyclic, expiration); 834 835 return (expiration); 836 } 837 838 /* 839 * There are some situations when the entire heap is walked and processed. 840 * This function is called to do the processing. These are the situations: 841 * 842 * 1. When the reap count reaches its threshold, the heap has to be cleared 843 * of all empty callout lists. 844 * 845 * 2. When the system enters and exits KMDB/OBP, all entries in the heap 846 * need to be adjusted by the interval spent in KMDB/OBP. 847 * 848 * 3. When system time is changed, the heap has to be scanned for 849 * absolute hrestime timers. These need to be removed from the heap 850 * and expired immediately. 851 * 852 * In cases 2 and 3, it is a good idea to do 1 as well since we are 853 * scanning the heap anyway. 854 * 855 * If the root gets changed and/or callout lists are expired, return the 856 * new expiration to the caller so he can reprogram the cyclic accordingly. 857 */ 858 static hrtime_t 859 callout_heap_process(callout_table_t *ct, hrtime_t delta, int timechange) 860 { 861 callout_heap_t *heap; 862 callout_list_t *cl; 863 hrtime_t expiration, now; 864 int i, hash, clflags; 865 ulong_t num; 866 867 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 868 869 if (ct->ct_heap_num == 0) 870 return (CY_INFINITY); 871 872 if (ct->ct_nreap > 0) 873 ct->ct_cleanups++; 874 875 heap = ct->ct_heap; 876 877 /* 878 * We walk the heap from the top to the bottom. If we encounter 879 * a heap item that points to an empty callout list, we clean 880 * it out. If we encounter a hrestime entry that must be removed, 881 * again we clean it out. Otherwise, we apply any adjustments needed 882 * to an element. 883 * 884 * During the walk, we also compact the heap from the bottom and 885 * reconstruct the heap using upheap operations. This is very 886 * efficient if the number of elements to be cleaned is greater than 887 * or equal to half the heap. This is the common case. 888 * 889 * Even in the non-common case, the upheap operations should be short 890 * as the entries below generally tend to be bigger than the entries 891 * above. 892 */ 893 num = ct->ct_heap_num; 894 ct->ct_heap_num = 0; 895 clflags = (CALLOUT_LIST_FLAG_HRESTIME | CALLOUT_LIST_FLAG_ABSOLUTE); 896 now = gethrtime(); 897 for (i = 0; i < num; i++) { 898 cl = heap[i].ch_list; 899 /* 900 * If the callout list is empty, delete the heap element and 901 * free the callout list. 902 */ 903 if (cl->cl_callouts.ch_head == NULL) { 904 hash = CALLOUT_CLHASH(cl->cl_expiration); 905 CALLOUT_LIST_DELETE(ct->ct_clhash[hash], cl); 906 CALLOUT_LIST_FREE(ct, cl); 907 continue; 908 } 909 910 /* 911 * Delete the heap element and expire the callout list, if 912 * one of the following is true: 913 * - the callout list has expired 914 * - the callout list is an absolute hrestime one and 915 * there has been a system time change 916 */ 917 if ((cl->cl_expiration <= now) || 918 (timechange && ((cl->cl_flags & clflags) == clflags))) { 919 hash = CALLOUT_CLHASH(cl->cl_expiration); 920 cl->cl_flags &= ~CALLOUT_LIST_FLAG_HEAPED; 921 CALLOUT_LIST_DELETE(ct->ct_clhash[hash], cl); 922 CALLOUT_LIST_APPEND(ct->ct_expired, cl); 923 continue; 924 } 925 926 /* 927 * Apply adjustments, if any. Adjustments are applied after 928 * the system returns from KMDB or OBP. They are only applied 929 * to relative callout lists. 930 */ 931 if (delta && !(cl->cl_flags & CALLOUT_LIST_FLAG_ABSOLUTE)) { 932 hash = CALLOUT_CLHASH(cl->cl_expiration); 933 CALLOUT_LIST_DELETE(ct->ct_clhash[hash], cl); 934 expiration = cl->cl_expiration + delta; 935 if (expiration <= 0) 936 expiration = CY_INFINITY; 937 heap[i].ch_expiration = expiration; 938 cl->cl_expiration = expiration; 939 hash = CALLOUT_CLHASH(cl->cl_expiration); 940 if (cl->cl_flags & CALLOUT_LIST_FLAG_NANO) { 941 CALLOUT_LIST_APPEND(ct->ct_clhash[hash], cl); 942 } else { 943 CALLOUT_LIST_INSERT(ct->ct_clhash[hash], cl); 944 } 945 } 946 947 heap[ct->ct_heap_num] = heap[i]; 948 ct->ct_heap_num++; 949 (void) callout_upheap(ct); 950 } 951 952 ct->ct_nreap = 0; 953 954 /* 955 * We need to return the expiration to help program the cyclic. 956 * If there are expired callouts, the cyclic needs to go off 957 * immediately. If the heap has become empty, then we return infinity. 958 * Else, return the expiration of the earliest callout in the heap. 959 */ 960 if (ct->ct_expired.ch_head != NULL) 961 return (gethrtime()); 962 963 if (ct->ct_heap_num == 0) 964 return (CY_INFINITY); 965 966 return (heap->ch_expiration); 967 } 968 969 /* 970 * Common function used to create normal and realtime callouts. 971 * 972 * Realtime callouts are handled at CY_LOW_PIL by a cyclic handler. So, 973 * there is one restriction on a realtime callout handler - it should not 974 * directly or indirectly acquire cpu_lock. CPU offline waits for pending 975 * cyclic handlers to complete while holding cpu_lock. So, if a realtime 976 * callout handler were to try to get cpu_lock, there would be a deadlock 977 * during CPU offline. 978 */ 979 callout_id_t 980 timeout_generic(int type, void (*func)(void *), void *arg, 981 hrtime_t expiration, hrtime_t resolution, int flags) 982 { 983 callout_table_t *ct; 984 callout_t *cp; 985 callout_id_t id; 986 callout_list_t *cl; 987 hrtime_t now, interval; 988 int hash, clflags; 989 990 ASSERT(resolution > 0); 991 ASSERT(func != NULL); 992 993 /* 994 * We get the current hrtime right upfront so that latencies in 995 * this function do not affect the accuracy of the callout. 996 */ 997 now = gethrtime(); 998 999 /* 1000 * We disable kernel preemption so that we remain on the same CPU 1001 * throughout. If we needed to reprogram the callout table's cyclic, 1002 * we can avoid X-calls if we are on the same CPU. 1003 * 1004 * Note that callout_alloc() releases and reacquires the callout 1005 * table mutex. While reacquiring the mutex, it is possible for us 1006 * to go to sleep and later migrate to another CPU. This should be 1007 * pretty rare, though. 1008 */ 1009 kpreempt_disable(); 1010 1011 ct = &callout_table[CALLOUT_TABLE(type, CPU->cpu_seqid)]; 1012 mutex_enter(&ct->ct_mutex); 1013 1014 if (ct->ct_cyclic == CYCLIC_NONE) { 1015 mutex_exit(&ct->ct_mutex); 1016 /* 1017 * The callout table has not yet been initialized fully. 1018 * So, put this one on the boot callout table which is 1019 * always initialized. 1020 */ 1021 ct = &callout_boot_ct[type]; 1022 mutex_enter(&ct->ct_mutex); 1023 } 1024 1025 if (CALLOUT_CLEANUP(ct)) { 1026 /* 1027 * There are too many heap elements pointing to empty callout 1028 * lists. Clean them out. Since cleanup is only done once 1029 * in a while, no need to reprogram the cyclic if the root 1030 * of the heap gets cleaned out. 1031 */ 1032 (void) callout_heap_process(ct, 0, 0); 1033 } 1034 1035 if ((cp = ct->ct_free) == NULL) 1036 cp = callout_alloc(ct); 1037 else 1038 ct->ct_free = cp->c_idnext; 1039 1040 cp->c_func = func; 1041 cp->c_arg = arg; 1042 1043 /* 1044 * Compute the expiration hrtime. 1045 */ 1046 if (flags & CALLOUT_FLAG_ABSOLUTE) { 1047 interval = expiration - now; 1048 } else { 1049 interval = expiration; 1050 expiration += now; 1051 } 1052 1053 if (resolution > 1) { 1054 /* 1055 * Align expiration to the specified resolution. 1056 */ 1057 if (flags & CALLOUT_FLAG_ROUNDUP) 1058 expiration += resolution - 1; 1059 expiration = (expiration / resolution) * resolution; 1060 } 1061 1062 if (expiration <= 0) { 1063 /* 1064 * expiration hrtime overflow has occurred. Just set the 1065 * expiration to infinity. 1066 */ 1067 expiration = CY_INFINITY; 1068 } 1069 1070 /* 1071 * Assign an ID to this callout 1072 */ 1073 if (flags & CALLOUT_FLAG_32BIT) { 1074 if (interval > callout_longterm) { 1075 id = (ct->ct_long_id - callout_counter_low); 1076 id |= CALLOUT_COUNTER_HIGH; 1077 ct->ct_long_id = id; 1078 } else { 1079 id = (ct->ct_short_id - callout_counter_low); 1080 id |= CALLOUT_COUNTER_HIGH; 1081 ct->ct_short_id = id; 1082 } 1083 } else { 1084 id = (ct->ct_gen_id - callout_counter_low); 1085 if ((id & CALLOUT_COUNTER_HIGH) == 0) { 1086 id |= CALLOUT_COUNTER_HIGH; 1087 id += CALLOUT_GENERATION_LOW; 1088 } 1089 ct->ct_gen_id = id; 1090 } 1091 1092 cp->c_xid = id; 1093 1094 clflags = 0; 1095 if (flags & CALLOUT_FLAG_ABSOLUTE) 1096 clflags |= CALLOUT_LIST_FLAG_ABSOLUTE; 1097 if (flags & CALLOUT_FLAG_HRESTIME) 1098 clflags |= CALLOUT_LIST_FLAG_HRESTIME; 1099 if (resolution == 1) 1100 clflags |= CALLOUT_LIST_FLAG_NANO; 1101 hash = CALLOUT_CLHASH(expiration); 1102 1103 again: 1104 /* 1105 * Try to see if a callout list already exists for this expiration. 1106 */ 1107 cl = callout_list_get(ct, expiration, clflags, hash); 1108 if (cl == NULL) { 1109 /* 1110 * Check the free list. If we don't find one, we have to 1111 * take the slow path and allocate from kmem. 1112 */ 1113 if ((cl = ct->ct_lfree) == NULL) { 1114 callout_list_alloc(ct); 1115 /* 1116 * In the above call, we drop the lock, allocate and 1117 * reacquire the lock. So, we could have been away 1118 * for a while. In the meantime, someone could have 1119 * inserted a callout list with the same expiration. 1120 * Plus, the heap could have become full. So, the best 1121 * course is to repeat the steps. This should be an 1122 * infrequent event. 1123 */ 1124 goto again; 1125 } 1126 ct->ct_lfree = cl->cl_next; 1127 cl->cl_expiration = expiration; 1128 cl->cl_flags = clflags; 1129 1130 /* 1131 * Check if we have enough space in the heap to insert one 1132 * expiration. If not, expand the heap. 1133 */ 1134 if (ct->ct_heap_num == ct->ct_heap_max) { 1135 if (callout_heap_expand(ct) == 0) { 1136 /* 1137 * Could not expand the heap. Just queue it. 1138 */ 1139 callout_queue_insert(ct, cl); 1140 goto out; 1141 } 1142 1143 /* 1144 * In the above call, we drop the lock, allocate and 1145 * reacquire the lock. So, we could have been away 1146 * for a while. In the meantime, someone could have 1147 * inserted a callout list with the same expiration. 1148 * But we will not go back and check for it as this 1149 * should be a really infrequent event. There is no 1150 * point. 1151 */ 1152 } 1153 1154 if (clflags & CALLOUT_LIST_FLAG_NANO) { 1155 CALLOUT_LIST_APPEND(ct->ct_clhash[hash], cl); 1156 } else { 1157 CALLOUT_LIST_INSERT(ct->ct_clhash[hash], cl); 1158 } 1159 1160 /* 1161 * This is a new expiration. So, insert it into the heap. 1162 * This will also reprogram the cyclic, if the expiration 1163 * propagated to the root of the heap. 1164 */ 1165 callout_heap_insert(ct, cl); 1166 } else { 1167 /* 1168 * If the callout list was empty, untimeout_generic() would 1169 * have incremented a reap count. Decrement the reap count 1170 * as we are going to insert a callout into this list. 1171 */ 1172 if (cl->cl_callouts.ch_head == NULL) 1173 ct->ct_nreap--; 1174 } 1175 out: 1176 cp->c_list = cl; 1177 CALLOUT_APPEND(ct, cp); 1178 1179 ct->ct_timeouts++; 1180 ct->ct_timeouts_pending++; 1181 1182 mutex_exit(&ct->ct_mutex); 1183 1184 kpreempt_enable(); 1185 1186 TRACE_4(TR_FAC_CALLOUT, TR_TIMEOUT, 1187 "timeout:%K(%p) in %llx expiration, cp %p", func, arg, expiration, 1188 cp); 1189 1190 return (id); 1191 } 1192 1193 timeout_id_t 1194 timeout(void (*func)(void *), void *arg, clock_t delta) 1195 { 1196 ulong_t id; 1197 1198 /* 1199 * Make sure the callout runs at least 1 tick in the future. 1200 */ 1201 if (delta <= 0) 1202 delta = 1; 1203 else if (delta > callout_max_ticks) 1204 delta = callout_max_ticks; 1205 1206 id = (ulong_t)timeout_generic(CALLOUT_NORMAL, func, arg, 1207 TICK_TO_NSEC(delta), nsec_per_tick, CALLOUT_LEGACY); 1208 1209 return ((timeout_id_t)id); 1210 } 1211 1212 /* 1213 * Convenience function that creates a normal callout with default parameters 1214 * and returns a full ID. 1215 */ 1216 callout_id_t 1217 timeout_default(void (*func)(void *), void *arg, clock_t delta) 1218 { 1219 callout_id_t id; 1220 1221 /* 1222 * Make sure the callout runs at least 1 tick in the future. 1223 */ 1224 if (delta <= 0) 1225 delta = 1; 1226 else if (delta > callout_max_ticks) 1227 delta = callout_max_ticks; 1228 1229 id = timeout_generic(CALLOUT_NORMAL, func, arg, TICK_TO_NSEC(delta), 1230 nsec_per_tick, 0); 1231 1232 return (id); 1233 } 1234 1235 timeout_id_t 1236 realtime_timeout(void (*func)(void *), void *arg, clock_t delta) 1237 { 1238 ulong_t id; 1239 1240 /* 1241 * Make sure the callout runs at least 1 tick in the future. 1242 */ 1243 if (delta <= 0) 1244 delta = 1; 1245 else if (delta > callout_max_ticks) 1246 delta = callout_max_ticks; 1247 1248 id = (ulong_t)timeout_generic(CALLOUT_REALTIME, func, arg, 1249 TICK_TO_NSEC(delta), nsec_per_tick, CALLOUT_LEGACY); 1250 1251 return ((timeout_id_t)id); 1252 } 1253 1254 /* 1255 * Convenience function that creates a realtime callout with default parameters 1256 * and returns a full ID. 1257 */ 1258 callout_id_t 1259 realtime_timeout_default(void (*func)(void *), void *arg, clock_t delta) 1260 { 1261 callout_id_t id; 1262 1263 /* 1264 * Make sure the callout runs at least 1 tick in the future. 1265 */ 1266 if (delta <= 0) 1267 delta = 1; 1268 else if (delta > callout_max_ticks) 1269 delta = callout_max_ticks; 1270 1271 id = timeout_generic(CALLOUT_REALTIME, func, arg, TICK_TO_NSEC(delta), 1272 nsec_per_tick, 0); 1273 1274 return (id); 1275 } 1276 1277 hrtime_t 1278 untimeout_generic(callout_id_t id, int nowait) 1279 { 1280 callout_table_t *ct; 1281 callout_t *cp; 1282 callout_id_t xid; 1283 callout_list_t *cl; 1284 int hash, flags; 1285 callout_id_t bogus; 1286 1287 ct = &callout_table[CALLOUT_ID_TO_TABLE(id)]; 1288 hash = CALLOUT_IDHASH(id); 1289 1290 mutex_enter(&ct->ct_mutex); 1291 1292 /* 1293 * Search the ID hash table for the callout. 1294 */ 1295 for (cp = ct->ct_idhash[hash].ch_head; cp; cp = cp->c_idnext) { 1296 1297 xid = cp->c_xid; 1298 1299 /* 1300 * Match the ID and generation number. 1301 */ 1302 if ((xid & CALLOUT_ID_MASK) != id) 1303 continue; 1304 1305 if ((xid & CALLOUT_EXECUTING) == 0) { 1306 hrtime_t expiration; 1307 1308 /* 1309 * Delete the callout. If the callout list becomes 1310 * NULL, we don't remove it from the table. This is 1311 * so it can be reused. If the empty callout list 1312 * corresponds to the top of the the callout heap, we 1313 * don't reprogram the table cyclic here. This is in 1314 * order to avoid lots of X-calls to the CPU associated 1315 * with the callout table. 1316 */ 1317 cl = cp->c_list; 1318 expiration = cl->cl_expiration; 1319 CALLOUT_DELETE(ct, cp); 1320 CALLOUT_FREE(ct, cp); 1321 ct->ct_untimeouts_unexpired++; 1322 ct->ct_timeouts_pending--; 1323 1324 /* 1325 * If the callout list has become empty, there are 3 1326 * possibilities. If it is present: 1327 * - in the heap, it needs to be cleaned along 1328 * with its heap entry. Increment a reap count. 1329 * - in the callout queue, free it. 1330 * - in the expired list, free it. 1331 */ 1332 if (cl->cl_callouts.ch_head == NULL) { 1333 flags = cl->cl_flags; 1334 if (flags & CALLOUT_LIST_FLAG_HEAPED) { 1335 ct->ct_nreap++; 1336 } else if (flags & CALLOUT_LIST_FLAG_QUEUED) { 1337 CALLOUT_LIST_DELETE(ct->ct_queue, cl); 1338 CALLOUT_LIST_FREE(ct, cl); 1339 } else { 1340 CALLOUT_LIST_DELETE(ct->ct_expired, cl); 1341 CALLOUT_LIST_FREE(ct, cl); 1342 } 1343 } 1344 mutex_exit(&ct->ct_mutex); 1345 1346 expiration -= gethrtime(); 1347 TRACE_2(TR_FAC_CALLOUT, TR_UNTIMEOUT, 1348 "untimeout:ID %lx hrtime left %llx", id, 1349 expiration); 1350 return (expiration < 0 ? 0 : expiration); 1351 } 1352 1353 ct->ct_untimeouts_executing++; 1354 /* 1355 * The callout we want to delete is currently executing. 1356 * The DDI states that we must wait until the callout 1357 * completes before returning, so we block on c_done until the 1358 * callout ID changes (to the old ID if it's on the freelist, 1359 * or to a new callout ID if it's in use). This implicitly 1360 * assumes that callout structures are persistent (they are). 1361 */ 1362 if (cp->c_executor == curthread) { 1363 /* 1364 * The timeout handler called untimeout() on itself. 1365 * Stupid, but legal. We can't wait for the timeout 1366 * to complete without deadlocking, so we just return. 1367 */ 1368 mutex_exit(&ct->ct_mutex); 1369 TRACE_1(TR_FAC_CALLOUT, TR_UNTIMEOUT_SELF, 1370 "untimeout_self:ID %x", id); 1371 return (-1); 1372 } 1373 if (nowait == 0) { 1374 /* 1375 * We need to wait. Indicate that we are waiting by 1376 * incrementing c_waiting. This prevents the executor 1377 * from doing a wakeup on c_done if there are no 1378 * waiters. 1379 */ 1380 while (cp->c_xid == xid) { 1381 cp->c_waiting = 1; 1382 cv_wait(&cp->c_done, &ct->ct_mutex); 1383 } 1384 } 1385 mutex_exit(&ct->ct_mutex); 1386 TRACE_1(TR_FAC_CALLOUT, TR_UNTIMEOUT_EXECUTING, 1387 "untimeout_executing:ID %lx", id); 1388 return (-1); 1389 } 1390 ct->ct_untimeouts_expired++; 1391 1392 mutex_exit(&ct->ct_mutex); 1393 TRACE_1(TR_FAC_CALLOUT, TR_UNTIMEOUT_BOGUS_ID, 1394 "untimeout_bogus_id:ID %lx", id); 1395 1396 /* 1397 * We didn't find the specified callout ID. This means either 1398 * (1) the callout already fired, or (2) the caller passed us 1399 * a bogus value. Perform a sanity check to detect case (2). 1400 */ 1401 bogus = (CALLOUT_ID_FLAGS | CALLOUT_COUNTER_HIGH); 1402 if (((id & bogus) != CALLOUT_COUNTER_HIGH) && (id != 0)) 1403 panic("untimeout: impossible timeout id %llx", 1404 (unsigned long long)id); 1405 1406 return (-1); 1407 } 1408 1409 clock_t 1410 untimeout(timeout_id_t id_arg) 1411 { 1412 hrtime_t hleft; 1413 clock_t tleft; 1414 callout_id_t id; 1415 1416 id = (ulong_t)id_arg; 1417 hleft = untimeout_generic(id, 0); 1418 if (hleft < 0) 1419 tleft = -1; 1420 else if (hleft == 0) 1421 tleft = 0; 1422 else 1423 tleft = NSEC_TO_TICK(hleft); 1424 1425 return (tleft); 1426 } 1427 1428 /* 1429 * Convenience function to untimeout a timeout with a full ID with default 1430 * parameters. 1431 */ 1432 clock_t 1433 untimeout_default(callout_id_t id, int nowait) 1434 { 1435 hrtime_t hleft; 1436 clock_t tleft; 1437 1438 hleft = untimeout_generic(id, nowait); 1439 if (hleft < 0) 1440 tleft = -1; 1441 else if (hleft == 0) 1442 tleft = 0; 1443 else 1444 tleft = NSEC_TO_TICK(hleft); 1445 1446 return (tleft); 1447 } 1448 1449 /* 1450 * Expire all the callouts queued in the specified callout list. 1451 */ 1452 static void 1453 callout_list_expire(callout_table_t *ct, callout_list_t *cl) 1454 { 1455 callout_t *cp, *cnext; 1456 1457 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 1458 ASSERT(cl != NULL); 1459 1460 for (cp = cl->cl_callouts.ch_head; cp != NULL; cp = cnext) { 1461 /* 1462 * Multiple executor threads could be running at the same 1463 * time. If this callout is already being executed, 1464 * go on to the next one. 1465 */ 1466 if (cp->c_xid & CALLOUT_EXECUTING) { 1467 cnext = cp->c_clnext; 1468 continue; 1469 } 1470 1471 /* 1472 * Indicate to untimeout() that a callout is 1473 * being expired by the executor. 1474 */ 1475 cp->c_xid |= CALLOUT_EXECUTING; 1476 cp->c_executor = curthread; 1477 mutex_exit(&ct->ct_mutex); 1478 1479 DTRACE_PROBE1(callout__start, callout_t *, cp); 1480 (*cp->c_func)(cp->c_arg); 1481 DTRACE_PROBE1(callout__end, callout_t *, cp); 1482 1483 mutex_enter(&ct->ct_mutex); 1484 1485 ct->ct_expirations++; 1486 ct->ct_timeouts_pending--; 1487 /* 1488 * Indicate completion for c_done. 1489 */ 1490 cp->c_xid &= ~CALLOUT_EXECUTING; 1491 cp->c_executor = NULL; 1492 cnext = cp->c_clnext; 1493 1494 /* 1495 * Delete callout from ID hash table and the callout 1496 * list, return to freelist, and tell any untimeout() that 1497 * cares that we're done. 1498 */ 1499 CALLOUT_DELETE(ct, cp); 1500 CALLOUT_FREE(ct, cp); 1501 1502 if (cp->c_waiting) { 1503 cp->c_waiting = 0; 1504 cv_broadcast(&cp->c_done); 1505 } 1506 } 1507 } 1508 1509 /* 1510 * Execute all expired callout lists for a callout table. 1511 */ 1512 static void 1513 callout_expire(callout_table_t *ct) 1514 { 1515 callout_list_t *cl, *clnext; 1516 1517 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 1518 1519 for (cl = ct->ct_expired.ch_head; (cl != NULL); cl = clnext) { 1520 /* 1521 * Expire all the callouts in this callout list. 1522 */ 1523 callout_list_expire(ct, cl); 1524 1525 clnext = cl->cl_next; 1526 if (cl->cl_callouts.ch_head == NULL) { 1527 /* 1528 * Free the callout list. 1529 */ 1530 CALLOUT_LIST_DELETE(ct->ct_expired, cl); 1531 CALLOUT_LIST_FREE(ct, cl); 1532 } 1533 } 1534 } 1535 1536 /* 1537 * The cyclic handlers below process callouts in two steps: 1538 * 1539 * 1. Find all expired callout lists and queue them in a separate 1540 * list of expired callouts. 1541 * 2. Execute the expired callout lists. 1542 * 1543 * This is done for two reasons: 1544 * 1545 * 1. We want to quickly find the next earliest expiration to program 1546 * the cyclic to and reprogram it. We can do this right at the end 1547 * of step 1. 1548 * 2. The realtime cyclic handler expires callouts in place. However, 1549 * for normal callouts, callouts are expired by a taskq thread. 1550 * So, it is simpler and more robust to have the taskq thread just 1551 * do step 2. 1552 */ 1553 1554 /* 1555 * Realtime callout cyclic handlers. 1556 */ 1557 void 1558 callout_realtime(callout_table_t *ct) 1559 { 1560 mutex_enter(&ct->ct_mutex); 1561 (void) callout_heap_delete(ct); 1562 callout_expire(ct); 1563 mutex_exit(&ct->ct_mutex); 1564 } 1565 1566 void 1567 callout_queue_realtime(callout_table_t *ct) 1568 { 1569 mutex_enter(&ct->ct_mutex); 1570 (void) callout_queue_delete(ct); 1571 callout_expire(ct); 1572 mutex_exit(&ct->ct_mutex); 1573 } 1574 1575 void 1576 callout_execute(callout_table_t *ct) 1577 { 1578 mutex_enter(&ct->ct_mutex); 1579 callout_expire(ct); 1580 mutex_exit(&ct->ct_mutex); 1581 } 1582 1583 /* 1584 * Normal callout cyclic handlers. 1585 */ 1586 void 1587 callout_normal(callout_table_t *ct) 1588 { 1589 int i, exec; 1590 hrtime_t exp; 1591 1592 mutex_enter(&ct->ct_mutex); 1593 exp = callout_heap_delete(ct); 1594 CALLOUT_EXEC_COMPUTE(ct, exp, exec); 1595 mutex_exit(&ct->ct_mutex); 1596 1597 for (i = 0; i < exec; i++) { 1598 ASSERT(ct->ct_taskq != NULL); 1599 (void) taskq_dispatch(ct->ct_taskq, 1600 (task_func_t *)callout_execute, ct, TQ_NOSLEEP); 1601 } 1602 } 1603 1604 void 1605 callout_queue_normal(callout_table_t *ct) 1606 { 1607 int i, exec; 1608 hrtime_t exp; 1609 1610 mutex_enter(&ct->ct_mutex); 1611 exp = callout_queue_delete(ct); 1612 CALLOUT_EXEC_COMPUTE(ct, exp, exec); 1613 mutex_exit(&ct->ct_mutex); 1614 1615 for (i = 0; i < exec; i++) { 1616 ASSERT(ct->ct_taskq != NULL); 1617 (void) taskq_dispatch(ct->ct_taskq, 1618 (task_func_t *)callout_execute, ct, TQ_NOSLEEP); 1619 } 1620 } 1621 1622 /* 1623 * Suspend callout processing. 1624 */ 1625 static void 1626 callout_suspend(void) 1627 { 1628 int t, f; 1629 callout_table_t *ct; 1630 1631 /* 1632 * Traverse every callout table in the system and suspend callout 1633 * processing. 1634 * 1635 * We need to suspend all the tables (including the inactive ones) 1636 * so that if a table is made active while the suspend is still on, 1637 * the table remains suspended. 1638 */ 1639 for (f = 0; f < max_ncpus; f++) { 1640 for (t = 0; t < CALLOUT_NTYPES; t++) { 1641 ct = &callout_table[CALLOUT_TABLE(t, f)]; 1642 1643 mutex_enter(&ct->ct_mutex); 1644 ct->ct_suspend++; 1645 if (ct->ct_cyclic == CYCLIC_NONE) { 1646 mutex_exit(&ct->ct_mutex); 1647 continue; 1648 } 1649 if (ct->ct_suspend == 1) { 1650 (void) cyclic_reprogram(ct->ct_cyclic, 1651 CY_INFINITY); 1652 (void) cyclic_reprogram(ct->ct_qcyclic, 1653 CY_INFINITY); 1654 } 1655 mutex_exit(&ct->ct_mutex); 1656 } 1657 } 1658 } 1659 1660 /* 1661 * Resume callout processing. 1662 */ 1663 static void 1664 callout_resume(hrtime_t delta, int timechange) 1665 { 1666 hrtime_t hexp, qexp; 1667 int t, f; 1668 callout_table_t *ct; 1669 1670 /* 1671 * Traverse every callout table in the system and resume callout 1672 * processing. For active tables, perform any hrtime adjustments 1673 * necessary. 1674 */ 1675 for (f = 0; f < max_ncpus; f++) { 1676 for (t = 0; t < CALLOUT_NTYPES; t++) { 1677 ct = &callout_table[CALLOUT_TABLE(t, f)]; 1678 1679 mutex_enter(&ct->ct_mutex); 1680 if (ct->ct_cyclic == CYCLIC_NONE) { 1681 ct->ct_suspend--; 1682 mutex_exit(&ct->ct_mutex); 1683 continue; 1684 } 1685 1686 /* 1687 * If a delta is specified, adjust the expirations in 1688 * the heap by delta. Also, if the caller indicates 1689 * a timechange, process that. This step also cleans 1690 * out any empty callout lists that might happen to 1691 * be there. 1692 */ 1693 hexp = callout_heap_process(ct, delta, timechange); 1694 qexp = callout_queue_process(ct, delta, timechange); 1695 1696 ct->ct_suspend--; 1697 if (ct->ct_suspend == 0) { 1698 (void) cyclic_reprogram(ct->ct_cyclic, hexp); 1699 (void) cyclic_reprogram(ct->ct_qcyclic, qexp); 1700 } 1701 1702 mutex_exit(&ct->ct_mutex); 1703 } 1704 } 1705 } 1706 1707 /* 1708 * Callback handler used by CPR to stop and resume callouts. 1709 * The cyclic subsystem saves and restores hrtime during CPR. 1710 * That is why callout_resume() is called with a 0 delta. 1711 * Although hrtime is the same, hrestime (system time) has 1712 * progressed during CPR. So, we have to indicate a time change 1713 * to expire the absolute hrestime timers. 1714 */ 1715 /*ARGSUSED*/ 1716 static boolean_t 1717 callout_cpr_callb(void *arg, int code) 1718 { 1719 if (code == CB_CODE_CPR_CHKPT) 1720 callout_suspend(); 1721 else 1722 callout_resume(0, 1); 1723 1724 return (B_TRUE); 1725 } 1726 1727 /* 1728 * Callback handler invoked when the debugger is entered or exited. 1729 */ 1730 /*ARGSUSED*/ 1731 static boolean_t 1732 callout_debug_callb(void *arg, int code) 1733 { 1734 hrtime_t delta; 1735 1736 /* 1737 * When the system enters the debugger. make a note of the hrtime. 1738 * When it is resumed, compute how long the system was in the 1739 * debugger. This interval should not be counted for callouts. 1740 */ 1741 if (code == 0) { 1742 callout_suspend(); 1743 callout_debug_hrtime = gethrtime(); 1744 } else { 1745 delta = gethrtime() - callout_debug_hrtime; 1746 callout_resume(delta, 0); 1747 } 1748 1749 return (B_TRUE); 1750 } 1751 1752 /* 1753 * Move the absolute hrestime callouts to the expired list. Then program the 1754 * table's cyclic to expire immediately so that the callouts can be executed 1755 * immediately. 1756 */ 1757 static void 1758 callout_hrestime_one(callout_table_t *ct) 1759 { 1760 hrtime_t hexp, qexp; 1761 1762 mutex_enter(&ct->ct_mutex); 1763 if (ct->ct_cyclic == CYCLIC_NONE) { 1764 mutex_exit(&ct->ct_mutex); 1765 return; 1766 } 1767 1768 /* 1769 * Walk the heap and process all the absolute hrestime entries. 1770 */ 1771 hexp = callout_heap_process(ct, 0, 1); 1772 qexp = callout_queue_process(ct, 0, 1); 1773 1774 if (ct->ct_suspend == 0) { 1775 (void) cyclic_reprogram(ct->ct_cyclic, hexp); 1776 (void) cyclic_reprogram(ct->ct_qcyclic, qexp); 1777 } 1778 1779 mutex_exit(&ct->ct_mutex); 1780 } 1781 1782 /* 1783 * This function is called whenever system time (hrestime) is changed 1784 * explicitly. All the HRESTIME callouts must be expired at once. 1785 */ 1786 /*ARGSUSED*/ 1787 void 1788 callout_hrestime(void) 1789 { 1790 int t, f; 1791 callout_table_t *ct; 1792 1793 /* 1794 * Traverse every callout table in the system and process the hrestime 1795 * callouts therein. 1796 * 1797 * We look at all the tables because we don't know which ones were 1798 * onlined and offlined in the past. The offlined tables may still 1799 * have active cyclics processing timers somewhere. 1800 */ 1801 for (f = 0; f < max_ncpus; f++) { 1802 for (t = 0; t < CALLOUT_NTYPES; t++) { 1803 ct = &callout_table[CALLOUT_TABLE(t, f)]; 1804 callout_hrestime_one(ct); 1805 } 1806 } 1807 } 1808 1809 /* 1810 * Create the hash tables for this callout table. 1811 */ 1812 static void 1813 callout_hash_init(callout_table_t *ct) 1814 { 1815 size_t size; 1816 1817 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 1818 ASSERT((ct->ct_idhash == NULL) && (ct->ct_clhash == NULL)); 1819 1820 size = sizeof (callout_hash_t) * CALLOUT_BUCKETS; 1821 ct->ct_idhash = kmem_zalloc(size, KM_SLEEP); 1822 ct->ct_clhash = kmem_zalloc(size, KM_SLEEP); 1823 } 1824 1825 /* 1826 * Create per-callout table kstats. 1827 */ 1828 static void 1829 callout_kstat_init(callout_table_t *ct) 1830 { 1831 callout_stat_type_t stat; 1832 kstat_t *ct_kstats; 1833 int ndx; 1834 1835 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 1836 ASSERT(ct->ct_kstats == NULL); 1837 1838 ndx = ct - callout_table; 1839 ct_kstats = kstat_create("unix", ndx, "callout", 1840 "misc", KSTAT_TYPE_NAMED, CALLOUT_NUM_STATS, KSTAT_FLAG_VIRTUAL); 1841 1842 if (ct_kstats == NULL) { 1843 cmn_err(CE_WARN, "kstat_create for callout table %p failed", 1844 (void *)ct); 1845 } else { 1846 ct_kstats->ks_data = ct->ct_kstat_data; 1847 for (stat = 0; stat < CALLOUT_NUM_STATS; stat++) 1848 kstat_named_init(&ct->ct_kstat_data[stat], 1849 callout_kstat_names[stat], KSTAT_DATA_INT64); 1850 ct->ct_kstats = ct_kstats; 1851 kstat_install(ct_kstats); 1852 } 1853 } 1854 1855 static void 1856 callout_cyclic_init(callout_table_t *ct) 1857 { 1858 cyc_handler_t hdlr; 1859 cyc_time_t when; 1860 processorid_t seqid; 1861 int t; 1862 cyclic_id_t cyclic, qcyclic; 1863 1864 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 1865 1866 t = ct->ct_type; 1867 seqid = CALLOUT_TABLE_SEQID(ct); 1868 1869 /* 1870 * Create the taskq thread if the table type is normal. 1871 * Realtime tables are handled at PIL1 by a softint 1872 * handler. 1873 */ 1874 if (t == CALLOUT_NORMAL) { 1875 ASSERT(ct->ct_taskq == NULL); 1876 /* 1877 * Each callout thread consumes exactly one 1878 * task structure while active. Therefore, 1879 * prepopulating with 2 * callout_threads tasks 1880 * ensures that there's at least one task per 1881 * thread that's either scheduled or on the 1882 * freelist. In turn, this guarantees that 1883 * taskq_dispatch() will always either succeed 1884 * (because there's a free task structure) or 1885 * be unnecessary (because "callout_excute(ct)" 1886 * has already scheduled). 1887 */ 1888 ct->ct_taskq = 1889 taskq_create_instance("callout_taskq", seqid, 1890 callout_threads, maxclsyspri, 1891 2 * callout_threads, 2 * callout_threads, 1892 TASKQ_PREPOPULATE | TASKQ_CPR_SAFE); 1893 } 1894 1895 /* 1896 * callouts can only be created in a table whose 1897 * cyclic has been initialized. 1898 */ 1899 ASSERT(ct->ct_heap_num == 0); 1900 1901 /* 1902 * Drop the mutex before creating the callout cyclics. cyclic_add() 1903 * could potentially expand the cyclic heap. We don't want to be 1904 * holding the callout table mutex in that case. Note that this 1905 * function is called during CPU online. cpu_lock is held at this 1906 * point. So, only one thread can be executing the cyclic add logic 1907 * below at any time. 1908 */ 1909 mutex_exit(&ct->ct_mutex); 1910 1911 /* 1912 * Create the callout table cyclics. 1913 * 1914 * The realtime cyclic handler executes at low PIL. The normal cyclic 1915 * handler executes at lock PIL. This is because there are cases 1916 * where code can block at PIL > 1 waiting for a normal callout handler 1917 * to unblock it directly or indirectly. If the normal cyclic were to 1918 * be executed at low PIL, it could get blocked out by the waiter 1919 * and cause a deadlock. 1920 */ 1921 ASSERT(ct->ct_cyclic == CYCLIC_NONE); 1922 1923 if (t == CALLOUT_REALTIME) { 1924 hdlr.cyh_level = callout_realtime_level; 1925 hdlr.cyh_func = (cyc_func_t)callout_realtime; 1926 } else { 1927 hdlr.cyh_level = callout_normal_level; 1928 hdlr.cyh_func = (cyc_func_t)callout_normal; 1929 } 1930 hdlr.cyh_arg = ct; 1931 when.cyt_when = CY_INFINITY; 1932 when.cyt_interval = CY_INFINITY; 1933 1934 cyclic = cyclic_add(&hdlr, &when); 1935 1936 if (t == CALLOUT_REALTIME) 1937 hdlr.cyh_func = (cyc_func_t)callout_queue_realtime; 1938 else 1939 hdlr.cyh_func = (cyc_func_t)callout_queue_normal; 1940 1941 qcyclic = cyclic_add(&hdlr, &when); 1942 1943 mutex_enter(&ct->ct_mutex); 1944 ct->ct_cyclic = cyclic; 1945 ct->ct_qcyclic = qcyclic; 1946 } 1947 1948 void 1949 callout_cpu_online(cpu_t *cp) 1950 { 1951 lgrp_handle_t hand; 1952 callout_cache_t *cache; 1953 char s[KMEM_CACHE_NAMELEN]; 1954 callout_table_t *ct; 1955 processorid_t seqid; 1956 int t; 1957 1958 ASSERT(MUTEX_HELD(&cpu_lock)); 1959 1960 /* 1961 * Locate the cache corresponding to the onlined CPU's lgroup. 1962 * Note that access to callout_caches is protected by cpu_lock. 1963 */ 1964 hand = lgrp_plat_cpu_to_hand(cp->cpu_id); 1965 for (cache = callout_caches; cache != NULL; cache = cache->cc_next) { 1966 if (cache->cc_hand == hand) 1967 break; 1968 } 1969 1970 /* 1971 * If not found, create one. The caches are never destroyed. 1972 */ 1973 if (cache == NULL) { 1974 cache = kmem_alloc(sizeof (callout_cache_t), KM_SLEEP); 1975 cache->cc_hand = hand; 1976 (void) snprintf(s, KMEM_CACHE_NAMELEN, "callout_cache%lx", 1977 (long)hand); 1978 cache->cc_cache = kmem_cache_create(s, sizeof (callout_t), 1979 CALLOUT_ALIGN, NULL, NULL, NULL, NULL, NULL, 0); 1980 (void) snprintf(s, KMEM_CACHE_NAMELEN, "callout_lcache%lx", 1981 (long)hand); 1982 cache->cc_lcache = kmem_cache_create(s, sizeof (callout_list_t), 1983 CALLOUT_ALIGN, NULL, NULL, NULL, NULL, NULL, 0); 1984 cache->cc_next = callout_caches; 1985 callout_caches = cache; 1986 } 1987 1988 seqid = cp->cpu_seqid; 1989 1990 for (t = 0; t < CALLOUT_NTYPES; t++) { 1991 ct = &callout_table[CALLOUT_TABLE(t, seqid)]; 1992 1993 mutex_enter(&ct->ct_mutex); 1994 /* 1995 * Store convinience pointers to the kmem caches 1996 * in the callout table. These assignments should always be 1997 * done as callout tables can map to different physical 1998 * CPUs each time. 1999 */ 2000 ct->ct_cache = cache->cc_cache; 2001 ct->ct_lcache = cache->cc_lcache; 2002 2003 /* 2004 * We use the heap pointer to check if stuff has been 2005 * initialized for this callout table. 2006 */ 2007 if (ct->ct_heap == NULL) { 2008 callout_heap_init(ct); 2009 callout_hash_init(ct); 2010 callout_kstat_init(ct); 2011 callout_cyclic_init(ct); 2012 } 2013 2014 mutex_exit(&ct->ct_mutex); 2015 2016 /* 2017 * Move the cyclics to this CPU by doing a bind. 2018 */ 2019 cyclic_bind(ct->ct_cyclic, cp, NULL); 2020 cyclic_bind(ct->ct_qcyclic, cp, NULL); 2021 } 2022 } 2023 2024 void 2025 callout_cpu_offline(cpu_t *cp) 2026 { 2027 callout_table_t *ct; 2028 processorid_t seqid; 2029 int t; 2030 2031 ASSERT(MUTEX_HELD(&cpu_lock)); 2032 2033 seqid = cp->cpu_seqid; 2034 2035 for (t = 0; t < CALLOUT_NTYPES; t++) { 2036 ct = &callout_table[CALLOUT_TABLE(t, seqid)]; 2037 2038 /* 2039 * Unbind the cyclics. This will allow the cyclic subsystem 2040 * to juggle the cyclics during CPU offline. 2041 */ 2042 cyclic_bind(ct->ct_cyclic, NULL, NULL); 2043 cyclic_bind(ct->ct_qcyclic, NULL, NULL); 2044 } 2045 } 2046 2047 /* 2048 * This is called to perform per-CPU initialization for slave CPUs at 2049 * boot time. 2050 */ 2051 void 2052 callout_mp_init(void) 2053 { 2054 cpu_t *cp; 2055 size_t min, max; 2056 2057 if (callout_chunk == CALLOUT_CHUNK) { 2058 /* 2059 * No one has specified a chunk in /etc/system. We need to 2060 * compute it here based on the number of online CPUs and 2061 * available physical memory. 2062 */ 2063 min = CALLOUT_MIN_HEAP_SIZE; 2064 max = ptob(physmem / CALLOUT_MEM_FRACTION); 2065 if (min > max) 2066 min = max; 2067 callout_chunk = min / sizeof (callout_heap_t); 2068 callout_chunk /= ncpus_online; 2069 callout_chunk = P2ROUNDUP(callout_chunk, CALLOUT_CHUNK); 2070 } 2071 2072 mutex_enter(&cpu_lock); 2073 2074 cp = cpu_active; 2075 do { 2076 callout_cpu_online(cp); 2077 } while ((cp = cp->cpu_next_onln) != cpu_active); 2078 2079 mutex_exit(&cpu_lock); 2080 } 2081 2082 /* 2083 * Initialize all callout tables. Called at boot time just before clkstart(). 2084 */ 2085 void 2086 callout_init(void) 2087 { 2088 int f, t; 2089 size_t size; 2090 int table_id; 2091 callout_table_t *ct; 2092 long bits, fanout; 2093 uintptr_t buf; 2094 2095 /* 2096 * Initialize callout globals. 2097 */ 2098 bits = 0; 2099 for (fanout = 1; (fanout < max_ncpus); fanout <<= 1) 2100 bits++; 2101 callout_table_bits = CALLOUT_TYPE_BITS + bits; 2102 callout_table_mask = (1 << callout_table_bits) - 1; 2103 callout_counter_low = 1 << CALLOUT_COUNTER_SHIFT; 2104 callout_longterm = TICK_TO_NSEC(CALLOUT_LONGTERM_TICKS); 2105 callout_max_ticks = CALLOUT_MAX_TICKS; 2106 if (callout_min_reap == 0) 2107 callout_min_reap = CALLOUT_MIN_REAP; 2108 2109 if (callout_tolerance <= 0) 2110 callout_tolerance = CALLOUT_TOLERANCE; 2111 if (callout_threads <= 0) 2112 callout_threads = CALLOUT_THREADS; 2113 if (callout_chunk <= 0) 2114 callout_chunk = CALLOUT_CHUNK; 2115 else 2116 callout_chunk = P2ROUNDUP(callout_chunk, CALLOUT_CHUNK); 2117 2118 /* 2119 * Allocate all the callout tables based on max_ncpus. We have chosen 2120 * to do boot-time allocation instead of dynamic allocation because: 2121 * 2122 * - the size of the callout tables is not too large. 2123 * - there are race conditions involved in making this dynamic. 2124 * - the hash tables that go with the callout tables consume 2125 * most of the memory and they are only allocated in 2126 * callout_cpu_online(). 2127 * 2128 * Each CPU has two tables that are consecutive in the array. The first 2129 * one is for realtime callouts and the second one is for normal ones. 2130 * 2131 * We do this alignment dance to make sure that callout table 2132 * structures will always be on a cache line boundary. 2133 */ 2134 size = sizeof (callout_table_t) * CALLOUT_NTYPES * max_ncpus; 2135 size += CALLOUT_ALIGN; 2136 buf = (uintptr_t)kmem_zalloc(size, KM_SLEEP); 2137 callout_table = (callout_table_t *)P2ROUNDUP(buf, CALLOUT_ALIGN); 2138 2139 size = sizeof (kstat_named_t) * CALLOUT_NUM_STATS; 2140 /* 2141 * Now, initialize the tables for all the CPUs. 2142 */ 2143 for (f = 0; f < max_ncpus; f++) { 2144 for (t = 0; t < CALLOUT_NTYPES; t++) { 2145 table_id = CALLOUT_TABLE(t, f); 2146 ct = &callout_table[table_id]; 2147 ct->ct_type = t; 2148 mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL); 2149 /* 2150 * Precompute the base IDs for long and short-term 2151 * legacy IDs. This makes ID generation during 2152 * timeout() fast. 2153 */ 2154 ct->ct_short_id = CALLOUT_SHORT_ID(table_id); 2155 ct->ct_long_id = CALLOUT_LONG_ID(table_id); 2156 /* 2157 * Precompute the base ID for generation-based IDs. 2158 * Note that when the first ID gets allocated, the 2159 * ID will wrap. This will cause the generation 2160 * number to be incremented to 1. 2161 */ 2162 ct->ct_gen_id = CALLOUT_SHORT_ID(table_id); 2163 /* 2164 * Initialize the cyclics as NONE. This will get set 2165 * during CPU online. This is so that partially 2166 * populated systems will only have the required 2167 * number of cyclics, not more. 2168 */ 2169 ct->ct_cyclic = CYCLIC_NONE; 2170 ct->ct_qcyclic = CYCLIC_NONE; 2171 ct->ct_kstat_data = kmem_zalloc(size, KM_SLEEP); 2172 } 2173 } 2174 2175 /* 2176 * Add the callback for CPR. This is called during checkpoint 2177 * resume to suspend and resume callouts. 2178 */ 2179 (void) callb_add(callout_cpr_callb, 0, CB_CL_CPR_CALLOUT, 2180 "callout_cpr"); 2181 (void) callb_add(callout_debug_callb, 0, CB_CL_ENTER_DEBUGGER, 2182 "callout_debug"); 2183 2184 /* 2185 * Call the per-CPU initialization function for the boot CPU. This 2186 * is done here because the function is not called automatically for 2187 * the boot CPU from the CPU online/offline hooks. Note that the 2188 * CPU lock is taken here because of convention. 2189 */ 2190 mutex_enter(&cpu_lock); 2191 callout_boot_ct = &callout_table[CALLOUT_TABLE(0, CPU->cpu_seqid)]; 2192 callout_cpu_online(CPU); 2193 mutex_exit(&cpu_lock); 2194 2195 /* heads-up to boot-time clients that timeouts now available */ 2196 callout_init_done = 1; 2197 } 2198