1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2016 by Delphix. All rights reserved. 24 */ 25 26 #include <sys/callo.h> 27 #include <sys/param.h> 28 #include <sys/types.h> 29 #include <sys/cpuvar.h> 30 #include <sys/thread.h> 31 #include <sys/kmem.h> 32 #include <sys/kmem_impl.h> 33 #include <sys/cmn_err.h> 34 #include <sys/callb.h> 35 #include <sys/debug.h> 36 #include <sys/vtrace.h> 37 #include <sys/sysmacros.h> 38 #include <sys/sdt.h> 39 40 int callout_init_done; /* useful during boot */ 41 42 /* 43 * Callout tables. See timeout(9F) for details. 44 */ 45 static int callout_threads; /* callout normal threads */ 46 static hrtime_t callout_debug_hrtime; /* debugger entry time */ 47 static int callout_chunk; /* callout heap chunk size */ 48 static int callout_min_reap; /* callout minimum reap count */ 49 static int callout_tolerance; /* callout hires tolerance */ 50 static callout_table_t *callout_boot_ct; /* Boot CPU's callout tables */ 51 static clock_t callout_max_ticks; /* max interval */ 52 static hrtime_t callout_longterm; /* longterm nanoseconds */ 53 static ulong_t callout_counter_low; /* callout ID increment */ 54 static ulong_t callout_table_bits; /* number of table bits in ID */ 55 static ulong_t callout_table_mask; /* mask for the table bits */ 56 static callout_cache_t *callout_caches; /* linked list of caches */ 57 #pragma align 64(callout_table) 58 static callout_table_t *callout_table; /* global callout table array */ 59 60 /* 61 * We run 'realtime' callouts at PIL 1 (CY_LOW_LEVEL). For 'normal' 62 * callouts, from PIL 10 (CY_LOCK_LEVEL) we dispatch the callout, 63 * via taskq, to a thread that executes at PIL 0 - so we end up running 64 * 'normal' callouts at PIL 0. 65 */ 66 static volatile int callout_realtime_level = CY_LOW_LEVEL; 67 static volatile int callout_normal_level = CY_LOCK_LEVEL; 68 69 static char *callout_kstat_names[] = { 70 "callout_timeouts", 71 "callout_timeouts_pending", 72 "callout_untimeouts_unexpired", 73 "callout_untimeouts_executing", 74 "callout_untimeouts_expired", 75 "callout_expirations", 76 "callout_allocations", 77 "callout_cleanups", 78 }; 79 80 static hrtime_t callout_heap_process(callout_table_t *, hrtime_t, int); 81 82 #define CALLOUT_HASH_INSERT(hash, cp, cnext, cprev) \ 83 { \ 84 callout_hash_t *hashp = &(hash); \ 85 \ 86 cp->cprev = NULL; \ 87 cp->cnext = hashp->ch_head; \ 88 if (hashp->ch_head == NULL) \ 89 hashp->ch_tail = cp; \ 90 else \ 91 cp->cnext->cprev = cp; \ 92 hashp->ch_head = cp; \ 93 } 94 95 #define CALLOUT_HASH_APPEND(hash, cp, cnext, cprev) \ 96 { \ 97 callout_hash_t *hashp = &(hash); \ 98 \ 99 cp->cnext = NULL; \ 100 cp->cprev = hashp->ch_tail; \ 101 if (hashp->ch_tail == NULL) \ 102 hashp->ch_head = cp; \ 103 else \ 104 cp->cprev->cnext = cp; \ 105 hashp->ch_tail = cp; \ 106 } 107 108 #define CALLOUT_HASH_DELETE(hash, cp, cnext, cprev) \ 109 { \ 110 callout_hash_t *hashp = &(hash); \ 111 \ 112 if (cp->cnext == NULL) \ 113 hashp->ch_tail = cp->cprev; \ 114 else \ 115 cp->cnext->cprev = cp->cprev; \ 116 if (cp->cprev == NULL) \ 117 hashp->ch_head = cp->cnext; \ 118 else \ 119 cp->cprev->cnext = cp->cnext; \ 120 } 121 122 /* 123 * These definitions help us queue callouts and callout lists. Here is 124 * the queueing rationale: 125 * 126 * - callouts are queued in a FIFO manner in the ID hash table. 127 * TCP timers are typically cancelled in the same order that they 128 * were issued. The FIFO queueing shortens the search for a callout 129 * during untimeout(). 130 * 131 * - callouts are queued in a FIFO manner in their callout lists. 132 * This ensures that the callouts are executed in the same order that 133 * they were queued. This is fair. Plus, it helps to make each 134 * callout expiration timely. It also favors cancellations. 135 * 136 * - callout lists are queued in the following manner in the callout 137 * hash table buckets: 138 * 139 * - appended, if the callout list is a 1-nanosecond resolution 140 * callout list. When a callout is created, we first look for 141 * a callout list that has the same expiration so we can avoid 142 * allocating a callout list and inserting the expiration into 143 * the heap. However, we do not want to look at 1-nanosecond 144 * resolution callout lists as we will seldom find a match in 145 * them. Keeping these callout lists in the rear of the hash 146 * buckets allows us to skip these during the lookup. 147 * 148 * - inserted at the beginning, if the callout list is not a 149 * 1-nanosecond resolution callout list. This also has the 150 * side-effect of keeping the long term timers away from the 151 * front of the buckets. 152 * 153 * - callout lists are queued in a FIFO manner in the expired callouts 154 * list. This ensures that callout lists are executed in the order 155 * of expiration. 156 */ 157 #define CALLOUT_APPEND(ct, cp) \ 158 CALLOUT_HASH_APPEND(ct->ct_idhash[CALLOUT_IDHASH(cp->c_xid)], \ 159 cp, c_idnext, c_idprev); \ 160 CALLOUT_HASH_APPEND(cp->c_list->cl_callouts, cp, c_clnext, c_clprev) 161 162 #define CALLOUT_DELETE(ct, cp) \ 163 CALLOUT_HASH_DELETE(ct->ct_idhash[CALLOUT_IDHASH(cp->c_xid)], \ 164 cp, c_idnext, c_idprev); \ 165 CALLOUT_HASH_DELETE(cp->c_list->cl_callouts, cp, c_clnext, c_clprev) 166 167 #define CALLOUT_LIST_INSERT(hash, cl) \ 168 CALLOUT_HASH_INSERT(hash, cl, cl_next, cl_prev) 169 170 #define CALLOUT_LIST_APPEND(hash, cl) \ 171 CALLOUT_HASH_APPEND(hash, cl, cl_next, cl_prev) 172 173 #define CALLOUT_LIST_DELETE(hash, cl) \ 174 CALLOUT_HASH_DELETE(hash, cl, cl_next, cl_prev) 175 176 #define CALLOUT_LIST_BEFORE(cl, nextcl) \ 177 { \ 178 (cl)->cl_prev = (nextcl)->cl_prev; \ 179 (cl)->cl_next = (nextcl); \ 180 (nextcl)->cl_prev = (cl); \ 181 if (cl->cl_prev != NULL) \ 182 cl->cl_prev->cl_next = cl; \ 183 } 184 185 /* 186 * For normal callouts, there is a deadlock scenario if two callouts that 187 * have an inter-dependency end up on the same callout list. To break the 188 * deadlock, you need two taskq threads running in parallel. We compute 189 * the number of taskq threads here using a bunch of conditions to make 190 * it optimal for the common case. This is an ugly hack, but one that is 191 * necessary (sigh). 192 */ 193 #define CALLOUT_THRESHOLD 100000000 194 #define CALLOUT_EXEC_COMPUTE(ct, nextexp, exec) \ 195 { \ 196 callout_list_t *cl; \ 197 \ 198 cl = ct->ct_expired.ch_head; \ 199 if (cl == NULL) { \ 200 /* \ 201 * If the expired list is NULL, there is nothing to \ 202 * process. \ 203 */ \ 204 exec = 0; \ 205 } else if ((cl->cl_next == NULL) && \ 206 (cl->cl_callouts.ch_head == cl->cl_callouts.ch_tail)) { \ 207 /* \ 208 * If there is only one callout list and it contains \ 209 * only one callout, there is no need for two threads. \ 210 */ \ 211 exec = 1; \ 212 } else if ((nextexp) > (gethrtime() + CALLOUT_THRESHOLD)) { \ 213 /* \ 214 * If the next expiration of the cyclic is way out into \ 215 * the future, we need two threads. \ 216 */ \ 217 exec = 2; \ 218 } else { \ 219 /* \ 220 * We have multiple callouts to process. But the cyclic \ 221 * will fire in the near future. So, we only need one \ 222 * thread for now. \ 223 */ \ 224 exec = 1; \ 225 } \ 226 } 227 228 /* 229 * Macro to swap two heap items. 230 */ 231 #define CALLOUT_SWAP(h1, h2) \ 232 { \ 233 callout_heap_t tmp; \ 234 \ 235 tmp = *h1; \ 236 *h1 = *h2; \ 237 *h2 = tmp; \ 238 } 239 240 /* 241 * Macro to free a callout list. 242 */ 243 #define CALLOUT_LIST_FREE(ct, cl) \ 244 { \ 245 cl->cl_next = ct->ct_lfree; \ 246 ct->ct_lfree = cl; \ 247 cl->cl_flags |= CALLOUT_LIST_FLAG_FREE; \ 248 } 249 250 /* 251 * Macro to free a callout. 252 */ 253 #define CALLOUT_FREE(ct, cl) \ 254 { \ 255 cp->c_idnext = ct->ct_free; \ 256 ct->ct_free = cp; \ 257 cp->c_xid |= CALLOUT_ID_FREE; \ 258 } 259 260 /* 261 * Allocate a callout structure. We try quite hard because we 262 * can't sleep, and if we can't do the allocation, we're toast. 263 * Failing all, we try a KM_PANIC allocation. Note that we never 264 * deallocate a callout. See untimeout() for the reasoning. 265 */ 266 static callout_t * 267 callout_alloc(callout_table_t *ct) 268 { 269 size_t size; 270 callout_t *cp; 271 272 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 273 mutex_exit(&ct->ct_mutex); 274 275 cp = kmem_cache_alloc(ct->ct_cache, KM_NOSLEEP); 276 if (cp == NULL) { 277 size = sizeof (callout_t); 278 cp = kmem_alloc_tryhard(size, &size, KM_NOSLEEP | KM_PANIC); 279 } 280 cp->c_xid = 0; 281 cp->c_executor = NULL; 282 cv_init(&cp->c_done, NULL, CV_DEFAULT, NULL); 283 cp->c_waiting = 0; 284 285 mutex_enter(&ct->ct_mutex); 286 ct->ct_allocations++; 287 return (cp); 288 } 289 290 /* 291 * Allocate a callout list structure. We try quite hard because we 292 * can't sleep, and if we can't do the allocation, we're toast. 293 * Failing all, we try a KM_PANIC allocation. Note that we never 294 * deallocate a callout list. 295 */ 296 static void 297 callout_list_alloc(callout_table_t *ct) 298 { 299 size_t size; 300 callout_list_t *cl; 301 302 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 303 mutex_exit(&ct->ct_mutex); 304 305 cl = kmem_cache_alloc(ct->ct_lcache, KM_NOSLEEP); 306 if (cl == NULL) { 307 size = sizeof (callout_list_t); 308 cl = kmem_alloc_tryhard(size, &size, KM_NOSLEEP | KM_PANIC); 309 } 310 bzero(cl, sizeof (callout_list_t)); 311 312 mutex_enter(&ct->ct_mutex); 313 CALLOUT_LIST_FREE(ct, cl); 314 } 315 316 /* 317 * Find a callout list that corresponds to an expiration and matching flags. 318 */ 319 static callout_list_t * 320 callout_list_get(callout_table_t *ct, hrtime_t expiration, int flags, int hash) 321 { 322 callout_list_t *cl; 323 int clflags; 324 325 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 326 327 if (flags & CALLOUT_LIST_FLAG_NANO) { 328 /* 329 * This is a 1-nanosecond resolution callout. We will rarely 330 * find a match for this. So, bail out. 331 */ 332 return (NULL); 333 } 334 335 clflags = (CALLOUT_LIST_FLAG_ABSOLUTE | CALLOUT_LIST_FLAG_HRESTIME); 336 for (cl = ct->ct_clhash[hash].ch_head; (cl != NULL); cl = cl->cl_next) { 337 /* 338 * If we have reached a 1-nanosecond resolution callout list, 339 * we don't have much hope of finding a match in this hash 340 * bucket. So, just bail out. 341 */ 342 if (cl->cl_flags & CALLOUT_LIST_FLAG_NANO) 343 return (NULL); 344 345 if ((cl->cl_expiration == expiration) && 346 ((cl->cl_flags & clflags) == (flags & clflags))) 347 return (cl); 348 } 349 350 return (NULL); 351 } 352 353 /* 354 * Add a new callout list into a callout table's queue in sorted order by 355 * expiration. 356 */ 357 static int 358 callout_queue_add(callout_table_t *ct, callout_list_t *cl) 359 { 360 callout_list_t *nextcl; 361 hrtime_t expiration; 362 363 expiration = cl->cl_expiration; 364 nextcl = ct->ct_queue.ch_head; 365 if ((nextcl == NULL) || (expiration < nextcl->cl_expiration)) { 366 CALLOUT_LIST_INSERT(ct->ct_queue, cl); 367 return (1); 368 } 369 370 while (nextcl != NULL) { 371 if (expiration < nextcl->cl_expiration) { 372 CALLOUT_LIST_BEFORE(cl, nextcl); 373 return (0); 374 } 375 nextcl = nextcl->cl_next; 376 } 377 CALLOUT_LIST_APPEND(ct->ct_queue, cl); 378 379 return (0); 380 } 381 382 /* 383 * Insert a callout list into a callout table's queue and reprogram the queue 384 * cyclic if needed. 385 */ 386 static void 387 callout_queue_insert(callout_table_t *ct, callout_list_t *cl) 388 { 389 cl->cl_flags |= CALLOUT_LIST_FLAG_QUEUED; 390 391 /* 392 * Add the callout to the callout queue. If it ends up at the head, 393 * the cyclic needs to be reprogrammed as we have an earlier 394 * expiration. 395 * 396 * Also, during the CPR suspend phase, do not reprogram the cyclic. 397 * We don't want any callout activity. When the CPR resume phase is 398 * entered, the cyclic will be programmed for the earliest expiration 399 * in the queue. 400 */ 401 if (callout_queue_add(ct, cl) && (ct->ct_suspend == 0)) 402 (void) cyclic_reprogram(ct->ct_qcyclic, cl->cl_expiration); 403 } 404 405 /* 406 * Delete and handle all past expirations in a callout table's queue. 407 */ 408 static hrtime_t 409 callout_queue_delete(callout_table_t *ct) 410 { 411 callout_list_t *cl; 412 hrtime_t now; 413 414 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 415 416 now = gethrtime(); 417 while ((cl = ct->ct_queue.ch_head) != NULL) { 418 if (cl->cl_expiration > now) 419 break; 420 cl->cl_flags &= ~CALLOUT_LIST_FLAG_QUEUED; 421 CALLOUT_LIST_DELETE(ct->ct_queue, cl); 422 CALLOUT_LIST_APPEND(ct->ct_expired, cl); 423 } 424 425 /* 426 * If this callout queue is empty or callouts have been suspended, 427 * just return. 428 */ 429 if ((cl == NULL) || (ct->ct_suspend > 0)) 430 return (CY_INFINITY); 431 432 (void) cyclic_reprogram(ct->ct_qcyclic, cl->cl_expiration); 433 434 return (cl->cl_expiration); 435 } 436 437 static hrtime_t 438 callout_queue_process(callout_table_t *ct, hrtime_t delta, int timechange) 439 { 440 callout_list_t *firstcl, *cl; 441 hrtime_t expiration, now; 442 int clflags; 443 callout_hash_t temp; 444 445 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 446 447 firstcl = ct->ct_queue.ch_head; 448 if (firstcl == NULL) 449 return (CY_INFINITY); 450 451 /* 452 * We walk the callout queue. If we encounter a hrestime entry that 453 * must be removed, we clean it out. Otherwise, we apply any 454 * adjustments needed to it. Because of the latter, we need to 455 * recreate the list as we go along. 456 */ 457 temp = ct->ct_queue; 458 ct->ct_queue.ch_head = NULL; 459 ct->ct_queue.ch_tail = NULL; 460 461 clflags = (CALLOUT_LIST_FLAG_HRESTIME | CALLOUT_LIST_FLAG_ABSOLUTE); 462 now = gethrtime(); 463 while ((cl = temp.ch_head) != NULL) { 464 CALLOUT_LIST_DELETE(temp, cl); 465 466 /* 467 * Delete the callout and expire it, if one of the following 468 * is true: 469 * - the callout has expired 470 * - the callout is an absolute hrestime one and 471 * there has been a system time change 472 */ 473 if ((cl->cl_expiration <= now) || 474 (timechange && ((cl->cl_flags & clflags) == clflags))) { 475 cl->cl_flags &= ~CALLOUT_LIST_FLAG_QUEUED; 476 CALLOUT_LIST_APPEND(ct->ct_expired, cl); 477 continue; 478 } 479 480 /* 481 * Apply adjustments, if any. Adjustments are applied after 482 * the system returns from KMDB or OBP. They are only applied 483 * to relative callout lists. 484 */ 485 if (delta && !(cl->cl_flags & CALLOUT_LIST_FLAG_ABSOLUTE)) { 486 expiration = cl->cl_expiration + delta; 487 if (expiration <= 0) 488 expiration = CY_INFINITY; 489 cl->cl_expiration = expiration; 490 } 491 492 (void) callout_queue_add(ct, cl); 493 } 494 495 /* 496 * We need to return the expiration to help program the cyclic. 497 * If there are expired callouts, the cyclic needs to go off 498 * immediately. If the queue has become empty, then we return infinity. 499 * Else, we return the expiration of the earliest callout in the queue. 500 */ 501 if (ct->ct_expired.ch_head != NULL) 502 return (gethrtime()); 503 504 cl = ct->ct_queue.ch_head; 505 if (cl == NULL) 506 return (CY_INFINITY); 507 508 return (cl->cl_expiration); 509 } 510 511 /* 512 * Initialize a callout table's heap, if necessary. Preallocate some free 513 * entries so we don't have to check for NULL elsewhere. 514 */ 515 static void 516 callout_heap_init(callout_table_t *ct) 517 { 518 size_t size; 519 520 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 521 ASSERT(ct->ct_heap == NULL); 522 523 ct->ct_heap_num = 0; 524 ct->ct_heap_max = callout_chunk; 525 size = sizeof (callout_heap_t) * callout_chunk; 526 ct->ct_heap = kmem_alloc(size, KM_SLEEP); 527 } 528 529 /* 530 * Reallocate the heap. Return 0 if the heap is still full at the end of it. 531 * Return 1 otherwise. Note that the heap only expands, it never contracts. 532 */ 533 static int 534 callout_heap_expand(callout_table_t *ct) 535 { 536 size_t max, size, osize; 537 callout_heap_t *heap; 538 539 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 540 ASSERT(ct->ct_heap_num <= ct->ct_heap_max); 541 542 while (ct->ct_heap_num == ct->ct_heap_max) { 543 max = ct->ct_heap_max; 544 mutex_exit(&ct->ct_mutex); 545 546 osize = sizeof (callout_heap_t) * max; 547 size = sizeof (callout_heap_t) * (max + callout_chunk); 548 heap = kmem_alloc(size, KM_NOSLEEP); 549 550 mutex_enter(&ct->ct_mutex); 551 if (heap == NULL) { 552 /* 553 * We could not allocate memory. If we can free up 554 * some entries, that would be great. 555 */ 556 if (ct->ct_nreap > 0) 557 (void) callout_heap_process(ct, 0, 0); 558 /* 559 * If we still have no space in the heap, inform the 560 * caller. 561 */ 562 if (ct->ct_heap_num == ct->ct_heap_max) 563 return (0); 564 return (1); 565 } 566 if (max < ct->ct_heap_max) { 567 /* 568 * Someone beat us to the allocation. Free what we 569 * just allocated and proceed. 570 */ 571 kmem_free(heap, size); 572 continue; 573 } 574 575 bcopy(ct->ct_heap, heap, osize); 576 kmem_free(ct->ct_heap, osize); 577 ct->ct_heap = heap; 578 ct->ct_heap_max = size / sizeof (callout_heap_t); 579 } 580 581 return (1); 582 } 583 584 /* 585 * Move an expiration from the bottom of the heap to its correct place 586 * in the heap. If we reached the root doing this, return 1. Else, 587 * return 0. 588 */ 589 static int 590 callout_upheap(callout_table_t *ct) 591 { 592 int current, parent; 593 callout_heap_t *heap, *hcurrent, *hparent; 594 595 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 596 ASSERT(ct->ct_heap_num >= 1); 597 598 if (ct->ct_heap_num == 1) { 599 return (1); 600 } 601 602 heap = ct->ct_heap; 603 current = ct->ct_heap_num - 1; 604 605 for (;;) { 606 parent = CALLOUT_HEAP_PARENT(current); 607 hparent = &heap[parent]; 608 hcurrent = &heap[current]; 609 610 /* 611 * We have an expiration later than our parent; we're done. 612 */ 613 if (hcurrent->ch_expiration >= hparent->ch_expiration) { 614 return (0); 615 } 616 617 /* 618 * We need to swap with our parent, and continue up the heap. 619 */ 620 CALLOUT_SWAP(hparent, hcurrent); 621 622 /* 623 * If we just reached the root, we're done. 624 */ 625 if (parent == 0) { 626 return (1); 627 } 628 629 current = parent; 630 } 631 /*NOTREACHED*/ 632 } 633 634 /* 635 * Insert a new heap item into a callout table's heap. 636 */ 637 static void 638 callout_heap_insert(callout_table_t *ct, callout_list_t *cl) 639 { 640 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 641 ASSERT(ct->ct_heap_num < ct->ct_heap_max); 642 643 cl->cl_flags |= CALLOUT_LIST_FLAG_HEAPED; 644 /* 645 * First, copy the expiration and callout list pointer to the bottom 646 * of the heap. 647 */ 648 ct->ct_heap[ct->ct_heap_num].ch_expiration = cl->cl_expiration; 649 ct->ct_heap[ct->ct_heap_num].ch_list = cl; 650 ct->ct_heap_num++; 651 652 /* 653 * Now, perform an upheap operation. If we reached the root, then 654 * the cyclic needs to be reprogrammed as we have an earlier 655 * expiration. 656 * 657 * Also, during the CPR suspend phase, do not reprogram the cyclic. 658 * We don't want any callout activity. When the CPR resume phase is 659 * entered, the cyclic will be programmed for the earliest expiration 660 * in the heap. 661 */ 662 if (callout_upheap(ct) && (ct->ct_suspend == 0)) 663 (void) cyclic_reprogram(ct->ct_cyclic, cl->cl_expiration); 664 } 665 666 /* 667 * Move an expiration from the top of the heap to its correct place 668 * in the heap. 669 */ 670 static void 671 callout_downheap(callout_table_t *ct) 672 { 673 int current, left, right, nelems; 674 callout_heap_t *heap, *hleft, *hright, *hcurrent; 675 676 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 677 ASSERT(ct->ct_heap_num >= 1); 678 679 heap = ct->ct_heap; 680 current = 0; 681 nelems = ct->ct_heap_num; 682 683 for (;;) { 684 /* 685 * If we don't have a left child (i.e., we're a leaf), we're 686 * done. 687 */ 688 if ((left = CALLOUT_HEAP_LEFT(current)) >= nelems) 689 return; 690 691 hleft = &heap[left]; 692 hcurrent = &heap[current]; 693 694 right = CALLOUT_HEAP_RIGHT(current); 695 696 /* 697 * Even if we don't have a right child, we still need to compare 698 * our expiration against that of our left child. 699 */ 700 if (right >= nelems) 701 goto comp_left; 702 703 hright = &heap[right]; 704 705 /* 706 * We have both a left and a right child. We need to compare 707 * the expiration of the children to determine which 708 * expires earlier. 709 */ 710 if (hright->ch_expiration < hleft->ch_expiration) { 711 /* 712 * Our right child is the earlier of our children. 713 * We'll now compare our expiration to its expiration. 714 * If ours is the earlier one, we're done. 715 */ 716 if (hcurrent->ch_expiration <= hright->ch_expiration) 717 return; 718 719 /* 720 * Our right child expires earlier than we do; swap 721 * with our right child, and descend right. 722 */ 723 CALLOUT_SWAP(hright, hcurrent); 724 current = right; 725 continue; 726 } 727 728 comp_left: 729 /* 730 * Our left child is the earlier of our children (or we have 731 * no right child). We'll now compare our expiration 732 * to its expiration. If ours is the earlier one, we're done. 733 */ 734 if (hcurrent->ch_expiration <= hleft->ch_expiration) 735 return; 736 737 /* 738 * Our left child expires earlier than we do; swap with our 739 * left child, and descend left. 740 */ 741 CALLOUT_SWAP(hleft, hcurrent); 742 current = left; 743 } 744 } 745 746 /* 747 * Delete and handle all past expirations in a callout table's heap. 748 */ 749 static hrtime_t 750 callout_heap_delete(callout_table_t *ct) 751 { 752 hrtime_t now, expiration, next; 753 callout_list_t *cl; 754 callout_heap_t *heap; 755 int hash; 756 757 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 758 759 if (CALLOUT_CLEANUP(ct)) { 760 /* 761 * There are too many heap elements pointing to empty callout 762 * lists. Clean them out. 763 */ 764 (void) callout_heap_process(ct, 0, 0); 765 } 766 767 now = gethrtime(); 768 heap = ct->ct_heap; 769 770 while (ct->ct_heap_num > 0) { 771 expiration = heap->ch_expiration; 772 hash = CALLOUT_CLHASH(expiration); 773 cl = heap->ch_list; 774 ASSERT(expiration == cl->cl_expiration); 775 776 if (cl->cl_callouts.ch_head == NULL) { 777 /* 778 * If the callout list is empty, reap it. 779 * Decrement the reap count. 780 */ 781 CALLOUT_LIST_DELETE(ct->ct_clhash[hash], cl); 782 CALLOUT_LIST_FREE(ct, cl); 783 ct->ct_nreap--; 784 } else { 785 /* 786 * If the root of the heap expires in the future, 787 * bail out. 788 */ 789 if (expiration > now) 790 break; 791 792 /* 793 * Move the callout list for this expiration to the 794 * list of expired callout lists. It will be processed 795 * by the callout executor. 796 */ 797 cl->cl_flags &= ~CALLOUT_LIST_FLAG_HEAPED; 798 CALLOUT_LIST_DELETE(ct->ct_clhash[hash], cl); 799 CALLOUT_LIST_APPEND(ct->ct_expired, cl); 800 } 801 802 /* 803 * Now delete the root. This is done by swapping the root with 804 * the last item in the heap and downheaping the item. 805 */ 806 ct->ct_heap_num--; 807 if (ct->ct_heap_num > 0) { 808 heap[0] = heap[ct->ct_heap_num]; 809 callout_downheap(ct); 810 } 811 } 812 813 /* 814 * If this callout table is empty or callouts have been suspended, 815 * just return. The cyclic has already been programmed to 816 * infinity by the cyclic subsystem. 817 */ 818 if ((ct->ct_heap_num == 0) || (ct->ct_suspend > 0)) 819 return (CY_INFINITY); 820 821 /* 822 * If the top expirations are within callout_tolerance of each other, 823 * delay the cyclic expire so that they can be processed together. 824 * This is to prevent high resolution timers from swamping the system 825 * with cyclic activity. 826 */ 827 if (ct->ct_heap_num > 2) { 828 next = expiration + callout_tolerance; 829 if ((heap[1].ch_expiration < next) || 830 (heap[2].ch_expiration < next)) 831 expiration = next; 832 } 833 834 (void) cyclic_reprogram(ct->ct_cyclic, expiration); 835 836 return (expiration); 837 } 838 839 /* 840 * There are some situations when the entire heap is walked and processed. 841 * This function is called to do the processing. These are the situations: 842 * 843 * 1. When the reap count reaches its threshold, the heap has to be cleared 844 * of all empty callout lists. 845 * 846 * 2. When the system enters and exits KMDB/OBP, all entries in the heap 847 * need to be adjusted by the interval spent in KMDB/OBP. 848 * 849 * 3. When system time is changed, the heap has to be scanned for 850 * absolute hrestime timers. These need to be removed from the heap 851 * and expired immediately. 852 * 853 * In cases 2 and 3, it is a good idea to do 1 as well since we are 854 * scanning the heap anyway. 855 * 856 * If the root gets changed and/or callout lists are expired, return the 857 * new expiration to the caller so it can reprogram the cyclic accordingly. 858 */ 859 static hrtime_t 860 callout_heap_process(callout_table_t *ct, hrtime_t delta, int timechange) 861 { 862 callout_heap_t *heap; 863 callout_list_t *cl; 864 hrtime_t expiration, now; 865 int i, hash, clflags; 866 ulong_t num; 867 868 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 869 870 if (ct->ct_heap_num == 0) 871 return (CY_INFINITY); 872 873 if (ct->ct_nreap > 0) 874 ct->ct_cleanups++; 875 876 heap = ct->ct_heap; 877 878 /* 879 * We walk the heap from the top to the bottom. If we encounter 880 * a heap item that points to an empty callout list, we clean 881 * it out. If we encounter a hrestime entry that must be removed, 882 * again we clean it out. Otherwise, we apply any adjustments needed 883 * to an element. 884 * 885 * During the walk, we also compact the heap from the bottom and 886 * reconstruct the heap using upheap operations. This is very 887 * efficient if the number of elements to be cleaned is greater than 888 * or equal to half the heap. This is the common case. 889 * 890 * Even in the non-common case, the upheap operations should be short 891 * as the entries below generally tend to be bigger than the entries 892 * above. 893 */ 894 num = ct->ct_heap_num; 895 ct->ct_heap_num = 0; 896 clflags = (CALLOUT_LIST_FLAG_HRESTIME | CALLOUT_LIST_FLAG_ABSOLUTE); 897 now = gethrtime(); 898 for (i = 0; i < num; i++) { 899 cl = heap[i].ch_list; 900 /* 901 * If the callout list is empty, delete the heap element and 902 * free the callout list. 903 */ 904 if (cl->cl_callouts.ch_head == NULL) { 905 hash = CALLOUT_CLHASH(cl->cl_expiration); 906 CALLOUT_LIST_DELETE(ct->ct_clhash[hash], cl); 907 CALLOUT_LIST_FREE(ct, cl); 908 continue; 909 } 910 911 /* 912 * Delete the heap element and expire the callout list, if 913 * one of the following is true: 914 * - the callout list has expired 915 * - the callout list is an absolute hrestime one and 916 * there has been a system time change 917 */ 918 if ((cl->cl_expiration <= now) || 919 (timechange && ((cl->cl_flags & clflags) == clflags))) { 920 hash = CALLOUT_CLHASH(cl->cl_expiration); 921 cl->cl_flags &= ~CALLOUT_LIST_FLAG_HEAPED; 922 CALLOUT_LIST_DELETE(ct->ct_clhash[hash], cl); 923 CALLOUT_LIST_APPEND(ct->ct_expired, cl); 924 continue; 925 } 926 927 /* 928 * Apply adjustments, if any. Adjustments are applied after 929 * the system returns from KMDB or OBP. They are only applied 930 * to relative callout lists. 931 */ 932 if (delta && !(cl->cl_flags & CALLOUT_LIST_FLAG_ABSOLUTE)) { 933 hash = CALLOUT_CLHASH(cl->cl_expiration); 934 CALLOUT_LIST_DELETE(ct->ct_clhash[hash], cl); 935 expiration = cl->cl_expiration + delta; 936 if (expiration <= 0) 937 expiration = CY_INFINITY; 938 heap[i].ch_expiration = expiration; 939 cl->cl_expiration = expiration; 940 hash = CALLOUT_CLHASH(cl->cl_expiration); 941 if (cl->cl_flags & CALLOUT_LIST_FLAG_NANO) { 942 CALLOUT_LIST_APPEND(ct->ct_clhash[hash], cl); 943 } else { 944 CALLOUT_LIST_INSERT(ct->ct_clhash[hash], cl); 945 } 946 } 947 948 heap[ct->ct_heap_num] = heap[i]; 949 ct->ct_heap_num++; 950 (void) callout_upheap(ct); 951 } 952 953 ct->ct_nreap = 0; 954 955 /* 956 * We need to return the expiration to help program the cyclic. 957 * If there are expired callouts, the cyclic needs to go off 958 * immediately. If the heap has become empty, then we return infinity. 959 * Else, return the expiration of the earliest callout in the heap. 960 */ 961 if (ct->ct_expired.ch_head != NULL) 962 return (gethrtime()); 963 964 if (ct->ct_heap_num == 0) 965 return (CY_INFINITY); 966 967 return (heap->ch_expiration); 968 } 969 970 /* 971 * Common function used to create normal and realtime callouts. 972 * 973 * Realtime callouts are handled at CY_LOW_PIL by a cyclic handler. So, 974 * there is one restriction on a realtime callout handler - it should not 975 * directly or indirectly acquire cpu_lock. CPU offline waits for pending 976 * cyclic handlers to complete while holding cpu_lock. So, if a realtime 977 * callout handler were to try to get cpu_lock, there would be a deadlock 978 * during CPU offline. 979 */ 980 callout_id_t 981 timeout_generic(int type, void (*func)(void *), void *arg, 982 hrtime_t expiration, hrtime_t resolution, int flags) 983 { 984 callout_table_t *ct; 985 callout_t *cp; 986 callout_id_t id; 987 callout_list_t *cl; 988 hrtime_t now, interval; 989 int hash, clflags; 990 991 ASSERT(resolution > 0); 992 ASSERT(func != NULL); 993 994 /* 995 * We get the current hrtime right upfront so that latencies in 996 * this function do not affect the accuracy of the callout. 997 */ 998 now = gethrtime(); 999 1000 /* 1001 * We disable kernel preemption so that we remain on the same CPU 1002 * throughout. If we needed to reprogram the callout table's cyclic, 1003 * we can avoid X-calls if we are on the same CPU. 1004 * 1005 * Note that callout_alloc() releases and reacquires the callout 1006 * table mutex. While reacquiring the mutex, it is possible for us 1007 * to go to sleep and later migrate to another CPU. This should be 1008 * pretty rare, though. 1009 */ 1010 kpreempt_disable(); 1011 1012 ct = &callout_table[CALLOUT_TABLE(type, CPU->cpu_seqid)]; 1013 mutex_enter(&ct->ct_mutex); 1014 1015 if (ct->ct_cyclic == CYCLIC_NONE) { 1016 mutex_exit(&ct->ct_mutex); 1017 /* 1018 * The callout table has not yet been initialized fully. 1019 * So, put this one on the boot callout table which is 1020 * always initialized. 1021 */ 1022 ct = &callout_boot_ct[type]; 1023 mutex_enter(&ct->ct_mutex); 1024 } 1025 1026 if (CALLOUT_CLEANUP(ct)) { 1027 /* 1028 * There are too many heap elements pointing to empty callout 1029 * lists. Clean them out. Since cleanup is only done once 1030 * in a while, no need to reprogram the cyclic if the root 1031 * of the heap gets cleaned out. 1032 */ 1033 (void) callout_heap_process(ct, 0, 0); 1034 } 1035 1036 if ((cp = ct->ct_free) == NULL) 1037 cp = callout_alloc(ct); 1038 else 1039 ct->ct_free = cp->c_idnext; 1040 1041 cp->c_func = func; 1042 cp->c_arg = arg; 1043 1044 /* 1045 * Compute the expiration hrtime. 1046 */ 1047 if (flags & CALLOUT_FLAG_ABSOLUTE) { 1048 interval = expiration - now; 1049 } else { 1050 interval = expiration; 1051 expiration += now; 1052 } 1053 1054 if (resolution > 1) { 1055 /* 1056 * Align expiration to the specified resolution. 1057 */ 1058 if (flags & CALLOUT_FLAG_ROUNDUP) 1059 expiration += resolution - 1; 1060 expiration = (expiration / resolution) * resolution; 1061 } 1062 1063 if (expiration <= 0) { 1064 /* 1065 * expiration hrtime overflow has occurred. Just set the 1066 * expiration to infinity. 1067 */ 1068 expiration = CY_INFINITY; 1069 } 1070 1071 /* 1072 * Assign an ID to this callout 1073 */ 1074 if (flags & CALLOUT_FLAG_32BIT) { 1075 if (interval > callout_longterm) { 1076 id = (ct->ct_long_id - callout_counter_low); 1077 id |= CALLOUT_COUNTER_HIGH; 1078 ct->ct_long_id = id; 1079 } else { 1080 id = (ct->ct_short_id - callout_counter_low); 1081 id |= CALLOUT_COUNTER_HIGH; 1082 ct->ct_short_id = id; 1083 } 1084 } else { 1085 id = (ct->ct_gen_id - callout_counter_low); 1086 if ((id & CALLOUT_COUNTER_HIGH) == 0) { 1087 id |= CALLOUT_COUNTER_HIGH; 1088 id += CALLOUT_GENERATION_LOW; 1089 } 1090 ct->ct_gen_id = id; 1091 } 1092 1093 cp->c_xid = id; 1094 1095 clflags = 0; 1096 if (flags & CALLOUT_FLAG_ABSOLUTE) 1097 clflags |= CALLOUT_LIST_FLAG_ABSOLUTE; 1098 if (flags & CALLOUT_FLAG_HRESTIME) 1099 clflags |= CALLOUT_LIST_FLAG_HRESTIME; 1100 if (resolution == 1) 1101 clflags |= CALLOUT_LIST_FLAG_NANO; 1102 hash = CALLOUT_CLHASH(expiration); 1103 1104 again: 1105 /* 1106 * Try to see if a callout list already exists for this expiration. 1107 */ 1108 cl = callout_list_get(ct, expiration, clflags, hash); 1109 if (cl == NULL) { 1110 /* 1111 * Check the free list. If we don't find one, we have to 1112 * take the slow path and allocate from kmem. 1113 */ 1114 if ((cl = ct->ct_lfree) == NULL) { 1115 callout_list_alloc(ct); 1116 /* 1117 * In the above call, we drop the lock, allocate and 1118 * reacquire the lock. So, we could have been away 1119 * for a while. In the meantime, someone could have 1120 * inserted a callout list with the same expiration. 1121 * Plus, the heap could have become full. So, the best 1122 * course is to repeat the steps. This should be an 1123 * infrequent event. 1124 */ 1125 goto again; 1126 } 1127 ct->ct_lfree = cl->cl_next; 1128 cl->cl_expiration = expiration; 1129 cl->cl_flags = clflags; 1130 1131 /* 1132 * Check if we have enough space in the heap to insert one 1133 * expiration. If not, expand the heap. 1134 */ 1135 if (ct->ct_heap_num == ct->ct_heap_max) { 1136 if (callout_heap_expand(ct) == 0) { 1137 /* 1138 * Could not expand the heap. Just queue it. 1139 */ 1140 callout_queue_insert(ct, cl); 1141 goto out; 1142 } 1143 1144 /* 1145 * In the above call, we drop the lock, allocate and 1146 * reacquire the lock. So, we could have been away 1147 * for a while. In the meantime, someone could have 1148 * inserted a callout list with the same expiration. 1149 * But we will not go back and check for it as this 1150 * should be a really infrequent event. There is no 1151 * point. 1152 */ 1153 } 1154 1155 if (clflags & CALLOUT_LIST_FLAG_NANO) { 1156 CALLOUT_LIST_APPEND(ct->ct_clhash[hash], cl); 1157 } else { 1158 CALLOUT_LIST_INSERT(ct->ct_clhash[hash], cl); 1159 } 1160 1161 /* 1162 * This is a new expiration. So, insert it into the heap. 1163 * This will also reprogram the cyclic, if the expiration 1164 * propagated to the root of the heap. 1165 */ 1166 callout_heap_insert(ct, cl); 1167 } else { 1168 /* 1169 * If the callout list was empty, untimeout_generic() would 1170 * have incremented a reap count. Decrement the reap count 1171 * as we are going to insert a callout into this list. 1172 */ 1173 if (cl->cl_callouts.ch_head == NULL) 1174 ct->ct_nreap--; 1175 } 1176 out: 1177 cp->c_list = cl; 1178 CALLOUT_APPEND(ct, cp); 1179 1180 ct->ct_timeouts++; 1181 ct->ct_timeouts_pending++; 1182 1183 mutex_exit(&ct->ct_mutex); 1184 1185 kpreempt_enable(); 1186 1187 TRACE_4(TR_FAC_CALLOUT, TR_TIMEOUT, 1188 "timeout:%K(%p) in %llx expiration, cp %p", func, arg, expiration, 1189 cp); 1190 1191 return (id); 1192 } 1193 1194 timeout_id_t 1195 timeout(void (*func)(void *), void *arg, clock_t delta) 1196 { 1197 ulong_t id; 1198 1199 /* 1200 * Make sure the callout runs at least 1 tick in the future. 1201 */ 1202 if (delta <= 0) 1203 delta = 1; 1204 else if (delta > callout_max_ticks) 1205 delta = callout_max_ticks; 1206 1207 id = (ulong_t)timeout_generic(CALLOUT_NORMAL, func, arg, 1208 TICK_TO_NSEC(delta), nsec_per_tick, CALLOUT_LEGACY); 1209 1210 return ((timeout_id_t)id); 1211 } 1212 1213 /* 1214 * Convenience function that creates a normal callout with default parameters 1215 * and returns a full ID. 1216 */ 1217 callout_id_t 1218 timeout_default(void (*func)(void *), void *arg, clock_t delta) 1219 { 1220 callout_id_t id; 1221 1222 /* 1223 * Make sure the callout runs at least 1 tick in the future. 1224 */ 1225 if (delta <= 0) 1226 delta = 1; 1227 else if (delta > callout_max_ticks) 1228 delta = callout_max_ticks; 1229 1230 id = timeout_generic(CALLOUT_NORMAL, func, arg, TICK_TO_NSEC(delta), 1231 nsec_per_tick, 0); 1232 1233 return (id); 1234 } 1235 1236 timeout_id_t 1237 realtime_timeout(void (*func)(void *), void *arg, clock_t delta) 1238 { 1239 ulong_t id; 1240 1241 /* 1242 * Make sure the callout runs at least 1 tick in the future. 1243 */ 1244 if (delta <= 0) 1245 delta = 1; 1246 else if (delta > callout_max_ticks) 1247 delta = callout_max_ticks; 1248 1249 id = (ulong_t)timeout_generic(CALLOUT_REALTIME, func, arg, 1250 TICK_TO_NSEC(delta), nsec_per_tick, CALLOUT_LEGACY); 1251 1252 return ((timeout_id_t)id); 1253 } 1254 1255 /* 1256 * Convenience function that creates a realtime callout with default parameters 1257 * and returns a full ID. 1258 */ 1259 callout_id_t 1260 realtime_timeout_default(void (*func)(void *), void *arg, clock_t delta) 1261 { 1262 callout_id_t id; 1263 1264 /* 1265 * Make sure the callout runs at least 1 tick in the future. 1266 */ 1267 if (delta <= 0) 1268 delta = 1; 1269 else if (delta > callout_max_ticks) 1270 delta = callout_max_ticks; 1271 1272 id = timeout_generic(CALLOUT_REALTIME, func, arg, TICK_TO_NSEC(delta), 1273 nsec_per_tick, 0); 1274 1275 return (id); 1276 } 1277 1278 hrtime_t 1279 untimeout_generic(callout_id_t id, int nowait) 1280 { 1281 callout_table_t *ct; 1282 callout_t *cp; 1283 callout_id_t xid; 1284 callout_list_t *cl; 1285 int hash, flags; 1286 callout_id_t bogus; 1287 1288 ct = &callout_table[CALLOUT_ID_TO_TABLE(id)]; 1289 hash = CALLOUT_IDHASH(id); 1290 1291 mutex_enter(&ct->ct_mutex); 1292 1293 /* 1294 * Search the ID hash table for the callout. 1295 */ 1296 for (cp = ct->ct_idhash[hash].ch_head; cp; cp = cp->c_idnext) { 1297 1298 xid = cp->c_xid; 1299 1300 /* 1301 * Match the ID and generation number. 1302 */ 1303 if ((xid & CALLOUT_ID_MASK) != id) 1304 continue; 1305 1306 if ((xid & CALLOUT_EXECUTING) == 0) { 1307 hrtime_t expiration; 1308 1309 /* 1310 * Delete the callout. If the callout list becomes 1311 * NULL, we don't remove it from the table. This is 1312 * so it can be reused. If the empty callout list 1313 * corresponds to the top of the the callout heap, we 1314 * don't reprogram the table cyclic here. This is in 1315 * order to avoid lots of X-calls to the CPU associated 1316 * with the callout table. 1317 */ 1318 cl = cp->c_list; 1319 expiration = cl->cl_expiration; 1320 CALLOUT_DELETE(ct, cp); 1321 CALLOUT_FREE(ct, cp); 1322 ct->ct_untimeouts_unexpired++; 1323 ct->ct_timeouts_pending--; 1324 1325 /* 1326 * If the callout list has become empty, there are 3 1327 * possibilities. If it is present: 1328 * - in the heap, it needs to be cleaned along 1329 * with its heap entry. Increment a reap count. 1330 * - in the callout queue, free it. 1331 * - in the expired list, free it. 1332 */ 1333 if (cl->cl_callouts.ch_head == NULL) { 1334 flags = cl->cl_flags; 1335 if (flags & CALLOUT_LIST_FLAG_HEAPED) { 1336 ct->ct_nreap++; 1337 } else if (flags & CALLOUT_LIST_FLAG_QUEUED) { 1338 CALLOUT_LIST_DELETE(ct->ct_queue, cl); 1339 CALLOUT_LIST_FREE(ct, cl); 1340 } else { 1341 CALLOUT_LIST_DELETE(ct->ct_expired, cl); 1342 CALLOUT_LIST_FREE(ct, cl); 1343 } 1344 } 1345 mutex_exit(&ct->ct_mutex); 1346 1347 expiration -= gethrtime(); 1348 TRACE_2(TR_FAC_CALLOUT, TR_UNTIMEOUT, 1349 "untimeout:ID %lx hrtime left %llx", id, 1350 expiration); 1351 return (expiration < 0 ? 0 : expiration); 1352 } 1353 1354 ct->ct_untimeouts_executing++; 1355 /* 1356 * The callout we want to delete is currently executing. 1357 * The DDI states that we must wait until the callout 1358 * completes before returning, so we block on c_done until the 1359 * callout ID changes (to the old ID if it's on the freelist, 1360 * or to a new callout ID if it's in use). This implicitly 1361 * assumes that callout structures are persistent (they are). 1362 */ 1363 if (cp->c_executor == curthread) { 1364 /* 1365 * The timeout handler called untimeout() on itself. 1366 * Stupid, but legal. We can't wait for the timeout 1367 * to complete without deadlocking, so we just return. 1368 */ 1369 mutex_exit(&ct->ct_mutex); 1370 TRACE_1(TR_FAC_CALLOUT, TR_UNTIMEOUT_SELF, 1371 "untimeout_self:ID %x", id); 1372 return (-1); 1373 } 1374 if (nowait == 0) { 1375 /* 1376 * We need to wait. Indicate that we are waiting by 1377 * incrementing c_waiting. This prevents the executor 1378 * from doing a wakeup on c_done if there are no 1379 * waiters. 1380 */ 1381 while (cp->c_xid == xid) { 1382 cp->c_waiting = 1; 1383 cv_wait(&cp->c_done, &ct->ct_mutex); 1384 } 1385 } 1386 mutex_exit(&ct->ct_mutex); 1387 TRACE_1(TR_FAC_CALLOUT, TR_UNTIMEOUT_EXECUTING, 1388 "untimeout_executing:ID %lx", id); 1389 return (-1); 1390 } 1391 ct->ct_untimeouts_expired++; 1392 1393 mutex_exit(&ct->ct_mutex); 1394 TRACE_1(TR_FAC_CALLOUT, TR_UNTIMEOUT_BOGUS_ID, 1395 "untimeout_bogus_id:ID %lx", id); 1396 1397 /* 1398 * We didn't find the specified callout ID. This means either 1399 * (1) the callout already fired, or (2) the caller passed us 1400 * a bogus value. Perform a sanity check to detect case (2). 1401 */ 1402 bogus = (CALLOUT_ID_FLAGS | CALLOUT_COUNTER_HIGH); 1403 if (((id & bogus) != CALLOUT_COUNTER_HIGH) && (id != 0)) 1404 panic("untimeout: impossible timeout id %llx", 1405 (unsigned long long)id); 1406 1407 return (-1); 1408 } 1409 1410 clock_t 1411 untimeout(timeout_id_t id_arg) 1412 { 1413 hrtime_t hleft; 1414 clock_t tleft; 1415 callout_id_t id; 1416 1417 id = (ulong_t)id_arg; 1418 hleft = untimeout_generic(id, 0); 1419 if (hleft < 0) 1420 tleft = -1; 1421 else if (hleft == 0) 1422 tleft = 0; 1423 else 1424 tleft = NSEC_TO_TICK(hleft); 1425 1426 return (tleft); 1427 } 1428 1429 /* 1430 * Convenience function to untimeout a timeout with a full ID with default 1431 * parameters. 1432 */ 1433 clock_t 1434 untimeout_default(callout_id_t id, int nowait) 1435 { 1436 hrtime_t hleft; 1437 clock_t tleft; 1438 1439 hleft = untimeout_generic(id, nowait); 1440 if (hleft < 0) 1441 tleft = -1; 1442 else if (hleft == 0) 1443 tleft = 0; 1444 else 1445 tleft = NSEC_TO_TICK(hleft); 1446 1447 return (tleft); 1448 } 1449 1450 /* 1451 * Expire all the callouts queued in the specified callout list. 1452 */ 1453 static void 1454 callout_list_expire(callout_table_t *ct, callout_list_t *cl) 1455 { 1456 callout_t *cp, *cnext; 1457 1458 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 1459 ASSERT(cl != NULL); 1460 1461 for (cp = cl->cl_callouts.ch_head; cp != NULL; cp = cnext) { 1462 /* 1463 * Multiple executor threads could be running at the same 1464 * time. If this callout is already being executed, 1465 * go on to the next one. 1466 */ 1467 if (cp->c_xid & CALLOUT_EXECUTING) { 1468 cnext = cp->c_clnext; 1469 continue; 1470 } 1471 1472 /* 1473 * Indicate to untimeout() that a callout is 1474 * being expired by the executor. 1475 */ 1476 cp->c_xid |= CALLOUT_EXECUTING; 1477 cp->c_executor = curthread; 1478 mutex_exit(&ct->ct_mutex); 1479 1480 DTRACE_PROBE1(callout__start, callout_t *, cp); 1481 (*cp->c_func)(cp->c_arg); 1482 DTRACE_PROBE1(callout__end, callout_t *, cp); 1483 1484 mutex_enter(&ct->ct_mutex); 1485 1486 ct->ct_expirations++; 1487 ct->ct_timeouts_pending--; 1488 /* 1489 * Indicate completion for c_done. 1490 */ 1491 cp->c_xid &= ~CALLOUT_EXECUTING; 1492 cp->c_executor = NULL; 1493 cnext = cp->c_clnext; 1494 1495 /* 1496 * Delete callout from ID hash table and the callout 1497 * list, return to freelist, and tell any untimeout() that 1498 * cares that we're done. 1499 */ 1500 CALLOUT_DELETE(ct, cp); 1501 CALLOUT_FREE(ct, cp); 1502 1503 if (cp->c_waiting) { 1504 cp->c_waiting = 0; 1505 cv_broadcast(&cp->c_done); 1506 } 1507 } 1508 } 1509 1510 /* 1511 * Execute all expired callout lists for a callout table. 1512 */ 1513 static void 1514 callout_expire(callout_table_t *ct) 1515 { 1516 callout_list_t *cl, *clnext; 1517 1518 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 1519 1520 for (cl = ct->ct_expired.ch_head; (cl != NULL); cl = clnext) { 1521 /* 1522 * Expire all the callouts in this callout list. 1523 */ 1524 callout_list_expire(ct, cl); 1525 1526 clnext = cl->cl_next; 1527 if (cl->cl_callouts.ch_head == NULL) { 1528 /* 1529 * Free the callout list. 1530 */ 1531 CALLOUT_LIST_DELETE(ct->ct_expired, cl); 1532 CALLOUT_LIST_FREE(ct, cl); 1533 } 1534 } 1535 } 1536 1537 /* 1538 * The cyclic handlers below process callouts in two steps: 1539 * 1540 * 1. Find all expired callout lists and queue them in a separate 1541 * list of expired callouts. 1542 * 2. Execute the expired callout lists. 1543 * 1544 * This is done for two reasons: 1545 * 1546 * 1. We want to quickly find the next earliest expiration to program 1547 * the cyclic to and reprogram it. We can do this right at the end 1548 * of step 1. 1549 * 2. The realtime cyclic handler expires callouts in place. However, 1550 * for normal callouts, callouts are expired by a taskq thread. 1551 * So, it is simpler and more robust to have the taskq thread just 1552 * do step 2. 1553 */ 1554 1555 /* 1556 * Realtime callout cyclic handlers. 1557 */ 1558 void 1559 callout_realtime(callout_table_t *ct) 1560 { 1561 mutex_enter(&ct->ct_mutex); 1562 (void) callout_heap_delete(ct); 1563 callout_expire(ct); 1564 mutex_exit(&ct->ct_mutex); 1565 } 1566 1567 void 1568 callout_queue_realtime(callout_table_t *ct) 1569 { 1570 mutex_enter(&ct->ct_mutex); 1571 (void) callout_queue_delete(ct); 1572 callout_expire(ct); 1573 mutex_exit(&ct->ct_mutex); 1574 } 1575 1576 void 1577 callout_execute(callout_table_t *ct) 1578 { 1579 mutex_enter(&ct->ct_mutex); 1580 callout_expire(ct); 1581 mutex_exit(&ct->ct_mutex); 1582 } 1583 1584 /* 1585 * Normal callout cyclic handlers. 1586 */ 1587 void 1588 callout_normal(callout_table_t *ct) 1589 { 1590 int i, exec; 1591 hrtime_t exp; 1592 1593 mutex_enter(&ct->ct_mutex); 1594 exp = callout_heap_delete(ct); 1595 CALLOUT_EXEC_COMPUTE(ct, exp, exec); 1596 mutex_exit(&ct->ct_mutex); 1597 1598 for (i = 0; i < exec; i++) { 1599 ASSERT(ct->ct_taskq != NULL); 1600 (void) taskq_dispatch(ct->ct_taskq, 1601 (task_func_t *)callout_execute, ct, TQ_NOSLEEP); 1602 } 1603 } 1604 1605 void 1606 callout_queue_normal(callout_table_t *ct) 1607 { 1608 int i, exec; 1609 hrtime_t exp; 1610 1611 mutex_enter(&ct->ct_mutex); 1612 exp = callout_queue_delete(ct); 1613 CALLOUT_EXEC_COMPUTE(ct, exp, exec); 1614 mutex_exit(&ct->ct_mutex); 1615 1616 for (i = 0; i < exec; i++) { 1617 ASSERT(ct->ct_taskq != NULL); 1618 (void) taskq_dispatch(ct->ct_taskq, 1619 (task_func_t *)callout_execute, ct, TQ_NOSLEEP); 1620 } 1621 } 1622 1623 /* 1624 * Suspend callout processing. 1625 */ 1626 static void 1627 callout_suspend(void) 1628 { 1629 int t, f; 1630 callout_table_t *ct; 1631 1632 /* 1633 * Traverse every callout table in the system and suspend callout 1634 * processing. 1635 * 1636 * We need to suspend all the tables (including the inactive ones) 1637 * so that if a table is made active while the suspend is still on, 1638 * the table remains suspended. 1639 */ 1640 for (f = 0; f < max_ncpus; f++) { 1641 for (t = 0; t < CALLOUT_NTYPES; t++) { 1642 ct = &callout_table[CALLOUT_TABLE(t, f)]; 1643 1644 mutex_enter(&ct->ct_mutex); 1645 ct->ct_suspend++; 1646 if (ct->ct_cyclic == CYCLIC_NONE) { 1647 mutex_exit(&ct->ct_mutex); 1648 continue; 1649 } 1650 if (ct->ct_suspend == 1) { 1651 (void) cyclic_reprogram(ct->ct_cyclic, 1652 CY_INFINITY); 1653 (void) cyclic_reprogram(ct->ct_qcyclic, 1654 CY_INFINITY); 1655 } 1656 mutex_exit(&ct->ct_mutex); 1657 } 1658 } 1659 } 1660 1661 /* 1662 * Resume callout processing. 1663 */ 1664 static void 1665 callout_resume(hrtime_t delta, int timechange) 1666 { 1667 hrtime_t hexp, qexp; 1668 int t, f; 1669 callout_table_t *ct; 1670 1671 /* 1672 * Traverse every callout table in the system and resume callout 1673 * processing. For active tables, perform any hrtime adjustments 1674 * necessary. 1675 */ 1676 for (f = 0; f < max_ncpus; f++) { 1677 for (t = 0; t < CALLOUT_NTYPES; t++) { 1678 ct = &callout_table[CALLOUT_TABLE(t, f)]; 1679 1680 mutex_enter(&ct->ct_mutex); 1681 if (ct->ct_cyclic == CYCLIC_NONE) { 1682 ct->ct_suspend--; 1683 mutex_exit(&ct->ct_mutex); 1684 continue; 1685 } 1686 1687 /* 1688 * If a delta is specified, adjust the expirations in 1689 * the heap by delta. Also, if the caller indicates 1690 * a timechange, process that. This step also cleans 1691 * out any empty callout lists that might happen to 1692 * be there. 1693 */ 1694 hexp = callout_heap_process(ct, delta, timechange); 1695 qexp = callout_queue_process(ct, delta, timechange); 1696 1697 ct->ct_suspend--; 1698 if (ct->ct_suspend == 0) { 1699 (void) cyclic_reprogram(ct->ct_cyclic, hexp); 1700 (void) cyclic_reprogram(ct->ct_qcyclic, qexp); 1701 } 1702 1703 mutex_exit(&ct->ct_mutex); 1704 } 1705 } 1706 } 1707 1708 /* 1709 * Callback handler used by CPR to stop and resume callouts. 1710 * The cyclic subsystem saves and restores hrtime during CPR. 1711 * That is why callout_resume() is called with a 0 delta. 1712 * Although hrtime is the same, hrestime (system time) has 1713 * progressed during CPR. So, we have to indicate a time change 1714 * to expire the absolute hrestime timers. 1715 */ 1716 /*ARGSUSED*/ 1717 static boolean_t 1718 callout_cpr_callb(void *arg, int code) 1719 { 1720 if (code == CB_CODE_CPR_CHKPT) 1721 callout_suspend(); 1722 else 1723 callout_resume(0, 1); 1724 1725 return (B_TRUE); 1726 } 1727 1728 /* 1729 * Callback handler invoked when the debugger is entered or exited. 1730 */ 1731 /*ARGSUSED*/ 1732 static boolean_t 1733 callout_debug_callb(void *arg, int code) 1734 { 1735 hrtime_t delta; 1736 1737 /* 1738 * When the system enters the debugger. make a note of the hrtime. 1739 * When it is resumed, compute how long the system was in the 1740 * debugger. This interval should not be counted for callouts. 1741 */ 1742 if (code == 0) { 1743 callout_suspend(); 1744 callout_debug_hrtime = gethrtime(); 1745 } else { 1746 delta = gethrtime() - callout_debug_hrtime; 1747 callout_resume(delta, 0); 1748 } 1749 1750 return (B_TRUE); 1751 } 1752 1753 /* 1754 * Move the absolute hrestime callouts to the expired list. Then program the 1755 * table's cyclic to expire immediately so that the callouts can be executed 1756 * immediately. 1757 */ 1758 static void 1759 callout_hrestime_one(callout_table_t *ct) 1760 { 1761 hrtime_t hexp, qexp; 1762 1763 mutex_enter(&ct->ct_mutex); 1764 if (ct->ct_cyclic == CYCLIC_NONE) { 1765 mutex_exit(&ct->ct_mutex); 1766 return; 1767 } 1768 1769 /* 1770 * Walk the heap and process all the absolute hrestime entries. 1771 */ 1772 hexp = callout_heap_process(ct, 0, 1); 1773 qexp = callout_queue_process(ct, 0, 1); 1774 1775 if (ct->ct_suspend == 0) { 1776 (void) cyclic_reprogram(ct->ct_cyclic, hexp); 1777 (void) cyclic_reprogram(ct->ct_qcyclic, qexp); 1778 } 1779 1780 mutex_exit(&ct->ct_mutex); 1781 } 1782 1783 /* 1784 * This function is called whenever system time (hrestime) is changed 1785 * explicitly. All the HRESTIME callouts must be expired at once. 1786 */ 1787 /*ARGSUSED*/ 1788 void 1789 callout_hrestime(void) 1790 { 1791 int t, f; 1792 callout_table_t *ct; 1793 1794 /* 1795 * Traverse every callout table in the system and process the hrestime 1796 * callouts therein. 1797 * 1798 * We look at all the tables because we don't know which ones were 1799 * onlined and offlined in the past. The offlined tables may still 1800 * have active cyclics processing timers somewhere. 1801 */ 1802 for (f = 0; f < max_ncpus; f++) { 1803 for (t = 0; t < CALLOUT_NTYPES; t++) { 1804 ct = &callout_table[CALLOUT_TABLE(t, f)]; 1805 callout_hrestime_one(ct); 1806 } 1807 } 1808 } 1809 1810 /* 1811 * Create the hash tables for this callout table. 1812 */ 1813 static void 1814 callout_hash_init(callout_table_t *ct) 1815 { 1816 size_t size; 1817 1818 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 1819 ASSERT((ct->ct_idhash == NULL) && (ct->ct_clhash == NULL)); 1820 1821 size = sizeof (callout_hash_t) * CALLOUT_BUCKETS; 1822 ct->ct_idhash = kmem_zalloc(size, KM_SLEEP); 1823 ct->ct_clhash = kmem_zalloc(size, KM_SLEEP); 1824 } 1825 1826 /* 1827 * Create per-callout table kstats. 1828 */ 1829 static void 1830 callout_kstat_init(callout_table_t *ct) 1831 { 1832 callout_stat_type_t stat; 1833 kstat_t *ct_kstats; 1834 int ndx; 1835 1836 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 1837 ASSERT(ct->ct_kstats == NULL); 1838 1839 ndx = ct - callout_table; 1840 ct_kstats = kstat_create("unix", ndx, "callout", 1841 "misc", KSTAT_TYPE_NAMED, CALLOUT_NUM_STATS, KSTAT_FLAG_VIRTUAL); 1842 1843 if (ct_kstats == NULL) { 1844 cmn_err(CE_WARN, "kstat_create for callout table %p failed", 1845 (void *)ct); 1846 } else { 1847 ct_kstats->ks_data = ct->ct_kstat_data; 1848 for (stat = 0; stat < CALLOUT_NUM_STATS; stat++) 1849 kstat_named_init(&ct->ct_kstat_data[stat], 1850 callout_kstat_names[stat], KSTAT_DATA_INT64); 1851 ct->ct_kstats = ct_kstats; 1852 kstat_install(ct_kstats); 1853 } 1854 } 1855 1856 static void 1857 callout_cyclic_init(callout_table_t *ct) 1858 { 1859 cyc_handler_t hdlr; 1860 cyc_time_t when; 1861 processorid_t seqid; 1862 int t; 1863 cyclic_id_t cyclic, qcyclic; 1864 1865 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 1866 1867 t = ct->ct_type; 1868 seqid = CALLOUT_TABLE_SEQID(ct); 1869 1870 /* 1871 * Create the taskq thread if the table type is normal. 1872 * Realtime tables are handled at PIL1 by a softint 1873 * handler. 1874 */ 1875 if (t == CALLOUT_NORMAL) { 1876 ASSERT(ct->ct_taskq == NULL); 1877 /* 1878 * Each callout thread consumes exactly one 1879 * task structure while active. Therefore, 1880 * prepopulating with 2 * callout_threads tasks 1881 * ensures that there's at least one task per 1882 * thread that's either scheduled or on the 1883 * freelist. In turn, this guarantees that 1884 * taskq_dispatch() will always either succeed 1885 * (because there's a free task structure) or 1886 * be unnecessary (because "callout_excute(ct)" 1887 * has already scheduled). 1888 */ 1889 ct->ct_taskq = 1890 taskq_create_instance("callout_taskq", seqid, 1891 callout_threads, maxclsyspri, 1892 2 * callout_threads, 2 * callout_threads, 1893 TASKQ_PREPOPULATE | TASKQ_CPR_SAFE); 1894 } 1895 1896 /* 1897 * callouts can only be created in a table whose 1898 * cyclic has been initialized. 1899 */ 1900 ASSERT(ct->ct_heap_num == 0); 1901 1902 /* 1903 * Drop the mutex before creating the callout cyclics. cyclic_add() 1904 * could potentially expand the cyclic heap. We don't want to be 1905 * holding the callout table mutex in that case. Note that this 1906 * function is called during CPU online. cpu_lock is held at this 1907 * point. So, only one thread can be executing the cyclic add logic 1908 * below at any time. 1909 */ 1910 mutex_exit(&ct->ct_mutex); 1911 1912 /* 1913 * Create the callout table cyclics. 1914 * 1915 * The realtime cyclic handler executes at low PIL. The normal cyclic 1916 * handler executes at lock PIL. This is because there are cases 1917 * where code can block at PIL > 1 waiting for a normal callout handler 1918 * to unblock it directly or indirectly. If the normal cyclic were to 1919 * be executed at low PIL, it could get blocked out by the waiter 1920 * and cause a deadlock. 1921 */ 1922 ASSERT(ct->ct_cyclic == CYCLIC_NONE); 1923 1924 if (t == CALLOUT_REALTIME) { 1925 hdlr.cyh_level = callout_realtime_level; 1926 hdlr.cyh_func = (cyc_func_t)callout_realtime; 1927 } else { 1928 hdlr.cyh_level = callout_normal_level; 1929 hdlr.cyh_func = (cyc_func_t)callout_normal; 1930 } 1931 hdlr.cyh_arg = ct; 1932 when.cyt_when = CY_INFINITY; 1933 when.cyt_interval = CY_INFINITY; 1934 1935 cyclic = cyclic_add(&hdlr, &when); 1936 1937 if (t == CALLOUT_REALTIME) 1938 hdlr.cyh_func = (cyc_func_t)callout_queue_realtime; 1939 else 1940 hdlr.cyh_func = (cyc_func_t)callout_queue_normal; 1941 1942 qcyclic = cyclic_add(&hdlr, &when); 1943 1944 mutex_enter(&ct->ct_mutex); 1945 ct->ct_cyclic = cyclic; 1946 ct->ct_qcyclic = qcyclic; 1947 } 1948 1949 void 1950 callout_cpu_online(cpu_t *cp) 1951 { 1952 lgrp_handle_t hand; 1953 callout_cache_t *cache; 1954 char s[KMEM_CACHE_NAMELEN]; 1955 callout_table_t *ct; 1956 processorid_t seqid; 1957 int t; 1958 1959 ASSERT(MUTEX_HELD(&cpu_lock)); 1960 1961 /* 1962 * Locate the cache corresponding to the onlined CPU's lgroup. 1963 * Note that access to callout_caches is protected by cpu_lock. 1964 */ 1965 hand = lgrp_plat_cpu_to_hand(cp->cpu_id); 1966 for (cache = callout_caches; cache != NULL; cache = cache->cc_next) { 1967 if (cache->cc_hand == hand) 1968 break; 1969 } 1970 1971 /* 1972 * If not found, create one. The caches are never destroyed. 1973 */ 1974 if (cache == NULL) { 1975 cache = kmem_alloc(sizeof (callout_cache_t), KM_SLEEP); 1976 cache->cc_hand = hand; 1977 (void) snprintf(s, KMEM_CACHE_NAMELEN, "callout_cache%lx", 1978 (long)hand); 1979 cache->cc_cache = kmem_cache_create(s, sizeof (callout_t), 1980 CALLOUT_ALIGN, NULL, NULL, NULL, NULL, NULL, 0); 1981 (void) snprintf(s, KMEM_CACHE_NAMELEN, "callout_lcache%lx", 1982 (long)hand); 1983 cache->cc_lcache = kmem_cache_create(s, sizeof (callout_list_t), 1984 CALLOUT_ALIGN, NULL, NULL, NULL, NULL, NULL, 0); 1985 cache->cc_next = callout_caches; 1986 callout_caches = cache; 1987 } 1988 1989 seqid = cp->cpu_seqid; 1990 1991 for (t = 0; t < CALLOUT_NTYPES; t++) { 1992 ct = &callout_table[CALLOUT_TABLE(t, seqid)]; 1993 1994 mutex_enter(&ct->ct_mutex); 1995 /* 1996 * Store convinience pointers to the kmem caches 1997 * in the callout table. These assignments should always be 1998 * done as callout tables can map to different physical 1999 * CPUs each time. 2000 */ 2001 ct->ct_cache = cache->cc_cache; 2002 ct->ct_lcache = cache->cc_lcache; 2003 2004 /* 2005 * We use the heap pointer to check if stuff has been 2006 * initialized for this callout table. 2007 */ 2008 if (ct->ct_heap == NULL) { 2009 callout_heap_init(ct); 2010 callout_hash_init(ct); 2011 callout_kstat_init(ct); 2012 callout_cyclic_init(ct); 2013 } 2014 2015 mutex_exit(&ct->ct_mutex); 2016 2017 /* 2018 * Move the cyclics to this CPU by doing a bind. 2019 */ 2020 cyclic_bind(ct->ct_cyclic, cp, NULL); 2021 cyclic_bind(ct->ct_qcyclic, cp, NULL); 2022 } 2023 } 2024 2025 void 2026 callout_cpu_offline(cpu_t *cp) 2027 { 2028 callout_table_t *ct; 2029 processorid_t seqid; 2030 int t; 2031 2032 ASSERT(MUTEX_HELD(&cpu_lock)); 2033 2034 seqid = cp->cpu_seqid; 2035 2036 for (t = 0; t < CALLOUT_NTYPES; t++) { 2037 ct = &callout_table[CALLOUT_TABLE(t, seqid)]; 2038 2039 /* 2040 * Unbind the cyclics. This will allow the cyclic subsystem 2041 * to juggle the cyclics during CPU offline. 2042 */ 2043 cyclic_bind(ct->ct_cyclic, NULL, NULL); 2044 cyclic_bind(ct->ct_qcyclic, NULL, NULL); 2045 } 2046 } 2047 2048 /* 2049 * This is called to perform per-CPU initialization for slave CPUs at 2050 * boot time. 2051 */ 2052 void 2053 callout_mp_init(void) 2054 { 2055 cpu_t *cp; 2056 size_t min, max; 2057 2058 if (callout_chunk == CALLOUT_CHUNK) { 2059 /* 2060 * No one has specified a chunk in /etc/system. We need to 2061 * compute it here based on the number of online CPUs and 2062 * available physical memory. 2063 */ 2064 min = CALLOUT_MIN_HEAP_SIZE; 2065 max = ptob(physmem / CALLOUT_MEM_FRACTION); 2066 if (min > max) 2067 min = max; 2068 callout_chunk = min / sizeof (callout_heap_t); 2069 callout_chunk /= ncpus_online; 2070 callout_chunk = P2ROUNDUP(callout_chunk, CALLOUT_CHUNK); 2071 } 2072 2073 mutex_enter(&cpu_lock); 2074 2075 cp = cpu_active; 2076 do { 2077 callout_cpu_online(cp); 2078 } while ((cp = cp->cpu_next_onln) != cpu_active); 2079 2080 mutex_exit(&cpu_lock); 2081 } 2082 2083 /* 2084 * Initialize all callout tables. Called at boot time just before clkstart(). 2085 */ 2086 void 2087 callout_init(void) 2088 { 2089 int f, t; 2090 size_t size; 2091 int table_id; 2092 callout_table_t *ct; 2093 long bits, fanout; 2094 uintptr_t buf; 2095 2096 /* 2097 * Initialize callout globals. 2098 */ 2099 bits = 0; 2100 for (fanout = 1; (fanout < max_ncpus); fanout <<= 1) 2101 bits++; 2102 callout_table_bits = CALLOUT_TYPE_BITS + bits; 2103 callout_table_mask = (1 << callout_table_bits) - 1; 2104 callout_counter_low = 1 << CALLOUT_COUNTER_SHIFT; 2105 callout_longterm = TICK_TO_NSEC(CALLOUT_LONGTERM_TICKS); 2106 callout_max_ticks = CALLOUT_MAX_TICKS; 2107 if (callout_min_reap == 0) 2108 callout_min_reap = CALLOUT_MIN_REAP; 2109 2110 if (callout_tolerance <= 0) 2111 callout_tolerance = CALLOUT_TOLERANCE; 2112 if (callout_threads <= 0) 2113 callout_threads = CALLOUT_THREADS; 2114 if (callout_chunk <= 0) 2115 callout_chunk = CALLOUT_CHUNK; 2116 else 2117 callout_chunk = P2ROUNDUP(callout_chunk, CALLOUT_CHUNK); 2118 2119 /* 2120 * Allocate all the callout tables based on max_ncpus. We have chosen 2121 * to do boot-time allocation instead of dynamic allocation because: 2122 * 2123 * - the size of the callout tables is not too large. 2124 * - there are race conditions involved in making this dynamic. 2125 * - the hash tables that go with the callout tables consume 2126 * most of the memory and they are only allocated in 2127 * callout_cpu_online(). 2128 * 2129 * Each CPU has two tables that are consecutive in the array. The first 2130 * one is for realtime callouts and the second one is for normal ones. 2131 * 2132 * We do this alignment dance to make sure that callout table 2133 * structures will always be on a cache line boundary. 2134 */ 2135 size = sizeof (callout_table_t) * CALLOUT_NTYPES * max_ncpus; 2136 size += CALLOUT_ALIGN; 2137 buf = (uintptr_t)kmem_zalloc(size, KM_SLEEP); 2138 callout_table = (callout_table_t *)P2ROUNDUP(buf, CALLOUT_ALIGN); 2139 2140 size = sizeof (kstat_named_t) * CALLOUT_NUM_STATS; 2141 /* 2142 * Now, initialize the tables for all the CPUs. 2143 */ 2144 for (f = 0; f < max_ncpus; f++) { 2145 for (t = 0; t < CALLOUT_NTYPES; t++) { 2146 table_id = CALLOUT_TABLE(t, f); 2147 ct = &callout_table[table_id]; 2148 ct->ct_type = t; 2149 mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL); 2150 /* 2151 * Precompute the base IDs for long and short-term 2152 * legacy IDs. This makes ID generation during 2153 * timeout() fast. 2154 */ 2155 ct->ct_short_id = CALLOUT_SHORT_ID(table_id); 2156 ct->ct_long_id = CALLOUT_LONG_ID(table_id); 2157 /* 2158 * Precompute the base ID for generation-based IDs. 2159 * Note that when the first ID gets allocated, the 2160 * ID will wrap. This will cause the generation 2161 * number to be incremented to 1. 2162 */ 2163 ct->ct_gen_id = CALLOUT_SHORT_ID(table_id); 2164 /* 2165 * Initialize the cyclics as NONE. This will get set 2166 * during CPU online. This is so that partially 2167 * populated systems will only have the required 2168 * number of cyclics, not more. 2169 */ 2170 ct->ct_cyclic = CYCLIC_NONE; 2171 ct->ct_qcyclic = CYCLIC_NONE; 2172 ct->ct_kstat_data = kmem_zalloc(size, KM_SLEEP); 2173 } 2174 } 2175 2176 /* 2177 * Add the callback for CPR. This is called during checkpoint 2178 * resume to suspend and resume callouts. 2179 */ 2180 (void) callb_add(callout_cpr_callb, 0, CB_CL_CPR_CALLOUT, 2181 "callout_cpr"); 2182 (void) callb_add(callout_debug_callb, 0, CB_CL_ENTER_DEBUGGER, 2183 "callout_debug"); 2184 2185 /* 2186 * Call the per-CPU initialization function for the boot CPU. This 2187 * is done here because the function is not called automatically for 2188 * the boot CPU from the CPU online/offline hooks. Note that the 2189 * CPU lock is taken here because of convention. 2190 */ 2191 mutex_enter(&cpu_lock); 2192 callout_boot_ct = &callout_table[CALLOUT_TABLE(0, CPU->cpu_seqid)]; 2193 callout_cpu_online(CPU); 2194 mutex_exit(&cpu_lock); 2195 2196 /* heads-up to boot-time clients that timeouts now available */ 2197 callout_init_done = 1; 2198 } 2199