1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/callo.h> 27 #include <sys/param.h> 28 #include <sys/types.h> 29 #include <sys/cpuvar.h> 30 #include <sys/thread.h> 31 #include <sys/kmem.h> 32 #include <sys/kmem_impl.h> 33 #include <sys/cmn_err.h> 34 #include <sys/callb.h> 35 #include <sys/debug.h> 36 #include <sys/vtrace.h> 37 #include <sys/sysmacros.h> 38 #include <sys/sdt.h> 39 40 /* 41 * Callout tables. See timeout(9F) for details. 42 */ 43 static hrtime_t callout_debug_hrtime; /* debugger entry time */ 44 static int callout_min_resolution; /* Minimum resolution */ 45 static callout_table_t *callout_boot_ct; /* Boot CPU's callout tables */ 46 static clock_t callout_max_ticks; /* max interval */ 47 static hrtime_t callout_longterm; /* longterm nanoseconds */ 48 static ulong_t callout_counter_low; /* callout ID increment */ 49 static ulong_t callout_table_bits; /* number of table bits in ID */ 50 static ulong_t callout_table_mask; /* mask for the table bits */ 51 static callout_cache_t *callout_caches; /* linked list of caches */ 52 #pragma align 64(callout_table) 53 static callout_table_t *callout_table; /* global callout table array */ 54 55 static char *callout_kstat_names[] = { 56 "callout_timeouts", 57 "callout_timeouts_pending", 58 "callout_untimeouts_unexpired", 59 "callout_untimeouts_executing", 60 "callout_untimeouts_expired", 61 "callout_expirations", 62 "callout_allocations", 63 }; 64 65 #define CALLOUT_HASH_INSERT(hash, cp, cnext, cprev) \ 66 { \ 67 callout_hash_t *hashp = &(hash); \ 68 \ 69 cp->cprev = NULL; \ 70 cp->cnext = hashp->ch_head; \ 71 if (hashp->ch_head == NULL) \ 72 hashp->ch_tail = cp; \ 73 else \ 74 cp->cnext->cprev = cp; \ 75 hashp->ch_head = cp; \ 76 } 77 78 #define CALLOUT_HASH_APPEND(hash, cp, cnext, cprev) \ 79 { \ 80 callout_hash_t *hashp = &(hash); \ 81 \ 82 cp->cnext = NULL; \ 83 cp->cprev = hashp->ch_tail; \ 84 if (hashp->ch_tail == NULL) \ 85 hashp->ch_head = cp; \ 86 else \ 87 cp->cprev->cnext = cp; \ 88 hashp->ch_tail = cp; \ 89 } 90 91 #define CALLOUT_HASH_DELETE(hash, cp, cnext, cprev) \ 92 { \ 93 callout_hash_t *hashp = &(hash); \ 94 \ 95 if (cp->cnext == NULL) \ 96 hashp->ch_tail = cp->cprev; \ 97 else \ 98 cp->cnext->cprev = cp->cprev; \ 99 if (cp->cprev == NULL) \ 100 hashp->ch_head = cp->cnext; \ 101 else \ 102 cp->cprev->cnext = cp->cnext; \ 103 } 104 105 /* 106 * These definitions help us queue callouts and callout lists. Here is 107 * the queueing rationale: 108 * 109 * - callouts are queued in a FIFO manner in the ID hash table. 110 * TCP timers are typically cancelled in the same order that they 111 * were issued. The FIFO queueing shortens the search for a callout 112 * during untimeout(). 113 * 114 * - callouts are queued in a FIFO manner in their callout lists. 115 * This ensures that the callouts are executed in the same order that 116 * they were queued. This is fair. Plus, it helps to make each 117 * callout expiration timely. It also favors cancellations. 118 * 119 * - callout lists are queued in a LIFO manner in the callout list hash 120 * table. This ensures that long term timers stay at the rear of the 121 * hash lists. 122 * 123 * - callout lists are queued in a FIFO manner in the expired callouts 124 * list. This ensures that callout lists are executed in the order 125 * of expiration. 126 */ 127 #define CALLOUT_APPEND(ct, cp) \ 128 CALLOUT_HASH_APPEND(ct->ct_idhash[CALLOUT_IDHASH(cp->c_xid)], \ 129 cp, c_idnext, c_idprev); \ 130 CALLOUT_HASH_APPEND(cp->c_list->cl_callouts, cp, c_clnext, c_clprev) 131 132 #define CALLOUT_DELETE(ct, cp) \ 133 CALLOUT_HASH_DELETE(ct->ct_idhash[CALLOUT_IDHASH(cp->c_xid)], \ 134 cp, c_idnext, c_idprev); \ 135 CALLOUT_HASH_DELETE(cp->c_list->cl_callouts, cp, c_clnext, c_clprev) 136 137 #define CALLOUT_LIST_INSERT(hash, cl) \ 138 CALLOUT_HASH_INSERT(hash, cl, cl_next, cl_prev) 139 140 #define CALLOUT_LIST_APPEND(hash, cl) \ 141 CALLOUT_HASH_APPEND(hash, cl, cl_next, cl_prev) 142 143 #define CALLOUT_LIST_DELETE(hash, cl) \ 144 CALLOUT_HASH_DELETE(hash, cl, cl_next, cl_prev) 145 146 /* 147 * Allocate a callout structure. We try quite hard because we 148 * can't sleep, and if we can't do the allocation, we're toast. 149 * Failing all, we try a KM_PANIC allocation. Note that we never 150 * deallocate a callout. See untimeout() for the reasoning. 151 */ 152 static callout_t * 153 callout_alloc(callout_table_t *ct) 154 { 155 size_t size; 156 callout_t *cp; 157 158 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 159 mutex_exit(&ct->ct_mutex); 160 161 cp = kmem_cache_alloc(ct->ct_cache, KM_NOSLEEP); 162 if (cp == NULL) { 163 size = sizeof (callout_t); 164 cp = kmem_alloc_tryhard(size, &size, KM_NOSLEEP | KM_PANIC); 165 } 166 cp->c_xid = 0; 167 168 mutex_enter(&ct->ct_mutex); 169 ct->ct_allocations++; 170 return (cp); 171 } 172 173 /* 174 * Allocate a callout list structure. We try quite hard because we 175 * can't sleep, and if we can't do the allocation, we're toast. 176 * Failing all, we try a KM_PANIC allocation. Note that we never 177 * deallocate a callout list. 178 */ 179 static void 180 callout_list_alloc(callout_table_t *ct) 181 { 182 size_t size; 183 callout_list_t *cl; 184 185 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 186 mutex_exit(&ct->ct_mutex); 187 188 cl = kmem_cache_alloc(ct->ct_lcache, KM_NOSLEEP); 189 if (cl == NULL) { 190 size = sizeof (callout_list_t); 191 cl = kmem_alloc_tryhard(size, &size, KM_NOSLEEP | KM_PANIC); 192 } 193 bzero(cl, sizeof (callout_list_t)); 194 195 mutex_enter(&ct->ct_mutex); 196 cl->cl_next = ct->ct_lfree; 197 ct->ct_lfree = cl; 198 } 199 200 /* 201 * Find the callout list that corresponds to an expiration. There can 202 * be only one. 203 */ 204 static callout_list_t * 205 callout_list_get(callout_table_t *ct, hrtime_t expiration, int hash) 206 { 207 callout_list_t *cl; 208 209 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 210 211 for (cl = ct->ct_clhash[hash].ch_head; (cl != NULL); cl = cl->cl_next) { 212 if (cl->cl_expiration == expiration) 213 return (cl); 214 } 215 216 return (NULL); 217 } 218 219 /* 220 * Find the callout list that corresponds to an expiration. There can 221 * be only one. If the callout list is null, free it. Else, return it. 222 */ 223 static callout_list_t * 224 callout_list_check(callout_table_t *ct, hrtime_t expiration, int hash) 225 { 226 callout_list_t *cl; 227 228 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 229 230 cl = callout_list_get(ct, expiration, hash); 231 if (cl != NULL) { 232 if (cl->cl_callouts.ch_head != NULL) { 233 /* 234 * There is exactly one callout list for every 235 * unique expiration. So, we are done. 236 */ 237 return (cl); 238 } 239 240 CALLOUT_LIST_DELETE(ct->ct_clhash[hash], cl); 241 cl->cl_next = ct->ct_lfree; 242 ct->ct_lfree = cl; 243 } 244 245 return (NULL); 246 } 247 248 /* 249 * Initialize a callout table's heap, if necessary. Preallocate some free 250 * entries so we don't have to check for NULL elsewhere. 251 */ 252 static void 253 callout_heap_init(callout_table_t *ct) 254 { 255 size_t size; 256 257 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 258 ASSERT(ct->ct_heap == NULL); 259 260 ct->ct_heap_num = 0; 261 ct->ct_heap_max = CALLOUT_CHUNK; 262 size = sizeof (hrtime_t) * CALLOUT_CHUNK; 263 ct->ct_heap = kmem_alloc(size, KM_SLEEP); 264 } 265 266 /* 267 * Reallocate the heap. We try quite hard because we can't sleep, and if 268 * we can't do the allocation, we're toast. Failing all, we try a KM_PANIC 269 * allocation. Note that the heap only expands, it never contracts. 270 */ 271 static void 272 callout_heap_expand(callout_table_t *ct) 273 { 274 size_t max, size, osize; 275 hrtime_t *heap; 276 277 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 278 ASSERT(ct->ct_heap_num <= ct->ct_heap_max); 279 280 while (ct->ct_heap_num == ct->ct_heap_max) { 281 max = ct->ct_heap_max; 282 mutex_exit(&ct->ct_mutex); 283 284 osize = sizeof (hrtime_t) * max; 285 size = sizeof (hrtime_t) * (max + CALLOUT_CHUNK); 286 heap = kmem_alloc_tryhard(size, &size, KM_NOSLEEP | KM_PANIC); 287 288 mutex_enter(&ct->ct_mutex); 289 if (max < ct->ct_heap_max) { 290 /* 291 * Someone beat us to the allocation. Free what we 292 * just allocated and proceed. 293 */ 294 kmem_free(heap, size); 295 continue; 296 } 297 298 bcopy(ct->ct_heap, heap, osize); 299 kmem_free(ct->ct_heap, osize); 300 ct->ct_heap = heap; 301 ct->ct_heap_max = size / sizeof (hrtime_t); 302 } 303 } 304 305 /* 306 * Move an expiration from the bottom of the heap to its correct place 307 * in the heap. If we reached the root doing this, return 1. Else, 308 * return 0. 309 */ 310 static int 311 callout_upheap(callout_table_t *ct) 312 { 313 int current, parent; 314 hrtime_t *heap, current_expiration, parent_expiration; 315 316 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 317 ASSERT(ct->ct_heap_num >= 1); 318 319 if (ct->ct_heap_num == 1) { 320 return (1); 321 } 322 323 heap = ct->ct_heap; 324 current = ct->ct_heap_num - 1; 325 326 for (;;) { 327 parent = CALLOUT_HEAP_PARENT(current); 328 current_expiration = heap[current]; 329 parent_expiration = heap[parent]; 330 331 /* 332 * We have an expiration later than our parent; we're done. 333 */ 334 if (current_expiration >= parent_expiration) { 335 return (0); 336 } 337 338 /* 339 * We need to swap with our parent, and continue up the heap. 340 */ 341 heap[parent] = current_expiration; 342 heap[current] = parent_expiration; 343 344 /* 345 * If we just reached the root, we're done. 346 */ 347 if (parent == 0) { 348 return (1); 349 } 350 351 current = parent; 352 } 353 /*NOTREACHED*/ 354 } 355 356 /* 357 * Insert a new, unique expiration into a callout table's heap. 358 */ 359 static void 360 callout_heap_insert(callout_table_t *ct, hrtime_t expiration) 361 { 362 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 363 ASSERT(ct->ct_heap_num < ct->ct_heap_max); 364 365 /* 366 * First, copy the expiration to the bottom of the heap. 367 */ 368 ct->ct_heap[ct->ct_heap_num] = expiration; 369 ct->ct_heap_num++; 370 371 /* 372 * Now, perform an upheap operation. If we reached the root, then 373 * the cyclic needs to be reprogrammed as we have an earlier 374 * expiration. 375 * 376 * Also, during the CPR suspend phase, do not reprogram the cyclic. 377 * We don't want any callout activity. When the CPR resume phase is 378 * entered, the cyclic will be programmed for the earliest expiration 379 * in the heap. 380 */ 381 if (callout_upheap(ct) && (ct->ct_suspend == 0)) 382 (void) cyclic_reprogram(ct->ct_cyclic, expiration); 383 } 384 385 /* 386 * Move an expiration from the top of the heap to its correct place 387 * in the heap. 388 */ 389 static void 390 callout_downheap(callout_table_t *ct) 391 { 392 int left, right, current, nelems; 393 hrtime_t *heap, left_expiration, right_expiration, current_expiration; 394 395 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 396 ASSERT(ct->ct_heap_num >= 1); 397 398 heap = ct->ct_heap; 399 current = 0; 400 nelems = ct->ct_heap_num; 401 402 for (;;) { 403 /* 404 * If we don't have a left child (i.e., we're a leaf), we're 405 * done. 406 */ 407 if ((left = CALLOUT_HEAP_LEFT(current)) >= nelems) 408 return; 409 410 left_expiration = heap[left]; 411 current_expiration = heap[current]; 412 413 right = CALLOUT_HEAP_RIGHT(current); 414 415 /* 416 * Even if we don't have a right child, we still need to compare 417 * our expiration against that of our left child. 418 */ 419 if (right >= nelems) 420 goto comp_left; 421 422 right_expiration = heap[right]; 423 424 /* 425 * We have both a left and a right child. We need to compare 426 * the expiration of the children to determine which 427 * expires earlier. 428 */ 429 if (right_expiration < left_expiration) { 430 /* 431 * Our right child is the earlier of our children. 432 * We'll now compare our expiration to its expiration. 433 * If ours is the earlier one, we're done. 434 */ 435 if (current_expiration <= right_expiration) 436 return; 437 438 /* 439 * Our right child expires earlier than we do; swap 440 * with our right child, and descend right. 441 */ 442 heap[right] = current_expiration; 443 heap[current] = right_expiration; 444 current = right; 445 continue; 446 } 447 448 comp_left: 449 /* 450 * Our left child is the earlier of our children (or we have 451 * no right child). We'll now compare our expiration 452 * to its expiration. If ours is the earlier one, we're done. 453 */ 454 if (current_expiration <= left_expiration) 455 return; 456 457 /* 458 * Our left child expires earlier than we do; swap with our 459 * left child, and descend left. 460 */ 461 heap[left] = current_expiration; 462 heap[current] = left_expiration; 463 current = left; 464 } 465 } 466 467 /* 468 * Delete and handle all past expirations in a callout table's heap. 469 */ 470 static void 471 callout_heap_delete(callout_table_t *ct) 472 { 473 hrtime_t now, expiration; 474 callout_list_t *cl; 475 int hash; 476 477 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 478 479 now = gethrtime(); 480 481 while (ct->ct_heap_num > 0) { 482 expiration = ct->ct_heap[0]; 483 /* 484 * Find the callout list that corresponds to the expiration. 485 * If the callout list is empty, callout_list_check() 486 * will free the callout list and return NULL. 487 */ 488 hash = CALLOUT_CLHASH(expiration); 489 cl = callout_list_check(ct, expiration, hash); 490 if (cl != NULL) { 491 /* 492 * If the root of the heap expires in the future, we are 493 * done. We are doing this check here instead of at the 494 * beginning because we want to first free all the 495 * empty callout lists at the top of the heap. 496 */ 497 if (expiration > now) 498 break; 499 500 /* 501 * Move the callout list for this expiration to the 502 * list of expired callout lists. It will be processed 503 * by the callout executor. 504 */ 505 CALLOUT_LIST_DELETE(ct->ct_clhash[hash], cl); 506 CALLOUT_LIST_APPEND(ct->ct_expired, cl); 507 } 508 509 /* 510 * Now delete the root. This is done by swapping the root with 511 * the last item in the heap and downheaping the item. 512 */ 513 ct->ct_heap_num--; 514 if (ct->ct_heap_num > 0) { 515 ct->ct_heap[0] = ct->ct_heap[ct->ct_heap_num]; 516 callout_downheap(ct); 517 } 518 } 519 520 /* 521 * If this callout table is empty or callouts have been suspended 522 * by CPR, just return. The cyclic has already been programmed to 523 * infinity by the cyclic subsystem. 524 */ 525 if ((ct->ct_heap_num == 0) || (ct->ct_suspend > 0)) 526 return; 527 528 (void) cyclic_reprogram(ct->ct_cyclic, expiration); 529 } 530 531 /* 532 * Common function used to create normal and realtime callouts. 533 * 534 * Realtime callouts are handled at CY_LOW_PIL by a cyclic handler. So, 535 * there is one restriction on a realtime callout handler - it should not 536 * directly or indirectly acquire cpu_lock. CPU offline waits for pending 537 * cyclic handlers to complete while holding cpu_lock. So, if a realtime 538 * callout handler were to try to get cpu_lock, there would be a deadlock 539 * during CPU offline. 540 */ 541 callout_id_t 542 timeout_generic(int type, void (*func)(void *), void *arg, 543 hrtime_t expiration, hrtime_t resolution, int flags) 544 { 545 callout_table_t *ct; 546 callout_t *cp; 547 callout_id_t id; 548 callout_list_t *cl; 549 hrtime_t now, interval; 550 int hash; 551 552 ASSERT(resolution > 0); 553 ASSERT(func != NULL); 554 555 /* 556 * Please see comment about minimum resolution in callout_init(). 557 */ 558 if (resolution < callout_min_resolution) 559 resolution = callout_min_resolution; 560 561 /* 562 * We disable kernel preemption so that we remain on the same CPU 563 * throughout. If we needed to reprogram the callout table's cyclic, 564 * we can avoid X-calls if we are on the same CPU. 565 * 566 * Note that callout_alloc() releases and reacquires the callout 567 * table mutex. While reacquiring the mutex, it is possible for us 568 * to go to sleep and later migrate to another CPU. This should be 569 * pretty rare, though. 570 */ 571 kpreempt_disable(); 572 573 ct = &callout_table[CALLOUT_TABLE(type, CPU->cpu_seqid)]; 574 mutex_enter(&ct->ct_mutex); 575 576 if (ct->ct_cyclic == CYCLIC_NONE) { 577 mutex_exit(&ct->ct_mutex); 578 /* 579 * The callout table has not yet been initialized fully. 580 * So, put this one on the boot callout table which is 581 * always initialized. 582 */ 583 ct = &callout_boot_ct[type]; 584 mutex_enter(&ct->ct_mutex); 585 } 586 587 if ((cp = ct->ct_free) == NULL) 588 cp = callout_alloc(ct); 589 else 590 ct->ct_free = cp->c_idnext; 591 592 cp->c_func = func; 593 cp->c_arg = arg; 594 595 /* 596 * Compute the expiration hrtime. 597 */ 598 now = gethrtime(); 599 if (flags & CALLOUT_FLAG_ABSOLUTE) { 600 ASSERT(expiration > 0); 601 interval = expiration - now; 602 } else { 603 interval = expiration; 604 expiration += now; 605 ASSERT(expiration > 0); 606 } 607 if (flags & CALLOUT_FLAG_ROUNDUP) 608 expiration += resolution - 1; 609 expiration = (expiration / resolution) * resolution; 610 if (expiration <= 0) { 611 /* 612 * expiration hrtime overflow has occurred. Just set the 613 * expiration to infinity. 614 */ 615 expiration = CY_INFINITY; 616 } 617 618 /* 619 * Assign an ID to this callout 620 */ 621 if (flags & CALLOUT_FLAG_32BIT) { 622 if (interval > callout_longterm) { 623 id = (ct->ct_long_id - callout_counter_low); 624 id |= CALLOUT_COUNTER_HIGH; 625 ct->ct_long_id = id; 626 } else { 627 id = (ct->ct_short_id - callout_counter_low); 628 id |= CALLOUT_COUNTER_HIGH; 629 ct->ct_short_id = id; 630 } 631 } else { 632 id = (ct->ct_gen_id - callout_counter_low); 633 if ((id & CALLOUT_COUNTER_HIGH) == 0) { 634 id |= CALLOUT_COUNTER_HIGH; 635 id += CALLOUT_GENERATION_LOW; 636 } 637 ct->ct_gen_id = id; 638 } 639 640 cp->c_xid = id; 641 if (flags & CALLOUT_FLAG_HRESTIME) 642 cp->c_xid |= CALLOUT_HRESTIME; 643 644 hash = CALLOUT_CLHASH(expiration); 645 646 again: 647 /* 648 * Try to see if a callout list already exists for this expiration. 649 * Most of the time, this will be the case. 650 */ 651 cl = callout_list_get(ct, expiration, hash); 652 if (cl == NULL) { 653 /* 654 * Check if we have enough space in the heap to insert one 655 * expiration. If not, expand the heap. 656 */ 657 if (ct->ct_heap_num == ct->ct_heap_max) { 658 callout_heap_expand(ct); 659 /* 660 * In the above call, we drop the lock, allocate and 661 * reacquire the lock. So, we could have been away 662 * for a while. In the meantime, someone could have 663 * inserted a callout list with the same expiration. 664 * So, the best course is to repeat the steps. This 665 * should be an infrequent event. 666 */ 667 goto again; 668 } 669 670 /* 671 * Check the free list. If we don't find one, we have to 672 * take the slow path and allocate from kmem. 673 */ 674 if ((cl = ct->ct_lfree) == NULL) { 675 callout_list_alloc(ct); 676 /* 677 * In the above call, we drop the lock, allocate and 678 * reacquire the lock. So, we could have been away 679 * for a while. In the meantime, someone could have 680 * inserted a callout list with the same expiration. 681 * Plus, the heap could have become full. So, the best 682 * course is to repeat the steps. This should be an 683 * infrequent event. 684 */ 685 goto again; 686 } 687 ct->ct_lfree = cl->cl_next; 688 cl->cl_expiration = expiration; 689 690 CALLOUT_LIST_INSERT(ct->ct_clhash[hash], cl); 691 692 /* 693 * This is a new expiration. So, insert it into the heap. 694 * This will also reprogram the cyclic, if the expiration 695 * propagated to the root of the heap. 696 */ 697 callout_heap_insert(ct, expiration); 698 } 699 cp->c_list = cl; 700 CALLOUT_APPEND(ct, cp); 701 702 ct->ct_timeouts++; 703 ct->ct_timeouts_pending++; 704 705 mutex_exit(&ct->ct_mutex); 706 707 kpreempt_enable(); 708 709 TRACE_4(TR_FAC_CALLOUT, TR_TIMEOUT, 710 "timeout:%K(%p) in %llx expiration, cp %p", func, arg, expiration, 711 cp); 712 713 return (id); 714 } 715 716 timeout_id_t 717 timeout(void (*func)(void *), void *arg, clock_t delta) 718 { 719 ulong_t id; 720 721 /* 722 * Make sure the callout runs at least 1 tick in the future. 723 */ 724 if (delta <= 0) 725 delta = 1; 726 else if (delta > callout_max_ticks) 727 delta = callout_max_ticks; 728 729 id = (ulong_t)timeout_generic(CALLOUT_NORMAL, func, arg, 730 TICK_TO_NSEC(delta), nsec_per_tick, CALLOUT_LEGACY); 731 732 return ((timeout_id_t)id); 733 } 734 735 /* 736 * Convenience function that creates a normal callout with default parameters 737 * and returns a full ID. 738 */ 739 callout_id_t 740 timeout_default(void (*func)(void *), void *arg, clock_t delta) 741 { 742 callout_id_t id; 743 744 /* 745 * Make sure the callout runs at least 1 tick in the future. 746 */ 747 if (delta <= 0) 748 delta = 1; 749 else if (delta > callout_max_ticks) 750 delta = callout_max_ticks; 751 752 id = timeout_generic(CALLOUT_NORMAL, func, arg, TICK_TO_NSEC(delta), 753 nsec_per_tick, 0); 754 755 return (id); 756 } 757 758 timeout_id_t 759 realtime_timeout(void (*func)(void *), void *arg, clock_t delta) 760 { 761 ulong_t id; 762 763 /* 764 * Make sure the callout runs at least 1 tick in the future. 765 */ 766 if (delta <= 0) 767 delta = 1; 768 else if (delta > callout_max_ticks) 769 delta = callout_max_ticks; 770 771 id = (ulong_t)timeout_generic(CALLOUT_REALTIME, func, arg, 772 TICK_TO_NSEC(delta), nsec_per_tick, CALLOUT_LEGACY); 773 774 return ((timeout_id_t)id); 775 } 776 777 /* 778 * Convenience function that creates a realtime callout with default parameters 779 * and returns a full ID. 780 */ 781 callout_id_t 782 realtime_timeout_default(void (*func)(void *), void *arg, clock_t delta) 783 { 784 callout_id_t id; 785 786 /* 787 * Make sure the callout runs at least 1 tick in the future. 788 */ 789 if (delta <= 0) 790 delta = 1; 791 else if (delta > callout_max_ticks) 792 delta = callout_max_ticks; 793 794 id = timeout_generic(CALLOUT_REALTIME, func, arg, TICK_TO_NSEC(delta), 795 nsec_per_tick, 0); 796 797 return (id); 798 } 799 800 hrtime_t 801 untimeout_generic(callout_id_t id, int nowait) 802 { 803 callout_table_t *ct; 804 callout_t *cp; 805 callout_id_t xid; 806 callout_list_t *cl; 807 int hash; 808 callout_id_t bogus; 809 810 ct = &callout_table[CALLOUT_ID_TO_TABLE(id)]; 811 hash = CALLOUT_IDHASH(id); 812 813 mutex_enter(&ct->ct_mutex); 814 815 /* 816 * Search the ID hash table for the callout. 817 */ 818 for (cp = ct->ct_idhash[hash].ch_head; cp; cp = cp->c_idnext) { 819 820 xid = cp->c_xid; 821 822 /* 823 * Match the ID and generation number. 824 */ 825 if ((xid & CALLOUT_ID_MASK) != id) 826 continue; 827 828 cl = cp->c_list; 829 if ((xid & CALLOUT_EXECUTING) == 0) { 830 hrtime_t expiration; 831 832 /* 833 * Delete the callout. If the callout list becomes 834 * NULL, we don't remove it from the table. This is 835 * so it can be reused. If the empty callout list 836 * corresponds to the top of the the callout heap, we 837 * don't reprogram the table cyclic here. This is in 838 * order to avoid lots of X-calls to the CPU associated 839 * with the callout table. 840 */ 841 expiration = cl->cl_expiration; 842 CALLOUT_DELETE(ct, cp); 843 cp->c_idnext = ct->ct_free; 844 ct->ct_free = cp; 845 ct->ct_untimeouts_unexpired++; 846 ct->ct_timeouts_pending--; 847 mutex_exit(&ct->ct_mutex); 848 849 expiration -= gethrtime(); 850 TRACE_2(TR_FAC_CALLOUT, TR_UNTIMEOUT, 851 "untimeout:ID %lx hrtime left %llx", id, 852 expiration); 853 return (expiration < 0 ? 0 : expiration); 854 } 855 856 ct->ct_untimeouts_executing++; 857 /* 858 * The callout we want to delete is currently executing. 859 * The DDI states that we must wait until the callout 860 * completes before returning, so we block on cl_done until the 861 * callout ID changes (to the old ID if it's on the freelist, 862 * or to a new callout ID if it's in use). This implicitly 863 * assumes that callout structures are persistent (they are). 864 */ 865 if (cl->cl_executor == curthread) { 866 /* 867 * The timeout handler called untimeout() on itself. 868 * Stupid, but legal. We can't wait for the timeout 869 * to complete without deadlocking, so we just return. 870 */ 871 mutex_exit(&ct->ct_mutex); 872 TRACE_1(TR_FAC_CALLOUT, TR_UNTIMEOUT_SELF, 873 "untimeout_self:ID %x", id); 874 return (-1); 875 } 876 if (nowait == 0) { 877 /* 878 * We need to wait. Indicate that we are waiting by 879 * incrementing cl_waiting. This prevents the executor 880 * from doing a wakeup on cl_done if there are no 881 * waiters. 882 */ 883 while (cp->c_xid == xid) { 884 cl->cl_waiting = 1; 885 cv_wait(&cl->cl_done, &ct->ct_mutex); 886 } 887 } 888 mutex_exit(&ct->ct_mutex); 889 TRACE_1(TR_FAC_CALLOUT, TR_UNTIMEOUT_EXECUTING, 890 "untimeout_executing:ID %lx", id); 891 return (-1); 892 } 893 ct->ct_untimeouts_expired++; 894 895 mutex_exit(&ct->ct_mutex); 896 TRACE_1(TR_FAC_CALLOUT, TR_UNTIMEOUT_BOGUS_ID, 897 "untimeout_bogus_id:ID %lx", id); 898 899 /* 900 * We didn't find the specified callout ID. This means either 901 * (1) the callout already fired, or (2) the caller passed us 902 * a bogus value. Perform a sanity check to detect case (2). 903 */ 904 bogus = (CALLOUT_EXECUTING | CALLOUT_HRESTIME | CALLOUT_COUNTER_HIGH); 905 if (((id & bogus) != CALLOUT_COUNTER_HIGH) && (id != 0)) 906 panic("untimeout: impossible timeout id %llx", 907 (unsigned long long)id); 908 909 return (-1); 910 } 911 912 clock_t 913 untimeout(timeout_id_t id_arg) 914 { 915 hrtime_t hleft; 916 clock_t tleft; 917 callout_id_t id; 918 919 id = (ulong_t)id_arg; 920 hleft = untimeout_generic(id, 0); 921 if (hleft < 0) 922 tleft = -1; 923 else if (hleft == 0) 924 tleft = 0; 925 else 926 tleft = NSEC_TO_TICK(hleft); 927 928 return (tleft); 929 } 930 931 /* 932 * Convenience function to untimeout a timeout with a full ID with default 933 * parameters. 934 */ 935 clock_t 936 untimeout_default(callout_id_t id, int nowait) 937 { 938 hrtime_t hleft; 939 clock_t tleft; 940 941 hleft = untimeout_generic(id, nowait); 942 if (hleft < 0) 943 tleft = -1; 944 else if (hleft == 0) 945 tleft = 0; 946 else 947 tleft = NSEC_TO_TICK(hleft); 948 949 return (tleft); 950 } 951 952 /* 953 * Expire all the callouts queued in the specified callout list. 954 */ 955 static void 956 callout_list_expire(callout_table_t *ct, callout_list_t *cl) 957 { 958 callout_t *cp; 959 960 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 961 ASSERT(cl != NULL); 962 963 cl->cl_executor = curthread; 964 965 while ((cp = cl->cl_callouts.ch_head) != NULL) { 966 /* 967 * Indicate to untimeout() that a callout is 968 * being expired by the executor. 969 */ 970 cp->c_xid |= CALLOUT_EXECUTING; 971 mutex_exit(&ct->ct_mutex); 972 973 DTRACE_PROBE1(callout__start, callout_t *, cp); 974 (*cp->c_func)(cp->c_arg); 975 DTRACE_PROBE1(callout__end, callout_t *, cp); 976 977 mutex_enter(&ct->ct_mutex); 978 979 ct->ct_expirations++; 980 ct->ct_timeouts_pending--; 981 /* 982 * Indicate completion for cl_done. 983 */ 984 cp->c_xid &= ~CALLOUT_EXECUTING; 985 986 /* 987 * Delete callout from ID hash table and the callout 988 * list, return to freelist, and tell any untimeout() that 989 * cares that we're done. 990 */ 991 CALLOUT_DELETE(ct, cp); 992 cp->c_idnext = ct->ct_free; 993 ct->ct_free = cp; 994 995 if (cl->cl_waiting) { 996 cl->cl_waiting = 0; 997 cv_broadcast(&cl->cl_done); 998 } 999 } 1000 1001 cl->cl_executor = NULL; 1002 } 1003 1004 /* 1005 * Execute all expired callout lists for a callout table. 1006 */ 1007 static void 1008 callout_expire(callout_table_t *ct) 1009 { 1010 callout_list_t *cl, *clnext; 1011 1012 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 1013 1014 for (cl = ct->ct_expired.ch_head; (cl != NULL); cl = clnext) { 1015 /* 1016 * Multiple executor threads could be running at the same 1017 * time. Each callout list is processed by only one thread. 1018 * If this callout list is already being processed by another 1019 * executor, go on to the next one. 1020 */ 1021 if (cl->cl_executor != NULL) { 1022 clnext = cl->cl_next; 1023 continue; 1024 } 1025 1026 /* 1027 * Expire all the callouts in this callout list. 1028 */ 1029 callout_list_expire(ct, cl); 1030 1031 /* 1032 * Free the callout list. 1033 */ 1034 clnext = cl->cl_next; 1035 CALLOUT_LIST_DELETE(ct->ct_expired, cl); 1036 cl->cl_next = ct->ct_lfree; 1037 ct->ct_lfree = cl; 1038 } 1039 } 1040 1041 /* 1042 * The cyclic handlers below process callouts in two steps: 1043 * 1044 * 1. Find all expired callout lists and queue them in a separate 1045 * list of expired callouts. 1046 * 2. Execute the expired callout lists. 1047 * 1048 * This is done for two reasons: 1049 * 1050 * 1. We want to quickly find the next earliest expiration to program 1051 * the cyclic to and reprogram it. We can do this right at the end 1052 * of step 1. 1053 * 2. The realtime cyclic handler expires callouts in place. However, 1054 * for normal callouts, callouts are expired by a taskq thread. 1055 * So, it is simpler and more robust to have the taskq thread just 1056 * do step 2. 1057 */ 1058 1059 /* 1060 * Realtime callout cyclic handler. 1061 */ 1062 void 1063 callout_realtime(callout_table_t *ct) 1064 { 1065 mutex_enter(&ct->ct_mutex); 1066 callout_heap_delete(ct); 1067 callout_expire(ct); 1068 mutex_exit(&ct->ct_mutex); 1069 } 1070 1071 void 1072 callout_execute(callout_table_t *ct) 1073 { 1074 mutex_enter(&ct->ct_mutex); 1075 callout_expire(ct); 1076 mutex_exit(&ct->ct_mutex); 1077 } 1078 1079 /* 1080 * Normal callout cyclic handler. 1081 */ 1082 void 1083 callout_normal(callout_table_t *ct) 1084 { 1085 int exec; 1086 1087 mutex_enter(&ct->ct_mutex); 1088 callout_heap_delete(ct); 1089 exec = (ct->ct_expired.ch_head != NULL); 1090 mutex_exit(&ct->ct_mutex); 1091 1092 if (exec) { 1093 ASSERT(ct->ct_taskq != NULL); 1094 (void) taskq_dispatch(ct->ct_taskq, 1095 (task_func_t *)callout_execute, ct, TQ_NOSLEEP); 1096 } 1097 } 1098 1099 /* 1100 * Suspend callout processing. 1101 */ 1102 static void 1103 callout_suspend(void) 1104 { 1105 int t, f; 1106 callout_table_t *ct; 1107 1108 /* 1109 * Traverse every callout table in the system and suspend callout 1110 * processing. 1111 * 1112 * We need to suspend all the tables (including the inactive ones) 1113 * so that if a table is made active while the suspend is still on, 1114 * the table remains suspended. 1115 */ 1116 for (f = 0; f < max_ncpus; f++) { 1117 for (t = 0; t < CALLOUT_NTYPES; t++) { 1118 ct = &callout_table[CALLOUT_TABLE(t, f)]; 1119 1120 mutex_enter(&ct->ct_mutex); 1121 ct->ct_suspend++; 1122 if (ct->ct_cyclic == CYCLIC_NONE) { 1123 mutex_exit(&ct->ct_mutex); 1124 continue; 1125 } 1126 if (ct->ct_suspend == 1) 1127 (void) cyclic_reprogram(ct->ct_cyclic, 1128 CY_INFINITY); 1129 mutex_exit(&ct->ct_mutex); 1130 } 1131 } 1132 } 1133 1134 static void 1135 callout_adjust(callout_table_t *ct, hrtime_t delta) 1136 { 1137 int hash, newhash; 1138 hrtime_t expiration; 1139 callout_list_t *cl; 1140 callout_hash_t list; 1141 1142 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 1143 1144 /* 1145 * In order to adjust the expirations, we null out the heap. Then, 1146 * we reinsert adjusted expirations in the heap. Keeps it simple. 1147 * Note that since the CALLOUT_TABLE_SUSPENDED flag is set by the 1148 * caller, the heap insert does not result in cyclic reprogramming. 1149 */ 1150 ct->ct_heap_num = 0; 1151 1152 /* 1153 * First, remove all the callout lists from the table and string them 1154 * in a list. 1155 */ 1156 list.ch_head = list.ch_tail = NULL; 1157 for (hash = 0; hash < CALLOUT_BUCKETS; hash++) { 1158 while ((cl = ct->ct_clhash[hash].ch_head) != NULL) { 1159 CALLOUT_LIST_DELETE(ct->ct_clhash[hash], cl); 1160 CALLOUT_LIST_APPEND(list, cl); 1161 } 1162 } 1163 1164 /* 1165 * Now, traverse the callout lists and adjust their expirations. 1166 */ 1167 while ((cl = list.ch_head) != NULL) { 1168 CALLOUT_LIST_DELETE(list, cl); 1169 /* 1170 * Set the new expiration and reinsert in the right 1171 * hash bucket. 1172 */ 1173 expiration = cl->cl_expiration; 1174 expiration += delta; 1175 cl->cl_expiration = expiration; 1176 newhash = CALLOUT_CLHASH(expiration); 1177 CALLOUT_LIST_INSERT(ct->ct_clhash[newhash], cl); 1178 callout_heap_insert(ct, expiration); 1179 } 1180 } 1181 1182 /* 1183 * Resume callout processing. 1184 */ 1185 static void 1186 callout_resume(hrtime_t delta) 1187 { 1188 hrtime_t exp; 1189 int t, f; 1190 callout_table_t *ct; 1191 1192 /* 1193 * Traverse every callout table in the system and resume callout 1194 * processing. For active tables, perform any hrtime adjustments 1195 * necessary. 1196 */ 1197 for (f = 0; f < max_ncpus; f++) { 1198 for (t = 0; t < CALLOUT_NTYPES; t++) { 1199 ct = &callout_table[CALLOUT_TABLE(t, f)]; 1200 1201 mutex_enter(&ct->ct_mutex); 1202 if (ct->ct_cyclic == CYCLIC_NONE) { 1203 ct->ct_suspend--; 1204 mutex_exit(&ct->ct_mutex); 1205 continue; 1206 } 1207 1208 if (delta) 1209 callout_adjust(ct, delta); 1210 1211 ct->ct_suspend--; 1212 if (ct->ct_suspend == 0) { 1213 /* 1214 * If the expired list is non-empty, then have 1215 * the cyclic expire immediately. Else, program 1216 * the cyclic based on the heap. 1217 */ 1218 if (ct->ct_expired.ch_head != NULL) 1219 exp = gethrtime(); 1220 else if (ct->ct_heap_num > 0) 1221 exp = ct->ct_heap[0]; 1222 else 1223 exp = 0; 1224 if (exp != 0) 1225 (void) cyclic_reprogram(ct->ct_cyclic, 1226 exp); 1227 } 1228 mutex_exit(&ct->ct_mutex); 1229 } 1230 } 1231 } 1232 1233 /* 1234 * Callback handler used by CPR to stop and resume callouts. 1235 */ 1236 /*ARGSUSED*/ 1237 static boolean_t 1238 callout_cpr_callb(void *arg, int code) 1239 { 1240 if (code == CB_CODE_CPR_CHKPT) 1241 callout_suspend(); 1242 else 1243 callout_resume(0); 1244 1245 return (B_TRUE); 1246 } 1247 1248 /* 1249 * Callback handler invoked when the debugger is entered or exited. 1250 */ 1251 /*ARGSUSED*/ 1252 static boolean_t 1253 callout_debug_callb(void *arg, int code) 1254 { 1255 hrtime_t delta; 1256 1257 /* 1258 * When the system enters the debugger. make a note of the hrtime. 1259 * When it is resumed, compute how long the system was in the 1260 * debugger. This interval should not be counted for callouts. 1261 */ 1262 if (code == 0) { 1263 callout_suspend(); 1264 callout_debug_hrtime = gethrtime(); 1265 } else { 1266 delta = gethrtime() - callout_debug_hrtime; 1267 callout_resume(delta); 1268 } 1269 1270 return (B_TRUE); 1271 } 1272 1273 /* 1274 * Move the hrestime callouts to the expired list. Then program the table's 1275 * cyclic to expire immediately so that the callouts can be executed 1276 * immediately. 1277 */ 1278 static void 1279 callout_hrestime_one(callout_table_t *ct) 1280 { 1281 callout_list_t *cl, *ecl; 1282 callout_t *cp; 1283 int hash; 1284 1285 mutex_enter(&ct->ct_mutex); 1286 if (ct->ct_heap_num == 0) { 1287 mutex_exit(&ct->ct_mutex); 1288 return; 1289 } 1290 1291 if (ct->ct_lfree == NULL) 1292 callout_list_alloc(ct); 1293 ecl = ct->ct_lfree; 1294 ct->ct_lfree = ecl->cl_next; 1295 1296 for (hash = 0; hash < CALLOUT_BUCKETS; hash++) { 1297 for (cl = ct->ct_clhash[hash].ch_head; cl; cl = cl->cl_next) { 1298 for (cp = cl->cl_callouts.ch_head; cp; 1299 cp = cp->c_clnext) { 1300 if ((cp->c_xid & CALLOUT_HRESTIME) == 0) 1301 continue; 1302 CALLOUT_HASH_DELETE(cl->cl_callouts, cp, 1303 c_clnext, c_clprev); 1304 cp->c_list = ecl; 1305 CALLOUT_HASH_APPEND(ecl->cl_callouts, cp, 1306 c_clnext, c_clprev); 1307 } 1308 } 1309 } 1310 1311 if (ecl->cl_callouts.ch_head != NULL) { 1312 CALLOUT_LIST_APPEND(ct->ct_expired, ecl); 1313 if (ct->ct_suspend == 0) 1314 (void) cyclic_reprogram(ct->ct_cyclic, gethrtime()); 1315 } else { 1316 ecl->cl_next = ct->ct_lfree; 1317 ct->ct_lfree = ecl; 1318 } 1319 mutex_exit(&ct->ct_mutex); 1320 } 1321 1322 /* 1323 * This function is called whenever system time (hrestime) is changed 1324 * explicitly. All the HRESTIME callouts must be expired at once. 1325 */ 1326 /*ARGSUSED*/ 1327 void 1328 callout_hrestime(void) 1329 { 1330 int t, f; 1331 callout_table_t *ct; 1332 1333 /* 1334 * Traverse every callout table in the system and process the hrestime 1335 * callouts therein. 1336 * 1337 * We look at all the tables because we don't know which ones were 1338 * onlined and offlined in the past. The offlined tables may still 1339 * have active cyclics processing timers somewhere. 1340 */ 1341 for (f = 0; f < max_ncpus; f++) { 1342 for (t = 0; t < CALLOUT_NTYPES; t++) { 1343 ct = &callout_table[CALLOUT_TABLE(t, f)]; 1344 callout_hrestime_one(ct); 1345 } 1346 } 1347 } 1348 1349 /* 1350 * Create the hash tables for this callout table. 1351 */ 1352 static void 1353 callout_hash_init(callout_table_t *ct) 1354 { 1355 size_t size; 1356 1357 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 1358 ASSERT((ct->ct_idhash == NULL) && (ct->ct_clhash == NULL)); 1359 1360 size = sizeof (callout_hash_t) * CALLOUT_BUCKETS; 1361 ct->ct_idhash = kmem_zalloc(size, KM_SLEEP); 1362 ct->ct_clhash = kmem_zalloc(size, KM_SLEEP); 1363 } 1364 1365 /* 1366 * Create per-callout table kstats. 1367 */ 1368 static void 1369 callout_kstat_init(callout_table_t *ct) 1370 { 1371 callout_stat_type_t stat; 1372 kstat_t *ct_kstats; 1373 int ndx; 1374 1375 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 1376 ASSERT(ct->ct_kstats == NULL); 1377 1378 ndx = ct - callout_table; 1379 ct_kstats = kstat_create("unix", ndx, "callout", 1380 "misc", KSTAT_TYPE_NAMED, CALLOUT_NUM_STATS, KSTAT_FLAG_VIRTUAL); 1381 1382 if (ct_kstats == NULL) { 1383 cmn_err(CE_WARN, "kstat_create for callout table %p failed", 1384 (void *)ct); 1385 } else { 1386 ct_kstats->ks_data = ct->ct_kstat_data; 1387 for (stat = 0; stat < CALLOUT_NUM_STATS; stat++) 1388 kstat_named_init(&ct->ct_kstat_data[stat], 1389 callout_kstat_names[stat], KSTAT_DATA_INT64); 1390 ct->ct_kstats = ct_kstats; 1391 kstat_install(ct_kstats); 1392 } 1393 } 1394 1395 static void 1396 callout_cyclic_init(callout_table_t *ct) 1397 { 1398 cyc_handler_t hdlr; 1399 cyc_time_t when; 1400 processorid_t seqid; 1401 int t; 1402 1403 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 1404 1405 t = CALLOUT_TABLE_TYPE(ct); 1406 seqid = CALLOUT_TABLE_SEQID(ct); 1407 1408 /* 1409 * Create the taskq thread if the table type is normal. 1410 * Realtime tables are handled at PIL1 by a softint 1411 * handler. 1412 */ 1413 if (t == CALLOUT_NORMAL) { 1414 ASSERT(ct->ct_taskq == NULL); 1415 /* 1416 * Each callout thread consumes exactly one 1417 * task structure while active. Therefore, 1418 * prepopulating with 2 * CALLOUT_THREADS tasks 1419 * ensures that there's at least one task per 1420 * thread that's either scheduled or on the 1421 * freelist. In turn, this guarantees that 1422 * taskq_dispatch() will always either succeed 1423 * (because there's a free task structure) or 1424 * be unnecessary (because "callout_excute(ct)" 1425 * has already scheduled). 1426 */ 1427 ct->ct_taskq = 1428 taskq_create_instance("callout_taskq", seqid, 1429 CALLOUT_THREADS, maxclsyspri, 1430 2 * CALLOUT_THREADS, 2 * CALLOUT_THREADS, 1431 TASKQ_PREPOPULATE | TASKQ_CPR_SAFE); 1432 } 1433 1434 /* 1435 * callouts can only be created in a table whose 1436 * cyclic has been initialized. 1437 */ 1438 ASSERT(ct->ct_heap_num == 0); 1439 1440 /* 1441 * Create the callout table cyclics. 1442 */ 1443 ASSERT(ct->ct_cyclic == CYCLIC_NONE); 1444 1445 hdlr.cyh_func = (cyc_func_t)CALLOUT_CYCLIC_HANDLER(t); 1446 hdlr.cyh_level = CY_LOW_LEVEL; 1447 hdlr.cyh_arg = ct; 1448 when.cyt_when = CY_INFINITY; 1449 when.cyt_interval = CY_INFINITY; 1450 1451 ct->ct_cyclic = cyclic_add(&hdlr, &when); 1452 } 1453 1454 void 1455 callout_cpu_online(cpu_t *cp) 1456 { 1457 lgrp_handle_t hand; 1458 callout_cache_t *cache; 1459 char s[KMEM_CACHE_NAMELEN]; 1460 callout_table_t *ct; 1461 processorid_t seqid; 1462 int t; 1463 1464 ASSERT(MUTEX_HELD(&cpu_lock)); 1465 1466 /* 1467 * Locate the cache corresponding to the onlined CPU's lgroup. 1468 * Note that access to callout_caches is protected by cpu_lock. 1469 */ 1470 hand = lgrp_plat_cpu_to_hand(cp->cpu_id); 1471 for (cache = callout_caches; cache != NULL; cache = cache->cc_next) { 1472 if (cache->cc_hand == hand) 1473 break; 1474 } 1475 1476 /* 1477 * If not found, create one. The caches are never destroyed. 1478 */ 1479 if (cache == NULL) { 1480 cache = kmem_alloc(sizeof (callout_cache_t), KM_SLEEP); 1481 cache->cc_hand = hand; 1482 (void) snprintf(s, KMEM_CACHE_NAMELEN, "callout_cache%lx", 1483 (long)hand); 1484 cache->cc_cache = kmem_cache_create(s, sizeof (callout_t), 1485 CALLOUT_ALIGN, NULL, NULL, NULL, NULL, NULL, 0); 1486 (void) snprintf(s, KMEM_CACHE_NAMELEN, "callout_lcache%lx", 1487 (long)hand); 1488 cache->cc_lcache = kmem_cache_create(s, sizeof (callout_list_t), 1489 CALLOUT_ALIGN, NULL, NULL, NULL, NULL, NULL, 0); 1490 cache->cc_next = callout_caches; 1491 callout_caches = cache; 1492 } 1493 1494 seqid = cp->cpu_seqid; 1495 1496 for (t = 0; t < CALLOUT_NTYPES; t++) { 1497 ct = &callout_table[CALLOUT_TABLE(t, seqid)]; 1498 1499 mutex_enter(&ct->ct_mutex); 1500 /* 1501 * Store convinience pointers to the kmem caches 1502 * in the callout table. These assignments should always be 1503 * done as callout tables can map to different physical 1504 * CPUs each time. 1505 */ 1506 ct->ct_cache = cache->cc_cache; 1507 ct->ct_lcache = cache->cc_lcache; 1508 1509 /* 1510 * We use the heap pointer to check if stuff has been 1511 * initialized for this callout table. 1512 */ 1513 if (ct->ct_heap == NULL) { 1514 callout_heap_init(ct); 1515 callout_hash_init(ct); 1516 callout_kstat_init(ct); 1517 callout_cyclic_init(ct); 1518 } 1519 1520 mutex_exit(&ct->ct_mutex); 1521 1522 /* 1523 * Move the cyclic to this CPU by doing a bind. 1524 */ 1525 cyclic_bind(ct->ct_cyclic, cp, NULL); 1526 } 1527 } 1528 1529 void 1530 callout_cpu_offline(cpu_t *cp) 1531 { 1532 callout_table_t *ct; 1533 processorid_t seqid; 1534 int t; 1535 1536 ASSERT(MUTEX_HELD(&cpu_lock)); 1537 1538 seqid = cp->cpu_seqid; 1539 1540 for (t = 0; t < CALLOUT_NTYPES; t++) { 1541 ct = &callout_table[CALLOUT_TABLE(t, seqid)]; 1542 1543 /* 1544 * Unbind the cyclic. This will allow the cyclic subsystem 1545 * to juggle the cyclic during CPU offline. 1546 */ 1547 cyclic_bind(ct->ct_cyclic, NULL, NULL); 1548 } 1549 } 1550 1551 /* 1552 * This is called to perform per-CPU initialization for slave CPUs at 1553 * boot time. 1554 */ 1555 void 1556 callout_mp_init(void) 1557 { 1558 cpu_t *cp; 1559 1560 mutex_enter(&cpu_lock); 1561 1562 cp = cpu_active; 1563 do { 1564 callout_cpu_online(cp); 1565 } while ((cp = cp->cpu_next_onln) != cpu_active); 1566 1567 mutex_exit(&cpu_lock); 1568 } 1569 1570 /* 1571 * Initialize all callout tables. Called at boot time just before clkstart(). 1572 */ 1573 void 1574 callout_init(void) 1575 { 1576 int f, t; 1577 size_t size; 1578 int table_id; 1579 callout_table_t *ct; 1580 long bits, fanout; 1581 uintptr_t buf; 1582 1583 /* 1584 * Initialize callout globals. 1585 */ 1586 bits = 0; 1587 for (fanout = 1; (fanout < max_ncpus); fanout <<= 1) 1588 bits++; 1589 callout_table_bits = CALLOUT_TYPE_BITS + bits; 1590 callout_table_mask = (1 << callout_table_bits) - 1; 1591 callout_counter_low = 1 << CALLOUT_COUNTER_SHIFT; 1592 callout_longterm = TICK_TO_NSEC(CALLOUT_LONGTERM_TICKS); 1593 callout_max_ticks = CALLOUT_MAX_TICKS; 1594 1595 /* 1596 * Because of the variability in timing behavior across systems with 1597 * different architectures, we cannot allow arbitrarily low 1598 * resolutions. The minimum resolution has to be determined in a 1599 * platform-specific way. Until then, we define a blanket minimum 1600 * resolution for callouts of CALLOUT_MIN_RESOLUTION. 1601 * 1602 * If, in the future, someone requires lower resolution timers, they 1603 * can do one of two things: 1604 * 1605 * - Define a lower value for callout_min_resolution. This would 1606 * affect all clients of the callout subsystem. If this done 1607 * via /etc/system, then no code changes are required and it 1608 * would affect only that customer. 1609 * 1610 * - Define a flag to be passed to timeout creation that allows 1611 * the lower resolution. This involves code changes. But it 1612 * would affect only the calling module. It is the developer's 1613 * responsibility to test on all systems and make sure that 1614 * everything works. 1615 */ 1616 if (callout_min_resolution <= 0) 1617 callout_min_resolution = CALLOUT_MIN_RESOLUTION; 1618 1619 /* 1620 * Allocate all the callout tables based on max_ncpus. We have chosen 1621 * to do boot-time allocation instead of dynamic allocation because: 1622 * 1623 * - the size of the callout tables is not too large. 1624 * - there are race conditions involved in making this dynamic. 1625 * - the hash tables that go with the callout tables consume 1626 * most of the memory and they are only allocated in 1627 * callout_cpu_online(). 1628 * 1629 * Each CPU has two tables that are consecutive in the array. The first 1630 * one is for realtime callouts and the second one is for normal ones. 1631 * 1632 * We do this alignment dance to make sure that callout table 1633 * structures will always be on a cache line boundary. 1634 */ 1635 size = sizeof (callout_table_t) * CALLOUT_NTYPES * max_ncpus; 1636 size += CALLOUT_ALIGN; 1637 buf = (uintptr_t)kmem_zalloc(size, KM_SLEEP); 1638 callout_table = (callout_table_t *)P2ROUNDUP(buf, CALLOUT_ALIGN); 1639 1640 size = sizeof (kstat_named_t) * CALLOUT_NUM_STATS; 1641 /* 1642 * Now, initialize the tables for all the CPUs. 1643 */ 1644 for (f = 0; f < max_ncpus; f++) { 1645 for (t = 0; t < CALLOUT_NTYPES; t++) { 1646 table_id = CALLOUT_TABLE(t, f); 1647 ct = &callout_table[table_id]; 1648 ct->ct_type = t; 1649 mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL); 1650 /* 1651 * Precompute the base IDs for long and short-term 1652 * legacy IDs. This makes ID generation during 1653 * timeout() fast. 1654 */ 1655 ct->ct_short_id = CALLOUT_SHORT_ID(table_id); 1656 ct->ct_long_id = CALLOUT_LONG_ID(table_id); 1657 /* 1658 * Precompute the base ID for generation-based IDs. 1659 * Note that when the first ID gets allocated, the 1660 * ID will wrap. This will cause the generation 1661 * number to be incremented to 1. 1662 */ 1663 ct->ct_gen_id = CALLOUT_SHORT_ID(table_id); 1664 /* 1665 * Initialize the cyclic as NONE. This will get set 1666 * during CPU online. This is so that partially 1667 * populated systems will only have the required 1668 * number of cyclics, not more. 1669 */ 1670 ct->ct_cyclic = CYCLIC_NONE; 1671 ct->ct_kstat_data = kmem_zalloc(size, KM_SLEEP); 1672 } 1673 } 1674 1675 /* 1676 * Add the callback for CPR. This is called during checkpoint 1677 * resume to suspend and resume callouts. 1678 */ 1679 (void) callb_add(callout_cpr_callb, 0, CB_CL_CPR_CALLOUT, 1680 "callout_cpr"); 1681 (void) callb_add(callout_debug_callb, 0, CB_CL_ENTER_DEBUGGER, 1682 "callout_debug"); 1683 1684 /* 1685 * Call the per-CPU initialization function for the boot CPU. This 1686 * is done here because the function is not called automatically for 1687 * the boot CPU from the CPU online/offline hooks. Note that the 1688 * CPU lock is taken here because of convention. 1689 */ 1690 mutex_enter(&cpu_lock); 1691 callout_boot_ct = &callout_table[CALLOUT_TABLE(0, CPU->cpu_seqid)]; 1692 callout_cpu_online(CPU); 1693 mutex_exit(&cpu_lock); 1694 } 1695