1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/callo.h> 27 #include <sys/param.h> 28 #include <sys/types.h> 29 #include <sys/cpuvar.h> 30 #include <sys/thread.h> 31 #include <sys/kmem.h> 32 #include <sys/kmem_impl.h> 33 #include <sys/cmn_err.h> 34 #include <sys/callb.h> 35 #include <sys/debug.h> 36 #include <sys/vtrace.h> 37 #include <sys/sysmacros.h> 38 #include <sys/sdt.h> 39 40 /* 41 * Callout tables. See timeout(9F) for details. 42 */ 43 static hrtime_t callout_debug_hrtime; /* debugger entry time */ 44 static int callout_min_resolution; /* Minimum resolution */ 45 static callout_table_t *callout_boot_ct; /* Boot CPU's callout tables */ 46 static hrtime_t callout_longterm; /* longterm nanoseconds */ 47 static ulong_t callout_counter_low; /* callout ID increment */ 48 static ulong_t callout_table_bits; /* number of table bits in ID */ 49 static ulong_t callout_table_mask; /* mask for the table bits */ 50 static callout_cache_t *callout_caches; /* linked list of caches */ 51 #pragma align 64(callout_table) 52 static callout_table_t *callout_table; /* global callout table array */ 53 54 static char *callout_kstat_names[] = { 55 "callout_timeouts", 56 "callout_timeouts_pending", 57 "callout_untimeouts_unexpired", 58 "callout_untimeouts_executing", 59 "callout_untimeouts_expired", 60 "callout_expirations", 61 "callout_allocations", 62 }; 63 64 #define CALLOUT_HASH_INSERT(hash, cp, cnext, cprev) \ 65 { \ 66 callout_hash_t *hashp = &(hash); \ 67 \ 68 cp->cprev = NULL; \ 69 cp->cnext = hashp->ch_head; \ 70 if (hashp->ch_head == NULL) \ 71 hashp->ch_tail = cp; \ 72 else \ 73 cp->cnext->cprev = cp; \ 74 hashp->ch_head = cp; \ 75 } 76 77 #define CALLOUT_HASH_APPEND(hash, cp, cnext, cprev) \ 78 { \ 79 callout_hash_t *hashp = &(hash); \ 80 \ 81 cp->cnext = NULL; \ 82 cp->cprev = hashp->ch_tail; \ 83 if (hashp->ch_tail == NULL) \ 84 hashp->ch_head = cp; \ 85 else \ 86 cp->cprev->cnext = cp; \ 87 hashp->ch_tail = cp; \ 88 } 89 90 #define CALLOUT_HASH_DELETE(hash, cp, cnext, cprev) \ 91 { \ 92 callout_hash_t *hashp = &(hash); \ 93 \ 94 if (cp->cnext == NULL) \ 95 hashp->ch_tail = cp->cprev; \ 96 else \ 97 cp->cnext->cprev = cp->cprev; \ 98 if (cp->cprev == NULL) \ 99 hashp->ch_head = cp->cnext; \ 100 else \ 101 cp->cprev->cnext = cp->cnext; \ 102 } 103 104 /* 105 * These definitions help us queue callouts and callout lists. Here is 106 * the queueing rationale: 107 * 108 * - callouts are queued in a FIFO manner in the ID hash table. 109 * TCP timers are typically cancelled in the same order that they 110 * were issued. The FIFO queueing shortens the search for a callout 111 * during untimeout(). 112 * 113 * - callouts are queued in a FIFO manner in their callout lists. 114 * This ensures that the callouts are executed in the same order that 115 * they were queued. This is fair. Plus, it helps to make each 116 * callout expiration timely. It also favors cancellations. 117 * 118 * - callout lists are queued in a LIFO manner in the callout list hash 119 * table. This ensures that long term timers stay at the rear of the 120 * hash lists. 121 * 122 * - callout lists are queued in a FIFO manner in the expired callouts 123 * list. This ensures that callout lists are executed in the order 124 * of expiration. 125 */ 126 #define CALLOUT_APPEND(ct, cp) \ 127 CALLOUT_HASH_APPEND(ct->ct_idhash[CALLOUT_IDHASH(cp->c_xid)], \ 128 cp, c_idnext, c_idprev); \ 129 CALLOUT_HASH_APPEND(cp->c_list->cl_callouts, cp, c_clnext, c_clprev) 130 131 #define CALLOUT_DELETE(ct, cp) \ 132 CALLOUT_HASH_DELETE(ct->ct_idhash[CALLOUT_IDHASH(cp->c_xid)], \ 133 cp, c_idnext, c_idprev); \ 134 CALLOUT_HASH_DELETE(cp->c_list->cl_callouts, cp, c_clnext, c_clprev) 135 136 #define CALLOUT_LIST_INSERT(hash, cl) \ 137 CALLOUT_HASH_INSERT(hash, cl, cl_next, cl_prev) 138 139 #define CALLOUT_LIST_APPEND(hash, cl) \ 140 CALLOUT_HASH_APPEND(hash, cl, cl_next, cl_prev) 141 142 #define CALLOUT_LIST_DELETE(hash, cl) \ 143 CALLOUT_HASH_DELETE(hash, cl, cl_next, cl_prev) 144 145 /* 146 * Allocate a callout structure. We try quite hard because we 147 * can't sleep, and if we can't do the allocation, we're toast. 148 * Failing all, we try a KM_PANIC allocation. Note that we never 149 * deallocate a callout. See untimeout() for the reasoning. 150 */ 151 static callout_t * 152 callout_alloc(callout_table_t *ct) 153 { 154 size_t size; 155 callout_t *cp; 156 157 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 158 mutex_exit(&ct->ct_mutex); 159 160 cp = kmem_cache_alloc(ct->ct_cache, KM_NOSLEEP); 161 if (cp == NULL) { 162 size = sizeof (callout_t); 163 cp = kmem_alloc_tryhard(size, &size, KM_NOSLEEP | KM_PANIC); 164 } 165 cp->c_xid = 0; 166 167 mutex_enter(&ct->ct_mutex); 168 ct->ct_allocations++; 169 return (cp); 170 } 171 172 /* 173 * Allocate a callout list structure. We try quite hard because we 174 * can't sleep, and if we can't do the allocation, we're toast. 175 * Failing all, we try a KM_PANIC allocation. Note that we never 176 * deallocate a callout list. 177 */ 178 static void 179 callout_list_alloc(callout_table_t *ct) 180 { 181 size_t size; 182 callout_list_t *cl; 183 184 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 185 mutex_exit(&ct->ct_mutex); 186 187 cl = kmem_cache_alloc(ct->ct_lcache, KM_NOSLEEP); 188 if (cl == NULL) { 189 size = sizeof (callout_list_t); 190 cl = kmem_alloc_tryhard(size, &size, KM_NOSLEEP | KM_PANIC); 191 } 192 bzero(cl, sizeof (callout_list_t)); 193 194 mutex_enter(&ct->ct_mutex); 195 cl->cl_next = ct->ct_lfree; 196 ct->ct_lfree = cl; 197 } 198 199 /* 200 * Find the callout list that corresponds to an expiration. There can 201 * be only one. 202 */ 203 static callout_list_t * 204 callout_list_get(callout_table_t *ct, hrtime_t expiration, int hash) 205 { 206 callout_list_t *cl; 207 208 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 209 210 for (cl = ct->ct_clhash[hash].ch_head; (cl != NULL); cl = cl->cl_next) { 211 if (cl->cl_expiration == expiration) 212 return (cl); 213 } 214 215 return (NULL); 216 } 217 218 /* 219 * Find the callout list that corresponds to an expiration. There can 220 * be only one. If the callout list is null, free it. Else, return it. 221 */ 222 static callout_list_t * 223 callout_list_check(callout_table_t *ct, hrtime_t expiration, int hash) 224 { 225 callout_list_t *cl; 226 227 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 228 229 cl = callout_list_get(ct, expiration, hash); 230 if (cl != NULL) { 231 if (cl->cl_callouts.ch_head != NULL) { 232 /* 233 * There is exactly one callout list for every 234 * unique expiration. So, we are done. 235 */ 236 return (cl); 237 } 238 239 CALLOUT_LIST_DELETE(ct->ct_clhash[hash], cl); 240 cl->cl_next = ct->ct_lfree; 241 ct->ct_lfree = cl; 242 } 243 244 return (NULL); 245 } 246 247 /* 248 * Initialize a callout table's heap, if necessary. Preallocate some free 249 * entries so we don't have to check for NULL elsewhere. 250 */ 251 static void 252 callout_heap_init(callout_table_t *ct) 253 { 254 size_t size; 255 256 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 257 ASSERT(ct->ct_heap == NULL); 258 259 ct->ct_heap_num = 0; 260 ct->ct_heap_max = CALLOUT_CHUNK; 261 size = sizeof (hrtime_t) * CALLOUT_CHUNK; 262 ct->ct_heap = kmem_alloc(size, KM_SLEEP); 263 } 264 265 /* 266 * Reallocate the heap. We try quite hard because we can't sleep, and if 267 * we can't do the allocation, we're toast. Failing all, we try a KM_PANIC 268 * allocation. Note that the heap only expands, it never contracts. 269 */ 270 static void 271 callout_heap_expand(callout_table_t *ct) 272 { 273 size_t max, size, osize; 274 hrtime_t *heap; 275 276 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 277 ASSERT(ct->ct_heap_num <= ct->ct_heap_max); 278 279 while (ct->ct_heap_num == ct->ct_heap_max) { 280 max = ct->ct_heap_max; 281 mutex_exit(&ct->ct_mutex); 282 283 osize = sizeof (hrtime_t) * max; 284 size = sizeof (hrtime_t) * (max + CALLOUT_CHUNK); 285 heap = kmem_alloc_tryhard(size, &size, KM_NOSLEEP | KM_PANIC); 286 287 mutex_enter(&ct->ct_mutex); 288 if (max < ct->ct_heap_max) { 289 /* 290 * Someone beat us to the allocation. Free what we 291 * just allocated and proceed. 292 */ 293 kmem_free(heap, size); 294 continue; 295 } 296 297 bcopy(ct->ct_heap, heap, osize); 298 kmem_free(ct->ct_heap, osize); 299 ct->ct_heap = heap; 300 ct->ct_heap_max = size / sizeof (hrtime_t); 301 } 302 } 303 304 /* 305 * Move an expiration from the bottom of the heap to its correct place 306 * in the heap. If we reached the root doing this, return 1. Else, 307 * return 0. 308 */ 309 static int 310 callout_upheap(callout_table_t *ct) 311 { 312 int current, parent; 313 hrtime_t *heap, current_expiration, parent_expiration; 314 315 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 316 ASSERT(ct->ct_heap_num >= 1); 317 318 if (ct->ct_heap_num == 1) { 319 return (1); 320 } 321 322 heap = ct->ct_heap; 323 current = ct->ct_heap_num - 1; 324 325 for (;;) { 326 parent = CALLOUT_HEAP_PARENT(current); 327 current_expiration = heap[current]; 328 parent_expiration = heap[parent]; 329 330 /* 331 * We have an expiration later than our parent; we're done. 332 */ 333 if (current_expiration >= parent_expiration) { 334 return (0); 335 } 336 337 /* 338 * We need to swap with our parent, and continue up the heap. 339 */ 340 heap[parent] = current_expiration; 341 heap[current] = parent_expiration; 342 343 /* 344 * If we just reached the root, we're done. 345 */ 346 if (parent == 0) { 347 return (1); 348 } 349 350 current = parent; 351 } 352 /*NOTREACHED*/ 353 } 354 355 /* 356 * Insert a new, unique expiration into a callout table's heap. 357 */ 358 static void 359 callout_heap_insert(callout_table_t *ct, hrtime_t expiration) 360 { 361 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 362 ASSERT(ct->ct_heap_num < ct->ct_heap_max); 363 364 /* 365 * First, copy the expiration to the bottom of the heap. 366 */ 367 ct->ct_heap[ct->ct_heap_num] = expiration; 368 ct->ct_heap_num++; 369 370 /* 371 * Now, perform an upheap operation. If we reached the root, then 372 * the cyclic needs to be reprogrammed as we have an earlier 373 * expiration. 374 * 375 * Also, during the CPR suspend phase, do not reprogram the cyclic. 376 * We don't want any callout activity. When the CPR resume phase is 377 * entered, the cyclic will be programmed for the earliest expiration 378 * in the heap. 379 */ 380 if (callout_upheap(ct) && !(ct->ct_flags & CALLOUT_TABLE_SUSPENDED)) 381 (void) cyclic_reprogram(ct->ct_cyclic, expiration); 382 } 383 384 /* 385 * Move an expiration from the top of the heap to its correct place 386 * in the heap. 387 */ 388 static void 389 callout_downheap(callout_table_t *ct) 390 { 391 int left, right, current, nelems; 392 hrtime_t *heap, left_expiration, right_expiration, current_expiration; 393 394 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 395 ASSERT(ct->ct_heap_num >= 1); 396 397 heap = ct->ct_heap; 398 current = 0; 399 nelems = ct->ct_heap_num; 400 401 for (;;) { 402 /* 403 * If we don't have a left child (i.e., we're a leaf), we're 404 * done. 405 */ 406 if ((left = CALLOUT_HEAP_LEFT(current)) >= nelems) 407 return; 408 409 left_expiration = heap[left]; 410 current_expiration = heap[current]; 411 412 right = CALLOUT_HEAP_RIGHT(current); 413 414 /* 415 * Even if we don't have a right child, we still need to compare 416 * our expiration against that of our left child. 417 */ 418 if (right >= nelems) 419 goto comp_left; 420 421 right_expiration = heap[right]; 422 423 /* 424 * We have both a left and a right child. We need to compare 425 * the expiration of the children to determine which 426 * expires earlier. 427 */ 428 if (right_expiration < left_expiration) { 429 /* 430 * Our right child is the earlier of our children. 431 * We'll now compare our expiration to its expiration. 432 * If ours is the earlier one, we're done. 433 */ 434 if (current_expiration <= right_expiration) 435 return; 436 437 /* 438 * Our right child expires earlier than we do; swap 439 * with our right child, and descend right. 440 */ 441 heap[right] = current_expiration; 442 heap[current] = right_expiration; 443 current = right; 444 continue; 445 } 446 447 comp_left: 448 /* 449 * Our left child is the earlier of our children (or we have 450 * no right child). We'll now compare our expiration 451 * to its expiration. If ours is the earlier one, we're done. 452 */ 453 if (current_expiration <= left_expiration) 454 return; 455 456 /* 457 * Our left child expires earlier than we do; swap with our 458 * left child, and descend left. 459 */ 460 heap[left] = current_expiration; 461 heap[current] = left_expiration; 462 current = left; 463 } 464 } 465 466 /* 467 * Delete and handle all past expirations in a callout table's heap. 468 */ 469 static void 470 callout_heap_delete(callout_table_t *ct) 471 { 472 hrtime_t now, expiration; 473 callout_list_t *cl; 474 int hash; 475 476 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 477 478 now = gethrtime(); 479 480 while (ct->ct_heap_num > 0) { 481 expiration = ct->ct_heap[0]; 482 /* 483 * Find the callout list that corresponds to the expiration. 484 * If the callout list is empty, callout_list_check() 485 * will free the callout list and return NULL. 486 */ 487 hash = CALLOUT_CLHASH(expiration); 488 cl = callout_list_check(ct, expiration, hash); 489 if (cl != NULL) { 490 /* 491 * If the root of the heap expires in the future, we are 492 * done. We are doing this check here instead of at the 493 * beginning because we want to first free all the 494 * empty callout lists at the top of the heap. 495 */ 496 if (expiration > now) 497 break; 498 499 /* 500 * Move the callout list for this expiration to the 501 * list of expired callout lists. It will be processed 502 * by the callout executor. 503 */ 504 CALLOUT_LIST_DELETE(ct->ct_clhash[hash], cl); 505 CALLOUT_LIST_APPEND(ct->ct_expired, cl); 506 } 507 508 /* 509 * Now delete the root. This is done by swapping the root with 510 * the last item in the heap and downheaping the item. 511 */ 512 ct->ct_heap_num--; 513 if (ct->ct_heap_num > 0) { 514 ct->ct_heap[0] = ct->ct_heap[ct->ct_heap_num]; 515 callout_downheap(ct); 516 } 517 } 518 519 /* 520 * If this callout table is empty or callouts have been suspended 521 * by CPR, just return. The cyclic has already been programmed to 522 * infinity by the cyclic subsystem. 523 */ 524 if ((ct->ct_heap_num == 0) || (ct->ct_flags & CALLOUT_TABLE_SUSPENDED)) 525 return; 526 527 (void) cyclic_reprogram(ct->ct_cyclic, expiration); 528 } 529 530 callout_id_t 531 timeout_generic(int type, void (*func)(void *), void *arg, 532 hrtime_t expiration, hrtime_t resolution, int flags) 533 { 534 callout_table_t *ct; 535 callout_t *cp; 536 callout_id_t id; 537 callout_list_t *cl; 538 hrtime_t now, interval; 539 int hash; 540 541 ASSERT(resolution > 0); 542 ASSERT(func != NULL); 543 544 /* 545 * Please see comment about minimum resolution in callout_init(). 546 */ 547 if (resolution < callout_min_resolution) 548 resolution = callout_min_resolution; 549 550 /* 551 * We disable kernel preemption so that we remain on the same CPU 552 * throughout. If we needed to reprogram the callout table's cyclic, 553 * we can avoid X-calls if we are on the same CPU. 554 * 555 * Note that callout_alloc() releases and reacquires the callout 556 * table mutex. While reacquiring the mutex, it is possible for us 557 * to go to sleep and later migrate to another CPU. This should be 558 * pretty rare, though. 559 */ 560 kpreempt_disable(); 561 562 ct = &callout_table[CALLOUT_TABLE(type, CPU->cpu_seqid)]; 563 mutex_enter(&ct->ct_mutex); 564 565 if (ct->ct_cyclic == CYCLIC_NONE) { 566 mutex_exit(&ct->ct_mutex); 567 /* 568 * The callout table has not yet been initialized fully. 569 * So, put this one on the boot callout table which is 570 * always initialized. 571 */ 572 ct = &callout_boot_ct[type]; 573 mutex_enter(&ct->ct_mutex); 574 } 575 576 if ((cp = ct->ct_free) == NULL) 577 cp = callout_alloc(ct); 578 else 579 ct->ct_free = cp->c_idnext; 580 581 cp->c_func = func; 582 cp->c_arg = arg; 583 584 /* 585 * Compute the expiration hrtime. 586 */ 587 now = gethrtime(); 588 if (flags & CALLOUT_FLAG_ABSOLUTE) { 589 ASSERT(expiration > 0); 590 interval = expiration - now; 591 } else { 592 interval = expiration; 593 expiration += now; 594 ASSERT(expiration > 0); 595 } 596 if (flags & CALLOUT_FLAG_ROUNDUP) 597 expiration += resolution - 1; 598 expiration = (expiration / resolution) * resolution; 599 600 /* 601 * Assign an ID to this callout 602 */ 603 if (flags & CALLOUT_FLAG_32BIT) { 604 if (interval > callout_longterm) { 605 id = (ct->ct_long_id - callout_counter_low); 606 id |= CALLOUT_COUNTER_HIGH; 607 ct->ct_long_id = id; 608 } else { 609 id = (ct->ct_short_id - callout_counter_low); 610 id |= CALLOUT_COUNTER_HIGH; 611 ct->ct_short_id = id; 612 } 613 } else { 614 id = (ct->ct_gen_id - callout_counter_low); 615 if ((id & CALLOUT_COUNTER_HIGH) == 0) { 616 id |= CALLOUT_COUNTER_HIGH; 617 id += CALLOUT_GENERATION_LOW; 618 } 619 ct->ct_gen_id = id; 620 } 621 622 cp->c_xid = id; 623 if (flags & CALLOUT_FLAG_HRESTIME) 624 cp->c_xid |= CALLOUT_HRESTIME; 625 626 hash = CALLOUT_CLHASH(expiration); 627 628 again: 629 /* 630 * Try to see if a callout list already exists for this expiration. 631 * Most of the time, this will be the case. 632 */ 633 cl = callout_list_get(ct, expiration, hash); 634 if (cl == NULL) { 635 /* 636 * Check if we have enough space in the heap to insert one 637 * expiration. If not, expand the heap. 638 */ 639 if (ct->ct_heap_num == ct->ct_heap_max) { 640 callout_heap_expand(ct); 641 /* 642 * In the above call, we drop the lock, allocate and 643 * reacquire the lock. So, we could have been away 644 * for a while. In the meantime, someone could have 645 * inserted a callout list with the same expiration. 646 * So, the best course is to repeat the steps. This 647 * should be an infrequent event. 648 */ 649 goto again; 650 } 651 652 /* 653 * Check the free list. If we don't find one, we have to 654 * take the slow path and allocate from kmem. 655 */ 656 if ((cl = ct->ct_lfree) == NULL) { 657 callout_list_alloc(ct); 658 /* 659 * In the above call, we drop the lock, allocate and 660 * reacquire the lock. So, we could have been away 661 * for a while. In the meantime, someone could have 662 * inserted a callout list with the same expiration. 663 * Plus, the heap could have become full. So, the best 664 * course is to repeat the steps. This should be an 665 * infrequent event. 666 */ 667 goto again; 668 } 669 ct->ct_lfree = cl->cl_next; 670 cl->cl_expiration = expiration; 671 672 CALLOUT_LIST_INSERT(ct->ct_clhash[hash], cl); 673 674 /* 675 * This is a new expiration. So, insert it into the heap. 676 * This will also reprogram the cyclic, if the expiration 677 * propagated to the root of the heap. 678 */ 679 callout_heap_insert(ct, expiration); 680 } 681 cp->c_list = cl; 682 CALLOUT_APPEND(ct, cp); 683 684 ct->ct_timeouts++; 685 ct->ct_timeouts_pending++; 686 687 mutex_exit(&ct->ct_mutex); 688 689 kpreempt_enable(); 690 691 TRACE_4(TR_FAC_CALLOUT, TR_TIMEOUT, 692 "timeout:%K(%p) in %llx expiration, cp %p", func, arg, expiration, 693 cp); 694 695 return (id); 696 } 697 698 timeout_id_t 699 timeout(void (*func)(void *), void *arg, clock_t delta) 700 { 701 ulong_t id; 702 703 /* 704 * Make sure the callout runs at least 1 tick in the future. 705 */ 706 if (delta <= 0) 707 delta = 1; 708 709 id = (ulong_t)timeout_generic(CALLOUT_NORMAL, func, arg, 710 TICK_TO_NSEC(delta), nsec_per_tick, CALLOUT_LEGACY); 711 712 return ((timeout_id_t)id); 713 } 714 715 /* 716 * Convenience function that creates a normal callout with default parameters 717 * and returns a full ID. 718 */ 719 callout_id_t 720 timeout_default(void (*func)(void *), void *arg, clock_t delta) 721 { 722 callout_id_t id; 723 724 /* 725 * Make sure the callout runs at least 1 tick in the future. 726 */ 727 if (delta <= 0) 728 delta = 1; 729 730 id = timeout_generic(CALLOUT_NORMAL, func, arg, TICK_TO_NSEC(delta), 731 nsec_per_tick, 0); 732 733 return (id); 734 } 735 736 timeout_id_t 737 realtime_timeout(void (*func)(void *), void *arg, clock_t delta) 738 { 739 ulong_t id; 740 741 /* 742 * Make sure the callout runs at least 1 tick in the future. 743 */ 744 if (delta <= 0) 745 delta = 1; 746 747 id = (ulong_t)timeout_generic(CALLOUT_REALTIME, func, arg, 748 TICK_TO_NSEC(delta), nsec_per_tick, CALLOUT_LEGACY); 749 750 return ((timeout_id_t)id); 751 } 752 753 /* 754 * Convenience function that creates a realtime callout with default parameters 755 * and returns a full ID. 756 */ 757 callout_id_t 758 realtime_timeout_default(void (*func)(void *), void *arg, clock_t delta) 759 { 760 callout_id_t id; 761 762 /* 763 * Make sure the callout runs at least 1 tick in the future. 764 */ 765 if (delta <= 0) 766 delta = 1; 767 768 id = timeout_generic(CALLOUT_REALTIME, func, arg, TICK_TO_NSEC(delta), 769 nsec_per_tick, 0); 770 771 return (id); 772 } 773 774 hrtime_t 775 untimeout_generic(callout_id_t id, int nowait) 776 { 777 callout_table_t *ct; 778 callout_t *cp; 779 callout_id_t xid; 780 callout_list_t *cl; 781 int hash; 782 callout_id_t bogus; 783 784 ct = &callout_table[CALLOUT_ID_TO_TABLE(id)]; 785 hash = CALLOUT_IDHASH(id); 786 787 mutex_enter(&ct->ct_mutex); 788 789 /* 790 * Search the ID hash table for the callout. 791 */ 792 for (cp = ct->ct_idhash[hash].ch_head; cp; cp = cp->c_idnext) { 793 794 xid = cp->c_xid; 795 796 /* 797 * Match the ID and generation number. 798 */ 799 if ((xid & CALLOUT_ID_MASK) != id) 800 continue; 801 802 cl = cp->c_list; 803 if ((xid & CALLOUT_EXECUTING) == 0) { 804 hrtime_t expiration; 805 806 /* 807 * Delete the callout. If the callout list becomes 808 * NULL, we don't remove it from the table. This is 809 * so it can be reused. If the empty callout list 810 * corresponds to the top of the the callout heap, we 811 * don't reprogram the table cyclic here. This is in 812 * order to avoid lots of X-calls to the CPU associated 813 * with the callout table. 814 */ 815 expiration = cl->cl_expiration; 816 CALLOUT_DELETE(ct, cp); 817 cp->c_idnext = ct->ct_free; 818 ct->ct_free = cp; 819 ct->ct_untimeouts_unexpired++; 820 ct->ct_timeouts_pending--; 821 mutex_exit(&ct->ct_mutex); 822 823 expiration -= gethrtime(); 824 TRACE_2(TR_FAC_CALLOUT, TR_UNTIMEOUT, 825 "untimeout:ID %lx hrtime left %llx", id, 826 expiration); 827 return (expiration < 0 ? 0 : expiration); 828 } 829 830 ct->ct_untimeouts_executing++; 831 /* 832 * The callout we want to delete is currently executing. 833 * The DDI states that we must wait until the callout 834 * completes before returning, so we block on cl_done until the 835 * callout ID changes (to the old ID if it's on the freelist, 836 * or to a new callout ID if it's in use). This implicitly 837 * assumes that callout structures are persistent (they are). 838 */ 839 if (cl->cl_executor == curthread) { 840 /* 841 * The timeout handler called untimeout() on itself. 842 * Stupid, but legal. We can't wait for the timeout 843 * to complete without deadlocking, so we just return. 844 */ 845 mutex_exit(&ct->ct_mutex); 846 TRACE_1(TR_FAC_CALLOUT, TR_UNTIMEOUT_SELF, 847 "untimeout_self:ID %x", id); 848 return (-1); 849 } 850 if (nowait == 0) { 851 /* 852 * We need to wait. Indicate that we are waiting by 853 * incrementing cl_waiting. This prevents the executor 854 * from doing a wakeup on cl_done if there are no 855 * waiters. 856 */ 857 while (cp->c_xid == xid) { 858 cl->cl_waiting = 1; 859 cv_wait(&cl->cl_done, &ct->ct_mutex); 860 } 861 } 862 mutex_exit(&ct->ct_mutex); 863 TRACE_1(TR_FAC_CALLOUT, TR_UNTIMEOUT_EXECUTING, 864 "untimeout_executing:ID %lx", id); 865 return (-1); 866 } 867 ct->ct_untimeouts_expired++; 868 869 mutex_exit(&ct->ct_mutex); 870 TRACE_1(TR_FAC_CALLOUT, TR_UNTIMEOUT_BOGUS_ID, 871 "untimeout_bogus_id:ID %lx", id); 872 873 /* 874 * We didn't find the specified callout ID. This means either 875 * (1) the callout already fired, or (2) the caller passed us 876 * a bogus value. Perform a sanity check to detect case (2). 877 */ 878 bogus = (CALLOUT_EXECUTING | CALLOUT_HRESTIME | CALLOUT_COUNTER_HIGH); 879 if (((id & bogus) != CALLOUT_COUNTER_HIGH) && (id != 0)) 880 panic("untimeout: impossible timeout id %llx", 881 (unsigned long long)id); 882 883 return (-1); 884 } 885 886 clock_t 887 untimeout(timeout_id_t id_arg) 888 { 889 hrtime_t hleft; 890 clock_t tleft; 891 callout_id_t id; 892 893 id = (ulong_t)id_arg; 894 hleft = untimeout_generic(id, 0); 895 if (hleft < 0) 896 tleft = -1; 897 else if (hleft == 0) 898 tleft = 0; 899 else 900 tleft = NSEC_TO_TICK(hleft); 901 902 return (tleft); 903 } 904 905 /* 906 * Convenience function to untimeout a timeout with a full ID with default 907 * parameters. 908 */ 909 clock_t 910 untimeout_default(callout_id_t id, int nowait) 911 { 912 hrtime_t hleft; 913 clock_t tleft; 914 915 hleft = untimeout_generic(id, nowait); 916 if (hleft < 0) 917 tleft = -1; 918 else if (hleft == 0) 919 tleft = 0; 920 else 921 tleft = NSEC_TO_TICK(hleft); 922 923 return (tleft); 924 } 925 926 /* 927 * Expire all the callouts queued in the specified callout list. 928 */ 929 static void 930 callout_list_expire(callout_table_t *ct, callout_list_t *cl) 931 { 932 callout_t *cp; 933 934 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 935 ASSERT(cl != NULL); 936 937 cl->cl_executor = curthread; 938 939 while ((cp = cl->cl_callouts.ch_head) != NULL) { 940 /* 941 * Indicate to untimeout() that a callout is 942 * being expired by the executor. 943 */ 944 cp->c_xid |= CALLOUT_EXECUTING; 945 mutex_exit(&ct->ct_mutex); 946 947 DTRACE_PROBE1(callout__start, callout_t *, cp); 948 (*cp->c_func)(cp->c_arg); 949 DTRACE_PROBE1(callout__end, callout_t *, cp); 950 951 mutex_enter(&ct->ct_mutex); 952 953 ct->ct_expirations++; 954 ct->ct_timeouts_pending--; 955 /* 956 * Indicate completion for cl_done. 957 */ 958 cp->c_xid &= ~CALLOUT_EXECUTING; 959 960 /* 961 * Delete callout from ID hash table and the callout 962 * list, return to freelist, and tell any untimeout() that 963 * cares that we're done. 964 */ 965 CALLOUT_DELETE(ct, cp); 966 cp->c_idnext = ct->ct_free; 967 ct->ct_free = cp; 968 969 if (cl->cl_waiting) { 970 cl->cl_waiting = 0; 971 cv_broadcast(&cl->cl_done); 972 } 973 } 974 975 cl->cl_executor = NULL; 976 } 977 978 /* 979 * Execute all expired callout lists for a callout table. 980 */ 981 static void 982 callout_expire(callout_table_t *ct) 983 { 984 callout_list_t *cl, *clnext; 985 986 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 987 988 for (cl = ct->ct_expired.ch_head; (cl != NULL); cl = clnext) { 989 /* 990 * Multiple executor threads could be running at the same 991 * time. Each callout list is processed by only one thread. 992 * If this callout list is already being processed by another 993 * executor, go on to the next one. 994 */ 995 if (cl->cl_executor != NULL) { 996 clnext = cl->cl_next; 997 continue; 998 } 999 1000 /* 1001 * Expire all the callouts in this callout list. 1002 */ 1003 callout_list_expire(ct, cl); 1004 1005 /* 1006 * Free the callout list. 1007 */ 1008 clnext = cl->cl_next; 1009 CALLOUT_LIST_DELETE(ct->ct_expired, cl); 1010 cl->cl_next = ct->ct_lfree; 1011 ct->ct_lfree = cl; 1012 } 1013 } 1014 1015 /* 1016 * The cyclic handlers below process callouts in two steps: 1017 * 1018 * 1. Find all expired callout lists and queue them in a separate 1019 * list of expired callouts. 1020 * 2. Execute the expired callout lists. 1021 * 1022 * This is done for two reasons: 1023 * 1024 * 1. We want to quickly find the next earliest expiration to program 1025 * the cyclic to and reprogram it. We can do this right at the end 1026 * of step 1. 1027 * 2. The realtime cyclic handler expires callouts in place. However, 1028 * for normal callouts, callouts are expired by a taskq thread. 1029 * So, it is simpler and more robust to have the taskq thread just 1030 * do step 2. 1031 */ 1032 1033 /* 1034 * Realtime callout cyclic handler. 1035 */ 1036 void 1037 callout_realtime(callout_table_t *ct) 1038 { 1039 mutex_enter(&ct->ct_mutex); 1040 callout_heap_delete(ct); 1041 callout_expire(ct); 1042 mutex_exit(&ct->ct_mutex); 1043 } 1044 1045 void 1046 callout_execute(callout_table_t *ct) 1047 { 1048 mutex_enter(&ct->ct_mutex); 1049 callout_expire(ct); 1050 mutex_exit(&ct->ct_mutex); 1051 } 1052 1053 /* 1054 * Normal callout cyclic handler. 1055 */ 1056 void 1057 callout_normal(callout_table_t *ct) 1058 { 1059 int exec; 1060 1061 mutex_enter(&ct->ct_mutex); 1062 callout_heap_delete(ct); 1063 exec = (ct->ct_expired.ch_head != NULL); 1064 mutex_exit(&ct->ct_mutex); 1065 1066 if (exec) { 1067 ASSERT(ct->ct_taskq != NULL); 1068 (void) taskq_dispatch(ct->ct_taskq, 1069 (task_func_t *)callout_execute, ct, TQ_NOSLEEP); 1070 } 1071 } 1072 1073 /* 1074 * Suspend callout processing. 1075 */ 1076 static void 1077 callout_suspend(void) 1078 { 1079 int t, f; 1080 callout_table_t *ct; 1081 1082 /* 1083 * Traverse every callout table in the system and suspend callout 1084 * processing. 1085 * 1086 * We need to suspend all the tables (including the inactive ones) 1087 * so that if a table is made active while the suspend is still on, 1088 * the table remains suspended. 1089 */ 1090 for (f = 0; f < max_ncpus; f++) { 1091 for (t = 0; t < CALLOUT_NTYPES; t++) { 1092 ct = &callout_table[CALLOUT_TABLE(t, f)]; 1093 1094 mutex_enter(&ct->ct_mutex); 1095 ct->ct_flags |= CALLOUT_TABLE_SUSPENDED; 1096 if (ct->ct_cyclic == CYCLIC_NONE) { 1097 mutex_exit(&ct->ct_mutex); 1098 continue; 1099 } 1100 (void) cyclic_reprogram(ct->ct_cyclic, CY_INFINITY); 1101 mutex_exit(&ct->ct_mutex); 1102 } 1103 } 1104 } 1105 1106 static void 1107 callout_adjust(callout_table_t *ct, hrtime_t delta) 1108 { 1109 int hash, newhash; 1110 hrtime_t expiration; 1111 callout_list_t *cl; 1112 callout_hash_t list; 1113 1114 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 1115 1116 /* 1117 * In order to adjust the expirations, we null out the heap. Then, 1118 * we reinsert adjusted expirations in the heap. Keeps it simple. 1119 * Note that since the CALLOUT_TABLE_SUSPENDED flag is set by the 1120 * caller, the heap insert does not result in cyclic reprogramming. 1121 */ 1122 ct->ct_heap_num = 0; 1123 1124 /* 1125 * First, remove all the callout lists from the table and string them 1126 * in a list. 1127 */ 1128 list.ch_head = list.ch_tail = NULL; 1129 for (hash = 0; hash < CALLOUT_BUCKETS; hash++) { 1130 while ((cl = ct->ct_clhash[hash].ch_head) != NULL) { 1131 CALLOUT_LIST_DELETE(ct->ct_clhash[hash], cl); 1132 CALLOUT_LIST_APPEND(list, cl); 1133 } 1134 } 1135 1136 /* 1137 * Now, traverse the callout lists and adjust their expirations. 1138 */ 1139 while ((cl = list.ch_head) != NULL) { 1140 CALLOUT_LIST_DELETE(list, cl); 1141 /* 1142 * Set the new expiration and reinsert in the right 1143 * hash bucket. 1144 */ 1145 expiration = cl->cl_expiration; 1146 expiration += delta; 1147 cl->cl_expiration = expiration; 1148 newhash = CALLOUT_CLHASH(expiration); 1149 CALLOUT_LIST_INSERT(ct->ct_clhash[newhash], cl); 1150 callout_heap_insert(ct, expiration); 1151 } 1152 } 1153 1154 /* 1155 * Resume callout processing. 1156 */ 1157 static void 1158 callout_resume(hrtime_t delta) 1159 { 1160 hrtime_t exp; 1161 int t, f; 1162 callout_table_t *ct; 1163 1164 /* 1165 * Traverse every callout table in the system and resume callout 1166 * processing. For active tables, perform any hrtime adjustments 1167 * necessary. 1168 */ 1169 for (f = 0; f < max_ncpus; f++) { 1170 for (t = 0; t < CALLOUT_NTYPES; t++) { 1171 ct = &callout_table[CALLOUT_TABLE(t, f)]; 1172 1173 mutex_enter(&ct->ct_mutex); 1174 if (ct->ct_cyclic == CYCLIC_NONE) { 1175 ct->ct_flags &= ~CALLOUT_TABLE_SUSPENDED; 1176 mutex_exit(&ct->ct_mutex); 1177 continue; 1178 } 1179 1180 if (delta) 1181 callout_adjust(ct, delta); 1182 1183 ct->ct_flags &= ~CALLOUT_TABLE_SUSPENDED; 1184 1185 /* 1186 * If the expired list is non-empty, then have the 1187 * cyclic expire immediately. Else, program the 1188 * cyclic based on the heap. 1189 */ 1190 if (ct->ct_expired.ch_head != NULL) 1191 exp = gethrtime(); 1192 else if (ct->ct_heap_num > 0) 1193 exp = ct->ct_heap[0]; 1194 else 1195 exp = 0; 1196 if (exp != 0) 1197 (void) cyclic_reprogram(ct->ct_cyclic, exp); 1198 mutex_exit(&ct->ct_mutex); 1199 } 1200 } 1201 } 1202 1203 /* 1204 * Callback handler used by CPR to stop and resume callouts. 1205 */ 1206 /*ARGSUSED*/ 1207 static boolean_t 1208 callout_cpr_callb(void *arg, int code) 1209 { 1210 if (code == CB_CODE_CPR_CHKPT) 1211 callout_suspend(); 1212 else 1213 callout_resume(0); 1214 1215 return (B_TRUE); 1216 } 1217 1218 /* 1219 * Callback handler invoked when the debugger is entered or exited. 1220 */ 1221 /*ARGSUSED*/ 1222 static boolean_t 1223 callout_debug_callb(void *arg, int code) 1224 { 1225 hrtime_t delta; 1226 1227 /* 1228 * When the system enters the debugger. make a note of the hrtime. 1229 * When it is resumed, compute how long the system was in the 1230 * debugger. This interval should not be counted for callouts. 1231 */ 1232 if (code == 0) { 1233 callout_suspend(); 1234 callout_debug_hrtime = gethrtime(); 1235 } else { 1236 delta = gethrtime() - callout_debug_hrtime; 1237 callout_resume(delta); 1238 } 1239 1240 return (B_TRUE); 1241 } 1242 1243 /* 1244 * Move the hrestime callouts to the expired list. Then program the table's 1245 * cyclic to expire immediately so that the callouts can be executed 1246 * immediately. 1247 */ 1248 static void 1249 callout_hrestime_one(callout_table_t *ct) 1250 { 1251 callout_list_t *cl, *ecl; 1252 callout_t *cp; 1253 int hash; 1254 1255 mutex_enter(&ct->ct_mutex); 1256 if (ct->ct_heap_num == 0) { 1257 mutex_exit(&ct->ct_mutex); 1258 return; 1259 } 1260 1261 if (ct->ct_lfree == NULL) 1262 callout_list_alloc(ct); 1263 ecl = ct->ct_lfree; 1264 ct->ct_lfree = ecl->cl_next; 1265 1266 for (hash = 0; hash < CALLOUT_BUCKETS; hash++) { 1267 for (cl = ct->ct_clhash[hash].ch_head; cl; cl = cl->cl_next) { 1268 for (cp = cl->cl_callouts.ch_head; cp; 1269 cp = cp->c_clnext) { 1270 if ((cp->c_xid & CALLOUT_HRESTIME) == 0) 1271 continue; 1272 CALLOUT_HASH_DELETE(cl->cl_callouts, cp, 1273 c_clnext, c_clprev); 1274 cp->c_list = ecl; 1275 CALLOUT_HASH_APPEND(ecl->cl_callouts, cp, 1276 c_clnext, c_clprev); 1277 } 1278 } 1279 } 1280 1281 if (ecl->cl_callouts.ch_head != NULL) { 1282 CALLOUT_LIST_APPEND(ct->ct_expired, ecl); 1283 if (!(ct->ct_flags & CALLOUT_TABLE_SUSPENDED)) 1284 (void) cyclic_reprogram(ct->ct_cyclic, gethrtime()); 1285 } else { 1286 ecl->cl_next = ct->ct_lfree; 1287 ct->ct_lfree = ecl; 1288 } 1289 mutex_exit(&ct->ct_mutex); 1290 } 1291 1292 /* 1293 * This function is called whenever system time (hrestime) is changed 1294 * explicitly. All the HRESTIME callouts must be expired at once. 1295 */ 1296 /*ARGSUSED*/ 1297 void 1298 callout_hrestime(void) 1299 { 1300 int t, f; 1301 callout_table_t *ct; 1302 1303 /* 1304 * Traverse every callout table in the system and process the hrestime 1305 * callouts therein. 1306 * 1307 * We look at all the tables because we don't know which ones were 1308 * onlined and offlined in the past. The offlined tables may still 1309 * have active cyclics processing timers somewhere. 1310 */ 1311 for (f = 0; f < max_ncpus; f++) { 1312 for (t = 0; t < CALLOUT_NTYPES; t++) { 1313 ct = &callout_table[CALLOUT_TABLE(t, f)]; 1314 callout_hrestime_one(ct); 1315 } 1316 } 1317 } 1318 1319 /* 1320 * Create the hash tables for this callout table. 1321 */ 1322 static void 1323 callout_hash_init(callout_table_t *ct) 1324 { 1325 size_t size; 1326 1327 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 1328 ASSERT((ct->ct_idhash == NULL) && (ct->ct_clhash == NULL)); 1329 1330 size = sizeof (callout_hash_t) * CALLOUT_BUCKETS; 1331 ct->ct_idhash = kmem_zalloc(size, KM_SLEEP); 1332 ct->ct_clhash = kmem_zalloc(size, KM_SLEEP); 1333 } 1334 1335 /* 1336 * Create per-callout table kstats. 1337 */ 1338 static void 1339 callout_kstat_init(callout_table_t *ct) 1340 { 1341 callout_stat_type_t stat; 1342 kstat_t *ct_kstats; 1343 int ndx; 1344 1345 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 1346 ASSERT(ct->ct_kstats == NULL); 1347 1348 ndx = ct - callout_table; 1349 ct_kstats = kstat_create("unix", ndx, "callout", 1350 "misc", KSTAT_TYPE_NAMED, CALLOUT_NUM_STATS, KSTAT_FLAG_VIRTUAL); 1351 1352 if (ct_kstats == NULL) { 1353 cmn_err(CE_WARN, "kstat_create for callout table %p failed", 1354 (void *)ct); 1355 } else { 1356 ct_kstats->ks_data = ct->ct_kstat_data; 1357 for (stat = 0; stat < CALLOUT_NUM_STATS; stat++) 1358 kstat_named_init(&ct->ct_kstat_data[stat], 1359 callout_kstat_names[stat], KSTAT_DATA_INT64); 1360 ct->ct_kstats = ct_kstats; 1361 kstat_install(ct_kstats); 1362 } 1363 } 1364 1365 static void 1366 callout_cyclic_init(callout_table_t *ct) 1367 { 1368 cyc_handler_t hdlr; 1369 cyc_time_t when; 1370 processorid_t seqid; 1371 int t; 1372 1373 ASSERT(MUTEX_HELD(&ct->ct_mutex)); 1374 1375 t = CALLOUT_TABLE_TYPE(ct); 1376 seqid = CALLOUT_TABLE_SEQID(ct); 1377 1378 /* 1379 * Create the taskq thread if the table type is normal. 1380 * Realtime tables are handled at PIL1 by a softint 1381 * handler. 1382 */ 1383 if (t == CALLOUT_NORMAL) { 1384 ASSERT(ct->ct_taskq == NULL); 1385 /* 1386 * Each callout thread consumes exactly one 1387 * task structure while active. Therefore, 1388 * prepopulating with 2 * CALLOUT_THREADS tasks 1389 * ensures that there's at least one task per 1390 * thread that's either scheduled or on the 1391 * freelist. In turn, this guarantees that 1392 * taskq_dispatch() will always either succeed 1393 * (because there's a free task structure) or 1394 * be unnecessary (because "callout_excute(ct)" 1395 * has already scheduled). 1396 */ 1397 ct->ct_taskq = 1398 taskq_create_instance("callout_taskq", seqid, 1399 CALLOUT_THREADS, maxclsyspri, 1400 2 * CALLOUT_THREADS, 2 * CALLOUT_THREADS, 1401 TASKQ_PREPOPULATE | TASKQ_CPR_SAFE); 1402 } 1403 1404 /* 1405 * callouts can only be created in a table whose 1406 * cyclic has been initialized. 1407 */ 1408 ASSERT(ct->ct_heap_num == 0); 1409 1410 /* 1411 * Create the callout table cyclics. 1412 */ 1413 ASSERT(ct->ct_cyclic == CYCLIC_NONE); 1414 1415 /* 1416 * Ideally, the handlers for CALLOUT_REALTIME and CALLOUT_NORMAL should 1417 * be run at CY_LOW_LEVEL. But there are some callers of the delay(9F) 1418 * function that call delay(9F) illegally from PIL > 0. delay(9F) uses 1419 * normal callouts. In order to avoid a deadlock, we run the normal 1420 * handler from LOCK level. When the delay(9F) issue is fixed, this 1421 * should be fixed as well. 1422 */ 1423 hdlr.cyh_func = (cyc_func_t)CALLOUT_CYCLIC_HANDLER(t); 1424 hdlr.cyh_level = (t == CALLOUT_REALTIME) ? CY_LOW_LEVEL : CY_LOCK_LEVEL; 1425 hdlr.cyh_arg = ct; 1426 when.cyt_when = CY_INFINITY; 1427 when.cyt_interval = CY_INFINITY; 1428 1429 ct->ct_cyclic = cyclic_add(&hdlr, &when); 1430 } 1431 1432 void 1433 callout_cpu_online(cpu_t *cp) 1434 { 1435 lgrp_handle_t hand; 1436 callout_cache_t *cache; 1437 char s[KMEM_CACHE_NAMELEN]; 1438 callout_table_t *ct; 1439 processorid_t seqid; 1440 int t; 1441 1442 ASSERT(MUTEX_HELD(&cpu_lock)); 1443 1444 /* 1445 * Locate the cache corresponding to the onlined CPU's lgroup. 1446 * Note that access to callout_caches is protected by cpu_lock. 1447 */ 1448 hand = lgrp_plat_cpu_to_hand(cp->cpu_id); 1449 for (cache = callout_caches; cache != NULL; cache = cache->cc_next) { 1450 if (cache->cc_hand == hand) 1451 break; 1452 } 1453 1454 /* 1455 * If not found, create one. The caches are never destroyed. 1456 */ 1457 if (cache == NULL) { 1458 cache = kmem_alloc(sizeof (callout_cache_t), KM_SLEEP); 1459 cache->cc_hand = hand; 1460 (void) snprintf(s, KMEM_CACHE_NAMELEN, "callout_cache%lx", 1461 (long)hand); 1462 cache->cc_cache = kmem_cache_create(s, sizeof (callout_t), 1463 CALLOUT_ALIGN, NULL, NULL, NULL, NULL, NULL, 0); 1464 (void) snprintf(s, KMEM_CACHE_NAMELEN, "callout_lcache%lx", 1465 (long)hand); 1466 cache->cc_lcache = kmem_cache_create(s, sizeof (callout_list_t), 1467 CALLOUT_ALIGN, NULL, NULL, NULL, NULL, NULL, 0); 1468 cache->cc_next = callout_caches; 1469 callout_caches = cache; 1470 } 1471 1472 seqid = cp->cpu_seqid; 1473 1474 for (t = 0; t < CALLOUT_NTYPES; t++) { 1475 ct = &callout_table[CALLOUT_TABLE(t, seqid)]; 1476 1477 mutex_enter(&ct->ct_mutex); 1478 /* 1479 * Store convinience pointers to the kmem caches 1480 * in the callout table. These assignments should always be 1481 * done as callout tables can map to different physical 1482 * CPUs each time. 1483 */ 1484 ct->ct_cache = cache->cc_cache; 1485 ct->ct_lcache = cache->cc_lcache; 1486 1487 /* 1488 * We use the heap pointer to check if stuff has been 1489 * initialized for this callout table. 1490 */ 1491 if (ct->ct_heap == NULL) { 1492 callout_heap_init(ct); 1493 callout_hash_init(ct); 1494 callout_kstat_init(ct); 1495 callout_cyclic_init(ct); 1496 } 1497 1498 mutex_exit(&ct->ct_mutex); 1499 1500 /* 1501 * Move the cyclic to this CPU by doing a bind. Then unbind 1502 * the cyclic. This will allow the cyclic subsystem to juggle 1503 * the cyclic during CPU offline. 1504 */ 1505 cyclic_bind(ct->ct_cyclic, cp, NULL); 1506 cyclic_bind(ct->ct_cyclic, NULL, NULL); 1507 } 1508 } 1509 1510 /* 1511 * This is called to perform per-CPU initialization for slave CPUs at 1512 * boot time. 1513 */ 1514 void 1515 callout_mp_init(void) 1516 { 1517 cpu_t *cp; 1518 1519 mutex_enter(&cpu_lock); 1520 1521 cp = cpu_active; 1522 do { 1523 callout_cpu_online(cp); 1524 } while ((cp = cp->cpu_next_onln) != cpu_active); 1525 1526 mutex_exit(&cpu_lock); 1527 } 1528 1529 /* 1530 * Initialize all callout tables. Called at boot time just before clkstart(). 1531 */ 1532 void 1533 callout_init(void) 1534 { 1535 int f, t; 1536 size_t size; 1537 int table_id; 1538 callout_table_t *ct; 1539 long bits, fanout; 1540 uintptr_t buf; 1541 1542 /* 1543 * Initialize callout globals. 1544 */ 1545 bits = 0; 1546 for (fanout = 1; (fanout < max_ncpus); fanout <<= 1) 1547 bits++; 1548 callout_table_bits = CALLOUT_TYPE_BITS + bits; 1549 callout_table_mask = (1 << callout_table_bits) - 1; 1550 callout_counter_low = 1 << CALLOUT_COUNTER_SHIFT; 1551 callout_longterm = TICK_TO_NSEC(CALLOUT_LONGTERM_TICKS); 1552 1553 /* 1554 * Because of the variability in timing behavior across systems with 1555 * different architectures, we cannot allow arbitrarily low 1556 * resolutions. The minimum resolution has to be determined in a 1557 * platform-specific way. Until then, we define a blanket minimum 1558 * resolution for callouts of CALLOUT_MIN_RESOLUTION. 1559 * 1560 * If, in the future, someone requires lower resolution timers, they 1561 * can do one of two things: 1562 * 1563 * - Define a lower value for callout_min_resolution. This would 1564 * affect all clients of the callout subsystem. If this done 1565 * via /etc/system, then no code changes are required and it 1566 * would affect only that customer. 1567 * 1568 * - Define a flag to be passed to timeout creation that allows 1569 * the lower resolution. This involves code changes. But it 1570 * would affect only the calling module. It is the developer's 1571 * responsibility to test on all systems and make sure that 1572 * everything works. 1573 */ 1574 if (callout_min_resolution <= 0) 1575 callout_min_resolution = CALLOUT_MIN_RESOLUTION; 1576 1577 /* 1578 * Allocate all the callout tables based on max_ncpus. We have chosen 1579 * to do boot-time allocation instead of dynamic allocation because: 1580 * 1581 * - the size of the callout tables is not too large. 1582 * - there are race conditions involved in making this dynamic. 1583 * - the hash tables that go with the callout tables consume 1584 * most of the memory and they are only allocated in 1585 * callout_cpu_online(). 1586 * 1587 * Each CPU has two tables that are consecutive in the array. The first 1588 * one is for realtime callouts and the second one is for normal ones. 1589 * 1590 * We do this alignment dance to make sure that callout table 1591 * structures will always be on a cache line boundary. 1592 */ 1593 size = sizeof (callout_table_t) * CALLOUT_NTYPES * max_ncpus; 1594 size += CALLOUT_ALIGN; 1595 buf = (uintptr_t)kmem_zalloc(size, KM_SLEEP); 1596 callout_table = (callout_table_t *)P2ROUNDUP(buf, CALLOUT_ALIGN); 1597 1598 size = sizeof (kstat_named_t) * CALLOUT_NUM_STATS; 1599 /* 1600 * Now, initialize the tables for all the CPUs. 1601 */ 1602 for (f = 0; f < max_ncpus; f++) { 1603 for (t = 0; t < CALLOUT_NTYPES; t++) { 1604 table_id = CALLOUT_TABLE(t, f); 1605 ct = &callout_table[table_id]; 1606 mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL); 1607 /* 1608 * Precompute the base IDs for long and short-term 1609 * legacy IDs. This makes ID generation during 1610 * timeout() fast. 1611 */ 1612 ct->ct_short_id = CALLOUT_SHORT_ID(table_id); 1613 ct->ct_long_id = CALLOUT_LONG_ID(table_id); 1614 /* 1615 * Precompute the base ID for generation-based IDs. 1616 * Note that when the first ID gets allocated, the 1617 * ID will wrap. This will cause the generation 1618 * number to be incremented to 1. 1619 */ 1620 ct->ct_gen_id = CALLOUT_SHORT_ID(table_id); 1621 /* 1622 * Initialize the cyclic as NONE. This will get set 1623 * during CPU online. This is so that partially 1624 * populated systems will only have the required 1625 * number of cyclics, not more. 1626 */ 1627 ct->ct_cyclic = CYCLIC_NONE; 1628 ct->ct_kstat_data = kmem_zalloc(size, KM_SLEEP); 1629 } 1630 } 1631 1632 /* 1633 * Add the callback for CPR. This is called during checkpoint 1634 * resume to suspend and resume callouts. 1635 */ 1636 (void) callb_add(callout_cpr_callb, 0, CB_CL_CPR_CALLOUT, 1637 "callout_cpr"); 1638 (void) callb_add(callout_debug_callb, 0, CB_CL_ENTER_DEBUGGER, 1639 "callout_debug"); 1640 1641 /* 1642 * Call the per-CPU initialization function for the boot CPU. This 1643 * is done here because the function is not called automatically for 1644 * the boot CPU from the CPU online/offline hooks. Note that the 1645 * CPU lock is taken here because of convention. 1646 */ 1647 mutex_enter(&cpu_lock); 1648 callout_boot_ct = &callout_table[CALLOUT_TABLE(0, CPU->cpu_seqid)]; 1649 callout_cpu_online(CPU); 1650 mutex_exit(&cpu_lock); 1651 } 1652