1 /*- 2 * Copyright (c) 1982, 1986, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * From: @(#)kern_clock.c 8.5 (Berkeley) 1/21/94 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "opt_callout_profiling.h" 41 #if defined(__arm__) 42 #include "opt_timer.h" 43 #endif 44 #include "opt_rss.h" 45 46 #include <sys/param.h> 47 #include <sys/systm.h> 48 #include <sys/bus.h> 49 #include <sys/callout.h> 50 #include <sys/file.h> 51 #include <sys/interrupt.h> 52 #include <sys/kernel.h> 53 #include <sys/ktr.h> 54 #include <sys/lock.h> 55 #include <sys/malloc.h> 56 #include <sys/mutex.h> 57 #include <sys/rmlock.h> 58 #include <sys/rwlock.h> 59 #include <sys/proc.h> 60 #include <sys/sdt.h> 61 #include <sys/sleepqueue.h> 62 #include <sys/sysctl.h> 63 #include <sys/smp.h> 64 65 #ifdef SMP 66 #include <machine/cpu.h> 67 #endif 68 69 #ifndef NO_EVENTTIMERS 70 DPCPU_DECLARE(sbintime_t, hardclocktime); 71 #endif 72 73 SDT_PROVIDER_DEFINE(callout_execute); 74 SDT_PROBE_DEFINE1(callout_execute, kernel, , callout__start, 75 "struct callout *"); 76 SDT_PROBE_DEFINE1(callout_execute, kernel, , callout__end, 77 "struct callout *"); 78 79 #ifdef CALLOUT_PROFILING 80 static int avg_depth; 81 SYSCTL_INT(_debug, OID_AUTO, to_avg_depth, CTLFLAG_RD, &avg_depth, 0, 82 "Average number of items examined per softclock call. Units = 1/1000"); 83 static int avg_gcalls; 84 SYSCTL_INT(_debug, OID_AUTO, to_avg_gcalls, CTLFLAG_RD, &avg_gcalls, 0, 85 "Average number of Giant callouts made per softclock call. Units = 1/1000"); 86 static int avg_lockcalls; 87 SYSCTL_INT(_debug, OID_AUTO, to_avg_lockcalls, CTLFLAG_RD, &avg_lockcalls, 0, 88 "Average number of lock callouts made per softclock call. Units = 1/1000"); 89 static int avg_mpcalls; 90 SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls, CTLFLAG_RD, &avg_mpcalls, 0, 91 "Average number of MP callouts made per softclock call. Units = 1/1000"); 92 static int avg_depth_dir; 93 SYSCTL_INT(_debug, OID_AUTO, to_avg_depth_dir, CTLFLAG_RD, &avg_depth_dir, 0, 94 "Average number of direct callouts examined per callout_process call. " 95 "Units = 1/1000"); 96 static int avg_lockcalls_dir; 97 SYSCTL_INT(_debug, OID_AUTO, to_avg_lockcalls_dir, CTLFLAG_RD, 98 &avg_lockcalls_dir, 0, "Average number of lock direct callouts made per " 99 "callout_process call. Units = 1/1000"); 100 static int avg_mpcalls_dir; 101 SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls_dir, CTLFLAG_RD, &avg_mpcalls_dir, 102 0, "Average number of MP direct callouts made per callout_process call. " 103 "Units = 1/1000"); 104 #endif 105 106 static int ncallout; 107 SYSCTL_INT(_kern, OID_AUTO, ncallout, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &ncallout, 0, 108 "Number of entries in callwheel and size of timeout() preallocation"); 109 110 #ifdef RSS 111 static int pin_default_swi = 1; 112 static int pin_pcpu_swi = 1; 113 #else 114 static int pin_default_swi = 0; 115 static int pin_pcpu_swi = 0; 116 #endif 117 118 SYSCTL_INT(_kern, OID_AUTO, pin_default_swi, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &pin_default_swi, 119 0, "Pin the default (non-per-cpu) swi (shared with PCPU 0 swi)"); 120 SYSCTL_INT(_kern, OID_AUTO, pin_pcpu_swi, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &pin_pcpu_swi, 121 0, "Pin the per-CPU swis (except PCPU 0, which is also default"); 122 123 /* 124 * TODO: 125 * allocate more timeout table slots when table overflows. 126 */ 127 u_int callwheelsize, callwheelmask; 128 129 typedef void callout_mutex_op_t(struct lock_object *); 130 typedef int callout_owned_op_t(struct lock_object *); 131 132 struct callout_mutex_ops { 133 callout_mutex_op_t *lock; 134 callout_mutex_op_t *unlock; 135 callout_owned_op_t *owned; 136 }; 137 138 enum { 139 CALLOUT_LC_UNUSED_0, 140 CALLOUT_LC_UNUSED_1, 141 CALLOUT_LC_UNUSED_2, 142 CALLOUT_LC_UNUSED_3, 143 CALLOUT_LC_SPIN, 144 CALLOUT_LC_MUTEX, 145 CALLOUT_LC_RW, 146 CALLOUT_LC_RM, 147 }; 148 149 static void 150 callout_mutex_op_none(struct lock_object *lock) 151 { 152 } 153 154 static int 155 callout_owned_op_none(struct lock_object *lock) 156 { 157 return (0); 158 } 159 160 static void 161 callout_mutex_lock(struct lock_object *lock) 162 { 163 mtx_lock((struct mtx *)lock); 164 } 165 166 static void 167 callout_mutex_unlock(struct lock_object *lock) 168 { 169 mtx_unlock((struct mtx *)lock); 170 } 171 172 static void 173 callout_mutex_lock_spin(struct lock_object *lock) 174 { 175 mtx_lock_spin((struct mtx *)lock); 176 } 177 178 static void 179 callout_mutex_unlock_spin(struct lock_object *lock) 180 { 181 mtx_unlock_spin((struct mtx *)lock); 182 } 183 184 static int 185 callout_mutex_owned(struct lock_object *lock) 186 { 187 return (mtx_owned((struct mtx *)lock)); 188 } 189 190 static void 191 callout_rm_wlock(struct lock_object *lock) 192 { 193 rm_wlock((struct rmlock *)lock); 194 } 195 196 static void 197 callout_rm_wunlock(struct lock_object *lock) 198 { 199 rm_wunlock((struct rmlock *)lock); 200 } 201 202 static int 203 callout_rm_owned(struct lock_object *lock) 204 { 205 return (rm_wowned((struct rmlock *)lock)); 206 } 207 208 static void 209 callout_rw_wlock(struct lock_object *lock) 210 { 211 rw_wlock((struct rwlock *)lock); 212 } 213 214 static void 215 callout_rw_wunlock(struct lock_object *lock) 216 { 217 rw_wunlock((struct rwlock *)lock); 218 } 219 220 static int 221 callout_rw_owned(struct lock_object *lock) 222 { 223 return (rw_wowned((struct rwlock *)lock)); 224 } 225 226 static const struct callout_mutex_ops callout_mutex_ops[8] = { 227 [CALLOUT_LC_UNUSED_0] = { 228 .lock = callout_mutex_op_none, 229 .unlock = callout_mutex_op_none, 230 .owned = callout_owned_op_none, 231 }, 232 [CALLOUT_LC_UNUSED_1] = { 233 .lock = callout_mutex_op_none, 234 .unlock = callout_mutex_op_none, 235 .owned = callout_owned_op_none, 236 }, 237 [CALLOUT_LC_UNUSED_2] = { 238 .lock = callout_mutex_op_none, 239 .unlock = callout_mutex_op_none, 240 .owned = callout_owned_op_none, 241 }, 242 [CALLOUT_LC_UNUSED_3] = { 243 .lock = callout_mutex_op_none, 244 .unlock = callout_mutex_op_none, 245 .owned = callout_owned_op_none, 246 }, 247 [CALLOUT_LC_SPIN] = { 248 .lock = callout_mutex_lock_spin, 249 .unlock = callout_mutex_unlock_spin, 250 .owned = callout_mutex_owned, 251 }, 252 [CALLOUT_LC_MUTEX] = { 253 .lock = callout_mutex_lock, 254 .unlock = callout_mutex_unlock, 255 .owned = callout_mutex_owned, 256 }, 257 [CALLOUT_LC_RW] = { 258 .lock = callout_rw_wlock, 259 .unlock = callout_rw_wunlock, 260 .owned = callout_rw_owned, 261 }, 262 [CALLOUT_LC_RM] = { 263 .lock = callout_rm_wlock, 264 .unlock = callout_rm_wunlock, 265 .owned = callout_rm_owned, 266 }, 267 }; 268 269 static void 270 callout_lock_client(int c_flags, struct lock_object *c_lock) 271 { 272 callout_mutex_ops[CALLOUT_GET_LC(c_flags)].lock(c_lock); 273 } 274 275 static void 276 callout_unlock_client(int c_flags, struct lock_object *c_lock) 277 { 278 callout_mutex_ops[CALLOUT_GET_LC(c_flags)].unlock(c_lock); 279 } 280 281 #ifdef SMP 282 static int 283 callout_lock_owned_client(int c_flags, struct lock_object *c_lock) 284 { 285 return (callout_mutex_ops[CALLOUT_GET_LC(c_flags)].owned(c_lock)); 286 } 287 #endif 288 289 /* 290 * The callout CPU exec structure represent information necessary for 291 * describing the state of callouts currently running on the CPU and 292 * for handling deferred callout restarts. 293 * 294 * In particular, the first entry of the array cc_exec_entity holds 295 * information for callouts running from the SWI thread context, while 296 * the second one holds information for callouts running directly from 297 * the hardware interrupt context. 298 */ 299 struct cc_exec { 300 /* 301 * The "cc_curr" points to the currently executing callout and 302 * is protected by the "cc_lock" spinlock. If no callback is 303 * currently executing it is equal to "NULL". 304 */ 305 struct callout *cc_curr; 306 /* 307 * The "cc_restart_args" structure holds the argument for a 308 * deferred callback restart and is protected by the "cc_lock" 309 * spinlock. The structure is only valid if "cc_restart" is 310 * "true". If "cc_restart" is "false" the information in the 311 * "cc_restart_args" structure shall be ignored. 312 */ 313 struct callout_args cc_restart_args; 314 bool cc_restart; 315 /* 316 * The "cc_cancel" variable allows the currently pending 317 * callback to be atomically cancelled. This field is write 318 * protected by the "cc_lock" spinlock. 319 */ 320 bool cc_cancel; 321 /* 322 * The "cc_drain_fn" points to a function which shall be 323 * called with the argument stored in "cc_drain_arg" when an 324 * asynchronous drain is performed. This field is write 325 * protected by the "cc_lock" spinlock. 326 */ 327 callout_func_t *cc_drain_fn; 328 void *cc_drain_arg; 329 }; 330 331 /* 332 * There is one "struct callout_cpu" per CPU, holding all relevant 333 * state for the callout processing thread on the individual CPU. 334 */ 335 struct callout_cpu { 336 struct mtx_padalign cc_lock; 337 struct cc_exec cc_exec_entity[2]; 338 struct callout *cc_exec_next_dir; 339 struct callout *cc_callout; 340 struct callout_list *cc_callwheel; 341 struct callout_tailq cc_expireq; 342 struct callout_slist cc_callfree; 343 sbintime_t cc_firstevent; 344 sbintime_t cc_lastscan; 345 void *cc_cookie; 346 u_int cc_bucket; 347 char cc_ktr_event_name[20]; 348 }; 349 350 #ifdef SMP 351 struct callout_cpu cc_cpu[MAXCPU]; 352 #define CPUBLOCK MAXCPU 353 #define CC_CPU(cpu) (&cc_cpu[(cpu)]) 354 #define CC_SELF() CC_CPU(PCPU_GET(cpuid)) 355 #else 356 struct callout_cpu cc_cpu; 357 #define CC_CPU(cpu) &cc_cpu 358 #define CC_SELF() &cc_cpu 359 #endif 360 #define CC_LOCK(cc) mtx_lock_spin(&(cc)->cc_lock) 361 #define CC_UNLOCK(cc) mtx_unlock_spin(&(cc)->cc_lock) 362 #define CC_LOCK_ASSERT(cc) mtx_assert(&(cc)->cc_lock, MA_OWNED) 363 364 static int timeout_cpu; 365 366 static void callout_cpu_init(struct callout_cpu *cc, int cpu); 367 static void softclock_call_cc(struct callout *c, struct callout_cpu *cc, 368 #ifdef CALLOUT_PROFILING 369 int *mpcalls, int *lockcalls, int *gcalls, 370 #endif 371 int direct); 372 373 static MALLOC_DEFINE(M_CALLOUT, "callout", "Callout datastructures"); 374 375 /* 376 * Kernel low level callwheel initialization called from cpu0 during 377 * kernel startup: 378 */ 379 static void 380 callout_callwheel_init(void *dummy) 381 { 382 struct callout_cpu *cc; 383 384 /* 385 * Calculate the size of the callout wheel and the preallocated 386 * timeout() structures. 387 * XXX: Clip callout to result of previous function of maxusers 388 * maximum 384. This is still huge, but acceptable. 389 */ 390 ncallout = imin(16 + maxproc + maxfiles, 18508); 391 TUNABLE_INT_FETCH("kern.ncallout", &ncallout); 392 393 /* 394 * Calculate callout wheel size, should be next power of two higher 395 * than 'ncallout'. 396 */ 397 callwheelsize = 1 << fls(ncallout); 398 callwheelmask = callwheelsize - 1; 399 400 /* 401 * Fetch whether we're pinning the swi's or not. 402 */ 403 TUNABLE_INT_FETCH("kern.pin_default_swi", &pin_default_swi); 404 TUNABLE_INT_FETCH("kern.pin_pcpu_swi", &pin_pcpu_swi); 405 406 /* 407 * Only cpu0 handles timeout(9) and receives a preallocation. 408 * 409 * XXX: Once all timeout(9) consumers are converted this can 410 * be removed. 411 */ 412 timeout_cpu = PCPU_GET(cpuid); 413 cc = CC_CPU(timeout_cpu); 414 cc->cc_callout = malloc(ncallout * sizeof(struct callout), 415 M_CALLOUT, M_WAITOK); 416 callout_cpu_init(cc, timeout_cpu); 417 } 418 SYSINIT(callwheel_init, SI_SUB_CPU, SI_ORDER_ANY, callout_callwheel_init, NULL); 419 420 /* 421 * Initialize the per-cpu callout structures. 422 */ 423 static void 424 callout_cpu_init(struct callout_cpu *cc, int cpu) 425 { 426 struct callout *c; 427 int i; 428 429 mtx_init(&cc->cc_lock, "callout", NULL, MTX_SPIN | MTX_RECURSE); 430 SLIST_INIT(&cc->cc_callfree); 431 cc->cc_callwheel = malloc(sizeof(struct callout_list) * callwheelsize, 432 M_CALLOUT, M_WAITOK); 433 for (i = 0; i < callwheelsize; i++) 434 LIST_INIT(&cc->cc_callwheel[i]); 435 TAILQ_INIT(&cc->cc_expireq); 436 cc->cc_firstevent = SBT_MAX; 437 snprintf(cc->cc_ktr_event_name, sizeof(cc->cc_ktr_event_name), 438 "callwheel cpu %d", cpu); 439 if (cc->cc_callout == NULL) /* Only cpu0 handles timeout(9) */ 440 return; 441 for (i = 0; i < ncallout; i++) { 442 c = &cc->cc_callout[i]; 443 callout_init(c, 0); 444 c->c_flags |= CALLOUT_LOCAL_ALLOC; 445 SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle); 446 } 447 } 448 449 /* 450 * Start standard softclock thread. 451 */ 452 static void 453 start_softclock(void *dummy) 454 { 455 struct callout_cpu *cc; 456 char name[MAXCOMLEN]; 457 #ifdef SMP 458 int cpu; 459 struct intr_event *ie; 460 #endif 461 462 cc = CC_CPU(timeout_cpu); 463 snprintf(name, sizeof(name), "clock (%d)", timeout_cpu); 464 if (swi_add(&clk_intr_event, name, softclock, cc, SWI_CLOCK, 465 INTR_MPSAFE, &cc->cc_cookie)) 466 panic("died while creating standard software ithreads"); 467 if (pin_default_swi && 468 (intr_event_bind(clk_intr_event, timeout_cpu) != 0)) { 469 printf("%s: timeout clock couldn't be pinned to cpu %d\n", 470 __func__, 471 timeout_cpu); 472 } 473 474 #ifdef SMP 475 CPU_FOREACH(cpu) { 476 if (cpu == timeout_cpu) 477 continue; 478 cc = CC_CPU(cpu); 479 cc->cc_callout = NULL; /* Only cpu0 handles timeout(9). */ 480 callout_cpu_init(cc, cpu); 481 snprintf(name, sizeof(name), "clock (%d)", cpu); 482 ie = NULL; 483 if (swi_add(&ie, name, softclock, cc, SWI_CLOCK, 484 INTR_MPSAFE, &cc->cc_cookie)) 485 panic("died while creating standard software ithreads"); 486 if (pin_pcpu_swi && (intr_event_bind(ie, cpu) != 0)) { 487 printf("%s: per-cpu clock couldn't be pinned to " 488 "cpu %d\n", 489 __func__, 490 cpu); 491 } 492 } 493 #endif 494 } 495 SYSINIT(start_softclock, SI_SUB_SOFTINTR, SI_ORDER_FIRST, start_softclock, NULL); 496 497 #define CC_HASH_SHIFT 8 498 499 static inline u_int 500 callout_hash(sbintime_t sbt) 501 { 502 503 return (sbt >> (32 - CC_HASH_SHIFT)); 504 } 505 506 static inline u_int 507 callout_get_bucket(sbintime_t sbt) 508 { 509 510 return (callout_hash(sbt) & callwheelmask); 511 } 512 513 void 514 callout_process(sbintime_t now) 515 { 516 struct callout *tmp, *tmpn; 517 struct callout_cpu *cc; 518 struct callout_list *sc; 519 sbintime_t first, last, max, tmp_max; 520 uint32_t lookahead; 521 u_int firstb, lastb, nowb; 522 #ifdef CALLOUT_PROFILING 523 int depth_dir = 0, mpcalls_dir = 0, lockcalls_dir = 0; 524 #endif 525 cc = CC_SELF(); 526 CC_LOCK(cc); 527 528 /* Compute the buckets of the last scan and present times. */ 529 firstb = callout_hash(cc->cc_lastscan); 530 cc->cc_lastscan = now; 531 nowb = callout_hash(now); 532 533 /* Compute the last bucket and minimum time of the bucket after it. */ 534 if (nowb == firstb) 535 lookahead = (SBT_1S / 16); 536 else if (nowb - firstb == 1) 537 lookahead = (SBT_1S / 8); 538 else 539 lookahead = (SBT_1S / 2); 540 first = last = now; 541 first += (lookahead / 2); 542 last += lookahead; 543 last &= (0xffffffffffffffffLLU << (32 - CC_HASH_SHIFT)); 544 lastb = callout_hash(last) - 1; 545 max = last; 546 547 /* 548 * Check if we wrapped around the entire wheel from the last scan. 549 * In case, we need to scan entirely the wheel for pending callouts. 550 */ 551 if (lastb - firstb >= callwheelsize) { 552 lastb = firstb + callwheelsize - 1; 553 if (nowb - firstb >= callwheelsize) 554 nowb = lastb; 555 } 556 557 /* Iterate callwheel from firstb to nowb and then up to lastb. */ 558 do { 559 sc = &cc->cc_callwheel[firstb & callwheelmask]; 560 tmp = LIST_FIRST(sc); 561 while (tmp != NULL) { 562 /* Run the callout if present time within allowed. */ 563 if (tmp->c_time <= now) { 564 /* 565 * Consumer told us the callout may be run 566 * directly from hardware interrupt context. 567 */ 568 if (tmp->c_flags & CALLOUT_DIRECT) { 569 #ifdef CALLOUT_PROFILING 570 ++depth_dir; 571 #endif 572 cc->cc_exec_next_dir = 573 LIST_NEXT(tmp, c_links.le); 574 cc->cc_bucket = firstb & callwheelmask; 575 LIST_REMOVE(tmp, c_links.le); 576 softclock_call_cc(tmp, cc, 577 #ifdef CALLOUT_PROFILING 578 &mpcalls_dir, &lockcalls_dir, NULL, 579 #endif 580 1); 581 tmp = cc->cc_exec_next_dir; 582 } else { 583 tmpn = LIST_NEXT(tmp, c_links.le); 584 LIST_REMOVE(tmp, c_links.le); 585 TAILQ_INSERT_TAIL(&cc->cc_expireq, 586 tmp, c_links.tqe); 587 tmp->c_flags |= CALLOUT_PROCESSED; 588 tmp = tmpn; 589 } 590 continue; 591 } 592 /* Skip events from distant future. */ 593 if (tmp->c_time >= max) 594 goto next; 595 /* 596 * Event minimal time is bigger than present maximal 597 * time, so it cannot be aggregated. 598 */ 599 if (tmp->c_time > last) { 600 lastb = nowb; 601 goto next; 602 } 603 /* Update first and last time, respecting this event. */ 604 if (tmp->c_time < first) 605 first = tmp->c_time; 606 tmp_max = tmp->c_time + tmp->c_precision; 607 if (tmp_max < last) 608 last = tmp_max; 609 next: 610 tmp = LIST_NEXT(tmp, c_links.le); 611 } 612 /* Proceed with the next bucket. */ 613 firstb++; 614 /* 615 * Stop if we looked after present time and found 616 * some event we can't execute at now. 617 * Stop if we looked far enough into the future. 618 */ 619 } while (((int)(firstb - lastb)) <= 0); 620 cc->cc_firstevent = last; 621 #ifndef NO_EVENTTIMERS 622 cpu_new_callout(curcpu, last, first); 623 #endif 624 #ifdef CALLOUT_PROFILING 625 avg_depth_dir += (depth_dir * 1000 - avg_depth_dir) >> 8; 626 avg_mpcalls_dir += (mpcalls_dir * 1000 - avg_mpcalls_dir) >> 8; 627 avg_lockcalls_dir += (lockcalls_dir * 1000 - avg_lockcalls_dir) >> 8; 628 #endif 629 CC_UNLOCK(cc); 630 /* 631 * swi_sched acquires the thread lock, so we don't want to call it 632 * with cc_lock held; incorrect locking order. 633 */ 634 if (!TAILQ_EMPTY(&cc->cc_expireq)) 635 swi_sched(cc->cc_cookie, 0); 636 } 637 638 static struct callout_cpu * 639 callout_lock(struct callout *c) 640 { 641 struct callout_cpu *cc; 642 cc = CC_CPU(c->c_cpu); 643 CC_LOCK(cc); 644 return (cc); 645 } 646 647 static struct callout_cpu * 648 callout_cc_add_locked(struct callout *c, struct callout_cpu *cc, 649 struct callout_args *coa, bool can_swap_cpu) 650 { 651 #ifndef NO_EVENTTIMERS 652 sbintime_t sbt; 653 #endif 654 int bucket; 655 656 CC_LOCK_ASSERT(cc); 657 658 /* update flags before swapping locks, if any */ 659 c->c_flags &= ~(CALLOUT_PROCESSED | CALLOUT_DIRECT | CALLOUT_DEFRESTART); 660 if (coa->flags & C_DIRECT_EXEC) 661 c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING | CALLOUT_DIRECT); 662 else 663 c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING); 664 665 #ifdef SMP 666 /* 667 * Check if we are changing the CPU on which the callback 668 * should be executed and if we have a lock protecting us: 669 */ 670 if (can_swap_cpu != false && coa->cpu != c->c_cpu && 671 callout_lock_owned_client(c->c_flags, c->c_lock) != 0) { 672 CC_UNLOCK(cc); 673 c->c_cpu = coa->cpu; 674 cc = callout_lock(c); 675 } 676 #endif 677 if (coa->time < cc->cc_lastscan) 678 coa->time = cc->cc_lastscan; 679 c->c_arg = coa->arg; 680 c->c_func = coa->func; 681 c->c_time = coa->time; 682 c->c_precision = coa->precision; 683 684 bucket = callout_get_bucket(c->c_time); 685 CTR3(KTR_CALLOUT, "precision set for %p: %d.%08x", 686 c, (int)(c->c_precision >> 32), 687 (u_int)(c->c_precision & 0xffffffff)); 688 LIST_INSERT_HEAD(&cc->cc_callwheel[bucket], c, c_links.le); 689 690 /* Ensure we are first to be scanned, if called via a callback */ 691 if (cc->cc_bucket == bucket) 692 cc->cc_exec_next_dir = c; 693 #ifndef NO_EVENTTIMERS 694 /* 695 * Inform the eventtimers(4) subsystem there's a new callout 696 * that has been inserted, but only if really required. 697 */ 698 if (SBT_MAX - c->c_time < c->c_precision) 699 c->c_precision = SBT_MAX - c->c_time; 700 sbt = c->c_time + c->c_precision; 701 if (sbt < cc->cc_firstevent) { 702 cc->cc_firstevent = sbt; 703 cpu_new_callout(coa->cpu, sbt, c->c_time); 704 } 705 #endif 706 return (cc); 707 } 708 709 static void 710 callout_cc_del(struct callout *c, struct callout_cpu *cc) 711 { 712 713 c->c_func = NULL; 714 SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle); 715 } 716 717 static void 718 softclock_call_cc(struct callout *c, struct callout_cpu *cc, 719 #ifdef CALLOUT_PROFILING 720 int *mpcalls, int *lockcalls, int *gcalls, 721 #endif 722 int direct) 723 { 724 callout_func_t *c_func; 725 void *c_arg; 726 struct lock_object *c_lock; 727 int c_flags; 728 #if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING) 729 sbintime_t sbt1, sbt2; 730 struct timespec ts2; 731 static sbintime_t maxdt = 2 * SBT_1MS; /* 2 msec */ 732 static timeout_t *lastfunc; 733 #endif 734 735 KASSERT((c->c_flags & (CALLOUT_PENDING | CALLOUT_ACTIVE)) == 736 (CALLOUT_PENDING | CALLOUT_ACTIVE), 737 ("softclock_call_cc: pend|act %p %x", c, c->c_flags)); 738 c_lock = c->c_lock; 739 c_func = c->c_func; 740 c_arg = c->c_arg; 741 c_flags = c->c_flags; 742 743 /* remove pending bit */ 744 c->c_flags &= ~CALLOUT_PENDING; 745 746 /* reset our local state */ 747 cc->cc_exec_entity[direct].cc_curr = c; 748 cc->cc_exec_entity[direct].cc_restart = false; 749 cc->cc_exec_entity[direct].cc_drain_fn = NULL; 750 cc->cc_exec_entity[direct].cc_drain_arg = NULL; 751 752 if (c_lock != NULL) { 753 cc->cc_exec_entity[direct].cc_cancel = false; 754 CC_UNLOCK(cc); 755 756 /* unlocked region for switching locks */ 757 758 callout_lock_client(c_flags, c_lock); 759 760 /* 761 * Check if the callout may have been cancelled while 762 * we were switching locks. Even though the callout is 763 * specifying a lock, it might not be certain this 764 * lock is locked when starting and stopping callouts. 765 */ 766 CC_LOCK(cc); 767 if (cc->cc_exec_entity[direct].cc_cancel) { 768 callout_unlock_client(c_flags, c_lock); 769 goto skip_cc_locked; 770 } 771 if (c_lock == &Giant.lock_object) { 772 #ifdef CALLOUT_PROFILING 773 (*gcalls)++; 774 #endif 775 CTR3(KTR_CALLOUT, "callout giant %p func %p arg %p", 776 c, c_func, c_arg); 777 } else { 778 #ifdef CALLOUT_PROFILING 779 (*lockcalls)++; 780 #endif 781 CTR3(KTR_CALLOUT, "callout lock %p func %p arg %p", 782 c, c_func, c_arg); 783 } 784 } else { 785 #ifdef CALLOUT_PROFILING 786 (*mpcalls)++; 787 #endif 788 CTR3(KTR_CALLOUT, "callout %p func %p arg %p", 789 c, c_func, c_arg); 790 } 791 /* The callout cannot be stopped now! */ 792 cc->cc_exec_entity[direct].cc_cancel = true; 793 CC_UNLOCK(cc); 794 795 /* unlocked region */ 796 KTR_STATE3(KTR_SCHED, "callout", cc->cc_ktr_event_name, "running", 797 "func:%p", c_func, "arg:%p", c_arg, "direct:%d", direct); 798 #if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING) 799 sbt1 = sbinuptime(); 800 #endif 801 THREAD_NO_SLEEPING(); 802 SDT_PROBE(callout_execute, kernel, , callout__start, c, 0, 0, 0, 0); 803 c_func(c_arg); 804 SDT_PROBE(callout_execute, kernel, , callout__end, c, 0, 0, 0, 0); 805 THREAD_SLEEPING_OK(); 806 #if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING) 807 sbt2 = sbinuptime(); 808 sbt2 -= sbt1; 809 if (sbt2 > maxdt) { 810 if (lastfunc != c_func || sbt2 > maxdt * 2) { 811 ts2 = sbttots(sbt2); 812 printf( 813 "Expensive timeout(9) function: %p(%p) %jd.%09ld s\n", 814 c_func, c_arg, (intmax_t)ts2.tv_sec, ts2.tv_nsec); 815 } 816 maxdt = sbt2; 817 lastfunc = c_func; 818 } 819 #endif 820 KTR_STATE0(KTR_SCHED, "callout", cc->cc_ktr_event_name, "idle"); 821 CTR1(KTR_CALLOUT, "callout %p finished", c); 822 823 /* 824 * At this point the callback structure might have been freed, 825 * so we need to check the previously copied value of 826 * "c->c_flags": 827 */ 828 if ((c_flags & CALLOUT_RETURNUNLOCKED) == 0) 829 callout_unlock_client(c_flags, c_lock); 830 831 CC_LOCK(cc); 832 833 skip_cc_locked: 834 KASSERT(cc->cc_exec_entity[direct].cc_curr == c, ("mishandled cc_curr")); 835 cc->cc_exec_entity[direct].cc_curr = NULL; 836 837 /* Check if there is anything which needs draining */ 838 if (cc->cc_exec_entity[direct].cc_drain_fn != NULL) { 839 /* 840 * Unlock the CPU callout last, so that any use of 841 * structures belonging to the callout are complete: 842 */ 843 CC_UNLOCK(cc); 844 /* call drain function unlocked */ 845 cc->cc_exec_entity[direct].cc_drain_fn( 846 cc->cc_exec_entity[direct].cc_drain_arg); 847 CC_LOCK(cc); 848 } else if (c_flags & CALLOUT_LOCAL_ALLOC) { 849 /* return callout back to freelist */ 850 callout_cc_del(c, cc); 851 } else if (cc->cc_exec_entity[direct].cc_restart) { 852 /* [re-]schedule callout, if any */ 853 cc = callout_cc_add_locked(c, cc, 854 &cc->cc_exec_entity[direct].cc_restart_args, false); 855 } 856 } 857 858 /* 859 * The callout mechanism is based on the work of Adam M. Costello and 860 * George Varghese, published in a technical report entitled "Redesigning 861 * the BSD Callout and Timer Facilities" and modified slightly for inclusion 862 * in FreeBSD by Justin T. Gibbs. The original work on the data structures 863 * used in this implementation was published by G. Varghese and T. Lauck in 864 * the paper "Hashed and Hierarchical Timing Wheels: Data Structures for 865 * the Efficient Implementation of a Timer Facility" in the Proceedings of 866 * the 11th ACM Annual Symposium on Operating Systems Principles, 867 * Austin, Texas Nov 1987. 868 */ 869 870 /* 871 * Software (low priority) clock interrupt. 872 * Run periodic events from timeout queue. 873 */ 874 void 875 softclock(void *arg) 876 { 877 struct callout_cpu *cc; 878 struct callout *c; 879 #ifdef CALLOUT_PROFILING 880 int depth = 0, gcalls = 0, lockcalls = 0, mpcalls = 0; 881 #endif 882 883 cc = (struct callout_cpu *)arg; 884 CC_LOCK(cc); 885 while ((c = TAILQ_FIRST(&cc->cc_expireq)) != NULL) { 886 TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe); 887 softclock_call_cc(c, cc, 888 #ifdef CALLOUT_PROFILING 889 &mpcalls, &lockcalls, &gcalls, 890 #endif 891 0); 892 #ifdef CALLOUT_PROFILING 893 ++depth; 894 #endif 895 } 896 #ifdef CALLOUT_PROFILING 897 avg_depth += (depth * 1000 - avg_depth) >> 8; 898 avg_mpcalls += (mpcalls * 1000 - avg_mpcalls) >> 8; 899 avg_lockcalls += (lockcalls * 1000 - avg_lockcalls) >> 8; 900 avg_gcalls += (gcalls * 1000 - avg_gcalls) >> 8; 901 #endif 902 CC_UNLOCK(cc); 903 } 904 905 /* 906 * timeout -- 907 * Execute a function after a specified length of time. 908 * 909 * untimeout -- 910 * Cancel previous timeout function call. 911 * 912 * callout_handle_init -- 913 * Initialize a handle so that using it with untimeout is benign. 914 * 915 * See AT&T BCI Driver Reference Manual for specification. This 916 * implementation differs from that one in that although an 917 * identification value is returned from timeout, the original 918 * arguments to timeout as well as the identifier are used to 919 * identify entries for untimeout. 920 */ 921 struct callout_handle 922 timeout(timeout_t *ftn, void *arg, int to_ticks) 923 { 924 struct callout_cpu *cc; 925 struct callout *new; 926 struct callout_handle handle; 927 928 cc = CC_CPU(timeout_cpu); 929 CC_LOCK(cc); 930 /* Fill in the next free callout structure. */ 931 new = SLIST_FIRST(&cc->cc_callfree); 932 if (new == NULL) 933 /* XXX Attempt to malloc first */ 934 panic("timeout table full"); 935 SLIST_REMOVE_HEAD(&cc->cc_callfree, c_links.sle); 936 handle.callout = new; 937 CC_UNLOCK(cc); 938 939 callout_reset(new, to_ticks, ftn, arg); 940 941 return (handle); 942 } 943 944 void 945 untimeout(timeout_t *ftn, void *arg, struct callout_handle handle) 946 { 947 struct callout_cpu *cc; 948 bool match; 949 950 /* 951 * Check for a handle that was initialized 952 * by callout_handle_init, but never used 953 * for a real timeout. 954 */ 955 if (handle.callout == NULL) 956 return; 957 958 cc = callout_lock(handle.callout); 959 match = (handle.callout->c_func == ftn && handle.callout->c_arg == arg); 960 CC_UNLOCK(cc); 961 962 if (match) 963 callout_stop(handle.callout); 964 } 965 966 void 967 callout_handle_init(struct callout_handle *handle) 968 { 969 handle->callout = NULL; 970 } 971 972 static int 973 callout_restart_async(struct callout *c, struct callout_args *coa, 974 callout_func_t *drain_fn, void *drain_arg) 975 { 976 struct callout_cpu *cc; 977 int cancelled; 978 int direct; 979 980 cc = callout_lock(c); 981 982 /* Figure out if the callout is direct or not */ 983 direct = ((c->c_flags & CALLOUT_DIRECT) != 0); 984 985 /* 986 * Check if the callback is currently scheduled for 987 * completion: 988 */ 989 if (cc->cc_exec_entity[direct].cc_curr == c) { 990 /* 991 * Try to prevent the callback from running by setting 992 * the "cc_cancel" variable to "true". Also check if 993 * the callout was previously subject to a deferred 994 * callout restart: 995 */ 996 if (cc->cc_exec_entity[direct].cc_cancel == false || 997 (c->c_flags & CALLOUT_DEFRESTART) != 0) { 998 cc->cc_exec_entity[direct].cc_cancel = true; 999 cancelled = 1; 1000 } else { 1001 cancelled = 0; 1002 } 1003 1004 /* 1005 * Prevent callback restart if "callout_drain_xxx()" 1006 * is being called or we are stopping the callout or 1007 * the callback was preallocated by us: 1008 */ 1009 if (cc->cc_exec_entity[direct].cc_drain_fn != NULL || 1010 coa == NULL || (c->c_flags & CALLOUT_LOCAL_ALLOC) != 0) { 1011 CTR4(KTR_CALLOUT, "%s %p func %p arg %p", 1012 cancelled ? "cancelled and draining" : "draining", 1013 c, c->c_func, c->c_arg); 1014 1015 /* clear old flags, if any */ 1016 c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING | 1017 CALLOUT_DEFRESTART | CALLOUT_PROCESSED); 1018 1019 /* clear restart flag, if any */ 1020 cc->cc_exec_entity[direct].cc_restart = false; 1021 1022 /* set drain function, if any */ 1023 if (drain_fn != NULL) { 1024 cc->cc_exec_entity[direct].cc_drain_fn = drain_fn; 1025 cc->cc_exec_entity[direct].cc_drain_arg = drain_arg; 1026 cancelled |= 2; /* XXX define the value */ 1027 } 1028 } else { 1029 CTR4(KTR_CALLOUT, "%s %p func %p arg %p", 1030 cancelled ? "cancelled and restarting" : "restarting", 1031 c, c->c_func, c->c_arg); 1032 1033 /* get us back into the game */ 1034 c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING | 1035 CALLOUT_DEFRESTART); 1036 c->c_flags &= ~CALLOUT_PROCESSED; 1037 1038 /* enable deferred restart */ 1039 cc->cc_exec_entity[direct].cc_restart = true; 1040 1041 /* store arguments for the deferred restart, if any */ 1042 cc->cc_exec_entity[direct].cc_restart_args = *coa; 1043 } 1044 } else { 1045 /* stop callout */ 1046 if (c->c_flags & CALLOUT_PENDING) { 1047 /* 1048 * The callback has not yet been executed, and 1049 * we simply just need to unlink it: 1050 */ 1051 if ((c->c_flags & CALLOUT_PROCESSED) == 0) { 1052 if (cc->cc_exec_next_dir == c) 1053 cc->cc_exec_next_dir = LIST_NEXT(c, c_links.le); 1054 LIST_REMOVE(c, c_links.le); 1055 } else { 1056 TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe); 1057 } 1058 cancelled = 1; 1059 } else { 1060 cancelled = 0; 1061 } 1062 1063 CTR4(KTR_CALLOUT, "%s %p func %p arg %p", 1064 cancelled ? "rescheduled" : "scheduled", 1065 c, c->c_func, c->c_arg); 1066 1067 /* [re-]schedule callout, if any */ 1068 if (coa != NULL) { 1069 cc = callout_cc_add_locked(c, cc, coa, true); 1070 } else { 1071 /* clear old flags, if any */ 1072 c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING | 1073 CALLOUT_DEFRESTART | CALLOUT_PROCESSED); 1074 1075 /* return callback to pre-allocated list, if any */ 1076 if ((c->c_flags & CALLOUT_LOCAL_ALLOC) && cancelled != 0) { 1077 callout_cc_del(c, cc); 1078 } 1079 } 1080 } 1081 CC_UNLOCK(cc); 1082 return (cancelled); 1083 } 1084 1085 /* 1086 * New interface; clients allocate their own callout structures. 1087 * 1088 * callout_reset() - establish or change a timeout 1089 * callout_stop() - disestablish a timeout 1090 * callout_init() - initialize a callout structure so that it can 1091 * safely be passed to callout_reset() and callout_stop() 1092 * 1093 * <sys/callout.h> defines three convenience macros: 1094 * 1095 * callout_active() - returns truth if callout has not been stopped, 1096 * drained, or deactivated since the last time the callout was 1097 * reset. 1098 * callout_pending() - returns truth if callout is still waiting for timeout 1099 * callout_deactivate() - marks the callout as having been serviced 1100 */ 1101 int 1102 callout_reset_sbt_on(struct callout *c, sbintime_t sbt, sbintime_t precision, 1103 callout_func_t *ftn, void *arg, int cpu, int flags) 1104 { 1105 struct callout_args coa; 1106 1107 /* store arguments for callout add function */ 1108 coa.func = ftn; 1109 coa.arg = arg; 1110 coa.precision = precision; 1111 coa.flags = flags; 1112 coa.cpu = cpu; 1113 1114 /* compute the rest of the arguments needed */ 1115 if (coa.flags & C_ABSOLUTE) { 1116 coa.time = sbt; 1117 } else { 1118 sbintime_t pr; 1119 1120 if ((coa.flags & C_HARDCLOCK) && (sbt < tick_sbt)) 1121 sbt = tick_sbt; 1122 if ((coa.flags & C_HARDCLOCK) || 1123 #ifdef NO_EVENTTIMERS 1124 sbt >= sbt_timethreshold) { 1125 coa.time = getsbinuptime(); 1126 1127 /* Add safety belt for the case of hz > 1000. */ 1128 coa.time += tc_tick_sbt - tick_sbt; 1129 #else 1130 sbt >= sbt_tickthreshold) { 1131 /* 1132 * Obtain the time of the last hardclock() call on 1133 * this CPU directly from the kern_clocksource.c. 1134 * This value is per-CPU, but it is equal for all 1135 * active ones. 1136 */ 1137 #ifdef __LP64__ 1138 coa.time = DPCPU_GET(hardclocktime); 1139 #else 1140 spinlock_enter(); 1141 coa.time = DPCPU_GET(hardclocktime); 1142 spinlock_exit(); 1143 #endif 1144 #endif 1145 if ((coa.flags & C_HARDCLOCK) == 0) 1146 coa.time += tick_sbt; 1147 } else 1148 coa.time = sbinuptime(); 1149 if (SBT_MAX - coa.time < sbt) 1150 coa.time = SBT_MAX; 1151 else 1152 coa.time += sbt; 1153 pr = ((C_PRELGET(coa.flags) < 0) ? sbt >> tc_precexp : 1154 sbt >> C_PRELGET(coa.flags)); 1155 if (pr > coa.precision) 1156 coa.precision = pr; 1157 } 1158 1159 /* get callback started, if any */ 1160 return (callout_restart_async(c, &coa, NULL, NULL)); 1161 } 1162 1163 /* 1164 * Common idioms that can be optimized in the future. 1165 */ 1166 int 1167 callout_schedule_on(struct callout *c, int to_ticks, int cpu) 1168 { 1169 return callout_reset_on(c, to_ticks, c->c_func, c->c_arg, cpu); 1170 } 1171 1172 int 1173 callout_schedule(struct callout *c, int to_ticks) 1174 { 1175 return callout_reset_on(c, to_ticks, c->c_func, c->c_arg, c->c_cpu); 1176 } 1177 1178 int 1179 callout_stop(struct callout *c) 1180 { 1181 /* get callback stopped, if any */ 1182 return (callout_restart_async(c, NULL, NULL, NULL)); 1183 } 1184 1185 static void 1186 callout_drain_function(void *arg) 1187 { 1188 wakeup(arg); 1189 } 1190 1191 int 1192 callout_drain_async(struct callout *c, callout_func_t *fn, void *arg) 1193 { 1194 /* get callback stopped, if any */ 1195 return (callout_restart_async(c, NULL, fn, arg) & 2); 1196 } 1197 1198 int 1199 callout_drain(struct callout *c) 1200 { 1201 int cancelled; 1202 1203 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, 1204 "Draining callout"); 1205 1206 callout_lock_client(c->c_flags, c->c_lock); 1207 1208 /* at this point the "c->c_cpu" field is not changing */ 1209 1210 cancelled = callout_drain_async(c, &callout_drain_function, c); 1211 1212 if (cancelled != 0) { 1213 struct callout_cpu *cc; 1214 int direct; 1215 1216 CTR3(KTR_CALLOUT, "need to drain %p func %p arg %p", 1217 c, c->c_func, c->c_arg); 1218 1219 cc = callout_lock(c); 1220 direct = ((c->c_flags & CALLOUT_DIRECT) != 0); 1221 1222 /* 1223 * We've gotten our callout CPU lock, it is safe to 1224 * drop the initial lock: 1225 */ 1226 callout_unlock_client(c->c_flags, c->c_lock); 1227 1228 /* Wait for drain to complete */ 1229 1230 while (cc->cc_exec_entity[direct].cc_curr == c) 1231 msleep_spin(c, (struct mtx *)&cc->cc_lock, "codrain", 0); 1232 1233 CC_UNLOCK(cc); 1234 } else { 1235 callout_unlock_client(c->c_flags, c->c_lock); 1236 } 1237 1238 CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p", 1239 c, c->c_func, c->c_arg); 1240 1241 return (cancelled & 1); 1242 } 1243 1244 void 1245 callout_init(struct callout *c, int mpsafe) 1246 { 1247 if (mpsafe) { 1248 _callout_init_lock(c, NULL, CALLOUT_RETURNUNLOCKED); 1249 } else { 1250 _callout_init_lock(c, &Giant.lock_object, 0); 1251 } 1252 } 1253 1254 void 1255 _callout_init_lock(struct callout *c, struct lock_object *lock, int flags) 1256 { 1257 bzero(c, sizeof *c); 1258 KASSERT((flags & ~CALLOUT_RETURNUNLOCKED) == 0, 1259 ("callout_init_lock: bad flags 0x%08x", flags)); 1260 flags &= CALLOUT_RETURNUNLOCKED; 1261 if (lock != NULL) { 1262 struct lock_class *class = LOCK_CLASS(lock); 1263 if (class == &lock_class_mtx_sleep) 1264 flags |= CALLOUT_SET_LC(CALLOUT_LC_MUTEX); 1265 else if (class == &lock_class_mtx_spin) 1266 flags |= CALLOUT_SET_LC(CALLOUT_LC_SPIN); 1267 else if (class == &lock_class_rm) 1268 flags |= CALLOUT_SET_LC(CALLOUT_LC_RM); 1269 else if (class == &lock_class_rw) 1270 flags |= CALLOUT_SET_LC(CALLOUT_LC_RW); 1271 else 1272 panic("callout_init_lock: Unsupported lock class '%s'\n", class->lc_name); 1273 } else { 1274 flags |= CALLOUT_SET_LC(CALLOUT_LC_UNUSED_0); 1275 } 1276 c->c_lock = lock; 1277 c->c_flags = flags; 1278 c->c_cpu = timeout_cpu; 1279 } 1280 1281 #ifdef APM_FIXUP_CALLTODO 1282 /* 1283 * Adjust the kernel calltodo timeout list. This routine is used after 1284 * an APM resume to recalculate the calltodo timer list values with the 1285 * number of hz's we have been sleeping. The next hardclock() will detect 1286 * that there are fired timers and run softclock() to execute them. 1287 * 1288 * Please note, I have not done an exhaustive analysis of what code this 1289 * might break. I am motivated to have my select()'s and alarm()'s that 1290 * have expired during suspend firing upon resume so that the applications 1291 * which set the timer can do the maintanence the timer was for as close 1292 * as possible to the originally intended time. Testing this code for a 1293 * week showed that resuming from a suspend resulted in 22 to 25 timers 1294 * firing, which seemed independant on whether the suspend was 2 hours or 1295 * 2 days. Your milage may vary. - Ken Key <key@cs.utk.edu> 1296 */ 1297 void 1298 adjust_timeout_calltodo(struct timeval *time_change) 1299 { 1300 register struct callout *p; 1301 unsigned long delta_ticks; 1302 1303 /* 1304 * How many ticks were we asleep? 1305 * (stolen from tvtohz()). 1306 */ 1307 1308 /* Don't do anything */ 1309 if (time_change->tv_sec < 0) 1310 return; 1311 else if (time_change->tv_sec <= LONG_MAX / 1000000) 1312 delta_ticks = (time_change->tv_sec * 1000000 + 1313 time_change->tv_usec + (tick - 1)) / tick + 1; 1314 else if (time_change->tv_sec <= LONG_MAX / hz) 1315 delta_ticks = time_change->tv_sec * hz + 1316 (time_change->tv_usec + (tick - 1)) / tick + 1; 1317 else 1318 delta_ticks = LONG_MAX; 1319 1320 if (delta_ticks > INT_MAX) 1321 delta_ticks = INT_MAX; 1322 1323 /* 1324 * Now rip through the timer calltodo list looking for timers 1325 * to expire. 1326 */ 1327 1328 /* don't collide with softclock() */ 1329 CC_LOCK(cc); 1330 for (p = calltodo.c_next; p != NULL; p = p->c_next) { 1331 p->c_time -= delta_ticks; 1332 1333 /* Break if the timer had more time on it than delta_ticks */ 1334 if (p->c_time > 0) 1335 break; 1336 1337 /* take back the ticks the timer didn't use (p->c_time <= 0) */ 1338 delta_ticks = -p->c_time; 1339 } 1340 CC_UNLOCK(cc); 1341 1342 return; 1343 } 1344 #endif /* APM_FIXUP_CALLTODO */ 1345 1346 static int 1347 flssbt(sbintime_t sbt) 1348 { 1349 1350 sbt += (uint64_t)sbt >> 1; 1351 if (sizeof(long) >= sizeof(sbintime_t)) 1352 return (flsl(sbt)); 1353 if (sbt >= SBT_1S) 1354 return (flsl(((uint64_t)sbt) >> 32) + 32); 1355 return (flsl(sbt)); 1356 } 1357 1358 /* 1359 * Dump immediate statistic snapshot of the scheduled callouts. 1360 */ 1361 static int 1362 sysctl_kern_callout_stat(SYSCTL_HANDLER_ARGS) 1363 { 1364 struct callout *tmp; 1365 struct callout_cpu *cc; 1366 struct callout_list *sc; 1367 sbintime_t maxpr, maxt, medpr, medt, now, spr, st, t; 1368 int ct[64], cpr[64], ccpbk[32]; 1369 int error, val, i, count, tcum, pcum, maxc, c, medc; 1370 #ifdef SMP 1371 int cpu; 1372 #endif 1373 1374 val = 0; 1375 error = sysctl_handle_int(oidp, &val, 0, req); 1376 if (error != 0 || req->newptr == NULL) 1377 return (error); 1378 count = maxc = 0; 1379 st = spr = maxt = maxpr = 0; 1380 bzero(ccpbk, sizeof(ccpbk)); 1381 bzero(ct, sizeof(ct)); 1382 bzero(cpr, sizeof(cpr)); 1383 now = sbinuptime(); 1384 #ifdef SMP 1385 CPU_FOREACH(cpu) { 1386 cc = CC_CPU(cpu); 1387 #else 1388 cc = CC_CPU(timeout_cpu); 1389 #endif 1390 CC_LOCK(cc); 1391 for (i = 0; i < callwheelsize; i++) { 1392 sc = &cc->cc_callwheel[i]; 1393 c = 0; 1394 LIST_FOREACH(tmp, sc, c_links.le) { 1395 c++; 1396 t = tmp->c_time - now; 1397 if (t < 0) 1398 t = 0; 1399 st += t / SBT_1US; 1400 spr += tmp->c_precision / SBT_1US; 1401 if (t > maxt) 1402 maxt = t; 1403 if (tmp->c_precision > maxpr) 1404 maxpr = tmp->c_precision; 1405 ct[flssbt(t)]++; 1406 cpr[flssbt(tmp->c_precision)]++; 1407 } 1408 if (c > maxc) 1409 maxc = c; 1410 ccpbk[fls(c + c / 2)]++; 1411 count += c; 1412 } 1413 CC_UNLOCK(cc); 1414 #ifdef SMP 1415 } 1416 #endif 1417 1418 for (i = 0, tcum = 0; i < 64 && tcum < count / 2; i++) 1419 tcum += ct[i]; 1420 medt = (i >= 2) ? (((sbintime_t)1) << (i - 2)) : 0; 1421 for (i = 0, pcum = 0; i < 64 && pcum < count / 2; i++) 1422 pcum += cpr[i]; 1423 medpr = (i >= 2) ? (((sbintime_t)1) << (i - 2)) : 0; 1424 for (i = 0, c = 0; i < 32 && c < count / 2; i++) 1425 c += ccpbk[i]; 1426 medc = (i >= 2) ? (1 << (i - 2)) : 0; 1427 1428 printf("Scheduled callouts statistic snapshot:\n"); 1429 printf(" Callouts: %6d Buckets: %6d*%-3d Bucket size: 0.%06ds\n", 1430 count, callwheelsize, mp_ncpus, 1000000 >> CC_HASH_SHIFT); 1431 printf(" C/Bk: med %5d avg %6d.%06jd max %6d\n", 1432 medc, 1433 count / callwheelsize / mp_ncpus, 1434 (uint64_t)count * 1000000 / callwheelsize / mp_ncpus % 1000000, 1435 maxc); 1436 printf(" Time: med %5jd.%06jds avg %6jd.%06jds max %6jd.%06jds\n", 1437 medt / SBT_1S, (medt & 0xffffffff) * 1000000 >> 32, 1438 (st / count) / 1000000, (st / count) % 1000000, 1439 maxt / SBT_1S, (maxt & 0xffffffff) * 1000000 >> 32); 1440 printf(" Prec: med %5jd.%06jds avg %6jd.%06jds max %6jd.%06jds\n", 1441 medpr / SBT_1S, (medpr & 0xffffffff) * 1000000 >> 32, 1442 (spr / count) / 1000000, (spr / count) % 1000000, 1443 maxpr / SBT_1S, (maxpr & 0xffffffff) * 1000000 >> 32); 1444 printf(" Distribution: \tbuckets\t time\t tcum\t" 1445 " prec\t pcum\n"); 1446 for (i = 0, tcum = pcum = 0; i < 64; i++) { 1447 if (ct[i] == 0 && cpr[i] == 0) 1448 continue; 1449 t = (i != 0) ? (((sbintime_t)1) << (i - 1)) : 0; 1450 tcum += ct[i]; 1451 pcum += cpr[i]; 1452 printf(" %10jd.%06jds\t 2**%d\t%7d\t%7d\t%7d\t%7d\n", 1453 t / SBT_1S, (t & 0xffffffff) * 1000000 >> 32, 1454 i - 1 - (32 - CC_HASH_SHIFT), 1455 ct[i], tcum, cpr[i], pcum); 1456 } 1457 return (error); 1458 } 1459 SYSCTL_PROC(_kern, OID_AUTO, callout_stat, 1460 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 1461 0, 0, sysctl_kern_callout_stat, "I", 1462 "Dump immediate statistic snapshot of the scheduled callouts"); 1463