1 /*- 2 * Copyright (c) 1982, 1986, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * From: @(#)kern_clock.c 8.5 (Berkeley) 1/21/94 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "opt_callout_profiling.h" 41 #if defined(__arm__) 42 #include "opt_timer.h" 43 #endif 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/bus.h> 48 #include <sys/callout.h> 49 #include <sys/file.h> 50 #include <sys/interrupt.h> 51 #include <sys/kernel.h> 52 #include <sys/ktr.h> 53 #include <sys/lock.h> 54 #include <sys/malloc.h> 55 #include <sys/mutex.h> 56 #include <sys/proc.h> 57 #include <sys/sdt.h> 58 #include <sys/sleepqueue.h> 59 #include <sys/sysctl.h> 60 #include <sys/smp.h> 61 62 #ifdef SMP 63 #include <machine/cpu.h> 64 #endif 65 66 #ifndef NO_EVENTTIMERS 67 DPCPU_DECLARE(sbintime_t, hardclocktime); 68 #endif 69 70 SDT_PROVIDER_DEFINE(callout_execute); 71 SDT_PROBE_DEFINE1(callout_execute, kernel, , callout__start, 72 "struct callout *"); 73 SDT_PROBE_DEFINE1(callout_execute, kernel, , callout__end, 74 "struct callout *"); 75 76 #ifdef CALLOUT_PROFILING 77 static int avg_depth; 78 SYSCTL_INT(_debug, OID_AUTO, to_avg_depth, CTLFLAG_RD, &avg_depth, 0, 79 "Average number of items examined per softclock call. Units = 1/1000"); 80 static int avg_gcalls; 81 SYSCTL_INT(_debug, OID_AUTO, to_avg_gcalls, CTLFLAG_RD, &avg_gcalls, 0, 82 "Average number of Giant callouts made per softclock call. Units = 1/1000"); 83 static int avg_lockcalls; 84 SYSCTL_INT(_debug, OID_AUTO, to_avg_lockcalls, CTLFLAG_RD, &avg_lockcalls, 0, 85 "Average number of lock callouts made per softclock call. Units = 1/1000"); 86 static int avg_mpcalls; 87 SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls, CTLFLAG_RD, &avg_mpcalls, 0, 88 "Average number of MP callouts made per softclock call. Units = 1/1000"); 89 static int avg_depth_dir; 90 SYSCTL_INT(_debug, OID_AUTO, to_avg_depth_dir, CTLFLAG_RD, &avg_depth_dir, 0, 91 "Average number of direct callouts examined per callout_process call. " 92 "Units = 1/1000"); 93 static int avg_lockcalls_dir; 94 SYSCTL_INT(_debug, OID_AUTO, to_avg_lockcalls_dir, CTLFLAG_RD, 95 &avg_lockcalls_dir, 0, "Average number of lock direct callouts made per " 96 "callout_process call. Units = 1/1000"); 97 static int avg_mpcalls_dir; 98 SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls_dir, CTLFLAG_RD, &avg_mpcalls_dir, 99 0, "Average number of MP direct callouts made per callout_process call. " 100 "Units = 1/1000"); 101 #endif 102 103 static int ncallout; 104 SYSCTL_INT(_kern, OID_AUTO, ncallout, CTLFLAG_RDTUN, &ncallout, 0, 105 "Number of entries in callwheel and size of timeout() preallocation"); 106 107 /* 108 * TODO: 109 * allocate more timeout table slots when table overflows. 110 */ 111 u_int callwheelsize, callwheelmask; 112 113 /* 114 * The callout cpu exec entities represent informations necessary for 115 * describing the state of callouts currently running on the CPU and the ones 116 * necessary for migrating callouts to the new callout cpu. In particular, 117 * the first entry of the array cc_exec_entity holds informations for callout 118 * running in SWI thread context, while the second one holds informations 119 * for callout running directly from hardware interrupt context. 120 * The cached informations are very important for deferring migration when 121 * the migrating callout is already running. 122 */ 123 struct cc_exec { 124 struct callout *cc_next; 125 struct callout *cc_curr; 126 #ifdef SMP 127 void (*ce_migration_func)(void *); 128 void *ce_migration_arg; 129 int ce_migration_cpu; 130 sbintime_t ce_migration_time; 131 sbintime_t ce_migration_prec; 132 #endif 133 bool cc_cancel; 134 bool cc_waiting; 135 }; 136 137 /* 138 * There is one struct callout_cpu per cpu, holding all relevant 139 * state for the callout processing thread on the individual CPU. 140 */ 141 struct callout_cpu { 142 struct mtx_padalign cc_lock; 143 struct cc_exec cc_exec_entity[2]; 144 struct callout *cc_callout; 145 struct callout_list *cc_callwheel; 146 struct callout_tailq cc_expireq; 147 struct callout_slist cc_callfree; 148 sbintime_t cc_firstevent; 149 sbintime_t cc_lastscan; 150 void *cc_cookie; 151 u_int cc_bucket; 152 }; 153 154 #define cc_exec_curr cc_exec_entity[0].cc_curr 155 #define cc_exec_next cc_exec_entity[0].cc_next 156 #define cc_exec_cancel cc_exec_entity[0].cc_cancel 157 #define cc_exec_waiting cc_exec_entity[0].cc_waiting 158 #define cc_exec_curr_dir cc_exec_entity[1].cc_curr 159 #define cc_exec_next_dir cc_exec_entity[1].cc_next 160 #define cc_exec_cancel_dir cc_exec_entity[1].cc_cancel 161 #define cc_exec_waiting_dir cc_exec_entity[1].cc_waiting 162 163 #ifdef SMP 164 #define cc_migration_func cc_exec_entity[0].ce_migration_func 165 #define cc_migration_arg cc_exec_entity[0].ce_migration_arg 166 #define cc_migration_cpu cc_exec_entity[0].ce_migration_cpu 167 #define cc_migration_time cc_exec_entity[0].ce_migration_time 168 #define cc_migration_prec cc_exec_entity[0].ce_migration_prec 169 #define cc_migration_func_dir cc_exec_entity[1].ce_migration_func 170 #define cc_migration_arg_dir cc_exec_entity[1].ce_migration_arg 171 #define cc_migration_cpu_dir cc_exec_entity[1].ce_migration_cpu 172 #define cc_migration_time_dir cc_exec_entity[1].ce_migration_time 173 #define cc_migration_prec_dir cc_exec_entity[1].ce_migration_prec 174 175 struct callout_cpu cc_cpu[MAXCPU]; 176 #define CPUBLOCK MAXCPU 177 #define CC_CPU(cpu) (&cc_cpu[(cpu)]) 178 #define CC_SELF() CC_CPU(PCPU_GET(cpuid)) 179 #else 180 struct callout_cpu cc_cpu; 181 #define CC_CPU(cpu) &cc_cpu 182 #define CC_SELF() &cc_cpu 183 #endif 184 #define CC_LOCK(cc) mtx_lock_spin(&(cc)->cc_lock) 185 #define CC_UNLOCK(cc) mtx_unlock_spin(&(cc)->cc_lock) 186 #define CC_LOCK_ASSERT(cc) mtx_assert(&(cc)->cc_lock, MA_OWNED) 187 188 static int timeout_cpu; 189 190 static void callout_cpu_init(struct callout_cpu *cc); 191 static void softclock_call_cc(struct callout *c, struct callout_cpu *cc, 192 #ifdef CALLOUT_PROFILING 193 int *mpcalls, int *lockcalls, int *gcalls, 194 #endif 195 int direct); 196 197 static MALLOC_DEFINE(M_CALLOUT, "callout", "Callout datastructures"); 198 199 /** 200 * Locked by cc_lock: 201 * cc_curr - If a callout is in progress, it is cc_curr. 202 * If cc_curr is non-NULL, threads waiting in 203 * callout_drain() will be woken up as soon as the 204 * relevant callout completes. 205 * cc_cancel - Changing to 1 with both callout_lock and cc_lock held 206 * guarantees that the current callout will not run. 207 * The softclock() function sets this to 0 before it 208 * drops callout_lock to acquire c_lock, and it calls 209 * the handler only if curr_cancelled is still 0 after 210 * cc_lock is successfully acquired. 211 * cc_waiting - If a thread is waiting in callout_drain(), then 212 * callout_wait is nonzero. Set only when 213 * cc_curr is non-NULL. 214 */ 215 216 /* 217 * Resets the execution entity tied to a specific callout cpu. 218 */ 219 static void 220 cc_cce_cleanup(struct callout_cpu *cc, int direct) 221 { 222 223 cc->cc_exec_entity[direct].cc_curr = NULL; 224 cc->cc_exec_entity[direct].cc_next = NULL; 225 cc->cc_exec_entity[direct].cc_cancel = false; 226 cc->cc_exec_entity[direct].cc_waiting = false; 227 #ifdef SMP 228 cc->cc_exec_entity[direct].ce_migration_cpu = CPUBLOCK; 229 cc->cc_exec_entity[direct].ce_migration_time = 0; 230 cc->cc_exec_entity[direct].ce_migration_prec = 0; 231 cc->cc_exec_entity[direct].ce_migration_func = NULL; 232 cc->cc_exec_entity[direct].ce_migration_arg = NULL; 233 #endif 234 } 235 236 /* 237 * Checks if migration is requested by a specific callout cpu. 238 */ 239 static int 240 cc_cce_migrating(struct callout_cpu *cc, int direct) 241 { 242 243 #ifdef SMP 244 return (cc->cc_exec_entity[direct].ce_migration_cpu != CPUBLOCK); 245 #else 246 return (0); 247 #endif 248 } 249 250 /* 251 * Kernel low level callwheel initialization 252 * called on cpu0 during kernel startup. 253 */ 254 static void 255 callout_callwheel_init(void *dummy) 256 { 257 struct callout_cpu *cc; 258 259 /* 260 * Calculate the size of the callout wheel and the preallocated 261 * timeout() structures. 262 * XXX: Clip callout to result of previous function of maxusers 263 * maximum 384. This is still huge, but acceptable. 264 */ 265 ncallout = imin(16 + maxproc + maxfiles, 18508); 266 TUNABLE_INT_FETCH("kern.ncallout", &ncallout); 267 268 /* 269 * Calculate callout wheel size, should be next power of two higher 270 * than 'ncallout'. 271 */ 272 callwheelsize = 1 << fls(ncallout); 273 callwheelmask = callwheelsize - 1; 274 275 /* 276 * Only cpu0 handles timeout(9) and receives a preallocation. 277 * 278 * XXX: Once all timeout(9) consumers are converted this can 279 * be removed. 280 */ 281 timeout_cpu = PCPU_GET(cpuid); 282 cc = CC_CPU(timeout_cpu); 283 cc->cc_callout = malloc(ncallout * sizeof(struct callout), 284 M_CALLOUT, M_WAITOK); 285 callout_cpu_init(cc); 286 } 287 SYSINIT(callwheel_init, SI_SUB_CPU, SI_ORDER_ANY, callout_callwheel_init, NULL); 288 289 /* 290 * Initialize the per-cpu callout structures. 291 */ 292 static void 293 callout_cpu_init(struct callout_cpu *cc) 294 { 295 struct callout *c; 296 int i; 297 298 mtx_init(&cc->cc_lock, "callout", NULL, MTX_SPIN | MTX_RECURSE); 299 SLIST_INIT(&cc->cc_callfree); 300 cc->cc_callwheel = malloc(sizeof(struct callout_list) * callwheelsize, 301 M_CALLOUT, M_WAITOK); 302 for (i = 0; i < callwheelsize; i++) 303 LIST_INIT(&cc->cc_callwheel[i]); 304 TAILQ_INIT(&cc->cc_expireq); 305 cc->cc_firstevent = INT64_MAX; 306 for (i = 0; i < 2; i++) 307 cc_cce_cleanup(cc, i); 308 if (cc->cc_callout == NULL) /* Only cpu0 handles timeout(9) */ 309 return; 310 for (i = 0; i < ncallout; i++) { 311 c = &cc->cc_callout[i]; 312 callout_init(c, 0); 313 c->c_flags = CALLOUT_LOCAL_ALLOC; 314 SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle); 315 } 316 } 317 318 #ifdef SMP 319 /* 320 * Switches the cpu tied to a specific callout. 321 * The function expects a locked incoming callout cpu and returns with 322 * locked outcoming callout cpu. 323 */ 324 static struct callout_cpu * 325 callout_cpu_switch(struct callout *c, struct callout_cpu *cc, int new_cpu) 326 { 327 struct callout_cpu *new_cc; 328 329 MPASS(c != NULL && cc != NULL); 330 CC_LOCK_ASSERT(cc); 331 332 /* 333 * Avoid interrupts and preemption firing after the callout cpu 334 * is blocked in order to avoid deadlocks as the new thread 335 * may be willing to acquire the callout cpu lock. 336 */ 337 c->c_cpu = CPUBLOCK; 338 spinlock_enter(); 339 CC_UNLOCK(cc); 340 new_cc = CC_CPU(new_cpu); 341 CC_LOCK(new_cc); 342 spinlock_exit(); 343 c->c_cpu = new_cpu; 344 return (new_cc); 345 } 346 #endif 347 348 /* 349 * Start standard softclock thread. 350 */ 351 static void 352 start_softclock(void *dummy) 353 { 354 struct callout_cpu *cc; 355 char name[MAXCOMLEN]; 356 #ifdef SMP 357 int cpu; 358 #endif 359 360 cc = CC_CPU(timeout_cpu); 361 snprintf(name, sizeof(name), "clock (%d)", timeout_cpu); 362 if (swi_add(&clk_intr_event, name, softclock, cc, SWI_CLOCK, 363 INTR_MPSAFE, &cc->cc_cookie)) 364 panic("died while creating standard software ithreads"); 365 #ifdef SMP 366 CPU_FOREACH(cpu) { 367 if (cpu == timeout_cpu) 368 continue; 369 cc = CC_CPU(cpu); 370 cc->cc_callout = NULL; /* Only cpu0 handles timeout(9). */ 371 callout_cpu_init(cc); 372 snprintf(name, sizeof(name), "clock (%d)", cpu); 373 if (swi_add(NULL, name, softclock, cc, SWI_CLOCK, 374 INTR_MPSAFE, &cc->cc_cookie)) 375 panic("died while creating standard software ithreads"); 376 } 377 #endif 378 } 379 SYSINIT(start_softclock, SI_SUB_SOFTINTR, SI_ORDER_FIRST, start_softclock, NULL); 380 381 #define CC_HASH_SHIFT 8 382 383 static inline u_int 384 callout_hash(sbintime_t sbt) 385 { 386 387 return (sbt >> (32 - CC_HASH_SHIFT)); 388 } 389 390 static inline u_int 391 callout_get_bucket(sbintime_t sbt) 392 { 393 394 return (callout_hash(sbt) & callwheelmask); 395 } 396 397 void 398 callout_process(sbintime_t now) 399 { 400 struct callout *tmp, *tmpn; 401 struct callout_cpu *cc; 402 struct callout_list *sc; 403 sbintime_t first, last, max, tmp_max; 404 uint32_t lookahead; 405 u_int firstb, lastb, nowb; 406 #ifdef CALLOUT_PROFILING 407 int depth_dir = 0, mpcalls_dir = 0, lockcalls_dir = 0; 408 #endif 409 410 cc = CC_SELF(); 411 mtx_lock_spin_flags(&cc->cc_lock, MTX_QUIET); 412 413 /* Compute the buckets of the last scan and present times. */ 414 firstb = callout_hash(cc->cc_lastscan); 415 cc->cc_lastscan = now; 416 nowb = callout_hash(now); 417 418 /* Compute the last bucket and minimum time of the bucket after it. */ 419 if (nowb == firstb) 420 lookahead = (SBT_1S / 16); 421 else if (nowb - firstb == 1) 422 lookahead = (SBT_1S / 8); 423 else 424 lookahead = (SBT_1S / 2); 425 first = last = now; 426 first += (lookahead / 2); 427 last += lookahead; 428 last &= (0xffffffffffffffffLLU << (32 - CC_HASH_SHIFT)); 429 lastb = callout_hash(last) - 1; 430 max = last; 431 432 /* 433 * Check if we wrapped around the entire wheel from the last scan. 434 * In case, we need to scan entirely the wheel for pending callouts. 435 */ 436 if (lastb - firstb >= callwheelsize) { 437 lastb = firstb + callwheelsize - 1; 438 if (nowb - firstb >= callwheelsize) 439 nowb = lastb; 440 } 441 442 /* Iterate callwheel from firstb to nowb and then up to lastb. */ 443 do { 444 sc = &cc->cc_callwheel[firstb & callwheelmask]; 445 tmp = LIST_FIRST(sc); 446 while (tmp != NULL) { 447 /* Run the callout if present time within allowed. */ 448 if (tmp->c_time <= now) { 449 /* 450 * Consumer told us the callout may be run 451 * directly from hardware interrupt context. 452 */ 453 if (tmp->c_flags & CALLOUT_DIRECT) { 454 #ifdef CALLOUT_PROFILING 455 ++depth_dir; 456 #endif 457 cc->cc_exec_next_dir = 458 LIST_NEXT(tmp, c_links.le); 459 cc->cc_bucket = firstb & callwheelmask; 460 LIST_REMOVE(tmp, c_links.le); 461 softclock_call_cc(tmp, cc, 462 #ifdef CALLOUT_PROFILING 463 &mpcalls_dir, &lockcalls_dir, NULL, 464 #endif 465 1); 466 tmp = cc->cc_exec_next_dir; 467 } else { 468 tmpn = LIST_NEXT(tmp, c_links.le); 469 LIST_REMOVE(tmp, c_links.le); 470 TAILQ_INSERT_TAIL(&cc->cc_expireq, 471 tmp, c_links.tqe); 472 tmp->c_flags |= CALLOUT_PROCESSED; 473 tmp = tmpn; 474 } 475 continue; 476 } 477 /* Skip events from distant future. */ 478 if (tmp->c_time >= max) 479 goto next; 480 /* 481 * Event minimal time is bigger than present maximal 482 * time, so it cannot be aggregated. 483 */ 484 if (tmp->c_time > last) { 485 lastb = nowb; 486 goto next; 487 } 488 /* Update first and last time, respecting this event. */ 489 if (tmp->c_time < first) 490 first = tmp->c_time; 491 tmp_max = tmp->c_time + tmp->c_precision; 492 if (tmp_max < last) 493 last = tmp_max; 494 next: 495 tmp = LIST_NEXT(tmp, c_links.le); 496 } 497 /* Proceed with the next bucket. */ 498 firstb++; 499 /* 500 * Stop if we looked after present time and found 501 * some event we can't execute at now. 502 * Stop if we looked far enough into the future. 503 */ 504 } while (((int)(firstb - lastb)) <= 0); 505 cc->cc_firstevent = last; 506 #ifndef NO_EVENTTIMERS 507 cpu_new_callout(curcpu, last, first); 508 #endif 509 #ifdef CALLOUT_PROFILING 510 avg_depth_dir += (depth_dir * 1000 - avg_depth_dir) >> 8; 511 avg_mpcalls_dir += (mpcalls_dir * 1000 - avg_mpcalls_dir) >> 8; 512 avg_lockcalls_dir += (lockcalls_dir * 1000 - avg_lockcalls_dir) >> 8; 513 #endif 514 mtx_unlock_spin_flags(&cc->cc_lock, MTX_QUIET); 515 /* 516 * swi_sched acquires the thread lock, so we don't want to call it 517 * with cc_lock held; incorrect locking order. 518 */ 519 if (!TAILQ_EMPTY(&cc->cc_expireq)) 520 swi_sched(cc->cc_cookie, 0); 521 } 522 523 static struct callout_cpu * 524 callout_lock(struct callout *c) 525 { 526 struct callout_cpu *cc; 527 int cpu; 528 529 for (;;) { 530 cpu = c->c_cpu; 531 #ifdef SMP 532 if (cpu == CPUBLOCK) { 533 while (c->c_cpu == CPUBLOCK) 534 cpu_spinwait(); 535 continue; 536 } 537 #endif 538 cc = CC_CPU(cpu); 539 CC_LOCK(cc); 540 if (cpu == c->c_cpu) 541 break; 542 CC_UNLOCK(cc); 543 } 544 return (cc); 545 } 546 547 static void 548 callout_cc_add(struct callout *c, struct callout_cpu *cc, 549 sbintime_t sbt, sbintime_t precision, void (*func)(void *), 550 void *arg, int cpu, int flags) 551 { 552 int bucket; 553 554 CC_LOCK_ASSERT(cc); 555 if (sbt < cc->cc_lastscan) 556 sbt = cc->cc_lastscan; 557 c->c_arg = arg; 558 c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING); 559 if (flags & C_DIRECT_EXEC) 560 c->c_flags |= CALLOUT_DIRECT; 561 c->c_flags &= ~CALLOUT_PROCESSED; 562 c->c_func = func; 563 c->c_time = sbt; 564 c->c_precision = precision; 565 bucket = callout_get_bucket(c->c_time); 566 CTR3(KTR_CALLOUT, "precision set for %p: %d.%08x", 567 c, (int)(c->c_precision >> 32), 568 (u_int)(c->c_precision & 0xffffffff)); 569 LIST_INSERT_HEAD(&cc->cc_callwheel[bucket], c, c_links.le); 570 if (cc->cc_bucket == bucket) 571 cc->cc_exec_next_dir = c; 572 #ifndef NO_EVENTTIMERS 573 /* 574 * Inform the eventtimers(4) subsystem there's a new callout 575 * that has been inserted, but only if really required. 576 */ 577 if (INT64_MAX - c->c_time < c->c_precision) 578 c->c_precision = INT64_MAX - c->c_time; 579 sbt = c->c_time + c->c_precision; 580 if (sbt < cc->cc_firstevent) { 581 cc->cc_firstevent = sbt; 582 cpu_new_callout(cpu, sbt, c->c_time); 583 } 584 #endif 585 } 586 587 static void 588 callout_cc_del(struct callout *c, struct callout_cpu *cc) 589 { 590 591 if ((c->c_flags & CALLOUT_LOCAL_ALLOC) == 0) 592 return; 593 c->c_func = NULL; 594 SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle); 595 } 596 597 static void 598 softclock_call_cc(struct callout *c, struct callout_cpu *cc, 599 #ifdef CALLOUT_PROFILING 600 int *mpcalls, int *lockcalls, int *gcalls, 601 #endif 602 int direct) 603 { 604 struct rm_priotracker tracker; 605 void (*c_func)(void *); 606 void *c_arg; 607 struct lock_class *class; 608 struct lock_object *c_lock; 609 uintptr_t lock_status; 610 int c_flags; 611 #ifdef SMP 612 struct callout_cpu *new_cc; 613 void (*new_func)(void *); 614 void *new_arg; 615 int flags, new_cpu; 616 sbintime_t new_prec, new_time; 617 #endif 618 #if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING) 619 sbintime_t sbt1, sbt2; 620 struct timespec ts2; 621 static sbintime_t maxdt = 2 * SBT_1MS; /* 2 msec */ 622 static timeout_t *lastfunc; 623 #endif 624 625 KASSERT((c->c_flags & (CALLOUT_PENDING | CALLOUT_ACTIVE)) == 626 (CALLOUT_PENDING | CALLOUT_ACTIVE), 627 ("softclock_call_cc: pend|act %p %x", c, c->c_flags)); 628 class = (c->c_lock != NULL) ? LOCK_CLASS(c->c_lock) : NULL; 629 lock_status = 0; 630 if (c->c_flags & CALLOUT_SHAREDLOCK) { 631 if (class == &lock_class_rm) 632 lock_status = (uintptr_t)&tracker; 633 else 634 lock_status = 1; 635 } 636 c_lock = c->c_lock; 637 c_func = c->c_func; 638 c_arg = c->c_arg; 639 c_flags = c->c_flags; 640 if (c->c_flags & CALLOUT_LOCAL_ALLOC) 641 c->c_flags = CALLOUT_LOCAL_ALLOC; 642 else 643 c->c_flags &= ~CALLOUT_PENDING; 644 cc->cc_exec_entity[direct].cc_curr = c; 645 cc->cc_exec_entity[direct].cc_cancel = false; 646 CC_UNLOCK(cc); 647 if (c_lock != NULL) { 648 class->lc_lock(c_lock, lock_status); 649 /* 650 * The callout may have been cancelled 651 * while we switched locks. 652 */ 653 if (cc->cc_exec_entity[direct].cc_cancel) { 654 class->lc_unlock(c_lock); 655 goto skip; 656 } 657 /* The callout cannot be stopped now. */ 658 cc->cc_exec_entity[direct].cc_cancel = true; 659 if (c_lock == &Giant.lock_object) { 660 #ifdef CALLOUT_PROFILING 661 (*gcalls)++; 662 #endif 663 CTR3(KTR_CALLOUT, "callout giant %p func %p arg %p", 664 c, c_func, c_arg); 665 } else { 666 #ifdef CALLOUT_PROFILING 667 (*lockcalls)++; 668 #endif 669 CTR3(KTR_CALLOUT, "callout lock %p func %p arg %p", 670 c, c_func, c_arg); 671 } 672 } else { 673 #ifdef CALLOUT_PROFILING 674 (*mpcalls)++; 675 #endif 676 CTR3(KTR_CALLOUT, "callout %p func %p arg %p", 677 c, c_func, c_arg); 678 } 679 #if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING) 680 sbt1 = sbinuptime(); 681 #endif 682 THREAD_NO_SLEEPING(); 683 SDT_PROBE(callout_execute, kernel, , callout__start, c, 0, 0, 0, 0); 684 c_func(c_arg); 685 SDT_PROBE(callout_execute, kernel, , callout__end, c, 0, 0, 0, 0); 686 THREAD_SLEEPING_OK(); 687 #if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING) 688 sbt2 = sbinuptime(); 689 sbt2 -= sbt1; 690 if (sbt2 > maxdt) { 691 if (lastfunc != c_func || sbt2 > maxdt * 2) { 692 ts2 = sbttots(sbt2); 693 printf( 694 "Expensive timeout(9) function: %p(%p) %jd.%09ld s\n", 695 c_func, c_arg, (intmax_t)ts2.tv_sec, ts2.tv_nsec); 696 } 697 maxdt = sbt2; 698 lastfunc = c_func; 699 } 700 #endif 701 CTR1(KTR_CALLOUT, "callout %p finished", c); 702 if ((c_flags & CALLOUT_RETURNUNLOCKED) == 0) 703 class->lc_unlock(c_lock); 704 skip: 705 CC_LOCK(cc); 706 KASSERT(cc->cc_exec_entity[direct].cc_curr == c, ("mishandled cc_curr")); 707 cc->cc_exec_entity[direct].cc_curr = NULL; 708 if (cc->cc_exec_entity[direct].cc_waiting) { 709 /* 710 * There is someone waiting for the 711 * callout to complete. 712 * If the callout was scheduled for 713 * migration just cancel it. 714 */ 715 if (cc_cce_migrating(cc, direct)) { 716 cc_cce_cleanup(cc, direct); 717 718 /* 719 * It should be assert here that the callout is not 720 * destroyed but that is not easy. 721 */ 722 c->c_flags &= ~CALLOUT_DFRMIGRATION; 723 } 724 cc->cc_exec_entity[direct].cc_waiting = false; 725 CC_UNLOCK(cc); 726 wakeup(&cc->cc_exec_entity[direct].cc_waiting); 727 CC_LOCK(cc); 728 } else if (cc_cce_migrating(cc, direct)) { 729 KASSERT((c_flags & CALLOUT_LOCAL_ALLOC) == 0, 730 ("Migrating legacy callout %p", c)); 731 #ifdef SMP 732 /* 733 * If the callout was scheduled for 734 * migration just perform it now. 735 */ 736 new_cpu = cc->cc_exec_entity[direct].ce_migration_cpu; 737 new_time = cc->cc_exec_entity[direct].ce_migration_time; 738 new_prec = cc->cc_exec_entity[direct].ce_migration_prec; 739 new_func = cc->cc_exec_entity[direct].ce_migration_func; 740 new_arg = cc->cc_exec_entity[direct].ce_migration_arg; 741 cc_cce_cleanup(cc, direct); 742 743 /* 744 * It should be assert here that the callout is not destroyed 745 * but that is not easy. 746 * 747 * As first thing, handle deferred callout stops. 748 */ 749 if ((c->c_flags & CALLOUT_DFRMIGRATION) == 0) { 750 CTR3(KTR_CALLOUT, 751 "deferred cancelled %p func %p arg %p", 752 c, new_func, new_arg); 753 callout_cc_del(c, cc); 754 return; 755 } 756 c->c_flags &= ~CALLOUT_DFRMIGRATION; 757 758 new_cc = callout_cpu_switch(c, cc, new_cpu); 759 flags = (direct) ? C_DIRECT_EXEC : 0; 760 callout_cc_add(c, new_cc, new_time, new_prec, new_func, 761 new_arg, new_cpu, flags); 762 CC_UNLOCK(new_cc); 763 CC_LOCK(cc); 764 #else 765 panic("migration should not happen"); 766 #endif 767 } 768 /* 769 * If the current callout is locally allocated (from 770 * timeout(9)) then put it on the freelist. 771 * 772 * Note: we need to check the cached copy of c_flags because 773 * if it was not local, then it's not safe to deref the 774 * callout pointer. 775 */ 776 KASSERT((c_flags & CALLOUT_LOCAL_ALLOC) == 0 || 777 c->c_flags == CALLOUT_LOCAL_ALLOC, 778 ("corrupted callout")); 779 if (c_flags & CALLOUT_LOCAL_ALLOC) 780 callout_cc_del(c, cc); 781 } 782 783 /* 784 * The callout mechanism is based on the work of Adam M. Costello and 785 * George Varghese, published in a technical report entitled "Redesigning 786 * the BSD Callout and Timer Facilities" and modified slightly for inclusion 787 * in FreeBSD by Justin T. Gibbs. The original work on the data structures 788 * used in this implementation was published by G. Varghese and T. Lauck in 789 * the paper "Hashed and Hierarchical Timing Wheels: Data Structures for 790 * the Efficient Implementation of a Timer Facility" in the Proceedings of 791 * the 11th ACM Annual Symposium on Operating Systems Principles, 792 * Austin, Texas Nov 1987. 793 */ 794 795 /* 796 * Software (low priority) clock interrupt. 797 * Run periodic events from timeout queue. 798 */ 799 void 800 softclock(void *arg) 801 { 802 struct callout_cpu *cc; 803 struct callout *c; 804 #ifdef CALLOUT_PROFILING 805 int depth = 0, gcalls = 0, lockcalls = 0, mpcalls = 0; 806 #endif 807 808 cc = (struct callout_cpu *)arg; 809 CC_LOCK(cc); 810 while ((c = TAILQ_FIRST(&cc->cc_expireq)) != NULL) { 811 TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe); 812 softclock_call_cc(c, cc, 813 #ifdef CALLOUT_PROFILING 814 &mpcalls, &lockcalls, &gcalls, 815 #endif 816 0); 817 #ifdef CALLOUT_PROFILING 818 ++depth; 819 #endif 820 } 821 #ifdef CALLOUT_PROFILING 822 avg_depth += (depth * 1000 - avg_depth) >> 8; 823 avg_mpcalls += (mpcalls * 1000 - avg_mpcalls) >> 8; 824 avg_lockcalls += (lockcalls * 1000 - avg_lockcalls) >> 8; 825 avg_gcalls += (gcalls * 1000 - avg_gcalls) >> 8; 826 #endif 827 CC_UNLOCK(cc); 828 } 829 830 /* 831 * timeout -- 832 * Execute a function after a specified length of time. 833 * 834 * untimeout -- 835 * Cancel previous timeout function call. 836 * 837 * callout_handle_init -- 838 * Initialize a handle so that using it with untimeout is benign. 839 * 840 * See AT&T BCI Driver Reference Manual for specification. This 841 * implementation differs from that one in that although an 842 * identification value is returned from timeout, the original 843 * arguments to timeout as well as the identifier are used to 844 * identify entries for untimeout. 845 */ 846 struct callout_handle 847 timeout(ftn, arg, to_ticks) 848 timeout_t *ftn; 849 void *arg; 850 int to_ticks; 851 { 852 struct callout_cpu *cc; 853 struct callout *new; 854 struct callout_handle handle; 855 856 cc = CC_CPU(timeout_cpu); 857 CC_LOCK(cc); 858 /* Fill in the next free callout structure. */ 859 new = SLIST_FIRST(&cc->cc_callfree); 860 if (new == NULL) 861 /* XXX Attempt to malloc first */ 862 panic("timeout table full"); 863 SLIST_REMOVE_HEAD(&cc->cc_callfree, c_links.sle); 864 callout_reset(new, to_ticks, ftn, arg); 865 handle.callout = new; 866 CC_UNLOCK(cc); 867 868 return (handle); 869 } 870 871 void 872 untimeout(ftn, arg, handle) 873 timeout_t *ftn; 874 void *arg; 875 struct callout_handle handle; 876 { 877 struct callout_cpu *cc; 878 879 /* 880 * Check for a handle that was initialized 881 * by callout_handle_init, but never used 882 * for a real timeout. 883 */ 884 if (handle.callout == NULL) 885 return; 886 887 cc = callout_lock(handle.callout); 888 if (handle.callout->c_func == ftn && handle.callout->c_arg == arg) 889 callout_stop(handle.callout); 890 CC_UNLOCK(cc); 891 } 892 893 void 894 callout_handle_init(struct callout_handle *handle) 895 { 896 handle->callout = NULL; 897 } 898 899 /* 900 * New interface; clients allocate their own callout structures. 901 * 902 * callout_reset() - establish or change a timeout 903 * callout_stop() - disestablish a timeout 904 * callout_init() - initialize a callout structure so that it can 905 * safely be passed to callout_reset() and callout_stop() 906 * 907 * <sys/callout.h> defines three convenience macros: 908 * 909 * callout_active() - returns truth if callout has not been stopped, 910 * drained, or deactivated since the last time the callout was 911 * reset. 912 * callout_pending() - returns truth if callout is still waiting for timeout 913 * callout_deactivate() - marks the callout as having been serviced 914 */ 915 int 916 callout_reset_sbt_on(struct callout *c, sbintime_t sbt, sbintime_t precision, 917 void (*ftn)(void *), void *arg, int cpu, int flags) 918 { 919 sbintime_t to_sbt, pr; 920 struct callout_cpu *cc; 921 int cancelled, direct; 922 923 cancelled = 0; 924 if (flags & C_ABSOLUTE) { 925 to_sbt = sbt; 926 } else { 927 if ((flags & C_HARDCLOCK) && (sbt < tick_sbt)) 928 sbt = tick_sbt; 929 if ((flags & C_HARDCLOCK) || 930 #ifdef NO_EVENTTIMERS 931 sbt >= sbt_timethreshold) { 932 to_sbt = getsbinuptime(); 933 934 /* Add safety belt for the case of hz > 1000. */ 935 to_sbt += tc_tick_sbt - tick_sbt; 936 #else 937 sbt >= sbt_tickthreshold) { 938 /* 939 * Obtain the time of the last hardclock() call on 940 * this CPU directly from the kern_clocksource.c. 941 * This value is per-CPU, but it is equal for all 942 * active ones. 943 */ 944 #ifdef __LP64__ 945 to_sbt = DPCPU_GET(hardclocktime); 946 #else 947 spinlock_enter(); 948 to_sbt = DPCPU_GET(hardclocktime); 949 spinlock_exit(); 950 #endif 951 #endif 952 if ((flags & C_HARDCLOCK) == 0) 953 to_sbt += tick_sbt; 954 } else 955 to_sbt = sbinuptime(); 956 if (INT64_MAX - to_sbt < sbt) 957 to_sbt = INT64_MAX; 958 else 959 to_sbt += sbt; 960 pr = ((C_PRELGET(flags) < 0) ? sbt >> tc_precexp : 961 sbt >> C_PRELGET(flags)); 962 if (pr > precision) 963 precision = pr; 964 } 965 /* 966 * Don't allow migration of pre-allocated callouts lest they 967 * become unbalanced. 968 */ 969 if (c->c_flags & CALLOUT_LOCAL_ALLOC) 970 cpu = c->c_cpu; 971 direct = (c->c_flags & CALLOUT_DIRECT) != 0; 972 KASSERT(!direct || c->c_lock == NULL, 973 ("%s: direct callout %p has lock", __func__, c)); 974 cc = callout_lock(c); 975 if (cc->cc_exec_entity[direct].cc_curr == c) { 976 /* 977 * We're being asked to reschedule a callout which is 978 * currently in progress. If there is a lock then we 979 * can cancel the callout if it has not really started. 980 */ 981 if (c->c_lock != NULL && !cc->cc_exec_entity[direct].cc_cancel) 982 cancelled = cc->cc_exec_entity[direct].cc_cancel = true; 983 if (cc->cc_exec_entity[direct].cc_waiting) { 984 /* 985 * Someone has called callout_drain to kill this 986 * callout. Don't reschedule. 987 */ 988 CTR4(KTR_CALLOUT, "%s %p func %p arg %p", 989 cancelled ? "cancelled" : "failed to cancel", 990 c, c->c_func, c->c_arg); 991 CC_UNLOCK(cc); 992 return (cancelled); 993 } 994 } 995 if (c->c_flags & CALLOUT_PENDING) { 996 if ((c->c_flags & CALLOUT_PROCESSED) == 0) { 997 if (cc->cc_exec_next_dir == c) 998 cc->cc_exec_next_dir = LIST_NEXT(c, c_links.le); 999 LIST_REMOVE(c, c_links.le); 1000 } else 1001 TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe); 1002 cancelled = 1; 1003 c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING); 1004 } 1005 1006 #ifdef SMP 1007 /* 1008 * If the callout must migrate try to perform it immediately. 1009 * If the callout is currently running, just defer the migration 1010 * to a more appropriate moment. 1011 */ 1012 if (c->c_cpu != cpu) { 1013 if (cc->cc_exec_entity[direct].cc_curr == c) { 1014 cc->cc_exec_entity[direct].ce_migration_cpu = cpu; 1015 cc->cc_exec_entity[direct].ce_migration_time 1016 = to_sbt; 1017 cc->cc_exec_entity[direct].ce_migration_prec 1018 = precision; 1019 cc->cc_exec_entity[direct].ce_migration_func = ftn; 1020 cc->cc_exec_entity[direct].ce_migration_arg = arg; 1021 c->c_flags |= CALLOUT_DFRMIGRATION; 1022 CTR6(KTR_CALLOUT, 1023 "migration of %p func %p arg %p in %d.%08x to %u deferred", 1024 c, c->c_func, c->c_arg, (int)(to_sbt >> 32), 1025 (u_int)(to_sbt & 0xffffffff), cpu); 1026 CC_UNLOCK(cc); 1027 return (cancelled); 1028 } 1029 cc = callout_cpu_switch(c, cc, cpu); 1030 } 1031 #endif 1032 1033 callout_cc_add(c, cc, to_sbt, precision, ftn, arg, cpu, flags); 1034 CTR6(KTR_CALLOUT, "%sscheduled %p func %p arg %p in %d.%08x", 1035 cancelled ? "re" : "", c, c->c_func, c->c_arg, (int)(to_sbt >> 32), 1036 (u_int)(to_sbt & 0xffffffff)); 1037 CC_UNLOCK(cc); 1038 1039 return (cancelled); 1040 } 1041 1042 /* 1043 * Common idioms that can be optimized in the future. 1044 */ 1045 int 1046 callout_schedule_on(struct callout *c, int to_ticks, int cpu) 1047 { 1048 return callout_reset_on(c, to_ticks, c->c_func, c->c_arg, cpu); 1049 } 1050 1051 int 1052 callout_schedule(struct callout *c, int to_ticks) 1053 { 1054 return callout_reset_on(c, to_ticks, c->c_func, c->c_arg, c->c_cpu); 1055 } 1056 1057 int 1058 _callout_stop_safe(c, safe) 1059 struct callout *c; 1060 int safe; 1061 { 1062 struct callout_cpu *cc, *old_cc; 1063 struct lock_class *class; 1064 int direct, sq_locked, use_lock; 1065 1066 /* 1067 * Some old subsystems don't hold Giant while running a callout_stop(), 1068 * so just discard this check for the moment. 1069 */ 1070 if (!safe && c->c_lock != NULL) { 1071 if (c->c_lock == &Giant.lock_object) 1072 use_lock = mtx_owned(&Giant); 1073 else { 1074 use_lock = 1; 1075 class = LOCK_CLASS(c->c_lock); 1076 class->lc_assert(c->c_lock, LA_XLOCKED); 1077 } 1078 } else 1079 use_lock = 0; 1080 direct = (c->c_flags & CALLOUT_DIRECT) != 0; 1081 sq_locked = 0; 1082 old_cc = NULL; 1083 again: 1084 cc = callout_lock(c); 1085 1086 /* 1087 * If the callout was migrating while the callout cpu lock was 1088 * dropped, just drop the sleepqueue lock and check the states 1089 * again. 1090 */ 1091 if (sq_locked != 0 && cc != old_cc) { 1092 #ifdef SMP 1093 CC_UNLOCK(cc); 1094 sleepq_release(&old_cc->cc_exec_entity[direct].cc_waiting); 1095 sq_locked = 0; 1096 old_cc = NULL; 1097 goto again; 1098 #else 1099 panic("migration should not happen"); 1100 #endif 1101 } 1102 1103 /* 1104 * If the callout isn't pending, it's not on the queue, so 1105 * don't attempt to remove it from the queue. We can try to 1106 * stop it by other means however. 1107 */ 1108 if (!(c->c_flags & CALLOUT_PENDING)) { 1109 c->c_flags &= ~CALLOUT_ACTIVE; 1110 1111 /* 1112 * If it wasn't on the queue and it isn't the current 1113 * callout, then we can't stop it, so just bail. 1114 */ 1115 if (cc->cc_exec_entity[direct].cc_curr != c) { 1116 CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p", 1117 c, c->c_func, c->c_arg); 1118 CC_UNLOCK(cc); 1119 if (sq_locked) 1120 sleepq_release( 1121 &cc->cc_exec_entity[direct].cc_waiting); 1122 return (0); 1123 } 1124 1125 if (safe) { 1126 /* 1127 * The current callout is running (or just 1128 * about to run) and blocking is allowed, so 1129 * just wait for the current invocation to 1130 * finish. 1131 */ 1132 while (cc->cc_exec_entity[direct].cc_curr == c) { 1133 /* 1134 * Use direct calls to sleepqueue interface 1135 * instead of cv/msleep in order to avoid 1136 * a LOR between cc_lock and sleepqueue 1137 * chain spinlocks. This piece of code 1138 * emulates a msleep_spin() call actually. 1139 * 1140 * If we already have the sleepqueue chain 1141 * locked, then we can safely block. If we 1142 * don't already have it locked, however, 1143 * we have to drop the cc_lock to lock 1144 * it. This opens several races, so we 1145 * restart at the beginning once we have 1146 * both locks. If nothing has changed, then 1147 * we will end up back here with sq_locked 1148 * set. 1149 */ 1150 if (!sq_locked) { 1151 CC_UNLOCK(cc); 1152 sleepq_lock( 1153 &cc->cc_exec_entity[direct].cc_waiting); 1154 sq_locked = 1; 1155 old_cc = cc; 1156 goto again; 1157 } 1158 1159 /* 1160 * Migration could be cancelled here, but 1161 * as long as it is still not sure when it 1162 * will be packed up, just let softclock() 1163 * take care of it. 1164 */ 1165 cc->cc_exec_entity[direct].cc_waiting = true; 1166 DROP_GIANT(); 1167 CC_UNLOCK(cc); 1168 sleepq_add( 1169 &cc->cc_exec_entity[direct].cc_waiting, 1170 &cc->cc_lock.lock_object, "codrain", 1171 SLEEPQ_SLEEP, 0); 1172 sleepq_wait( 1173 &cc->cc_exec_entity[direct].cc_waiting, 1174 0); 1175 sq_locked = 0; 1176 old_cc = NULL; 1177 1178 /* Reacquire locks previously released. */ 1179 PICKUP_GIANT(); 1180 CC_LOCK(cc); 1181 } 1182 } else if (use_lock && 1183 !cc->cc_exec_entity[direct].cc_cancel) { 1184 /* 1185 * The current callout is waiting for its 1186 * lock which we hold. Cancel the callout 1187 * and return. After our caller drops the 1188 * lock, the callout will be skipped in 1189 * softclock(). 1190 */ 1191 cc->cc_exec_entity[direct].cc_cancel = true; 1192 CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p", 1193 c, c->c_func, c->c_arg); 1194 KASSERT(!cc_cce_migrating(cc, direct), 1195 ("callout wrongly scheduled for migration")); 1196 CC_UNLOCK(cc); 1197 KASSERT(!sq_locked, ("sleepqueue chain locked")); 1198 return (1); 1199 } else if ((c->c_flags & CALLOUT_DFRMIGRATION) != 0) { 1200 c->c_flags &= ~CALLOUT_DFRMIGRATION; 1201 CTR3(KTR_CALLOUT, "postponing stop %p func %p arg %p", 1202 c, c->c_func, c->c_arg); 1203 CC_UNLOCK(cc); 1204 return (1); 1205 } 1206 CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p", 1207 c, c->c_func, c->c_arg); 1208 CC_UNLOCK(cc); 1209 KASSERT(!sq_locked, ("sleepqueue chain still locked")); 1210 return (0); 1211 } 1212 if (sq_locked) 1213 sleepq_release(&cc->cc_exec_entity[direct].cc_waiting); 1214 1215 c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING); 1216 1217 CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p", 1218 c, c->c_func, c->c_arg); 1219 if ((c->c_flags & CALLOUT_PROCESSED) == 0) { 1220 if (cc->cc_exec_next_dir == c) 1221 cc->cc_exec_next_dir = LIST_NEXT(c, c_links.le); 1222 LIST_REMOVE(c, c_links.le); 1223 } else 1224 TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe); 1225 callout_cc_del(c, cc); 1226 1227 CC_UNLOCK(cc); 1228 return (1); 1229 } 1230 1231 void 1232 callout_init(c, mpsafe) 1233 struct callout *c; 1234 int mpsafe; 1235 { 1236 bzero(c, sizeof *c); 1237 if (mpsafe) { 1238 c->c_lock = NULL; 1239 c->c_flags = CALLOUT_RETURNUNLOCKED; 1240 } else { 1241 c->c_lock = &Giant.lock_object; 1242 c->c_flags = 0; 1243 } 1244 c->c_cpu = timeout_cpu; 1245 } 1246 1247 void 1248 _callout_init_lock(c, lock, flags) 1249 struct callout *c; 1250 struct lock_object *lock; 1251 int flags; 1252 { 1253 bzero(c, sizeof *c); 1254 c->c_lock = lock; 1255 KASSERT((flags & ~(CALLOUT_RETURNUNLOCKED | CALLOUT_SHAREDLOCK)) == 0, 1256 ("callout_init_lock: bad flags %d", flags)); 1257 KASSERT(lock != NULL || (flags & CALLOUT_RETURNUNLOCKED) == 0, 1258 ("callout_init_lock: CALLOUT_RETURNUNLOCKED with no lock")); 1259 KASSERT(lock == NULL || !(LOCK_CLASS(lock)->lc_flags & 1260 (LC_SPINLOCK | LC_SLEEPABLE)), ("%s: invalid lock class", 1261 __func__)); 1262 c->c_flags = flags & (CALLOUT_RETURNUNLOCKED | CALLOUT_SHAREDLOCK); 1263 c->c_cpu = timeout_cpu; 1264 } 1265 1266 #ifdef APM_FIXUP_CALLTODO 1267 /* 1268 * Adjust the kernel calltodo timeout list. This routine is used after 1269 * an APM resume to recalculate the calltodo timer list values with the 1270 * number of hz's we have been sleeping. The next hardclock() will detect 1271 * that there are fired timers and run softclock() to execute them. 1272 * 1273 * Please note, I have not done an exhaustive analysis of what code this 1274 * might break. I am motivated to have my select()'s and alarm()'s that 1275 * have expired during suspend firing upon resume so that the applications 1276 * which set the timer can do the maintanence the timer was for as close 1277 * as possible to the originally intended time. Testing this code for a 1278 * week showed that resuming from a suspend resulted in 22 to 25 timers 1279 * firing, which seemed independant on whether the suspend was 2 hours or 1280 * 2 days. Your milage may vary. - Ken Key <key@cs.utk.edu> 1281 */ 1282 void 1283 adjust_timeout_calltodo(time_change) 1284 struct timeval *time_change; 1285 { 1286 register struct callout *p; 1287 unsigned long delta_ticks; 1288 1289 /* 1290 * How many ticks were we asleep? 1291 * (stolen from tvtohz()). 1292 */ 1293 1294 /* Don't do anything */ 1295 if (time_change->tv_sec < 0) 1296 return; 1297 else if (time_change->tv_sec <= LONG_MAX / 1000000) 1298 delta_ticks = (time_change->tv_sec * 1000000 + 1299 time_change->tv_usec + (tick - 1)) / tick + 1; 1300 else if (time_change->tv_sec <= LONG_MAX / hz) 1301 delta_ticks = time_change->tv_sec * hz + 1302 (time_change->tv_usec + (tick - 1)) / tick + 1; 1303 else 1304 delta_ticks = LONG_MAX; 1305 1306 if (delta_ticks > INT_MAX) 1307 delta_ticks = INT_MAX; 1308 1309 /* 1310 * Now rip through the timer calltodo list looking for timers 1311 * to expire. 1312 */ 1313 1314 /* don't collide with softclock() */ 1315 CC_LOCK(cc); 1316 for (p = calltodo.c_next; p != NULL; p = p->c_next) { 1317 p->c_time -= delta_ticks; 1318 1319 /* Break if the timer had more time on it than delta_ticks */ 1320 if (p->c_time > 0) 1321 break; 1322 1323 /* take back the ticks the timer didn't use (p->c_time <= 0) */ 1324 delta_ticks = -p->c_time; 1325 } 1326 CC_UNLOCK(cc); 1327 1328 return; 1329 } 1330 #endif /* APM_FIXUP_CALLTODO */ 1331 1332 static int 1333 flssbt(sbintime_t sbt) 1334 { 1335 1336 sbt += (uint64_t)sbt >> 1; 1337 if (sizeof(long) >= sizeof(sbintime_t)) 1338 return (flsl(sbt)); 1339 if (sbt >= SBT_1S) 1340 return (flsl(((uint64_t)sbt) >> 32) + 32); 1341 return (flsl(sbt)); 1342 } 1343 1344 /* 1345 * Dump immediate statistic snapshot of the scheduled callouts. 1346 */ 1347 static int 1348 sysctl_kern_callout_stat(SYSCTL_HANDLER_ARGS) 1349 { 1350 struct callout *tmp; 1351 struct callout_cpu *cc; 1352 struct callout_list *sc; 1353 sbintime_t maxpr, maxt, medpr, medt, now, spr, st, t; 1354 int ct[64], cpr[64], ccpbk[32]; 1355 int error, val, i, count, tcum, pcum, maxc, c, medc; 1356 #ifdef SMP 1357 int cpu; 1358 #endif 1359 1360 val = 0; 1361 error = sysctl_handle_int(oidp, &val, 0, req); 1362 if (error != 0 || req->newptr == NULL) 1363 return (error); 1364 count = maxc = 0; 1365 st = spr = maxt = maxpr = 0; 1366 bzero(ccpbk, sizeof(ccpbk)); 1367 bzero(ct, sizeof(ct)); 1368 bzero(cpr, sizeof(cpr)); 1369 now = sbinuptime(); 1370 #ifdef SMP 1371 CPU_FOREACH(cpu) { 1372 cc = CC_CPU(cpu); 1373 #else 1374 cc = CC_CPU(timeout_cpu); 1375 #endif 1376 CC_LOCK(cc); 1377 for (i = 0; i < callwheelsize; i++) { 1378 sc = &cc->cc_callwheel[i]; 1379 c = 0; 1380 LIST_FOREACH(tmp, sc, c_links.le) { 1381 c++; 1382 t = tmp->c_time - now; 1383 if (t < 0) 1384 t = 0; 1385 st += t / SBT_1US; 1386 spr += tmp->c_precision / SBT_1US; 1387 if (t > maxt) 1388 maxt = t; 1389 if (tmp->c_precision > maxpr) 1390 maxpr = tmp->c_precision; 1391 ct[flssbt(t)]++; 1392 cpr[flssbt(tmp->c_precision)]++; 1393 } 1394 if (c > maxc) 1395 maxc = c; 1396 ccpbk[fls(c + c / 2)]++; 1397 count += c; 1398 } 1399 CC_UNLOCK(cc); 1400 #ifdef SMP 1401 } 1402 #endif 1403 1404 for (i = 0, tcum = 0; i < 64 && tcum < count / 2; i++) 1405 tcum += ct[i]; 1406 medt = (i >= 2) ? (((sbintime_t)1) << (i - 2)) : 0; 1407 for (i = 0, pcum = 0; i < 64 && pcum < count / 2; i++) 1408 pcum += cpr[i]; 1409 medpr = (i >= 2) ? (((sbintime_t)1) << (i - 2)) : 0; 1410 for (i = 0, c = 0; i < 32 && c < count / 2; i++) 1411 c += ccpbk[i]; 1412 medc = (i >= 2) ? (1 << (i - 2)) : 0; 1413 1414 printf("Scheduled callouts statistic snapshot:\n"); 1415 printf(" Callouts: %6d Buckets: %6d*%-3d Bucket size: 0.%06ds\n", 1416 count, callwheelsize, mp_ncpus, 1000000 >> CC_HASH_SHIFT); 1417 printf(" C/Bk: med %5d avg %6d.%06jd max %6d\n", 1418 medc, 1419 count / callwheelsize / mp_ncpus, 1420 (uint64_t)count * 1000000 / callwheelsize / mp_ncpus % 1000000, 1421 maxc); 1422 printf(" Time: med %5jd.%06jds avg %6jd.%06jds max %6jd.%06jds\n", 1423 medt / SBT_1S, (medt & 0xffffffff) * 1000000 >> 32, 1424 (st / count) / 1000000, (st / count) % 1000000, 1425 maxt / SBT_1S, (maxt & 0xffffffff) * 1000000 >> 32); 1426 printf(" Prec: med %5jd.%06jds avg %6jd.%06jds max %6jd.%06jds\n", 1427 medpr / SBT_1S, (medpr & 0xffffffff) * 1000000 >> 32, 1428 (spr / count) / 1000000, (spr / count) % 1000000, 1429 maxpr / SBT_1S, (maxpr & 0xffffffff) * 1000000 >> 32); 1430 printf(" Distribution: \tbuckets\t time\t tcum\t" 1431 " prec\t pcum\n"); 1432 for (i = 0, tcum = pcum = 0; i < 64; i++) { 1433 if (ct[i] == 0 && cpr[i] == 0) 1434 continue; 1435 t = (i != 0) ? (((sbintime_t)1) << (i - 1)) : 0; 1436 tcum += ct[i]; 1437 pcum += cpr[i]; 1438 printf(" %10jd.%06jds\t 2**%d\t%7d\t%7d\t%7d\t%7d\n", 1439 t / SBT_1S, (t & 0xffffffff) * 1000000 >> 32, 1440 i - 1 - (32 - CC_HASH_SHIFT), 1441 ct[i], tcum, cpr[i], pcum); 1442 } 1443 return (error); 1444 } 1445 SYSCTL_PROC(_kern, OID_AUTO, callout_stat, 1446 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 1447 0, 0, sysctl_kern_callout_stat, "I", 1448 "Dump immediate statistic snapshot of the scheduled callouts"); 1449