1 /*- 2 * Copyright (c) 1982, 1986, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * From: @(#)kern_clock.c 8.5 (Berkeley) 1/21/94 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "opt_callout_profiling.h" 41 #if defined(__arm__) 42 #include "opt_timer.h" 43 #endif 44 #include "opt_rss.h" 45 46 #include <sys/param.h> 47 #include <sys/systm.h> 48 #include <sys/bus.h> 49 #include <sys/callout.h> 50 #include <sys/file.h> 51 #include <sys/interrupt.h> 52 #include <sys/kernel.h> 53 #include <sys/ktr.h> 54 #include <sys/lock.h> 55 #include <sys/malloc.h> 56 #include <sys/mutex.h> 57 #include <sys/proc.h> 58 #include <sys/sdt.h> 59 #include <sys/sleepqueue.h> 60 #include <sys/sysctl.h> 61 #include <sys/smp.h> 62 63 #ifdef SMP 64 #include <machine/cpu.h> 65 #endif 66 67 #ifndef NO_EVENTTIMERS 68 DPCPU_DECLARE(sbintime_t, hardclocktime); 69 #endif 70 71 SDT_PROVIDER_DEFINE(callout_execute); 72 SDT_PROBE_DEFINE1(callout_execute, kernel, , callout__start, 73 "struct callout *"); 74 SDT_PROBE_DEFINE1(callout_execute, kernel, , callout__end, 75 "struct callout *"); 76 77 #ifdef CALLOUT_PROFILING 78 static int avg_depth; 79 SYSCTL_INT(_debug, OID_AUTO, to_avg_depth, CTLFLAG_RD, &avg_depth, 0, 80 "Average number of items examined per softclock call. Units = 1/1000"); 81 static int avg_gcalls; 82 SYSCTL_INT(_debug, OID_AUTO, to_avg_gcalls, CTLFLAG_RD, &avg_gcalls, 0, 83 "Average number of Giant callouts made per softclock call. Units = 1/1000"); 84 static int avg_lockcalls; 85 SYSCTL_INT(_debug, OID_AUTO, to_avg_lockcalls, CTLFLAG_RD, &avg_lockcalls, 0, 86 "Average number of lock callouts made per softclock call. Units = 1/1000"); 87 static int avg_mpcalls; 88 SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls, CTLFLAG_RD, &avg_mpcalls, 0, 89 "Average number of MP callouts made per softclock call. Units = 1/1000"); 90 static int avg_depth_dir; 91 SYSCTL_INT(_debug, OID_AUTO, to_avg_depth_dir, CTLFLAG_RD, &avg_depth_dir, 0, 92 "Average number of direct callouts examined per callout_process call. " 93 "Units = 1/1000"); 94 static int avg_lockcalls_dir; 95 SYSCTL_INT(_debug, OID_AUTO, to_avg_lockcalls_dir, CTLFLAG_RD, 96 &avg_lockcalls_dir, 0, "Average number of lock direct callouts made per " 97 "callout_process call. Units = 1/1000"); 98 static int avg_mpcalls_dir; 99 SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls_dir, CTLFLAG_RD, &avg_mpcalls_dir, 100 0, "Average number of MP direct callouts made per callout_process call. " 101 "Units = 1/1000"); 102 #endif 103 104 static int ncallout; 105 SYSCTL_INT(_kern, OID_AUTO, ncallout, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &ncallout, 0, 106 "Number of entries in callwheel and size of timeout() preallocation"); 107 108 #ifdef RSS 109 static int pin_default_swi = 1; 110 static int pin_pcpu_swi = 1; 111 #else 112 static int pin_default_swi = 0; 113 static int pin_pcpu_swi = 0; 114 #endif 115 116 SYSCTL_INT(_kern, OID_AUTO, pin_default_swi, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &pin_default_swi, 117 0, "Pin the default (non-per-cpu) swi (shared with PCPU 0 swi)"); 118 SYSCTL_INT(_kern, OID_AUTO, pin_pcpu_swi, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &pin_pcpu_swi, 119 0, "Pin the per-CPU swis (except PCPU 0, which is also default"); 120 121 /* 122 * TODO: 123 * allocate more timeout table slots when table overflows. 124 */ 125 u_int callwheelsize, callwheelmask; 126 127 /* 128 * The callout cpu exec entities represent informations necessary for 129 * describing the state of callouts currently running on the CPU and the ones 130 * necessary for migrating callouts to the new callout cpu. In particular, 131 * the first entry of the array cc_exec_entity holds informations for callout 132 * running in SWI thread context, while the second one holds informations 133 * for callout running directly from hardware interrupt context. 134 * The cached informations are very important for deferring migration when 135 * the migrating callout is already running. 136 */ 137 struct cc_exec { 138 struct callout *cc_next; 139 struct callout *cc_curr; 140 #ifdef SMP 141 void (*ce_migration_func)(void *); 142 void *ce_migration_arg; 143 int ce_migration_cpu; 144 sbintime_t ce_migration_time; 145 sbintime_t ce_migration_prec; 146 #endif 147 bool cc_cancel; 148 bool cc_waiting; 149 }; 150 151 /* 152 * There is one struct callout_cpu per cpu, holding all relevant 153 * state for the callout processing thread on the individual CPU. 154 */ 155 struct callout_cpu { 156 struct mtx_padalign cc_lock; 157 struct cc_exec cc_exec_entity[2]; 158 struct callout *cc_callout; 159 struct callout_list *cc_callwheel; 160 struct callout_tailq cc_expireq; 161 struct callout_slist cc_callfree; 162 sbintime_t cc_firstevent; 163 sbintime_t cc_lastscan; 164 void *cc_cookie; 165 u_int cc_bucket; 166 char cc_ktr_event_name[20]; 167 }; 168 169 #define cc_exec_curr(cc, dir) cc->cc_exec_entity[dir].cc_curr 170 #define cc_exec_next(cc, dir) cc->cc_exec_entity[dir].cc_next 171 #define cc_exec_cancel(cc, dir) cc->cc_exec_entity[dir].cc_cancel 172 #define cc_exec_waiting(cc, dir) cc->cc_exec_entity[dir].cc_waiting 173 #ifdef SMP 174 #define cc_migration_func(cc, dir) cc->cc_exec_entity[dir].ce_migration_func 175 #define cc_migration_arg(cc, dir) cc->cc_exec_entity[dir].ce_migration_arg 176 #define cc_migration_cpu(cc, dir) cc->cc_exec_entity[dir].ce_migration_cpu 177 #define cc_migration_time(cc, dir) cc->cc_exec_entity[dir].ce_migration_time 178 #define cc_migration_prec(cc, dir) cc->cc_exec_entity[dir].ce_migration_prec 179 180 struct callout_cpu cc_cpu[MAXCPU]; 181 #define CPUBLOCK MAXCPU 182 #define CC_CPU(cpu) (&cc_cpu[(cpu)]) 183 #define CC_SELF() CC_CPU(PCPU_GET(cpuid)) 184 #else 185 struct callout_cpu cc_cpu; 186 #define CC_CPU(cpu) &cc_cpu 187 #define CC_SELF() &cc_cpu 188 #endif 189 #define CC_LOCK(cc) mtx_lock_spin(&(cc)->cc_lock) 190 #define CC_UNLOCK(cc) mtx_unlock_spin(&(cc)->cc_lock) 191 #define CC_LOCK_ASSERT(cc) mtx_assert(&(cc)->cc_lock, MA_OWNED) 192 193 static int timeout_cpu; 194 195 static void callout_cpu_init(struct callout_cpu *cc, int cpu); 196 static void softclock_call_cc(struct callout *c, struct callout_cpu *cc, 197 #ifdef CALLOUT_PROFILING 198 int *mpcalls, int *lockcalls, int *gcalls, 199 #endif 200 int direct); 201 202 static MALLOC_DEFINE(M_CALLOUT, "callout", "Callout datastructures"); 203 204 /** 205 * Locked by cc_lock: 206 * cc_curr - If a callout is in progress, it is cc_curr. 207 * If cc_curr is non-NULL, threads waiting in 208 * callout_drain() will be woken up as soon as the 209 * relevant callout completes. 210 * cc_cancel - Changing to 1 with both callout_lock and cc_lock held 211 * guarantees that the current callout will not run. 212 * The softclock() function sets this to 0 before it 213 * drops callout_lock to acquire c_lock, and it calls 214 * the handler only if curr_cancelled is still 0 after 215 * cc_lock is successfully acquired. 216 * cc_waiting - If a thread is waiting in callout_drain(), then 217 * callout_wait is nonzero. Set only when 218 * cc_curr is non-NULL. 219 */ 220 221 /* 222 * Resets the execution entity tied to a specific callout cpu. 223 */ 224 static void 225 cc_cce_cleanup(struct callout_cpu *cc, int direct) 226 { 227 228 cc_exec_curr(cc, direct) = NULL; 229 cc_exec_next(cc, direct) = NULL; 230 cc_exec_cancel(cc, direct) = false; 231 cc_exec_waiting(cc, direct) = false; 232 #ifdef SMP 233 cc_migration_cpu(cc, direct) = CPUBLOCK; 234 cc_migration_time(cc, direct) = 0; 235 cc_migration_prec(cc, direct) = 0; 236 cc_migration_func(cc, direct) = NULL; 237 cc_migration_arg(cc, direct) = NULL; 238 #endif 239 } 240 241 /* 242 * Checks if migration is requested by a specific callout cpu. 243 */ 244 static int 245 cc_cce_migrating(struct callout_cpu *cc, int direct) 246 { 247 248 #ifdef SMP 249 return (cc_migration_cpu(cc, direct) != CPUBLOCK); 250 #else 251 return (0); 252 #endif 253 } 254 255 /* 256 * Kernel low level callwheel initialization 257 * called on cpu0 during kernel startup. 258 */ 259 static void 260 callout_callwheel_init(void *dummy) 261 { 262 struct callout_cpu *cc; 263 264 /* 265 * Calculate the size of the callout wheel and the preallocated 266 * timeout() structures. 267 * XXX: Clip callout to result of previous function of maxusers 268 * maximum 384. This is still huge, but acceptable. 269 */ 270 ncallout = imin(16 + maxproc + maxfiles, 18508); 271 TUNABLE_INT_FETCH("kern.ncallout", &ncallout); 272 273 /* 274 * Calculate callout wheel size, should be next power of two higher 275 * than 'ncallout'. 276 */ 277 callwheelsize = 1 << fls(ncallout); 278 callwheelmask = callwheelsize - 1; 279 280 /* 281 * Fetch whether we're pinning the swi's or not. 282 */ 283 TUNABLE_INT_FETCH("kern.pin_default_swi", &pin_default_swi); 284 TUNABLE_INT_FETCH("kern.pin_pcpu_swi", &pin_pcpu_swi); 285 286 /* 287 * Only cpu0 handles timeout(9) and receives a preallocation. 288 * 289 * XXX: Once all timeout(9) consumers are converted this can 290 * be removed. 291 */ 292 timeout_cpu = PCPU_GET(cpuid); 293 cc = CC_CPU(timeout_cpu); 294 cc->cc_callout = malloc(ncallout * sizeof(struct callout), 295 M_CALLOUT, M_WAITOK); 296 callout_cpu_init(cc, timeout_cpu); 297 } 298 SYSINIT(callwheel_init, SI_SUB_CPU, SI_ORDER_ANY, callout_callwheel_init, NULL); 299 300 /* 301 * Initialize the per-cpu callout structures. 302 */ 303 static void 304 callout_cpu_init(struct callout_cpu *cc, int cpu) 305 { 306 struct callout *c; 307 int i; 308 309 mtx_init(&cc->cc_lock, "callout", NULL, MTX_SPIN | MTX_RECURSE); 310 SLIST_INIT(&cc->cc_callfree); 311 cc->cc_callwheel = malloc(sizeof(struct callout_list) * callwheelsize, 312 M_CALLOUT, M_WAITOK); 313 for (i = 0; i < callwheelsize; i++) 314 LIST_INIT(&cc->cc_callwheel[i]); 315 TAILQ_INIT(&cc->cc_expireq); 316 cc->cc_firstevent = SBT_MAX; 317 for (i = 0; i < 2; i++) 318 cc_cce_cleanup(cc, i); 319 snprintf(cc->cc_ktr_event_name, sizeof(cc->cc_ktr_event_name), 320 "callwheel cpu %d", cpu); 321 if (cc->cc_callout == NULL) /* Only cpu0 handles timeout(9) */ 322 return; 323 for (i = 0; i < ncallout; i++) { 324 c = &cc->cc_callout[i]; 325 callout_init(c, 0); 326 c->c_flags = CALLOUT_LOCAL_ALLOC; 327 SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle); 328 } 329 } 330 331 #ifdef SMP 332 /* 333 * Switches the cpu tied to a specific callout. 334 * The function expects a locked incoming callout cpu and returns with 335 * locked outcoming callout cpu. 336 */ 337 static struct callout_cpu * 338 callout_cpu_switch(struct callout *c, struct callout_cpu *cc, int new_cpu) 339 { 340 struct callout_cpu *new_cc; 341 342 MPASS(c != NULL && cc != NULL); 343 CC_LOCK_ASSERT(cc); 344 345 /* 346 * Avoid interrupts and preemption firing after the callout cpu 347 * is blocked in order to avoid deadlocks as the new thread 348 * may be willing to acquire the callout cpu lock. 349 */ 350 c->c_cpu = CPUBLOCK; 351 spinlock_enter(); 352 CC_UNLOCK(cc); 353 new_cc = CC_CPU(new_cpu); 354 CC_LOCK(new_cc); 355 spinlock_exit(); 356 c->c_cpu = new_cpu; 357 return (new_cc); 358 } 359 #endif 360 361 /* 362 * Start standard softclock thread. 363 */ 364 static void 365 start_softclock(void *dummy) 366 { 367 struct callout_cpu *cc; 368 char name[MAXCOMLEN]; 369 #ifdef SMP 370 int cpu; 371 struct intr_event *ie; 372 #endif 373 374 cc = CC_CPU(timeout_cpu); 375 snprintf(name, sizeof(name), "clock (%d)", timeout_cpu); 376 if (swi_add(&clk_intr_event, name, softclock, cc, SWI_CLOCK, 377 INTR_MPSAFE, &cc->cc_cookie)) 378 panic("died while creating standard software ithreads"); 379 if (pin_default_swi && 380 (intr_event_bind(clk_intr_event, timeout_cpu) != 0)) { 381 printf("%s: timeout clock couldn't be pinned to cpu %d\n", 382 __func__, 383 timeout_cpu); 384 } 385 386 #ifdef SMP 387 CPU_FOREACH(cpu) { 388 if (cpu == timeout_cpu) 389 continue; 390 cc = CC_CPU(cpu); 391 cc->cc_callout = NULL; /* Only cpu0 handles timeout(9). */ 392 callout_cpu_init(cc, cpu); 393 snprintf(name, sizeof(name), "clock (%d)", cpu); 394 ie = NULL; 395 if (swi_add(&ie, name, softclock, cc, SWI_CLOCK, 396 INTR_MPSAFE, &cc->cc_cookie)) 397 panic("died while creating standard software ithreads"); 398 if (pin_pcpu_swi && (intr_event_bind(ie, cpu) != 0)) { 399 printf("%s: per-cpu clock couldn't be pinned to " 400 "cpu %d\n", 401 __func__, 402 cpu); 403 } 404 } 405 #endif 406 } 407 SYSINIT(start_softclock, SI_SUB_SOFTINTR, SI_ORDER_FIRST, start_softclock, NULL); 408 409 #define CC_HASH_SHIFT 8 410 411 static inline u_int 412 callout_hash(sbintime_t sbt) 413 { 414 415 return (sbt >> (32 - CC_HASH_SHIFT)); 416 } 417 418 static inline u_int 419 callout_get_bucket(sbintime_t sbt) 420 { 421 422 return (callout_hash(sbt) & callwheelmask); 423 } 424 425 void 426 callout_process(sbintime_t now) 427 { 428 struct callout *tmp, *tmpn; 429 struct callout_cpu *cc; 430 struct callout_list *sc; 431 sbintime_t first, last, max, tmp_max; 432 uint32_t lookahead; 433 u_int firstb, lastb, nowb; 434 #ifdef CALLOUT_PROFILING 435 int depth_dir = 0, mpcalls_dir = 0, lockcalls_dir = 0; 436 #endif 437 438 cc = CC_SELF(); 439 mtx_lock_spin_flags(&cc->cc_lock, MTX_QUIET); 440 441 /* Compute the buckets of the last scan and present times. */ 442 firstb = callout_hash(cc->cc_lastscan); 443 cc->cc_lastscan = now; 444 nowb = callout_hash(now); 445 446 /* Compute the last bucket and minimum time of the bucket after it. */ 447 if (nowb == firstb) 448 lookahead = (SBT_1S / 16); 449 else if (nowb - firstb == 1) 450 lookahead = (SBT_1S / 8); 451 else 452 lookahead = (SBT_1S / 2); 453 first = last = now; 454 first += (lookahead / 2); 455 last += lookahead; 456 last &= (0xffffffffffffffffLLU << (32 - CC_HASH_SHIFT)); 457 lastb = callout_hash(last) - 1; 458 max = last; 459 460 /* 461 * Check if we wrapped around the entire wheel from the last scan. 462 * In case, we need to scan entirely the wheel for pending callouts. 463 */ 464 if (lastb - firstb >= callwheelsize) { 465 lastb = firstb + callwheelsize - 1; 466 if (nowb - firstb >= callwheelsize) 467 nowb = lastb; 468 } 469 470 /* Iterate callwheel from firstb to nowb and then up to lastb. */ 471 do { 472 sc = &cc->cc_callwheel[firstb & callwheelmask]; 473 tmp = LIST_FIRST(sc); 474 while (tmp != NULL) { 475 /* Run the callout if present time within allowed. */ 476 if (tmp->c_time <= now) { 477 /* 478 * Consumer told us the callout may be run 479 * directly from hardware interrupt context. 480 */ 481 if (tmp->c_flags & CALLOUT_DIRECT) { 482 #ifdef CALLOUT_PROFILING 483 ++depth_dir; 484 #endif 485 cc_exec_next(cc, 1) = 486 LIST_NEXT(tmp, c_links.le); 487 cc->cc_bucket = firstb & callwheelmask; 488 LIST_REMOVE(tmp, c_links.le); 489 softclock_call_cc(tmp, cc, 490 #ifdef CALLOUT_PROFILING 491 &mpcalls_dir, &lockcalls_dir, NULL, 492 #endif 493 1); 494 tmp = cc_exec_next(cc, 1); 495 } else { 496 tmpn = LIST_NEXT(tmp, c_links.le); 497 LIST_REMOVE(tmp, c_links.le); 498 TAILQ_INSERT_TAIL(&cc->cc_expireq, 499 tmp, c_links.tqe); 500 tmp->c_flags |= CALLOUT_PROCESSED; 501 tmp = tmpn; 502 } 503 continue; 504 } 505 /* Skip events from distant future. */ 506 if (tmp->c_time >= max) 507 goto next; 508 /* 509 * Event minimal time is bigger than present maximal 510 * time, so it cannot be aggregated. 511 */ 512 if (tmp->c_time > last) { 513 lastb = nowb; 514 goto next; 515 } 516 /* Update first and last time, respecting this event. */ 517 if (tmp->c_time < first) 518 first = tmp->c_time; 519 tmp_max = tmp->c_time + tmp->c_precision; 520 if (tmp_max < last) 521 last = tmp_max; 522 next: 523 tmp = LIST_NEXT(tmp, c_links.le); 524 } 525 /* Proceed with the next bucket. */ 526 firstb++; 527 /* 528 * Stop if we looked after present time and found 529 * some event we can't execute at now. 530 * Stop if we looked far enough into the future. 531 */ 532 } while (((int)(firstb - lastb)) <= 0); 533 cc->cc_firstevent = last; 534 #ifndef NO_EVENTTIMERS 535 cpu_new_callout(curcpu, last, first); 536 #endif 537 #ifdef CALLOUT_PROFILING 538 avg_depth_dir += (depth_dir * 1000 - avg_depth_dir) >> 8; 539 avg_mpcalls_dir += (mpcalls_dir * 1000 - avg_mpcalls_dir) >> 8; 540 avg_lockcalls_dir += (lockcalls_dir * 1000 - avg_lockcalls_dir) >> 8; 541 #endif 542 mtx_unlock_spin_flags(&cc->cc_lock, MTX_QUIET); 543 /* 544 * swi_sched acquires the thread lock, so we don't want to call it 545 * with cc_lock held; incorrect locking order. 546 */ 547 if (!TAILQ_EMPTY(&cc->cc_expireq)) 548 swi_sched(cc->cc_cookie, 0); 549 } 550 551 static struct callout_cpu * 552 callout_lock(struct callout *c) 553 { 554 struct callout_cpu *cc; 555 int cpu; 556 557 for (;;) { 558 cpu = c->c_cpu; 559 #ifdef SMP 560 if (cpu == CPUBLOCK) { 561 while (c->c_cpu == CPUBLOCK) 562 cpu_spinwait(); 563 continue; 564 } 565 #endif 566 cc = CC_CPU(cpu); 567 CC_LOCK(cc); 568 if (cpu == c->c_cpu) 569 break; 570 CC_UNLOCK(cc); 571 } 572 return (cc); 573 } 574 575 static void 576 callout_cc_add(struct callout *c, struct callout_cpu *cc, 577 sbintime_t sbt, sbintime_t precision, void (*func)(void *), 578 void *arg, int cpu, int flags, int direct) 579 { 580 int bucket; 581 582 CC_LOCK_ASSERT(cc); 583 if (sbt < cc->cc_lastscan) 584 sbt = cc->cc_lastscan; 585 c->c_arg = arg; 586 c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING); 587 if (flags & C_DIRECT_EXEC) 588 c->c_flags |= CALLOUT_DIRECT; 589 c->c_flags &= ~CALLOUT_PROCESSED; 590 c->c_func = func; 591 c->c_time = sbt; 592 c->c_precision = precision; 593 bucket = callout_get_bucket(c->c_time); 594 CTR3(KTR_CALLOUT, "precision set for %p: %d.%08x", 595 c, (int)(c->c_precision >> 32), 596 (u_int)(c->c_precision & 0xffffffff)); 597 LIST_INSERT_HEAD(&cc->cc_callwheel[bucket], c, c_links.le); 598 if (cc->cc_bucket == bucket) 599 cc_exec_next(cc, direct) = c; 600 #ifndef NO_EVENTTIMERS 601 /* 602 * Inform the eventtimers(4) subsystem there's a new callout 603 * that has been inserted, but only if really required. 604 */ 605 if (SBT_MAX - c->c_time < c->c_precision) 606 c->c_precision = SBT_MAX - c->c_time; 607 sbt = c->c_time + c->c_precision; 608 if (sbt < cc->cc_firstevent) { 609 cc->cc_firstevent = sbt; 610 cpu_new_callout(cpu, sbt, c->c_time); 611 } 612 #endif 613 } 614 615 static void 616 callout_cc_del(struct callout *c, struct callout_cpu *cc) 617 { 618 619 if ((c->c_flags & CALLOUT_LOCAL_ALLOC) == 0) 620 return; 621 c->c_func = NULL; 622 SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle); 623 } 624 625 static void 626 softclock_call_cc(struct callout *c, struct callout_cpu *cc, 627 #ifdef CALLOUT_PROFILING 628 int *mpcalls, int *lockcalls, int *gcalls, 629 #endif 630 int direct) 631 { 632 struct rm_priotracker tracker; 633 void (*c_func)(void *); 634 void *c_arg; 635 struct lock_class *class; 636 struct lock_object *c_lock; 637 uintptr_t lock_status; 638 int c_flags; 639 #ifdef SMP 640 struct callout_cpu *new_cc; 641 void (*new_func)(void *); 642 void *new_arg; 643 int flags, new_cpu; 644 sbintime_t new_prec, new_time; 645 #endif 646 #if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING) 647 sbintime_t sbt1, sbt2; 648 struct timespec ts2; 649 static sbintime_t maxdt = 2 * SBT_1MS; /* 2 msec */ 650 static timeout_t *lastfunc; 651 #endif 652 653 KASSERT((c->c_flags & (CALLOUT_PENDING | CALLOUT_ACTIVE)) == 654 (CALLOUT_PENDING | CALLOUT_ACTIVE), 655 ("softclock_call_cc: pend|act %p %x", c, c->c_flags)); 656 class = (c->c_lock != NULL) ? LOCK_CLASS(c->c_lock) : NULL; 657 lock_status = 0; 658 if (c->c_flags & CALLOUT_SHAREDLOCK) { 659 if (class == &lock_class_rm) 660 lock_status = (uintptr_t)&tracker; 661 else 662 lock_status = 1; 663 } 664 c_lock = c->c_lock; 665 c_func = c->c_func; 666 c_arg = c->c_arg; 667 c_flags = c->c_flags; 668 if (c->c_flags & CALLOUT_LOCAL_ALLOC) 669 c->c_flags = CALLOUT_LOCAL_ALLOC; 670 else 671 c->c_flags &= ~CALLOUT_PENDING; 672 673 cc_exec_curr(cc, direct) = c; 674 cc_exec_cancel(cc, direct) = false; 675 CC_UNLOCK(cc); 676 if (c_lock != NULL) { 677 class->lc_lock(c_lock, lock_status); 678 /* 679 * The callout may have been cancelled 680 * while we switched locks. 681 */ 682 if (cc_exec_cancel(cc, direct)) { 683 class->lc_unlock(c_lock); 684 goto skip; 685 } 686 /* The callout cannot be stopped now. */ 687 cc_exec_cancel(cc, direct) = true; 688 if (c_lock == &Giant.lock_object) { 689 #ifdef CALLOUT_PROFILING 690 (*gcalls)++; 691 #endif 692 CTR3(KTR_CALLOUT, "callout giant %p func %p arg %p", 693 c, c_func, c_arg); 694 } else { 695 #ifdef CALLOUT_PROFILING 696 (*lockcalls)++; 697 #endif 698 CTR3(KTR_CALLOUT, "callout lock %p func %p arg %p", 699 c, c_func, c_arg); 700 } 701 } else { 702 #ifdef CALLOUT_PROFILING 703 (*mpcalls)++; 704 #endif 705 CTR3(KTR_CALLOUT, "callout %p func %p arg %p", 706 c, c_func, c_arg); 707 } 708 KTR_STATE3(KTR_SCHED, "callout", cc->cc_ktr_event_name, "running", 709 "func:%p", c_func, "arg:%p", c_arg, "direct:%d", direct); 710 #if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING) 711 sbt1 = sbinuptime(); 712 #endif 713 THREAD_NO_SLEEPING(); 714 SDT_PROBE(callout_execute, kernel, , callout__start, c, 0, 0, 0, 0); 715 c_func(c_arg); 716 SDT_PROBE(callout_execute, kernel, , callout__end, c, 0, 0, 0, 0); 717 THREAD_SLEEPING_OK(); 718 #if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING) 719 sbt2 = sbinuptime(); 720 sbt2 -= sbt1; 721 if (sbt2 > maxdt) { 722 if (lastfunc != c_func || sbt2 > maxdt * 2) { 723 ts2 = sbttots(sbt2); 724 printf( 725 "Expensive timeout(9) function: %p(%p) %jd.%09ld s\n", 726 c_func, c_arg, (intmax_t)ts2.tv_sec, ts2.tv_nsec); 727 } 728 maxdt = sbt2; 729 lastfunc = c_func; 730 } 731 #endif 732 KTR_STATE0(KTR_SCHED, "callout", cc->cc_ktr_event_name, "idle"); 733 CTR1(KTR_CALLOUT, "callout %p finished", c); 734 if ((c_flags & CALLOUT_RETURNUNLOCKED) == 0) 735 class->lc_unlock(c_lock); 736 skip: 737 CC_LOCK(cc); 738 KASSERT(cc_exec_curr(cc, direct) == c, ("mishandled cc_curr")); 739 cc_exec_curr(cc, direct) = NULL; 740 if (cc_exec_waiting(cc, direct)) { 741 /* 742 * There is someone waiting for the 743 * callout to complete. 744 * If the callout was scheduled for 745 * migration just cancel it. 746 */ 747 if (cc_cce_migrating(cc, direct)) { 748 cc_cce_cleanup(cc, direct); 749 750 /* 751 * It should be assert here that the callout is not 752 * destroyed but that is not easy. 753 */ 754 c->c_flags &= ~CALLOUT_DFRMIGRATION; 755 } 756 cc_exec_waiting(cc, direct) = false; 757 CC_UNLOCK(cc); 758 wakeup(&cc_exec_waiting(cc, direct)); 759 CC_LOCK(cc); 760 } else if (cc_cce_migrating(cc, direct)) { 761 KASSERT((c_flags & CALLOUT_LOCAL_ALLOC) == 0, 762 ("Migrating legacy callout %p", c)); 763 #ifdef SMP 764 /* 765 * If the callout was scheduled for 766 * migration just perform it now. 767 */ 768 new_cpu = cc_migration_cpu(cc, direct); 769 new_time = cc_migration_time(cc, direct); 770 new_prec = cc_migration_prec(cc, direct); 771 new_func = cc_migration_func(cc, direct); 772 new_arg = cc_migration_arg(cc, direct); 773 cc_cce_cleanup(cc, direct); 774 775 /* 776 * It should be assert here that the callout is not destroyed 777 * but that is not easy. 778 * 779 * As first thing, handle deferred callout stops. 780 */ 781 if (!callout_migrating(c)) { 782 CTR3(KTR_CALLOUT, 783 "deferred cancelled %p func %p arg %p", 784 c, new_func, new_arg); 785 callout_cc_del(c, cc); 786 return; 787 } 788 c->c_flags &= ~CALLOUT_DFRMIGRATION; 789 790 new_cc = callout_cpu_switch(c, cc, new_cpu); 791 flags = (direct) ? C_DIRECT_EXEC : 0; 792 callout_cc_add(c, new_cc, new_time, new_prec, new_func, 793 new_arg, new_cpu, flags, direct); 794 CC_UNLOCK(new_cc); 795 CC_LOCK(cc); 796 #else 797 panic("migration should not happen"); 798 #endif 799 } 800 /* 801 * If the current callout is locally allocated (from 802 * timeout(9)) then put it on the freelist. 803 * 804 * Note: we need to check the cached copy of c_flags because 805 * if it was not local, then it's not safe to deref the 806 * callout pointer. 807 */ 808 KASSERT((c_flags & CALLOUT_LOCAL_ALLOC) == 0 || 809 c->c_flags == CALLOUT_LOCAL_ALLOC, 810 ("corrupted callout")); 811 if (c_flags & CALLOUT_LOCAL_ALLOC) 812 callout_cc_del(c, cc); 813 } 814 815 /* 816 * The callout mechanism is based on the work of Adam M. Costello and 817 * George Varghese, published in a technical report entitled "Redesigning 818 * the BSD Callout and Timer Facilities" and modified slightly for inclusion 819 * in FreeBSD by Justin T. Gibbs. The original work on the data structures 820 * used in this implementation was published by G. Varghese and T. Lauck in 821 * the paper "Hashed and Hierarchical Timing Wheels: Data Structures for 822 * the Efficient Implementation of a Timer Facility" in the Proceedings of 823 * the 11th ACM Annual Symposium on Operating Systems Principles, 824 * Austin, Texas Nov 1987. 825 */ 826 827 /* 828 * Software (low priority) clock interrupt. 829 * Run periodic events from timeout queue. 830 */ 831 void 832 softclock(void *arg) 833 { 834 struct callout_cpu *cc; 835 struct callout *c; 836 #ifdef CALLOUT_PROFILING 837 int depth = 0, gcalls = 0, lockcalls = 0, mpcalls = 0; 838 #endif 839 840 cc = (struct callout_cpu *)arg; 841 CC_LOCK(cc); 842 while ((c = TAILQ_FIRST(&cc->cc_expireq)) != NULL) { 843 TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe); 844 softclock_call_cc(c, cc, 845 #ifdef CALLOUT_PROFILING 846 &mpcalls, &lockcalls, &gcalls, 847 #endif 848 0); 849 #ifdef CALLOUT_PROFILING 850 ++depth; 851 #endif 852 } 853 #ifdef CALLOUT_PROFILING 854 avg_depth += (depth * 1000 - avg_depth) >> 8; 855 avg_mpcalls += (mpcalls * 1000 - avg_mpcalls) >> 8; 856 avg_lockcalls += (lockcalls * 1000 - avg_lockcalls) >> 8; 857 avg_gcalls += (gcalls * 1000 - avg_gcalls) >> 8; 858 #endif 859 CC_UNLOCK(cc); 860 } 861 862 /* 863 * timeout -- 864 * Execute a function after a specified length of time. 865 * 866 * untimeout -- 867 * Cancel previous timeout function call. 868 * 869 * callout_handle_init -- 870 * Initialize a handle so that using it with untimeout is benign. 871 * 872 * See AT&T BCI Driver Reference Manual for specification. This 873 * implementation differs from that one in that although an 874 * identification value is returned from timeout, the original 875 * arguments to timeout as well as the identifier are used to 876 * identify entries for untimeout. 877 */ 878 struct callout_handle 879 timeout(timeout_t *ftn, void *arg, int to_ticks) 880 { 881 struct callout_cpu *cc; 882 struct callout *new; 883 struct callout_handle handle; 884 885 cc = CC_CPU(timeout_cpu); 886 CC_LOCK(cc); 887 /* Fill in the next free callout structure. */ 888 new = SLIST_FIRST(&cc->cc_callfree); 889 if (new == NULL) 890 /* XXX Attempt to malloc first */ 891 panic("timeout table full"); 892 SLIST_REMOVE_HEAD(&cc->cc_callfree, c_links.sle); 893 callout_reset(new, to_ticks, ftn, arg); 894 handle.callout = new; 895 CC_UNLOCK(cc); 896 897 return (handle); 898 } 899 900 void 901 untimeout(timeout_t *ftn, void *arg, struct callout_handle handle) 902 { 903 struct callout_cpu *cc; 904 905 /* 906 * Check for a handle that was initialized 907 * by callout_handle_init, but never used 908 * for a real timeout. 909 */ 910 if (handle.callout == NULL) 911 return; 912 913 cc = callout_lock(handle.callout); 914 if (handle.callout->c_func == ftn && handle.callout->c_arg == arg) 915 callout_stop(handle.callout); 916 CC_UNLOCK(cc); 917 } 918 919 void 920 callout_handle_init(struct callout_handle *handle) 921 { 922 handle->callout = NULL; 923 } 924 925 /* 926 * New interface; clients allocate their own callout structures. 927 * 928 * callout_reset() - establish or change a timeout 929 * callout_stop() - disestablish a timeout 930 * callout_init() - initialize a callout structure so that it can 931 * safely be passed to callout_reset() and callout_stop() 932 * 933 * <sys/callout.h> defines three convenience macros: 934 * 935 * callout_active() - returns truth if callout has not been stopped, 936 * drained, or deactivated since the last time the callout was 937 * reset. 938 * callout_pending() - returns truth if callout is still waiting for timeout 939 * callout_deactivate() - marks the callout as having been serviced 940 */ 941 int 942 callout_reset_sbt_on(struct callout *c, sbintime_t sbt, sbintime_t precision, 943 void (*ftn)(void *), void *arg, int cpu, int flags) 944 { 945 sbintime_t to_sbt, pr; 946 struct callout_cpu *cc; 947 int cancelled, direct; 948 949 cancelled = 0; 950 if (flags & C_ABSOLUTE) { 951 to_sbt = sbt; 952 } else { 953 if ((flags & C_HARDCLOCK) && (sbt < tick_sbt)) 954 sbt = tick_sbt; 955 if ((flags & C_HARDCLOCK) || 956 #ifdef NO_EVENTTIMERS 957 sbt >= sbt_timethreshold) { 958 to_sbt = getsbinuptime(); 959 960 /* Add safety belt for the case of hz > 1000. */ 961 to_sbt += tc_tick_sbt - tick_sbt; 962 #else 963 sbt >= sbt_tickthreshold) { 964 /* 965 * Obtain the time of the last hardclock() call on 966 * this CPU directly from the kern_clocksource.c. 967 * This value is per-CPU, but it is equal for all 968 * active ones. 969 */ 970 #ifdef __LP64__ 971 to_sbt = DPCPU_GET(hardclocktime); 972 #else 973 spinlock_enter(); 974 to_sbt = DPCPU_GET(hardclocktime); 975 spinlock_exit(); 976 #endif 977 #endif 978 if ((flags & C_HARDCLOCK) == 0) 979 to_sbt += tick_sbt; 980 } else 981 to_sbt = sbinuptime(); 982 if (SBT_MAX - to_sbt < sbt) 983 to_sbt = SBT_MAX; 984 else 985 to_sbt += sbt; 986 pr = ((C_PRELGET(flags) < 0) ? sbt >> tc_precexp : 987 sbt >> C_PRELGET(flags)); 988 if (pr > precision) 989 precision = pr; 990 } 991 /* 992 * Don't allow migration of pre-allocated callouts lest they 993 * become unbalanced. 994 */ 995 if (c->c_flags & CALLOUT_LOCAL_ALLOC) 996 cpu = c->c_cpu; 997 direct = (c->c_flags & CALLOUT_DIRECT) != 0; 998 KASSERT(!direct || c->c_lock == NULL, 999 ("%s: direct callout %p has lock", __func__, c)); 1000 cc = callout_lock(c); 1001 if (cc_exec_curr(cc, direct) == c) { 1002 /* 1003 * We're being asked to reschedule a callout which is 1004 * currently in progress. If there is a lock then we 1005 * can cancel the callout if it has not really started. 1006 */ 1007 if (c->c_lock != NULL && cc_exec_cancel(cc, direct)) 1008 cancelled = cc_exec_cancel(cc, direct) = true; 1009 if (cc_exec_waiting(cc, direct)) { 1010 /* 1011 * Someone has called callout_drain to kill this 1012 * callout. Don't reschedule. 1013 */ 1014 CTR4(KTR_CALLOUT, "%s %p func %p arg %p", 1015 cancelled ? "cancelled" : "failed to cancel", 1016 c, c->c_func, c->c_arg); 1017 CC_UNLOCK(cc); 1018 return (cancelled); 1019 } 1020 #ifdef SMP 1021 if (callout_migrating(c)) { 1022 /* 1023 * This only occurs when a second callout_reset_sbt_on 1024 * is made after a previous one moved it into 1025 * deferred migration (below). Note we do *not* change 1026 * the prev_cpu even though the previous target may 1027 * be different. 1028 */ 1029 cc_migration_cpu(cc, direct) = cpu; 1030 cc_migration_time(cc, direct) = to_sbt; 1031 cc_migration_prec(cc, direct) = precision; 1032 cc_migration_func(cc, direct) = ftn; 1033 cc_migration_arg(cc, direct) = arg; 1034 cancelled = 1; 1035 CC_UNLOCK(cc); 1036 return (cancelled); 1037 } 1038 #endif 1039 } 1040 if (c->c_flags & CALLOUT_PENDING) { 1041 if ((c->c_flags & CALLOUT_PROCESSED) == 0) { 1042 if (cc_exec_next(cc, direct) == c) 1043 cc_exec_next(cc, direct) = LIST_NEXT(c, c_links.le); 1044 LIST_REMOVE(c, c_links.le); 1045 } else 1046 TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe); 1047 cancelled = 1; 1048 c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING); 1049 } 1050 1051 #ifdef SMP 1052 /* 1053 * If the callout must migrate try to perform it immediately. 1054 * If the callout is currently running, just defer the migration 1055 * to a more appropriate moment. 1056 */ 1057 if (c->c_cpu != cpu) { 1058 if (cc_exec_curr(cc, direct) == c) { 1059 /* 1060 * Pending will have been removed since we are 1061 * actually executing the callout on another 1062 * CPU. That callout should be waiting on the 1063 * lock the caller holds. If we set both 1064 * active/and/pending after we return and the 1065 * lock on the executing callout proceeds, it 1066 * will then see pending is true and return. 1067 * At the return from the actual callout execution 1068 * the migration will occur in softclock_call_cc 1069 * and this new callout will be placed on the 1070 * new CPU via a call to callout_cpu_switch() which 1071 * will get the lock on the right CPU followed 1072 * by a call callout_cc_add() which will add it there. 1073 * (see above in softclock_call_cc()). 1074 */ 1075 cc_migration_cpu(cc, direct) = cpu; 1076 cc_migration_time(cc, direct) = to_sbt; 1077 cc_migration_prec(cc, direct) = precision; 1078 cc_migration_func(cc, direct) = ftn; 1079 cc_migration_arg(cc, direct) = arg; 1080 c->c_flags |= (CALLOUT_DFRMIGRATION | CALLOUT_ACTIVE | CALLOUT_PENDING); 1081 CTR6(KTR_CALLOUT, 1082 "migration of %p func %p arg %p in %d.%08x to %u deferred", 1083 c, c->c_func, c->c_arg, (int)(to_sbt >> 32), 1084 (u_int)(to_sbt & 0xffffffff), cpu); 1085 CC_UNLOCK(cc); 1086 return (cancelled); 1087 } 1088 cc = callout_cpu_switch(c, cc, cpu); 1089 } 1090 #endif 1091 1092 callout_cc_add(c, cc, to_sbt, precision, ftn, arg, cpu, flags, direct); 1093 CTR6(KTR_CALLOUT, "%sscheduled %p func %p arg %p in %d.%08x", 1094 cancelled ? "re" : "", c, c->c_func, c->c_arg, (int)(to_sbt >> 32), 1095 (u_int)(to_sbt & 0xffffffff)); 1096 CC_UNLOCK(cc); 1097 1098 return (cancelled); 1099 } 1100 1101 /* 1102 * Common idioms that can be optimized in the future. 1103 */ 1104 int 1105 callout_schedule_on(struct callout *c, int to_ticks, int cpu) 1106 { 1107 return callout_reset_on(c, to_ticks, c->c_func, c->c_arg, cpu); 1108 } 1109 1110 int 1111 callout_schedule(struct callout *c, int to_ticks) 1112 { 1113 return callout_reset_on(c, to_ticks, c->c_func, c->c_arg, c->c_cpu); 1114 } 1115 1116 int 1117 _callout_stop_safe(struct callout *c, int safe) 1118 { 1119 struct callout_cpu *cc, *old_cc; 1120 struct lock_class *class; 1121 int direct, sq_locked, use_lock; 1122 int not_on_a_list; 1123 1124 if (safe) 1125 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, c->c_lock, 1126 "calling %s", __func__); 1127 1128 /* 1129 * Some old subsystems don't hold Giant while running a callout_stop(), 1130 * so just discard this check for the moment. 1131 */ 1132 if (!safe && c->c_lock != NULL) { 1133 if (c->c_lock == &Giant.lock_object) 1134 use_lock = mtx_owned(&Giant); 1135 else { 1136 use_lock = 1; 1137 class = LOCK_CLASS(c->c_lock); 1138 class->lc_assert(c->c_lock, LA_XLOCKED); 1139 } 1140 } else 1141 use_lock = 0; 1142 direct = (c->c_flags & CALLOUT_DIRECT) != 0; 1143 sq_locked = 0; 1144 old_cc = NULL; 1145 again: 1146 cc = callout_lock(c); 1147 1148 if ((c->c_flags & (CALLOUT_DFRMIGRATION | CALLOUT_ACTIVE | CALLOUT_PENDING)) == 1149 (CALLOUT_DFRMIGRATION | CALLOUT_ACTIVE | CALLOUT_PENDING)) { 1150 /* 1151 * Special case where this slipped in while we 1152 * were migrating *as* the callout is about to 1153 * execute. The caller probably holds the lock 1154 * the callout wants. 1155 * 1156 * Get rid of the migration first. Then set 1157 * the flag that tells this code *not* to 1158 * try to remove it from any lists (its not 1159 * on one yet). When the callout wheel runs, 1160 * it will ignore this callout. 1161 */ 1162 c->c_flags &= ~(CALLOUT_PENDING|CALLOUT_ACTIVE); 1163 not_on_a_list = 1; 1164 } else { 1165 not_on_a_list = 0; 1166 } 1167 1168 /* 1169 * If the callout was migrating while the callout cpu lock was 1170 * dropped, just drop the sleepqueue lock and check the states 1171 * again. 1172 */ 1173 if (sq_locked != 0 && cc != old_cc) { 1174 #ifdef SMP 1175 CC_UNLOCK(cc); 1176 sleepq_release(&cc_exec_waiting(old_cc, direct)); 1177 sq_locked = 0; 1178 old_cc = NULL; 1179 goto again; 1180 #else 1181 panic("migration should not happen"); 1182 #endif 1183 } 1184 1185 /* 1186 * If the callout isn't pending, it's not on the queue, so 1187 * don't attempt to remove it from the queue. We can try to 1188 * stop it by other means however. 1189 */ 1190 if (!(c->c_flags & CALLOUT_PENDING)) { 1191 c->c_flags &= ~CALLOUT_ACTIVE; 1192 1193 /* 1194 * If it wasn't on the queue and it isn't the current 1195 * callout, then we can't stop it, so just bail. 1196 */ 1197 if (cc_exec_curr(cc, direct) != c) { 1198 CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p", 1199 c, c->c_func, c->c_arg); 1200 CC_UNLOCK(cc); 1201 if (sq_locked) 1202 sleepq_release(&cc_exec_waiting(cc, direct)); 1203 return (0); 1204 } 1205 1206 if (safe) { 1207 /* 1208 * The current callout is running (or just 1209 * about to run) and blocking is allowed, so 1210 * just wait for the current invocation to 1211 * finish. 1212 */ 1213 while (cc_exec_curr(cc, direct) == c) { 1214 /* 1215 * Use direct calls to sleepqueue interface 1216 * instead of cv/msleep in order to avoid 1217 * a LOR between cc_lock and sleepqueue 1218 * chain spinlocks. This piece of code 1219 * emulates a msleep_spin() call actually. 1220 * 1221 * If we already have the sleepqueue chain 1222 * locked, then we can safely block. If we 1223 * don't already have it locked, however, 1224 * we have to drop the cc_lock to lock 1225 * it. This opens several races, so we 1226 * restart at the beginning once we have 1227 * both locks. If nothing has changed, then 1228 * we will end up back here with sq_locked 1229 * set. 1230 */ 1231 if (!sq_locked) { 1232 CC_UNLOCK(cc); 1233 sleepq_lock( 1234 &cc_exec_waiting(cc, direct)); 1235 sq_locked = 1; 1236 old_cc = cc; 1237 goto again; 1238 } 1239 1240 /* 1241 * Migration could be cancelled here, but 1242 * as long as it is still not sure when it 1243 * will be packed up, just let softclock() 1244 * take care of it. 1245 */ 1246 cc_exec_waiting(cc, direct) = true; 1247 DROP_GIANT(); 1248 CC_UNLOCK(cc); 1249 sleepq_add( 1250 &cc_exec_waiting(cc, direct), 1251 &cc->cc_lock.lock_object, "codrain", 1252 SLEEPQ_SLEEP, 0); 1253 sleepq_wait( 1254 &cc_exec_waiting(cc, direct), 1255 0); 1256 sq_locked = 0; 1257 old_cc = NULL; 1258 1259 /* Reacquire locks previously released. */ 1260 PICKUP_GIANT(); 1261 CC_LOCK(cc); 1262 } 1263 } else if (use_lock && 1264 !cc_exec_cancel(cc, direct)) { 1265 1266 /* 1267 * The current callout is waiting for its 1268 * lock which we hold. Cancel the callout 1269 * and return. After our caller drops the 1270 * lock, the callout will be skipped in 1271 * softclock(). 1272 */ 1273 cc_exec_cancel(cc, direct) = true; 1274 CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p", 1275 c, c->c_func, c->c_arg); 1276 KASSERT(!cc_cce_migrating(cc, direct), 1277 ("callout wrongly scheduled for migration")); 1278 CC_UNLOCK(cc); 1279 KASSERT(!sq_locked, ("sleepqueue chain locked")); 1280 return (1); 1281 } else if (callout_migrating(c)) { 1282 /* 1283 * The callout is currently being serviced 1284 * and the "next" callout is scheduled at 1285 * its completion with a migration. We remove 1286 * the migration flag so it *won't* get rescheduled, 1287 * but we can't stop the one thats running so 1288 * we return 0. 1289 */ 1290 c->c_flags &= ~CALLOUT_DFRMIGRATION; 1291 #ifdef SMP 1292 /* 1293 * We can't call cc_cce_cleanup here since 1294 * if we do it will remove .ce_curr and 1295 * its still running. This will prevent a 1296 * reschedule of the callout when the 1297 * execution completes. 1298 */ 1299 cc_migration_cpu(cc, direct) = CPUBLOCK; 1300 cc_migration_time(cc, direct) = 0; 1301 cc_migration_prec(cc, direct) = 0; 1302 cc_migration_func(cc, direct) = NULL; 1303 cc_migration_arg(cc, direct) = NULL; 1304 #endif 1305 CTR3(KTR_CALLOUT, "postponing stop %p func %p arg %p", 1306 c, c->c_func, c->c_arg); 1307 CC_UNLOCK(cc); 1308 return (0); 1309 } 1310 CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p", 1311 c, c->c_func, c->c_arg); 1312 CC_UNLOCK(cc); 1313 KASSERT(!sq_locked, ("sleepqueue chain still locked")); 1314 return (0); 1315 } 1316 if (sq_locked) 1317 sleepq_release(&cc_exec_waiting(cc, direct)); 1318 1319 c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING); 1320 1321 CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p", 1322 c, c->c_func, c->c_arg); 1323 if (not_on_a_list == 0) { 1324 if ((c->c_flags & CALLOUT_PROCESSED) == 0) { 1325 if (cc_exec_next(cc, direct) == c) 1326 cc_exec_next(cc, direct) = LIST_NEXT(c, c_links.le); 1327 LIST_REMOVE(c, c_links.le); 1328 } else 1329 TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe); 1330 } 1331 callout_cc_del(c, cc); 1332 CC_UNLOCK(cc); 1333 return (1); 1334 } 1335 1336 void 1337 callout_init(struct callout *c, int mpsafe) 1338 { 1339 bzero(c, sizeof *c); 1340 if (mpsafe) { 1341 c->c_lock = NULL; 1342 c->c_flags = CALLOUT_RETURNUNLOCKED; 1343 } else { 1344 c->c_lock = &Giant.lock_object; 1345 c->c_flags = 0; 1346 } 1347 c->c_cpu = timeout_cpu; 1348 } 1349 1350 void 1351 _callout_init_lock(struct callout *c, struct lock_object *lock, int flags) 1352 { 1353 bzero(c, sizeof *c); 1354 c->c_lock = lock; 1355 KASSERT((flags & ~(CALLOUT_RETURNUNLOCKED | CALLOUT_SHAREDLOCK)) == 0, 1356 ("callout_init_lock: bad flags %d", flags)); 1357 KASSERT(lock != NULL || (flags & CALLOUT_RETURNUNLOCKED) == 0, 1358 ("callout_init_lock: CALLOUT_RETURNUNLOCKED with no lock")); 1359 KASSERT(lock == NULL || !(LOCK_CLASS(lock)->lc_flags & 1360 (LC_SPINLOCK | LC_SLEEPABLE)), ("%s: invalid lock class", 1361 __func__)); 1362 c->c_flags = flags & (CALLOUT_RETURNUNLOCKED | CALLOUT_SHAREDLOCK); 1363 c->c_cpu = timeout_cpu; 1364 } 1365 1366 #ifdef APM_FIXUP_CALLTODO 1367 /* 1368 * Adjust the kernel calltodo timeout list. This routine is used after 1369 * an APM resume to recalculate the calltodo timer list values with the 1370 * number of hz's we have been sleeping. The next hardclock() will detect 1371 * that there are fired timers and run softclock() to execute them. 1372 * 1373 * Please note, I have not done an exhaustive analysis of what code this 1374 * might break. I am motivated to have my select()'s and alarm()'s that 1375 * have expired during suspend firing upon resume so that the applications 1376 * which set the timer can do the maintanence the timer was for as close 1377 * as possible to the originally intended time. Testing this code for a 1378 * week showed that resuming from a suspend resulted in 22 to 25 timers 1379 * firing, which seemed independant on whether the suspend was 2 hours or 1380 * 2 days. Your milage may vary. - Ken Key <key@cs.utk.edu> 1381 */ 1382 void 1383 adjust_timeout_calltodo(struct timeval *time_change) 1384 { 1385 register struct callout *p; 1386 unsigned long delta_ticks; 1387 1388 /* 1389 * How many ticks were we asleep? 1390 * (stolen from tvtohz()). 1391 */ 1392 1393 /* Don't do anything */ 1394 if (time_change->tv_sec < 0) 1395 return; 1396 else if (time_change->tv_sec <= LONG_MAX / 1000000) 1397 delta_ticks = (time_change->tv_sec * 1000000 + 1398 time_change->tv_usec + (tick - 1)) / tick + 1; 1399 else if (time_change->tv_sec <= LONG_MAX / hz) 1400 delta_ticks = time_change->tv_sec * hz + 1401 (time_change->tv_usec + (tick - 1)) / tick + 1; 1402 else 1403 delta_ticks = LONG_MAX; 1404 1405 if (delta_ticks > INT_MAX) 1406 delta_ticks = INT_MAX; 1407 1408 /* 1409 * Now rip through the timer calltodo list looking for timers 1410 * to expire. 1411 */ 1412 1413 /* don't collide with softclock() */ 1414 CC_LOCK(cc); 1415 for (p = calltodo.c_next; p != NULL; p = p->c_next) { 1416 p->c_time -= delta_ticks; 1417 1418 /* Break if the timer had more time on it than delta_ticks */ 1419 if (p->c_time > 0) 1420 break; 1421 1422 /* take back the ticks the timer didn't use (p->c_time <= 0) */ 1423 delta_ticks = -p->c_time; 1424 } 1425 CC_UNLOCK(cc); 1426 1427 return; 1428 } 1429 #endif /* APM_FIXUP_CALLTODO */ 1430 1431 static int 1432 flssbt(sbintime_t sbt) 1433 { 1434 1435 sbt += (uint64_t)sbt >> 1; 1436 if (sizeof(long) >= sizeof(sbintime_t)) 1437 return (flsl(sbt)); 1438 if (sbt >= SBT_1S) 1439 return (flsl(((uint64_t)sbt) >> 32) + 32); 1440 return (flsl(sbt)); 1441 } 1442 1443 /* 1444 * Dump immediate statistic snapshot of the scheduled callouts. 1445 */ 1446 static int 1447 sysctl_kern_callout_stat(SYSCTL_HANDLER_ARGS) 1448 { 1449 struct callout *tmp; 1450 struct callout_cpu *cc; 1451 struct callout_list *sc; 1452 sbintime_t maxpr, maxt, medpr, medt, now, spr, st, t; 1453 int ct[64], cpr[64], ccpbk[32]; 1454 int error, val, i, count, tcum, pcum, maxc, c, medc; 1455 #ifdef SMP 1456 int cpu; 1457 #endif 1458 1459 val = 0; 1460 error = sysctl_handle_int(oidp, &val, 0, req); 1461 if (error != 0 || req->newptr == NULL) 1462 return (error); 1463 count = maxc = 0; 1464 st = spr = maxt = maxpr = 0; 1465 bzero(ccpbk, sizeof(ccpbk)); 1466 bzero(ct, sizeof(ct)); 1467 bzero(cpr, sizeof(cpr)); 1468 now = sbinuptime(); 1469 #ifdef SMP 1470 CPU_FOREACH(cpu) { 1471 cc = CC_CPU(cpu); 1472 #else 1473 cc = CC_CPU(timeout_cpu); 1474 #endif 1475 CC_LOCK(cc); 1476 for (i = 0; i < callwheelsize; i++) { 1477 sc = &cc->cc_callwheel[i]; 1478 c = 0; 1479 LIST_FOREACH(tmp, sc, c_links.le) { 1480 c++; 1481 t = tmp->c_time - now; 1482 if (t < 0) 1483 t = 0; 1484 st += t / SBT_1US; 1485 spr += tmp->c_precision / SBT_1US; 1486 if (t > maxt) 1487 maxt = t; 1488 if (tmp->c_precision > maxpr) 1489 maxpr = tmp->c_precision; 1490 ct[flssbt(t)]++; 1491 cpr[flssbt(tmp->c_precision)]++; 1492 } 1493 if (c > maxc) 1494 maxc = c; 1495 ccpbk[fls(c + c / 2)]++; 1496 count += c; 1497 } 1498 CC_UNLOCK(cc); 1499 #ifdef SMP 1500 } 1501 #endif 1502 1503 for (i = 0, tcum = 0; i < 64 && tcum < count / 2; i++) 1504 tcum += ct[i]; 1505 medt = (i >= 2) ? (((sbintime_t)1) << (i - 2)) : 0; 1506 for (i = 0, pcum = 0; i < 64 && pcum < count / 2; i++) 1507 pcum += cpr[i]; 1508 medpr = (i >= 2) ? (((sbintime_t)1) << (i - 2)) : 0; 1509 for (i = 0, c = 0; i < 32 && c < count / 2; i++) 1510 c += ccpbk[i]; 1511 medc = (i >= 2) ? (1 << (i - 2)) : 0; 1512 1513 printf("Scheduled callouts statistic snapshot:\n"); 1514 printf(" Callouts: %6d Buckets: %6d*%-3d Bucket size: 0.%06ds\n", 1515 count, callwheelsize, mp_ncpus, 1000000 >> CC_HASH_SHIFT); 1516 printf(" C/Bk: med %5d avg %6d.%06jd max %6d\n", 1517 medc, 1518 count / callwheelsize / mp_ncpus, 1519 (uint64_t)count * 1000000 / callwheelsize / mp_ncpus % 1000000, 1520 maxc); 1521 printf(" Time: med %5jd.%06jds avg %6jd.%06jds max %6jd.%06jds\n", 1522 medt / SBT_1S, (medt & 0xffffffff) * 1000000 >> 32, 1523 (st / count) / 1000000, (st / count) % 1000000, 1524 maxt / SBT_1S, (maxt & 0xffffffff) * 1000000 >> 32); 1525 printf(" Prec: med %5jd.%06jds avg %6jd.%06jds max %6jd.%06jds\n", 1526 medpr / SBT_1S, (medpr & 0xffffffff) * 1000000 >> 32, 1527 (spr / count) / 1000000, (spr / count) % 1000000, 1528 maxpr / SBT_1S, (maxpr & 0xffffffff) * 1000000 >> 32); 1529 printf(" Distribution: \tbuckets\t time\t tcum\t" 1530 " prec\t pcum\n"); 1531 for (i = 0, tcum = pcum = 0; i < 64; i++) { 1532 if (ct[i] == 0 && cpr[i] == 0) 1533 continue; 1534 t = (i != 0) ? (((sbintime_t)1) << (i - 1)) : 0; 1535 tcum += ct[i]; 1536 pcum += cpr[i]; 1537 printf(" %10jd.%06jds\t 2**%d\t%7d\t%7d\t%7d\t%7d\n", 1538 t / SBT_1S, (t & 0xffffffff) * 1000000 >> 32, 1539 i - 1 - (32 - CC_HASH_SHIFT), 1540 ct[i], tcum, cpr[i], pcum); 1541 } 1542 return (error); 1543 } 1544 SYSCTL_PROC(_kern, OID_AUTO, callout_stat, 1545 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 1546 0, 0, sysctl_kern_callout_stat, "I", 1547 "Dump immediate statistic snapshot of the scheduled callouts"); 1548