1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1982, 1986, 1990, 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)kern_synch.c 8.9 (Berkeley) 5/19/95 37 */ 38 39 #include <sys/cdefs.h> 40 __FBSDID("$FreeBSD$"); 41 42 #include "opt_ktrace.h" 43 #include "opt_sched.h" 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/blockcount.h> 48 #include <sys/condvar.h> 49 #include <sys/kdb.h> 50 #include <sys/kernel.h> 51 #include <sys/ktr.h> 52 #include <sys/lock.h> 53 #include <sys/mutex.h> 54 #include <sys/proc.h> 55 #include <sys/resourcevar.h> 56 #include <sys/sched.h> 57 #include <sys/sdt.h> 58 #include <sys/signalvar.h> 59 #include <sys/sleepqueue.h> 60 #include <sys/smp.h> 61 #include <sys/sx.h> 62 #include <sys/sysctl.h> 63 #include <sys/sysproto.h> 64 #include <sys/vmmeter.h> 65 #ifdef KTRACE 66 #include <sys/uio.h> 67 #include <sys/ktrace.h> 68 #endif 69 #ifdef EPOCH_TRACE 70 #include <sys/epoch.h> 71 #endif 72 73 #include <machine/cpu.h> 74 75 static void synch_setup(void *dummy); 76 SYSINIT(synch_setup, SI_SUB_KICK_SCHEDULER, SI_ORDER_FIRST, synch_setup, 77 NULL); 78 79 int hogticks; 80 static const char pause_wchan[MAXCPU]; 81 82 static struct callout loadav_callout; 83 84 struct loadavg averunnable = 85 { {0, 0, 0}, FSCALE }; /* load average, of runnable procs */ 86 /* 87 * Constants for averages over 1, 5, and 15 minutes 88 * when sampling at 5 second intervals. 89 */ 90 static uint64_t cexp[3] = { 91 0.9200444146293232 * FSCALE, /* exp(-1/12) */ 92 0.9834714538216174 * FSCALE, /* exp(-1/60) */ 93 0.9944598480048967 * FSCALE, /* exp(-1/180) */ 94 }; 95 96 /* kernel uses `FSCALE', userland (SHOULD) use kern.fscale */ 97 SYSCTL_INT(_kern, OID_AUTO, fscale, CTLFLAG_RD, SYSCTL_NULL_INT_PTR, FSCALE, 98 "Fixed-point scale factor used for calculating load average values"); 99 100 static void loadav(void *arg); 101 102 SDT_PROVIDER_DECLARE(sched); 103 SDT_PROBE_DEFINE(sched, , , preempt); 104 105 static void 106 sleepinit(void *unused) 107 { 108 109 hogticks = (hz / 10) * 2; /* Default only. */ 110 init_sleepqueues(); 111 } 112 113 /* 114 * vmem tries to lock the sleepq mutexes when free'ing kva, so make sure 115 * it is available. 116 */ 117 SYSINIT(sleepinit, SI_SUB_KMEM, SI_ORDER_ANY, sleepinit, NULL); 118 119 /* 120 * General sleep call. Suspends the current thread until a wakeup is 121 * performed on the specified identifier. The thread will then be made 122 * runnable with the specified priority. Sleeps at most sbt units of time 123 * (0 means no timeout). If pri includes the PCATCH flag, let signals 124 * interrupt the sleep, otherwise ignore them while sleeping. Returns 0 if 125 * awakened, EWOULDBLOCK if the timeout expires. If PCATCH is set and a 126 * signal becomes pending, ERESTART is returned if the current system 127 * call should be restarted if possible, and EINTR is returned if the system 128 * call should be interrupted by the signal (return EINTR). 129 * 130 * The lock argument is unlocked before the caller is suspended, and 131 * re-locked before _sleep() returns. If priority includes the PDROP 132 * flag the lock is not re-locked before returning. 133 */ 134 int 135 _sleep(const void *ident, struct lock_object *lock, int priority, 136 const char *wmesg, sbintime_t sbt, sbintime_t pr, int flags) 137 { 138 struct thread *td; 139 struct lock_class *class; 140 uintptr_t lock_state; 141 int catch, pri, rval, sleepq_flags; 142 WITNESS_SAVE_DECL(lock_witness); 143 144 TSENTER(); 145 td = curthread; 146 #ifdef KTRACE 147 if (KTRPOINT(td, KTR_CSW)) 148 ktrcsw(1, 0, wmesg); 149 #endif 150 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock, 151 "Sleeping on \"%s\"", wmesg); 152 KASSERT(sbt != 0 || mtx_owned(&Giant) || lock != NULL || 153 (priority & PNOLOCK) != 0, 154 ("sleeping without a lock")); 155 KASSERT(ident != NULL, ("_sleep: NULL ident")); 156 KASSERT(TD_IS_RUNNING(td), ("_sleep: curthread not running")); 157 if (priority & PDROP) 158 KASSERT(lock != NULL && lock != &Giant.lock_object, 159 ("PDROP requires a non-Giant lock")); 160 if (lock != NULL) 161 class = LOCK_CLASS(lock); 162 else 163 class = NULL; 164 165 if (SCHEDULER_STOPPED_TD(td)) { 166 if (lock != NULL && priority & PDROP) 167 class->lc_unlock(lock); 168 return (0); 169 } 170 catch = priority & PCATCH; 171 pri = priority & PRIMASK; 172 173 KASSERT(!TD_ON_SLEEPQ(td), ("recursive sleep")); 174 175 if ((uintptr_t)ident >= (uintptr_t)&pause_wchan[0] && 176 (uintptr_t)ident <= (uintptr_t)&pause_wchan[MAXCPU - 1]) 177 sleepq_flags = SLEEPQ_PAUSE; 178 else 179 sleepq_flags = SLEEPQ_SLEEP; 180 if (catch) 181 sleepq_flags |= SLEEPQ_INTERRUPTIBLE; 182 183 sleepq_lock(ident); 184 CTR5(KTR_PROC, "sleep: thread %ld (pid %ld, %s) on %s (%p)", 185 td->td_tid, td->td_proc->p_pid, td->td_name, wmesg, ident); 186 187 if (lock == &Giant.lock_object) 188 mtx_assert(&Giant, MA_OWNED); 189 DROP_GIANT(); 190 if (lock != NULL && lock != &Giant.lock_object && 191 !(class->lc_flags & LC_SLEEPABLE)) { 192 KASSERT(!(class->lc_flags & LC_SPINLOCK), 193 ("spin locks can only use msleep_spin")); 194 WITNESS_SAVE(lock, lock_witness); 195 lock_state = class->lc_unlock(lock); 196 } else 197 /* GCC needs to follow the Yellow Brick Road */ 198 lock_state = -1; 199 200 /* 201 * We put ourselves on the sleep queue and start our timeout 202 * before calling thread_suspend_check, as we could stop there, 203 * and a wakeup or a SIGCONT (or both) could occur while we were 204 * stopped without resuming us. Thus, we must be ready for sleep 205 * when cursig() is called. If the wakeup happens while we're 206 * stopped, then td will no longer be on a sleep queue upon 207 * return from cursig(). 208 */ 209 sleepq_add(ident, lock, wmesg, sleepq_flags, 0); 210 if (sbt != 0) 211 sleepq_set_timeout_sbt(ident, sbt, pr, flags); 212 if (lock != NULL && class->lc_flags & LC_SLEEPABLE) { 213 sleepq_release(ident); 214 WITNESS_SAVE(lock, lock_witness); 215 lock_state = class->lc_unlock(lock); 216 sleepq_lock(ident); 217 } 218 if (sbt != 0 && catch) 219 rval = sleepq_timedwait_sig(ident, pri); 220 else if (sbt != 0) 221 rval = sleepq_timedwait(ident, pri); 222 else if (catch) 223 rval = sleepq_wait_sig(ident, pri); 224 else { 225 sleepq_wait(ident, pri); 226 rval = 0; 227 } 228 #ifdef KTRACE 229 if (KTRPOINT(td, KTR_CSW)) 230 ktrcsw(0, 0, wmesg); 231 #endif 232 PICKUP_GIANT(); 233 if (lock != NULL && lock != &Giant.lock_object && !(priority & PDROP)) { 234 class->lc_lock(lock, lock_state); 235 WITNESS_RESTORE(lock, lock_witness); 236 } 237 TSEXIT(); 238 return (rval); 239 } 240 241 int 242 msleep_spin_sbt(const void *ident, struct mtx *mtx, const char *wmesg, 243 sbintime_t sbt, sbintime_t pr, int flags) 244 { 245 struct thread *td; 246 int rval; 247 WITNESS_SAVE_DECL(mtx); 248 249 td = curthread; 250 KASSERT(mtx != NULL, ("sleeping without a mutex")); 251 KASSERT(ident != NULL, ("msleep_spin_sbt: NULL ident")); 252 KASSERT(TD_IS_RUNNING(td), ("msleep_spin_sbt: curthread not running")); 253 254 if (SCHEDULER_STOPPED_TD(td)) 255 return (0); 256 257 sleepq_lock(ident); 258 CTR5(KTR_PROC, "msleep_spin: thread %ld (pid %ld, %s) on %s (%p)", 259 td->td_tid, td->td_proc->p_pid, td->td_name, wmesg, ident); 260 261 DROP_GIANT(); 262 mtx_assert(mtx, MA_OWNED | MA_NOTRECURSED); 263 WITNESS_SAVE(&mtx->lock_object, mtx); 264 mtx_unlock_spin(mtx); 265 266 /* 267 * We put ourselves on the sleep queue and start our timeout. 268 */ 269 sleepq_add(ident, &mtx->lock_object, wmesg, SLEEPQ_SLEEP, 0); 270 if (sbt != 0) 271 sleepq_set_timeout_sbt(ident, sbt, pr, flags); 272 273 /* 274 * Can't call ktrace with any spin locks held so it can lock the 275 * ktrace_mtx lock, and WITNESS_WARN considers it an error to hold 276 * any spin lock. Thus, we have to drop the sleepq spin lock while 277 * we handle those requests. This is safe since we have placed our 278 * thread on the sleep queue already. 279 */ 280 #ifdef KTRACE 281 if (KTRPOINT(td, KTR_CSW)) { 282 sleepq_release(ident); 283 ktrcsw(1, 0, wmesg); 284 sleepq_lock(ident); 285 } 286 #endif 287 #ifdef WITNESS 288 sleepq_release(ident); 289 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "Sleeping on \"%s\"", 290 wmesg); 291 sleepq_lock(ident); 292 #endif 293 if (sbt != 0) 294 rval = sleepq_timedwait(ident, 0); 295 else { 296 sleepq_wait(ident, 0); 297 rval = 0; 298 } 299 #ifdef KTRACE 300 if (KTRPOINT(td, KTR_CSW)) 301 ktrcsw(0, 0, wmesg); 302 #endif 303 PICKUP_GIANT(); 304 mtx_lock_spin(mtx); 305 WITNESS_RESTORE(&mtx->lock_object, mtx); 306 return (rval); 307 } 308 309 /* 310 * pause_sbt() delays the calling thread by the given signed binary 311 * time. During cold bootup, pause_sbt() uses the DELAY() function 312 * instead of the _sleep() function to do the waiting. The "sbt" 313 * argument must be greater than or equal to zero. A "sbt" value of 314 * zero is equivalent to a "sbt" value of one tick. 315 */ 316 int 317 pause_sbt(const char *wmesg, sbintime_t sbt, sbintime_t pr, int flags) 318 { 319 KASSERT(sbt >= 0, ("pause_sbt: timeout must be >= 0")); 320 321 /* silently convert invalid timeouts */ 322 if (sbt == 0) 323 sbt = tick_sbt; 324 325 if ((cold && curthread == &thread0) || kdb_active || 326 SCHEDULER_STOPPED()) { 327 /* 328 * We delay one second at a time to avoid overflowing the 329 * system specific DELAY() function(s): 330 */ 331 while (sbt >= SBT_1S) { 332 DELAY(1000000); 333 sbt -= SBT_1S; 334 } 335 /* Do the delay remainder, if any */ 336 sbt = howmany(sbt, SBT_1US); 337 if (sbt > 0) 338 DELAY(sbt); 339 return (EWOULDBLOCK); 340 } 341 return (_sleep(&pause_wchan[curcpu], NULL, 342 (flags & C_CATCH) ? PCATCH : 0, wmesg, sbt, pr, flags)); 343 } 344 345 /* 346 * Make all threads sleeping on the specified identifier runnable. 347 */ 348 void 349 wakeup(const void *ident) 350 { 351 int wakeup_swapper; 352 353 sleepq_lock(ident); 354 wakeup_swapper = sleepq_broadcast(ident, SLEEPQ_SLEEP, 0, 0); 355 sleepq_release(ident); 356 if (wakeup_swapper) { 357 KASSERT(ident != &proc0, 358 ("wakeup and wakeup_swapper and proc0")); 359 kick_proc0(); 360 } 361 } 362 363 /* 364 * Make a thread sleeping on the specified identifier runnable. 365 * May wake more than one thread if a target thread is currently 366 * swapped out. 367 */ 368 void 369 wakeup_one(const void *ident) 370 { 371 int wakeup_swapper; 372 373 sleepq_lock(ident); 374 wakeup_swapper = sleepq_signal(ident, SLEEPQ_SLEEP | SLEEPQ_DROP, 0, 0); 375 if (wakeup_swapper) 376 kick_proc0(); 377 } 378 379 void 380 wakeup_any(const void *ident) 381 { 382 int wakeup_swapper; 383 384 sleepq_lock(ident); 385 wakeup_swapper = sleepq_signal(ident, SLEEPQ_SLEEP | SLEEPQ_UNFAIR | 386 SLEEPQ_DROP, 0, 0); 387 if (wakeup_swapper) 388 kick_proc0(); 389 } 390 391 /* 392 * Signal sleeping waiters after the counter has reached zero. 393 */ 394 void 395 _blockcount_wakeup(blockcount_t *bc, u_int old) 396 { 397 398 KASSERT(_BLOCKCOUNT_WAITERS(old), 399 ("%s: no waiters on %p", __func__, bc)); 400 401 if (atomic_cmpset_int(&bc->__count, _BLOCKCOUNT_WAITERS_FLAG, 0)) 402 wakeup(bc); 403 } 404 405 /* 406 * Wait for a wakeup or a signal. This does not guarantee that the count is 407 * still zero on return. Callers wanting a precise answer should use 408 * blockcount_wait() with an interlock. 409 * 410 * If there is no work to wait for, return 0. If the sleep was interrupted by a 411 * signal, return EINTR or ERESTART, and return EAGAIN otherwise. 412 */ 413 int 414 _blockcount_sleep(blockcount_t *bc, struct lock_object *lock, const char *wmesg, 415 int prio) 416 { 417 void *wchan; 418 uintptr_t lock_state; 419 u_int old; 420 int ret; 421 bool catch, drop; 422 423 KASSERT(lock != &Giant.lock_object, 424 ("%s: cannot use Giant as the interlock", __func__)); 425 426 catch = (prio & PCATCH) != 0; 427 drop = (prio & PDROP) != 0; 428 prio &= PRIMASK; 429 430 /* 431 * Synchronize with the fence in blockcount_release(). If we end up 432 * waiting, the sleepqueue lock acquisition will provide the required 433 * side effects. 434 * 435 * If there is no work to wait for, but waiters are present, try to put 436 * ourselves to sleep to avoid jumping ahead. 437 */ 438 if (atomic_load_acq_int(&bc->__count) == 0) { 439 if (lock != NULL && drop) 440 LOCK_CLASS(lock)->lc_unlock(lock); 441 return (0); 442 } 443 lock_state = 0; 444 wchan = bc; 445 sleepq_lock(wchan); 446 DROP_GIANT(); 447 if (lock != NULL) 448 lock_state = LOCK_CLASS(lock)->lc_unlock(lock); 449 old = blockcount_read(bc); 450 ret = 0; 451 do { 452 if (_BLOCKCOUNT_COUNT(old) == 0) { 453 sleepq_release(wchan); 454 goto out; 455 } 456 if (_BLOCKCOUNT_WAITERS(old)) 457 break; 458 } while (!atomic_fcmpset_int(&bc->__count, &old, 459 old | _BLOCKCOUNT_WAITERS_FLAG)); 460 sleepq_add(wchan, NULL, wmesg, catch ? SLEEPQ_INTERRUPTIBLE : 0, 0); 461 if (catch) 462 ret = sleepq_wait_sig(wchan, prio); 463 else 464 sleepq_wait(wchan, prio); 465 if (ret == 0) 466 ret = EAGAIN; 467 468 out: 469 PICKUP_GIANT(); 470 if (lock != NULL && !drop) 471 LOCK_CLASS(lock)->lc_lock(lock, lock_state); 472 473 return (ret); 474 } 475 476 static void 477 kdb_switch(void) 478 { 479 thread_unlock(curthread); 480 kdb_backtrace(); 481 kdb_reenter(); 482 panic("%s: did not reenter debugger", __func__); 483 } 484 485 /* 486 * mi_switch(9): The machine-independent parts of context switching. 487 * 488 * The thread lock is required on entry and is no longer held on return. 489 */ 490 void 491 mi_switch(int flags) 492 { 493 uint64_t runtime, new_switchtime; 494 struct thread *td; 495 496 td = curthread; /* XXX */ 497 THREAD_LOCK_ASSERT(td, MA_OWNED | MA_NOTRECURSED); 498 KASSERT(!TD_ON_RUNQ(td), ("mi_switch: called by old code")); 499 #ifdef INVARIANTS 500 if (!TD_ON_LOCK(td) && !TD_IS_RUNNING(td)) 501 mtx_assert(&Giant, MA_NOTOWNED); 502 #endif 503 /* thread_lock() performs spinlock_enter(). */ 504 KASSERT(td->td_critnest == 1 || KERNEL_PANICKED(), 505 ("mi_switch: switch in a critical section")); 506 KASSERT((flags & (SW_INVOL | SW_VOL)) != 0, 507 ("mi_switch: switch must be voluntary or involuntary")); 508 KASSERT((flags & SW_TYPE_MASK) != 0, 509 ("mi_switch: a switch reason (type) must be specified")); 510 KASSERT((flags & SW_TYPE_MASK) < SWT_COUNT, 511 ("mi_switch: invalid switch reason %d", (flags & SW_TYPE_MASK))); 512 513 /* 514 * Don't perform context switches from the debugger. 515 */ 516 if (kdb_active) 517 kdb_switch(); 518 if (SCHEDULER_STOPPED_TD(td)) 519 return; 520 if (flags & SW_VOL) { 521 td->td_ru.ru_nvcsw++; 522 td->td_swvoltick = ticks; 523 } else { 524 td->td_ru.ru_nivcsw++; 525 td->td_swinvoltick = ticks; 526 } 527 #ifdef SCHED_STATS 528 SCHED_STAT_INC(sched_switch_stats[flags & SW_TYPE_MASK]); 529 #endif 530 /* 531 * Compute the amount of time during which the current 532 * thread was running, and add that to its total so far. 533 */ 534 new_switchtime = cpu_ticks(); 535 runtime = new_switchtime - PCPU_GET(switchtime); 536 td->td_runtime += runtime; 537 td->td_incruntime += runtime; 538 PCPU_SET(switchtime, new_switchtime); 539 td->td_generation++; /* bump preempt-detect counter */ 540 VM_CNT_INC(v_swtch); 541 PCPU_SET(switchticks, ticks); 542 CTR4(KTR_PROC, "mi_switch: old thread %ld (td_sched %p, pid %ld, %s)", 543 td->td_tid, td_get_sched(td), td->td_proc->p_pid, td->td_name); 544 #ifdef KDTRACE_HOOKS 545 if (SDT_PROBES_ENABLED() && 546 ((flags & SW_PREEMPT) != 0 || ((flags & SW_INVOL) != 0 && 547 (flags & SW_TYPE_MASK) == SWT_NEEDRESCHED))) 548 SDT_PROBE0(sched, , , preempt); 549 #endif 550 sched_switch(td, flags); 551 CTR4(KTR_PROC, "mi_switch: new thread %ld (td_sched %p, pid %ld, %s)", 552 td->td_tid, td_get_sched(td), td->td_proc->p_pid, td->td_name); 553 554 /* 555 * If the last thread was exiting, finish cleaning it up. 556 */ 557 if ((td = PCPU_GET(deadthread))) { 558 PCPU_SET(deadthread, NULL); 559 thread_stash(td); 560 } 561 spinlock_exit(); 562 } 563 564 /* 565 * Change thread state to be runnable, placing it on the run queue if 566 * it is in memory. If it is swapped out, return true so our caller 567 * will know to awaken the swapper. 568 * 569 * Requires the thread lock on entry, drops on exit. 570 */ 571 int 572 setrunnable(struct thread *td, int srqflags) 573 { 574 int swapin; 575 576 THREAD_LOCK_ASSERT(td, MA_OWNED); 577 KASSERT(td->td_proc->p_state != PRS_ZOMBIE, 578 ("setrunnable: pid %d is a zombie", td->td_proc->p_pid)); 579 580 swapin = 0; 581 switch (TD_GET_STATE(td)) { 582 case TDS_RUNNING: 583 case TDS_RUNQ: 584 break; 585 case TDS_CAN_RUN: 586 KASSERT((td->td_flags & TDF_INMEM) != 0, 587 ("setrunnable: td %p not in mem, flags 0x%X inhibit 0x%X", 588 td, td->td_flags, td->td_inhibitors)); 589 /* unlocks thread lock according to flags */ 590 sched_wakeup(td, srqflags); 591 return (0); 592 case TDS_INHIBITED: 593 /* 594 * If we are only inhibited because we are swapped out 595 * arrange to swap in this process. 596 */ 597 if (td->td_inhibitors == TDI_SWAPPED && 598 (td->td_flags & TDF_SWAPINREQ) == 0) { 599 td->td_flags |= TDF_SWAPINREQ; 600 swapin = 1; 601 } 602 break; 603 default: 604 panic("setrunnable: state 0x%x", TD_GET_STATE(td)); 605 } 606 if ((srqflags & (SRQ_HOLD | SRQ_HOLDTD)) == 0) 607 thread_unlock(td); 608 609 return (swapin); 610 } 611 612 /* 613 * Compute a tenex style load average of a quantity on 614 * 1, 5 and 15 minute intervals. 615 */ 616 static void 617 loadav(void *arg) 618 { 619 int i; 620 uint64_t nrun; 621 struct loadavg *avg; 622 623 nrun = (uint64_t)sched_load(); 624 avg = &averunnable; 625 626 for (i = 0; i < 3; i++) 627 avg->ldavg[i] = (cexp[i] * (uint64_t)avg->ldavg[i] + 628 nrun * FSCALE * (FSCALE - cexp[i])) >> FSHIFT; 629 630 /* 631 * Schedule the next update to occur after 5 seconds, but add a 632 * random variation to avoid synchronisation with processes that 633 * run at regular intervals. 634 */ 635 callout_reset_sbt(&loadav_callout, 636 SBT_1US * (4000000 + (int)(random() % 2000001)), SBT_1US, 637 loadav, NULL, C_DIRECT_EXEC | C_PREL(32)); 638 } 639 640 static void 641 ast_scheduler(struct thread *td, int tda __unused) 642 { 643 #ifdef KTRACE 644 if (KTRPOINT(td, KTR_CSW)) 645 ktrcsw(1, 1, __func__); 646 #endif 647 thread_lock(td); 648 sched_prio(td, td->td_user_pri); 649 mi_switch(SW_INVOL | SWT_NEEDRESCHED); 650 #ifdef KTRACE 651 if (KTRPOINT(td, KTR_CSW)) 652 ktrcsw(0, 1, __func__); 653 #endif 654 } 655 656 static void 657 synch_setup(void *dummy __unused) 658 { 659 callout_init(&loadav_callout, 1); 660 ast_register(TDA_SCHED, ASTR_ASTF_REQUIRED, 0, ast_scheduler); 661 662 /* Kick off timeout driven events by calling first time. */ 663 loadav(NULL); 664 } 665 666 bool 667 should_yield(void) 668 { 669 670 return ((u_int)ticks - (u_int)curthread->td_swvoltick >= hogticks); 671 } 672 673 void 674 maybe_yield(void) 675 { 676 677 if (should_yield()) 678 kern_yield(PRI_USER); 679 } 680 681 void 682 kern_yield(int prio) 683 { 684 struct thread *td; 685 686 td = curthread; 687 DROP_GIANT(); 688 thread_lock(td); 689 if (prio == PRI_USER) 690 prio = td->td_user_pri; 691 if (prio >= 0) 692 sched_prio(td, prio); 693 mi_switch(SW_VOL | SWT_RELINQUISH); 694 PICKUP_GIANT(); 695 } 696 697 /* 698 * General purpose yield system call. 699 */ 700 int 701 sys_yield(struct thread *td, struct yield_args *uap) 702 { 703 704 thread_lock(td); 705 if (PRI_BASE(td->td_pri_class) == PRI_TIMESHARE) 706 sched_prio(td, PRI_MAX_TIMESHARE); 707 mi_switch(SW_VOL | SWT_RELINQUISH); 708 td->td_retval[0] = 0; 709 return (0); 710 } 711 712 int 713 sys_sched_getcpu(struct thread *td, struct sched_getcpu_args *uap) 714 { 715 td->td_retval[0] = td->td_oncpu; 716 return (0); 717 } 718