1 /*- 2 * Copyright (c) 1982, 1986, 1990, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)kern_synch.c 8.9 (Berkeley) 5/19/95 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "opt_ddb.h" 41 #include "opt_ktrace.h" 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/condvar.h> 46 #include <sys/kernel.h> 47 #include <sys/ktr.h> 48 #include <sys/lock.h> 49 #include <sys/mutex.h> 50 #include <sys/proc.h> 51 #include <sys/resourcevar.h> 52 #include <sys/sched.h> 53 #include <sys/signalvar.h> 54 #include <sys/sleepqueue.h> 55 #include <sys/smp.h> 56 #include <sys/sx.h> 57 #include <sys/sysctl.h> 58 #include <sys/sysproto.h> 59 #include <sys/vmmeter.h> 60 #ifdef DDB 61 #include <ddb/ddb.h> 62 #endif 63 #ifdef KTRACE 64 #include <sys/uio.h> 65 #include <sys/ktrace.h> 66 #endif 67 68 #include <machine/cpu.h> 69 70 static void synch_setup(void *dummy); 71 SYSINIT(synch_setup, SI_SUB_KICK_SCHEDULER, SI_ORDER_FIRST, synch_setup, NULL) 72 73 int hogticks; 74 int lbolt; 75 76 static struct callout loadav_callout; 77 static struct callout lbolt_callout; 78 79 struct loadavg averunnable = 80 { {0, 0, 0}, FSCALE }; /* load average, of runnable procs */ 81 /* 82 * Constants for averages over 1, 5, and 15 minutes 83 * when sampling at 5 second intervals. 84 */ 85 static fixpt_t cexp[3] = { 86 0.9200444146293232 * FSCALE, /* exp(-1/12) */ 87 0.9834714538216174 * FSCALE, /* exp(-1/60) */ 88 0.9944598480048967 * FSCALE, /* exp(-1/180) */ 89 }; 90 91 /* kernel uses `FSCALE', userland (SHOULD) use kern.fscale */ 92 static int fscale __unused = FSCALE; 93 SYSCTL_INT(_kern, OID_AUTO, fscale, CTLFLAG_RD, 0, FSCALE, ""); 94 95 static void loadav(void *arg); 96 static void lboltcb(void *arg); 97 98 void 99 sleepinit(void) 100 { 101 102 hogticks = (hz / 10) * 2; /* Default only. */ 103 init_sleepqueues(); 104 } 105 106 /* 107 * General sleep call. Suspends the current process until a wakeup is 108 * performed on the specified identifier. The process will then be made 109 * runnable with the specified priority. Sleeps at most timo/hz seconds 110 * (0 means no timeout). If pri includes PCATCH flag, signals are checked 111 * before and after sleeping, else signals are not checked. Returns 0 if 112 * awakened, EWOULDBLOCK if the timeout expires. If PCATCH is set and a 113 * signal needs to be delivered, ERESTART is returned if the current system 114 * call should be restarted if possible, and EINTR is returned if the system 115 * call should be interrupted by the signal (return EINTR). 116 * 117 * The mutex argument is exited before the caller is suspended, and 118 * entered before msleep returns. If priority includes the PDROP 119 * flag the mutex is not entered before returning. 120 */ 121 122 int 123 msleep(ident, mtx, priority, wmesg, timo) 124 void *ident; 125 struct mtx *mtx; 126 int priority, timo; 127 const char *wmesg; 128 { 129 struct sleepqueue *sq; 130 struct thread *td; 131 struct proc *p; 132 int catch, rval, sig; 133 WITNESS_SAVE_DECL(mtx); 134 135 td = curthread; 136 p = td->td_proc; 137 #ifdef KTRACE 138 if (KTRPOINT(td, KTR_CSW)) 139 ktrcsw(1, 0); 140 #endif 141 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, mtx == NULL ? NULL : 142 &mtx->mtx_object, "Sleeping on \"%s\"", wmesg); 143 KASSERT(timo != 0 || mtx_owned(&Giant) || mtx != NULL, 144 ("sleeping without a mutex")); 145 KASSERT(p != NULL, ("msleep1")); 146 KASSERT(ident != NULL && TD_IS_RUNNING(td), ("msleep")); 147 148 if (cold) { 149 /* 150 * During autoconfiguration, just return; 151 * don't run any other procs or panic below, 152 * in case this is the idle process and already asleep. 153 * XXX: this used to do "s = splhigh(); splx(safepri); 154 * splx(s);" to give interrupts a chance, but there is 155 * no way to give interrupts a chance now. 156 */ 157 if (mtx != NULL && priority & PDROP) 158 mtx_unlock(mtx); 159 return (0); 160 } 161 catch = priority & PCATCH; 162 rval = 0; 163 164 /* 165 * If we are already on a sleep queue, then remove us from that 166 * sleep queue first. We have to do this to handle recursive 167 * sleeps. 168 */ 169 if (TD_ON_SLEEPQ(td)) 170 sleepq_remove(td, td->td_wchan); 171 172 sq = sleepq_lookup(ident); 173 mtx_lock_spin(&sched_lock); 174 175 /* 176 * If we are capable of async syscalls and there isn't already 177 * another one ready to return, start a new thread 178 * and queue it as ready to run. Note that there is danger here 179 * because we need to make sure that we don't sleep allocating 180 * the thread (recursion here might be bad). 181 */ 182 if (p->p_flag & P_SA || p->p_numthreads > 1) { 183 /* 184 * Just don't bother if we are exiting 185 * and not the exiting thread or thread was marked as 186 * interrupted. 187 */ 188 if (catch) { 189 if ((p->p_flag & P_WEXIT) && p->p_singlethread != td) { 190 mtx_unlock_spin(&sched_lock); 191 sleepq_release(ident); 192 return (EINTR); 193 } 194 if (td->td_flags & TDF_INTERRUPT) { 195 mtx_unlock_spin(&sched_lock); 196 sleepq_release(ident); 197 return (td->td_intrval); 198 } 199 } 200 } 201 mtx_unlock_spin(&sched_lock); 202 CTR5(KTR_PROC, "msleep: thread %p (pid %d, %s) on %s (%p)", 203 td, p->p_pid, p->p_comm, wmesg, ident); 204 205 DROP_GIANT(); 206 if (mtx != NULL) { 207 mtx_assert(mtx, MA_OWNED | MA_NOTRECURSED); 208 WITNESS_SAVE(&mtx->mtx_object, mtx); 209 mtx_unlock(mtx); 210 } 211 212 /* 213 * We put ourselves on the sleep queue and start our timeout 214 * before calling thread_suspend_check, as we could stop there, 215 * and a wakeup or a SIGCONT (or both) could occur while we were 216 * stopped without resuming us. Thus, we must be ready for sleep 217 * when cursig() is called. If the wakeup happens while we're 218 * stopped, then td will no longer be on a sleep queue upon 219 * return from cursig(). 220 */ 221 sleepq_add(sq, ident, mtx, wmesg, 0); 222 if (timo) 223 sleepq_set_timeout(ident, timo); 224 if (catch) { 225 sig = sleepq_catch_signals(ident); 226 if (sig == 0 && !TD_ON_SLEEPQ(td)) { 227 mtx_lock_spin(&sched_lock); 228 td->td_flags &= ~TDF_SINTR; 229 mtx_unlock_spin(&sched_lock); 230 catch = 0; 231 } 232 } else 233 sig = 0; 234 235 /* 236 * Adjust this threads priority. 237 * 238 * XXX: Do we need to save priority in td_base_pri? 239 */ 240 mtx_lock_spin(&sched_lock); 241 sched_prio(td, priority & PRIMASK); 242 mtx_unlock_spin(&sched_lock); 243 244 if (timo && catch) 245 rval = sleepq_timedwait_sig(ident, sig != 0); 246 else if (timo) 247 rval = sleepq_timedwait(ident, sig != 0); 248 else if (catch) 249 rval = sleepq_wait_sig(ident); 250 else { 251 sleepq_wait(ident); 252 rval = 0; 253 } 254 255 /* 256 * We're awake from voluntary sleep. 257 */ 258 if (rval == 0 && catch) 259 rval = sleepq_calc_signal_retval(sig); 260 #ifdef KTRACE 261 if (KTRPOINT(td, KTR_CSW)) 262 ktrcsw(0, 0); 263 #endif 264 PICKUP_GIANT(); 265 if (mtx != NULL && !(priority & PDROP)) { 266 mtx_lock(mtx); 267 WITNESS_RESTORE(&mtx->mtx_object, mtx); 268 } 269 return (rval); 270 } 271 272 /* 273 * Make all processes sleeping on the specified identifier runnable. 274 */ 275 void 276 wakeup(ident) 277 register void *ident; 278 { 279 280 sleepq_broadcast(ident, 0, -1); 281 } 282 283 /* 284 * Make a process sleeping on the specified identifier runnable. 285 * May wake more than one process if a target process is currently 286 * swapped out. 287 */ 288 void 289 wakeup_one(ident) 290 register void *ident; 291 { 292 293 sleepq_signal(ident, 0, -1); 294 } 295 296 /* 297 * The machine independent parts of mi_switch(). 298 */ 299 void 300 mi_switch(int flags) 301 { 302 struct bintime new_switchtime; 303 struct thread *td; 304 struct proc *p; 305 306 mtx_assert(&sched_lock, MA_OWNED | MA_NOTRECURSED); 307 td = curthread; /* XXX */ 308 p = td->td_proc; /* XXX */ 309 KASSERT(!TD_ON_RUNQ(td), ("mi_switch: called by old code")); 310 #ifdef INVARIANTS 311 if (!TD_ON_LOCK(td) && !TD_IS_RUNNING(td)) 312 mtx_assert(&Giant, MA_NOTOWNED); 313 #endif 314 KASSERT(td->td_critnest == 1, 315 ("mi_switch: switch in a critical section")); 316 KASSERT((flags & (SW_INVOL | SW_VOL)) != 0, 317 ("mi_switch: switch must be voluntary or involuntary")); 318 319 if (flags & SW_VOL) 320 p->p_stats->p_ru.ru_nvcsw++; 321 else 322 p->p_stats->p_ru.ru_nivcsw++; 323 /* 324 * Compute the amount of time during which the current 325 * process was running, and add that to its total so far. 326 */ 327 binuptime(&new_switchtime); 328 bintime_add(&p->p_runtime, &new_switchtime); 329 bintime_sub(&p->p_runtime, PCPU_PTR(switchtime)); 330 331 td->td_generation++; /* bump preempt-detect counter */ 332 333 #ifdef DDB 334 /* 335 * Don't perform context switches from the debugger. 336 */ 337 if (db_active) { 338 mtx_unlock_spin(&sched_lock); 339 db_print_backtrace(); 340 db_error("Context switches not allowed in the debugger"); 341 } 342 #endif 343 344 /* 345 * Check if the process exceeds its cpu resource allocation. If 346 * over max, arrange to kill the process in ast(). 347 */ 348 if (p->p_cpulimit != RLIM_INFINITY && 349 p->p_runtime.sec > p->p_cpulimit) { 350 p->p_sflag |= PS_XCPU; 351 td->td_flags |= TDF_ASTPENDING; 352 } 353 354 /* 355 * Finish up stats for outgoing thread. 356 */ 357 cnt.v_swtch++; 358 PCPU_SET(switchtime, new_switchtime); 359 PCPU_SET(switchticks, ticks); 360 CTR3(KTR_PROC, "mi_switch: old thread %p (pid %d, %s)", td, p->p_pid, 361 p->p_comm); 362 if (td->td_proc->p_flag & P_SA) 363 thread_switchout(td); 364 sched_switch(td); 365 366 CTR3(KTR_PROC, "mi_switch: new thread %p (pid %d, %s)", td, p->p_pid, 367 p->p_comm); 368 369 /* 370 * If the last thread was exiting, finish cleaning it up. 371 */ 372 if ((td = PCPU_GET(deadthread))) { 373 PCPU_SET(deadthread, NULL); 374 thread_stash(td); 375 } 376 } 377 378 /* 379 * Change process state to be runnable, 380 * placing it on the run queue if it is in memory, 381 * and awakening the swapper if it isn't in memory. 382 */ 383 void 384 setrunnable(struct thread *td) 385 { 386 struct proc *p; 387 388 p = td->td_proc; 389 mtx_assert(&sched_lock, MA_OWNED); 390 switch (p->p_state) { 391 case PRS_ZOMBIE: 392 panic("setrunnable(1)"); 393 default: 394 break; 395 } 396 switch (td->td_state) { 397 case TDS_RUNNING: 398 case TDS_RUNQ: 399 return; 400 case TDS_INHIBITED: 401 /* 402 * If we are only inhibited because we are swapped out 403 * then arange to swap in this process. Otherwise just return. 404 */ 405 if (td->td_inhibitors != TDI_SWAPPED) 406 return; 407 /* XXX: intentional fall-through ? */ 408 case TDS_CAN_RUN: 409 break; 410 default: 411 printf("state is 0x%x", td->td_state); 412 panic("setrunnable(2)"); 413 } 414 if ((p->p_sflag & PS_INMEM) == 0) { 415 if ((p->p_sflag & PS_SWAPPINGIN) == 0) { 416 p->p_sflag |= PS_SWAPINREQ; 417 wakeup(&proc0); 418 } 419 } else 420 sched_wakeup(td); 421 } 422 423 /* 424 * Compute a tenex style load average of a quantity on 425 * 1, 5 and 15 minute intervals. 426 * XXXKSE Needs complete rewrite when correct info is available. 427 * Completely Bogus.. only works with 1:1 (but compiles ok now :-) 428 */ 429 static void 430 loadav(void *arg) 431 { 432 int i, nrun; 433 struct loadavg *avg; 434 435 nrun = sched_load(); 436 avg = &averunnable; 437 438 for (i = 0; i < 3; i++) 439 avg->ldavg[i] = (cexp[i] * avg->ldavg[i] + 440 nrun * FSCALE * (FSCALE - cexp[i])) >> FSHIFT; 441 442 /* 443 * Schedule the next update to occur after 5 seconds, but add a 444 * random variation to avoid synchronisation with processes that 445 * run at regular intervals. 446 */ 447 callout_reset(&loadav_callout, hz * 4 + (int)(random() % (hz * 2 + 1)), 448 loadav, NULL); 449 } 450 451 static void 452 lboltcb(void *arg) 453 { 454 wakeup(&lbolt); 455 callout_reset(&lbolt_callout, hz, lboltcb, NULL); 456 } 457 458 /* ARGSUSED */ 459 static void 460 synch_setup(dummy) 461 void *dummy; 462 { 463 callout_init(&loadav_callout, CALLOUT_MPSAFE); 464 callout_init(&lbolt_callout, CALLOUT_MPSAFE); 465 466 /* Kick off timeout driven events by calling first time. */ 467 loadav(NULL); 468 lboltcb(NULL); 469 } 470 471 /* 472 * General purpose yield system call 473 */ 474 int 475 yield(struct thread *td, struct yield_args *uap) 476 { 477 struct ksegrp *kg; 478 479 kg = td->td_ksegrp; 480 mtx_assert(&Giant, MA_NOTOWNED); 481 mtx_lock_spin(&sched_lock); 482 sched_prio(td, PRI_MAX_TIMESHARE); 483 mi_switch(SW_VOL); 484 mtx_unlock_spin(&sched_lock); 485 td->td_retval[0] = 0; 486 return (0); 487 } 488