1 /*- 2 * Copyright (c) 2001 Jake Burkholder <jake@FreeBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include "opt_sched.h" 32 33 #ifndef KERN_SWITCH_INCLUDE 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/kdb.h> 37 #include <sys/kernel.h> 38 #include <sys/ktr.h> 39 #include <sys/lock.h> 40 #include <sys/mutex.h> 41 #include <sys/proc.h> 42 #include <sys/queue.h> 43 #include <sys/sched.h> 44 #else /* KERN_SWITCH_INCLUDE */ 45 #if defined(SMP) && (defined(__i386__) || defined(__amd64__)) 46 #include <sys/smp.h> 47 #endif 48 #if defined(SMP) && defined(SCHED_4BSD) 49 #include <sys/sysctl.h> 50 #endif 51 52 /* Uncomment this to enable logging of critical_enter/exit. */ 53 #if 0 54 #define KTR_CRITICAL KTR_SCHED 55 #else 56 #define KTR_CRITICAL 0 57 #endif 58 59 #ifdef FULL_PREEMPTION 60 #ifndef PREEMPTION 61 #error "The FULL_PREEMPTION option requires the PREEMPTION option" 62 #endif 63 #endif 64 65 CTASSERT((RQB_BPW * RQB_LEN) == RQ_NQS); 66 67 /* 68 * kern.sched.preemption allows user space to determine if preemption support 69 * is compiled in or not. It is not currently a boot or runtime flag that 70 * can be changed. 71 */ 72 #ifdef PREEMPTION 73 static int kern_sched_preemption = 1; 74 #else 75 static int kern_sched_preemption = 0; 76 #endif 77 SYSCTL_INT(_kern_sched, OID_AUTO, preemption, CTLFLAG_RD, 78 &kern_sched_preemption, 0, "Kernel preemption enabled"); 79 80 /************************************************************************ 81 * Functions that manipulate runnability from a thread perspective. * 82 ************************************************************************/ 83 /* 84 * Select the thread that will be run next. 85 */ 86 struct thread * 87 choosethread(void) 88 { 89 struct thread *td; 90 91 #if defined(SMP) && (defined(__i386__) || defined(__amd64__)) 92 if (smp_active == 0 && PCPU_GET(cpuid) != 0) { 93 /* Shutting down, run idlethread on AP's */ 94 td = PCPU_GET(idlethread); 95 CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td); 96 TD_SET_RUNNING(td); 97 return (td); 98 } 99 #endif 100 101 retry: 102 td = sched_choose(); 103 104 /* 105 * If we are in panic, only allow system threads, 106 * plus the one we are running in, to be run. 107 */ 108 if (panicstr && ((td->td_proc->p_flag & P_SYSTEM) == 0 && 109 (td->td_flags & TDF_INPANIC) == 0)) { 110 /* note that it is no longer on the run queue */ 111 TD_SET_CAN_RUN(td); 112 goto retry; 113 } 114 115 TD_SET_RUNNING(td); 116 return (td); 117 } 118 119 /* 120 * Kernel thread preemption implementation. Critical sections mark 121 * regions of code in which preemptions are not allowed. 122 */ 123 void 124 critical_enter(void) 125 { 126 struct thread *td; 127 128 td = curthread; 129 td->td_critnest++; 130 CTR4(KTR_CRITICAL, "critical_enter by thread %p (%ld, %s) to %d", td, 131 (long)td->td_proc->p_pid, td->td_proc->p_comm, td->td_critnest); 132 } 133 134 void 135 critical_exit(void) 136 { 137 struct thread *td; 138 139 td = curthread; 140 KASSERT(td->td_critnest != 0, 141 ("critical_exit: td_critnest == 0")); 142 #ifdef PREEMPTION 143 if (td->td_critnest == 1) { 144 td->td_critnest = 0; 145 mtx_assert(&sched_lock, MA_NOTOWNED); 146 if (td->td_owepreempt) { 147 td->td_critnest = 1; 148 mtx_lock_spin(&sched_lock); 149 td->td_critnest--; 150 mi_switch(SW_INVOL, NULL); 151 mtx_unlock_spin(&sched_lock); 152 } 153 } else 154 #endif 155 td->td_critnest--; 156 157 CTR4(KTR_CRITICAL, "critical_exit by thread %p (%ld, %s) to %d", td, 158 (long)td->td_proc->p_pid, td->td_proc->p_comm, td->td_critnest); 159 } 160 161 /* 162 * This function is called when a thread is about to be put on run queue 163 * because it has been made runnable or its priority has been adjusted. It 164 * determines if the new thread should be immediately preempted to. If so, 165 * it switches to it and eventually returns true. If not, it returns false 166 * so that the caller may place the thread on an appropriate run queue. 167 */ 168 int 169 maybe_preempt(struct thread *td) 170 { 171 #ifdef PREEMPTION 172 struct thread *ctd; 173 int cpri, pri; 174 #endif 175 176 mtx_assert(&sched_lock, MA_OWNED); 177 #ifdef PREEMPTION 178 /* 179 * The new thread should not preempt the current thread if any of the 180 * following conditions are true: 181 * 182 * - The kernel is in the throes of crashing (panicstr). 183 * - The current thread has a higher (numerically lower) or 184 * equivalent priority. Note that this prevents curthread from 185 * trying to preempt to itself. 186 * - It is too early in the boot for context switches (cold is set). 187 * - The current thread has an inhibitor set or is in the process of 188 * exiting. In this case, the current thread is about to switch 189 * out anyways, so there's no point in preempting. If we did, 190 * the current thread would not be properly resumed as well, so 191 * just avoid that whole landmine. 192 * - If the new thread's priority is not a realtime priority and 193 * the current thread's priority is not an idle priority and 194 * FULL_PREEMPTION is disabled. 195 * 196 * If all of these conditions are false, but the current thread is in 197 * a nested critical section, then we have to defer the preemption 198 * until we exit the critical section. Otherwise, switch immediately 199 * to the new thread. 200 */ 201 ctd = curthread; 202 KASSERT ((ctd->td_sched != NULL && ctd->td_sched->ts_thread == ctd), 203 ("thread has no (or wrong) sched-private part.")); 204 KASSERT((td->td_inhibitors == 0), 205 ("maybe_preempt: trying to run inhibited thread")); 206 pri = td->td_priority; 207 cpri = ctd->td_priority; 208 if (panicstr != NULL || pri >= cpri || cold /* || dumping */ || 209 TD_IS_INHIBITED(ctd)) 210 return (0); 211 #ifndef FULL_PREEMPTION 212 if (pri > PRI_MAX_ITHD && cpri < PRI_MIN_IDLE) 213 return (0); 214 #endif 215 216 if (ctd->td_critnest > 1) { 217 CTR1(KTR_PROC, "maybe_preempt: in critical section %d", 218 ctd->td_critnest); 219 ctd->td_owepreempt = 1; 220 return (0); 221 } 222 223 /* 224 * Thread is runnable but not yet put on system run queue. 225 */ 226 MPASS(TD_ON_RUNQ(td)); 227 TD_SET_RUNNING(td); 228 CTR3(KTR_PROC, "preempting to thread %p (pid %d, %s)\n", td, 229 td->td_proc->p_pid, td->td_proc->p_comm); 230 mi_switch(SW_INVOL|SW_PREEMPT, td); 231 return (1); 232 #else 233 return (0); 234 #endif 235 } 236 237 #if 0 238 #ifndef PREEMPTION 239 /* XXX: There should be a non-static version of this. */ 240 static void 241 printf_caddr_t(void *data) 242 { 243 printf("%s", (char *)data); 244 } 245 static char preempt_warning[] = 246 "WARNING: Kernel preemption is disabled, expect reduced performance.\n"; 247 SYSINIT(preempt_warning, SI_SUB_COPYRIGHT, SI_ORDER_ANY, printf_caddr_t, 248 preempt_warning) 249 #endif 250 #endif 251 252 /************************************************************************ 253 * SYSTEM RUN QUEUE manipulations and tests * 254 ************************************************************************/ 255 /* 256 * Initialize a run structure. 257 */ 258 void 259 runq_init(struct runq *rq) 260 { 261 int i; 262 263 bzero(rq, sizeof *rq); 264 for (i = 0; i < RQ_NQS; i++) 265 TAILQ_INIT(&rq->rq_queues[i]); 266 } 267 268 /* 269 * Clear the status bit of the queue corresponding to priority level pri, 270 * indicating that it is empty. 271 */ 272 static __inline void 273 runq_clrbit(struct runq *rq, int pri) 274 { 275 struct rqbits *rqb; 276 277 rqb = &rq->rq_status; 278 CTR4(KTR_RUNQ, "runq_clrbit: bits=%#x %#x bit=%#x word=%d", 279 rqb->rqb_bits[RQB_WORD(pri)], 280 rqb->rqb_bits[RQB_WORD(pri)] & ~RQB_BIT(pri), 281 RQB_BIT(pri), RQB_WORD(pri)); 282 rqb->rqb_bits[RQB_WORD(pri)] &= ~RQB_BIT(pri); 283 } 284 285 /* 286 * Find the index of the first non-empty run queue. This is done by 287 * scanning the status bits, a set bit indicates a non-empty queue. 288 */ 289 static __inline int 290 runq_findbit(struct runq *rq) 291 { 292 struct rqbits *rqb; 293 int pri; 294 int i; 295 296 rqb = &rq->rq_status; 297 for (i = 0; i < RQB_LEN; i++) 298 if (rqb->rqb_bits[i]) { 299 pri = RQB_FFS(rqb->rqb_bits[i]) + (i << RQB_L2BPW); 300 CTR3(KTR_RUNQ, "runq_findbit: bits=%#x i=%d pri=%d", 301 rqb->rqb_bits[i], i, pri); 302 return (pri); 303 } 304 305 return (-1); 306 } 307 308 static __inline int 309 runq_findbit_from(struct runq *rq, u_char start) 310 { 311 struct rqbits *rqb; 312 int bit; 313 int pri; 314 int i; 315 316 rqb = &rq->rq_status; 317 bit = start & (RQB_BPW -1); 318 pri = 0; 319 CTR1(KTR_RUNQ, "runq_findbit_from: start %d", start); 320 again: 321 for (i = RQB_WORD(start); i < RQB_LEN; i++) { 322 CTR3(KTR_RUNQ, "runq_findbit_from: bits %d = %#x bit = %d", 323 i, rqb->rqb_bits[i], bit); 324 if (rqb->rqb_bits[i]) { 325 if (bit != 0) { 326 for (pri = bit; pri < RQB_BPW; pri++) 327 if (rqb->rqb_bits[i] & (1ul << pri)) 328 break; 329 bit = 0; 330 if (pri >= RQB_BPW) 331 continue; 332 } else 333 pri = RQB_FFS(rqb->rqb_bits[i]); 334 pri += (i << RQB_L2BPW); 335 CTR3(KTR_RUNQ, "runq_findbit_from: bits=%#x i=%d pri=%d", 336 rqb->rqb_bits[i], i, pri); 337 return (pri); 338 } 339 bit = 0; 340 } 341 if (start != 0) { 342 CTR0(KTR_RUNQ, "runq_findbit_from: restarting"); 343 start = 0; 344 goto again; 345 } 346 347 return (-1); 348 } 349 350 /* 351 * Set the status bit of the queue corresponding to priority level pri, 352 * indicating that it is non-empty. 353 */ 354 static __inline void 355 runq_setbit(struct runq *rq, int pri) 356 { 357 struct rqbits *rqb; 358 359 rqb = &rq->rq_status; 360 CTR4(KTR_RUNQ, "runq_setbit: bits=%#x %#x bit=%#x word=%d", 361 rqb->rqb_bits[RQB_WORD(pri)], 362 rqb->rqb_bits[RQB_WORD(pri)] | RQB_BIT(pri), 363 RQB_BIT(pri), RQB_WORD(pri)); 364 rqb->rqb_bits[RQB_WORD(pri)] |= RQB_BIT(pri); 365 } 366 367 /* 368 * Add the thread to the queue specified by its priority, and set the 369 * corresponding status bit. 370 */ 371 void 372 runq_add(struct runq *rq, struct td_sched *ts, int flags) 373 { 374 struct rqhead *rqh; 375 int pri; 376 377 pri = ts->ts_thread->td_priority / RQ_PPQ; 378 ts->ts_rqindex = pri; 379 runq_setbit(rq, pri); 380 rqh = &rq->rq_queues[pri]; 381 CTR5(KTR_RUNQ, "runq_add: td=%p ts=%p pri=%d %d rqh=%p", 382 ts->ts_thread, ts, ts->ts_thread->td_priority, pri, rqh); 383 if (flags & SRQ_PREEMPTED) { 384 TAILQ_INSERT_HEAD(rqh, ts, ts_procq); 385 } else { 386 TAILQ_INSERT_TAIL(rqh, ts, ts_procq); 387 } 388 } 389 390 void 391 runq_add_pri(struct runq *rq, struct td_sched *ts, u_char pri, int flags) 392 { 393 struct rqhead *rqh; 394 395 KASSERT(pri < RQ_NQS, ("runq_add_pri: %d out of range", pri)); 396 ts->ts_rqindex = pri; 397 runq_setbit(rq, pri); 398 rqh = &rq->rq_queues[pri]; 399 CTR5(KTR_RUNQ, "runq_add_pri: td=%p ke=%p pri=%d idx=%d rqh=%p", 400 ts->ts_thread, ts, ts->ts_thread->td_priority, pri, rqh); 401 if (flags & SRQ_PREEMPTED) { 402 TAILQ_INSERT_HEAD(rqh, ts, ts_procq); 403 } else { 404 TAILQ_INSERT_TAIL(rqh, ts, ts_procq); 405 } 406 } 407 /* 408 * Return true if there are runnable processes of any priority on the run 409 * queue, false otherwise. Has no side effects, does not modify the run 410 * queue structure. 411 */ 412 int 413 runq_check(struct runq *rq) 414 { 415 struct rqbits *rqb; 416 int i; 417 418 rqb = &rq->rq_status; 419 for (i = 0; i < RQB_LEN; i++) 420 if (rqb->rqb_bits[i]) { 421 CTR2(KTR_RUNQ, "runq_check: bits=%#x i=%d", 422 rqb->rqb_bits[i], i); 423 return (1); 424 } 425 CTR0(KTR_RUNQ, "runq_check: empty"); 426 427 return (0); 428 } 429 430 #if defined(SMP) && defined(SCHED_4BSD) 431 int runq_fuzz = 1; 432 SYSCTL_INT(_kern_sched, OID_AUTO, runq_fuzz, CTLFLAG_RW, &runq_fuzz, 0, ""); 433 #endif 434 435 /* 436 * Find the highest priority process on the run queue. 437 */ 438 struct td_sched * 439 runq_choose(struct runq *rq) 440 { 441 struct rqhead *rqh; 442 struct td_sched *ts; 443 int pri; 444 445 mtx_assert(&sched_lock, MA_OWNED); 446 while ((pri = runq_findbit(rq)) != -1) { 447 rqh = &rq->rq_queues[pri]; 448 #if defined(SMP) && defined(SCHED_4BSD) 449 /* fuzz == 1 is normal.. 0 or less are ignored */ 450 if (runq_fuzz > 1) { 451 /* 452 * In the first couple of entries, check if 453 * there is one for our CPU as a preference. 454 */ 455 int count = runq_fuzz; 456 int cpu = PCPU_GET(cpuid); 457 struct td_sched *ts2; 458 ts2 = ts = TAILQ_FIRST(rqh); 459 460 while (count-- && ts2) { 461 if (ts->ts_thread->td_lastcpu == cpu) { 462 ts = ts2; 463 break; 464 } 465 ts2 = TAILQ_NEXT(ts2, ts_procq); 466 } 467 } else 468 #endif 469 ts = TAILQ_FIRST(rqh); 470 KASSERT(ts != NULL, ("runq_choose: no proc on busy queue")); 471 CTR3(KTR_RUNQ, 472 "runq_choose: pri=%d td_sched=%p rqh=%p", pri, ts, rqh); 473 return (ts); 474 } 475 CTR1(KTR_RUNQ, "runq_choose: idleproc pri=%d", pri); 476 477 return (NULL); 478 } 479 480 struct td_sched * 481 runq_choose_from(struct runq *rq, u_char idx) 482 { 483 struct rqhead *rqh; 484 struct td_sched *ts; 485 int pri; 486 487 mtx_assert(&sched_lock, MA_OWNED); 488 if ((pri = runq_findbit_from(rq, idx)) != -1) { 489 rqh = &rq->rq_queues[pri]; 490 ts = TAILQ_FIRST(rqh); 491 KASSERT(ts != NULL, ("runq_choose: no proc on busy queue")); 492 CTR4(KTR_RUNQ, 493 "runq_choose_from: pri=%d kse=%p idx=%d rqh=%p", 494 pri, ts, ts->ts_rqindex, rqh); 495 return (ts); 496 } 497 CTR1(KTR_RUNQ, "runq_choose_from: idleproc pri=%d", pri); 498 499 return (NULL); 500 } 501 /* 502 * Remove the thread from the queue specified by its priority, and clear the 503 * corresponding status bit if the queue becomes empty. 504 * Caller must set state afterwards. 505 */ 506 void 507 runq_remove(struct runq *rq, struct td_sched *ts) 508 { 509 510 runq_remove_idx(rq, ts, NULL); 511 } 512 513 void 514 runq_remove_idx(struct runq *rq, struct td_sched *ts, u_char *idx) 515 { 516 struct rqhead *rqh; 517 u_char pri; 518 519 KASSERT(ts->ts_thread->td_proc->p_sflag & PS_INMEM, 520 ("runq_remove_idx: process swapped out")); 521 pri = ts->ts_rqindex; 522 rqh = &rq->rq_queues[pri]; 523 CTR5(KTR_RUNQ, "runq_remove_idx: td=%p, ts=%p pri=%d %d rqh=%p", 524 ts->ts_thread, ts, ts->ts_thread->td_priority, pri, rqh); 525 TAILQ_REMOVE(rqh, ts, ts_procq); 526 if (TAILQ_EMPTY(rqh)) { 527 CTR0(KTR_RUNQ, "runq_remove_idx: empty"); 528 runq_clrbit(rq, pri); 529 if (idx != NULL && *idx == pri) 530 *idx = (pri + 1) % RQ_NQS; 531 } 532 } 533 534 /****** functions that are temporarily here ***********/ 535 #include <vm/uma.h> 536 extern struct mtx kse_zombie_lock; 537 538 /* 539 * Allocate scheduler specific per-process resources. 540 * The thread and proc have already been linked in. 541 * 542 * Called from: 543 * proc_init() (UMA init method) 544 */ 545 void 546 sched_newproc(struct proc *p, struct thread *td) 547 { 548 } 549 550 /* 551 * thread is being either created or recycled. 552 * Fix up the per-scheduler resources associated with it. 553 * Called from: 554 * sched_fork_thread() 555 * thread_dtor() (*may go away) 556 * thread_init() (*may go away) 557 */ 558 void 559 sched_newthread(struct thread *td) 560 { 561 struct td_sched *ts; 562 563 ts = (struct td_sched *) (td + 1); 564 bzero(ts, sizeof(*ts)); 565 td->td_sched = ts; 566 ts->ts_thread = td; 567 } 568 569 /* 570 * Called from: 571 * thr_create() 572 * proc_init() (UMA) via sched_newproc() 573 */ 574 void 575 sched_init_concurrency(struct proc *p) 576 { 577 } 578 579 /* 580 * Change the concurrency of an existing proc to N 581 * Called from: 582 * kse_create() 583 * kse_exit() 584 * thread_exit() 585 * thread_single() 586 */ 587 void 588 sched_set_concurrency(struct proc *p, int concurrency) 589 { 590 } 591 592 /* 593 * Called from thread_exit() for all exiting thread 594 * 595 * Not to be confused with sched_exit_thread() 596 * that is only called from thread_exit() for threads exiting 597 * without the rest of the process exiting because it is also called from 598 * sched_exit() and we wouldn't want to call it twice. 599 * XXX This can probably be fixed. 600 */ 601 void 602 sched_thread_exit(struct thread *td) 603 { 604 } 605 606 #endif /* KERN_SWITCH_INCLUDE */ 607