1 /*- 2 * Copyright (c) 2001 Jake Burkholder <jake@FreeBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 #ifdef KSE 28 /*** 29 Here is the logic.. 30 31 If there are N processors, then there are at most N KSEs (kernel 32 schedulable entities) working to process threads that belong to a 33 KSEGROUP (kg). If there are X of these KSEs actually running at the 34 moment in question, then there are at most M (N-X) of these KSEs on 35 the run queue, as running KSEs are not on the queue. 36 37 Runnable threads are queued off the KSEGROUP in priority order. 38 If there are M or more threads runnable, the top M threads 39 (by priority) are 'preassigned' to the M KSEs not running. The KSEs take 40 their priority from those threads and are put on the run queue. 41 42 The last thread that had a priority high enough to have a KSE associated 43 with it, AND IS ON THE RUN QUEUE is pointed to by 44 kg->kg_last_assigned. If no threads queued off the KSEGROUP have KSEs 45 assigned as all the available KSEs are activly running, or because there 46 are no threads queued, that pointer is NULL. 47 48 When a KSE is removed from the run queue to become runnable, we know 49 it was associated with the highest priority thread in the queue (at the head 50 of the queue). If it is also the last assigned we know M was 1 and must 51 now be 0. Since the thread is no longer queued that pointer must be 52 removed from it. Since we know there were no more KSEs available, 53 (M was 1 and is now 0) and since we are not FREEING our KSE 54 but using it, we know there are STILL no more KSEs available, we can prove 55 that the next thread in the ksegrp list will not have a KSE to assign to 56 it, so we can show that the pointer must be made 'invalid' (NULL). 57 58 The pointer exists so that when a new thread is made runnable, it can 59 have its priority compared with the last assigned thread to see if 60 it should 'steal' its KSE or not.. i.e. is it 'earlier' 61 on the list than that thread or later.. If it's earlier, then the KSE is 62 removed from the last assigned (which is now not assigned a KSE) 63 and reassigned to the new thread, which is placed earlier in the list. 64 The pointer is then backed up to the previous thread (which may or may not 65 be the new thread). 66 67 When a thread sleeps or is removed, the KSE becomes available and if there 68 are queued threads that are not assigned KSEs, the highest priority one of 69 them is assigned the KSE, which is then placed back on the run queue at 70 the approipriate place, and the kg->kg_last_assigned pointer is adjusted down 71 to point to it. 72 73 The following diagram shows 2 KSEs and 3 threads from a single process. 74 75 RUNQ: --->KSE---KSE--... (KSEs queued at priorities from threads) 76 \ \____ 77 \ \ 78 KSEGROUP---thread--thread--thread (queued in priority order) 79 \ / 80 \_______________/ 81 (last_assigned) 82 83 The result of this scheme is that the M available KSEs are always 84 queued at the priorities they have inherrited from the M highest priority 85 threads for that KSEGROUP. If this situation changes, the KSEs are 86 reassigned to keep this true. 87 ***/ 88 #endif 89 90 #include <sys/cdefs.h> 91 __FBSDID("$FreeBSD$"); 92 93 #include "opt_sched.h" 94 95 #ifndef KERN_SWITCH_INCLUDE 96 #include <sys/param.h> 97 #include <sys/systm.h> 98 #include <sys/kdb.h> 99 #include <sys/kernel.h> 100 #include <sys/ktr.h> 101 #include <sys/lock.h> 102 #include <sys/mutex.h> 103 #include <sys/proc.h> 104 #include <sys/queue.h> 105 #include <sys/sched.h> 106 #else /* KERN_SWITCH_INCLUDE */ 107 #if defined(SMP) && (defined(__i386__) || defined(__amd64__)) 108 #include <sys/smp.h> 109 #endif 110 #if defined(SMP) && defined(SCHED_4BSD) 111 #include <sys/sysctl.h> 112 #endif 113 114 /* Uncomment this to enable logging of critical_enter/exit. */ 115 #if 0 116 #define KTR_CRITICAL KTR_SCHED 117 #else 118 #define KTR_CRITICAL 0 119 #endif 120 121 #ifdef FULL_PREEMPTION 122 #ifndef PREEMPTION 123 #error "The FULL_PREEMPTION option requires the PREEMPTION option" 124 #endif 125 #endif 126 127 CTASSERT((RQB_BPW * RQB_LEN) == RQ_NQS); 128 129 #define td_kse td_sched 130 131 /* 132 * kern.sched.preemption allows user space to determine if preemption support 133 * is compiled in or not. It is not currently a boot or runtime flag that 134 * can be changed. 135 */ 136 #ifdef PREEMPTION 137 static int kern_sched_preemption = 1; 138 #else 139 static int kern_sched_preemption = 0; 140 #endif 141 SYSCTL_INT(_kern_sched, OID_AUTO, preemption, CTLFLAG_RD, 142 &kern_sched_preemption, 0, "Kernel preemption enabled"); 143 144 /************************************************************************ 145 * Functions that manipulate runnability from a thread perspective. * 146 ************************************************************************/ 147 #ifdef KSE 148 /* 149 * Select the KSE that will be run next. From that find the thread, and 150 * remove it from the KSEGRP's run queue. If there is thread clustering, 151 * this will be what does it. 152 */ 153 #else 154 /* 155 * Select the thread that will be run next. 156 */ 157 #endif 158 struct thread * 159 choosethread(void) 160 { 161 #ifdef KSE 162 struct kse *ke; 163 #endif 164 struct thread *td; 165 #ifdef KSE 166 struct ksegrp *kg; 167 #endif 168 169 #if defined(SMP) && (defined(__i386__) || defined(__amd64__)) 170 if (smp_active == 0 && PCPU_GET(cpuid) != 0) { 171 /* Shutting down, run idlethread on AP's */ 172 td = PCPU_GET(idlethread); 173 #ifdef KSE 174 ke = td->td_kse; 175 #endif 176 CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td); 177 #ifdef KSE 178 ke->ke_flags |= KEF_DIDRUN; 179 #else 180 td->td_kse->ke_flags |= KEF_DIDRUN; 181 #endif 182 TD_SET_RUNNING(td); 183 return (td); 184 } 185 #endif 186 187 retry: 188 #ifdef KSE 189 ke = sched_choose(); 190 if (ke) { 191 td = ke->ke_thread; 192 KASSERT((td->td_kse == ke), ("kse/thread mismatch")); 193 kg = ke->ke_ksegrp; 194 if (td->td_proc->p_flag & P_HADTHREADS) { 195 if (kg->kg_last_assigned == td) { 196 kg->kg_last_assigned = TAILQ_PREV(td, 197 threadqueue, td_runq); 198 } 199 TAILQ_REMOVE(&kg->kg_runq, td, td_runq); 200 } 201 #else 202 td = sched_choose(); 203 if (td) { 204 #endif 205 CTR2(KTR_RUNQ, "choosethread: td=%p pri=%d", 206 td, td->td_priority); 207 } else { 208 /* Simulate runq_choose() having returned the idle thread */ 209 td = PCPU_GET(idlethread); 210 #ifdef KSE 211 ke = td->td_kse; 212 #endif 213 CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td); 214 } 215 #ifdef KSE 216 ke->ke_flags |= KEF_DIDRUN; 217 #else 218 td->td_kse->ke_flags |= KEF_DIDRUN; 219 #endif 220 221 /* 222 * If we are in panic, only allow system threads, 223 * plus the one we are running in, to be run. 224 */ 225 if (panicstr && ((td->td_proc->p_flag & P_SYSTEM) == 0 && 226 (td->td_flags & TDF_INPANIC) == 0)) { 227 /* note that it is no longer on the run queue */ 228 TD_SET_CAN_RUN(td); 229 goto retry; 230 } 231 232 TD_SET_RUNNING(td); 233 return (td); 234 } 235 236 #ifdef KSE 237 /* 238 * Given a surplus system slot, try assign a new runnable thread to it. 239 * Called from: 240 * sched_thread_exit() (local) 241 * sched_switch() (local) 242 * sched_thread_exit() (local) 243 * remrunqueue() (local) (not at the moment) 244 */ 245 static void 246 slot_fill(struct ksegrp *kg) 247 { 248 struct thread *td; 249 250 mtx_assert(&sched_lock, MA_OWNED); 251 while (kg->kg_avail_opennings > 0) { 252 /* 253 * Find the first unassigned thread 254 */ 255 if ((td = kg->kg_last_assigned) != NULL) 256 td = TAILQ_NEXT(td, td_runq); 257 else 258 td = TAILQ_FIRST(&kg->kg_runq); 259 260 /* 261 * If we found one, send it to the system scheduler. 262 */ 263 if (td) { 264 kg->kg_last_assigned = td; 265 sched_add(td, SRQ_YIELDING); 266 CTR2(KTR_RUNQ, "slot_fill: td%p -> kg%p", td, kg); 267 } else { 268 /* no threads to use up the slots. quit now */ 269 break; 270 } 271 } 272 } 273 274 #ifdef SCHED_4BSD 275 /* 276 * Remove a thread from its KSEGRP's run queue. 277 * This in turn may remove it from a KSE if it was already assigned 278 * to one, possibly causing a new thread to be assigned to the KSE 279 * and the KSE getting a new priority. 280 */ 281 static void 282 remrunqueue(struct thread *td) 283 { 284 struct thread *td2, *td3; 285 struct ksegrp *kg; 286 struct kse *ke; 287 288 mtx_assert(&sched_lock, MA_OWNED); 289 KASSERT((TD_ON_RUNQ(td)), ("remrunqueue: Bad state on run queue")); 290 kg = td->td_ksegrp; 291 ke = td->td_kse; 292 CTR1(KTR_RUNQ, "remrunqueue: td%p", td); 293 TD_SET_CAN_RUN(td); 294 /* 295 * If it is not a threaded process, take the shortcut. 296 */ 297 if ((td->td_proc->p_flag & P_HADTHREADS) == 0) { 298 /* remve from sys run queue and free up a slot */ 299 sched_rem(td); 300 return; 301 } 302 td3 = TAILQ_PREV(td, threadqueue, td_runq); 303 TAILQ_REMOVE(&kg->kg_runq, td, td_runq); 304 if (ke->ke_state == KES_ONRUNQ) { 305 /* 306 * This thread has been assigned to the system run queue. 307 * We need to dissociate it and try assign the 308 * KSE to the next available thread. Then, we should 309 * see if we need to move the KSE in the run queues. 310 */ 311 sched_rem(td); 312 td2 = kg->kg_last_assigned; 313 KASSERT((td2 != NULL), ("last assigned has wrong value")); 314 if (td2 == td) 315 kg->kg_last_assigned = td3; 316 /* slot_fill(kg); */ /* will replace it with another */ 317 } 318 } 319 #endif 320 #endif 321 322 /* 323 * Change the priority of a thread that is on the run queue. 324 */ 325 void 326 adjustrunqueue( struct thread *td, int newpri) 327 { 328 #ifdef KSE 329 struct ksegrp *kg; 330 #endif 331 struct kse *ke; 332 333 mtx_assert(&sched_lock, MA_OWNED); 334 KASSERT((TD_ON_RUNQ(td)), ("adjustrunqueue: Bad state on run queue")); 335 336 ke = td->td_kse; 337 CTR1(KTR_RUNQ, "adjustrunqueue: td%p", td); 338 #ifdef KSE 339 /* 340 * If it is not a threaded process, take the shortcut. 341 */ 342 if ((td->td_proc->p_flag & P_HADTHREADS) == 0) { 343 /* We only care about the kse in the run queue. */ 344 td->td_priority = newpri; 345 #ifndef SCHED_CORE 346 if (ke->ke_rqindex != (newpri / RQ_PPQ)) 347 #else 348 if (ke->ke_rqindex != newpri) 349 #endif 350 { 351 sched_rem(td); 352 sched_add(td, SRQ_BORING); 353 } 354 return; 355 } 356 357 /* It is a threaded process */ 358 kg = td->td_ksegrp; 359 if (ke->ke_state == KES_ONRUNQ 360 #ifdef SCHED_ULE 361 || ((ke->ke_flags & KEF_ASSIGNED) != 0 && 362 (ke->ke_flags & KEF_REMOVED) == 0) 363 #endif 364 ) { 365 if (kg->kg_last_assigned == td) { 366 kg->kg_last_assigned = 367 TAILQ_PREV(td, threadqueue, td_runq); 368 } 369 sched_rem(td); 370 } 371 TAILQ_REMOVE(&kg->kg_runq, td, td_runq); 372 TD_SET_CAN_RUN(td); 373 td->td_priority = newpri; 374 setrunqueue(td, SRQ_BORING); 375 #else 376 /* We only care about the kse in the run queue. */ 377 td->td_priority = newpri; 378 #ifndef SCHED_CORE 379 if (ke->ke_rqindex != (newpri / RQ_PPQ)) 380 #else 381 if (ke->ke_rqindex != newpri) 382 #endif 383 { 384 sched_rem(td); 385 sched_add(td, SRQ_BORING); 386 } 387 #endif 388 } 389 390 #ifdef KSE 391 /* 392 * This function is called when a thread is about to be put on a 393 * ksegrp run queue because it has been made runnable or its 394 * priority has been adjusted and the ksegrp does not have a 395 * free kse slot. It determines if a thread from the same ksegrp 396 * should be preempted. If so, it tries to switch threads 397 * if the thread is on the same cpu or notifies another cpu that 398 * it should switch threads. 399 */ 400 401 static void 402 maybe_preempt_in_ksegrp(struct thread *td) 403 #if !defined(SMP) 404 { 405 struct thread *running_thread; 406 407 mtx_assert(&sched_lock, MA_OWNED); 408 running_thread = curthread; 409 410 if (running_thread->td_ksegrp != td->td_ksegrp) 411 return; 412 413 if (td->td_priority >= running_thread->td_priority) 414 return; 415 #ifdef PREEMPTION 416 #ifndef FULL_PREEMPTION 417 if (td->td_priority > PRI_MAX_ITHD) { 418 running_thread->td_flags |= TDF_NEEDRESCHED; 419 return; 420 } 421 #endif /* FULL_PREEMPTION */ 422 423 if (running_thread->td_critnest > 1) 424 running_thread->td_owepreempt = 1; 425 else 426 mi_switch(SW_INVOL, NULL); 427 428 #else /* PREEMPTION */ 429 running_thread->td_flags |= TDF_NEEDRESCHED; 430 #endif /* PREEMPTION */ 431 return; 432 } 433 434 #else /* SMP */ 435 { 436 struct thread *running_thread; 437 int worst_pri; 438 struct ksegrp *kg; 439 cpumask_t cpumask,dontuse; 440 struct pcpu *pc; 441 struct pcpu *best_pcpu; 442 struct thread *cputhread; 443 444 mtx_assert(&sched_lock, MA_OWNED); 445 446 running_thread = curthread; 447 448 #if !defined(KSEG_PEEMPT_BEST_CPU) 449 if (running_thread->td_ksegrp != td->td_ksegrp) { 450 #endif 451 kg = td->td_ksegrp; 452 453 /* if someone is ahead of this thread, wait our turn */ 454 if (td != TAILQ_FIRST(&kg->kg_runq)) 455 return; 456 457 worst_pri = td->td_priority; 458 best_pcpu = NULL; 459 dontuse = stopped_cpus | idle_cpus_mask; 460 461 /* 462 * Find a cpu with the worst priority that runs at thread from 463 * the same ksegrp - if multiple exist give first the last run 464 * cpu and then the current cpu priority 465 */ 466 467 SLIST_FOREACH(pc, &cpuhead, pc_allcpu) { 468 cpumask = pc->pc_cpumask; 469 cputhread = pc->pc_curthread; 470 471 if ((cpumask & dontuse) || 472 cputhread->td_ksegrp != kg) 473 continue; 474 475 if (cputhread->td_priority > worst_pri) { 476 worst_pri = cputhread->td_priority; 477 best_pcpu = pc; 478 continue; 479 } 480 481 if (cputhread->td_priority == worst_pri && 482 best_pcpu != NULL && 483 (td->td_lastcpu == pc->pc_cpuid || 484 (PCPU_GET(cpumask) == cpumask && 485 td->td_lastcpu != best_pcpu->pc_cpuid))) 486 best_pcpu = pc; 487 } 488 489 /* Check if we need to preempt someone */ 490 if (best_pcpu == NULL) 491 return; 492 493 #if defined(IPI_PREEMPTION) && defined(PREEMPTION) 494 #if !defined(FULL_PREEMPTION) 495 if (td->td_priority <= PRI_MAX_ITHD) 496 #endif /* ! FULL_PREEMPTION */ 497 { 498 ipi_selected(best_pcpu->pc_cpumask, IPI_PREEMPT); 499 return; 500 } 501 #endif /* defined(IPI_PREEMPTION) && defined(PREEMPTION) */ 502 503 if (PCPU_GET(cpuid) != best_pcpu->pc_cpuid) { 504 best_pcpu->pc_curthread->td_flags |= TDF_NEEDRESCHED; 505 ipi_selected(best_pcpu->pc_cpumask, IPI_AST); 506 return; 507 } 508 #if !defined(KSEG_PEEMPT_BEST_CPU) 509 } 510 #endif 511 512 if (td->td_priority >= running_thread->td_priority) 513 return; 514 #ifdef PREEMPTION 515 516 #if !defined(FULL_PREEMPTION) 517 if (td->td_priority > PRI_MAX_ITHD) { 518 running_thread->td_flags |= TDF_NEEDRESCHED; 519 } 520 #endif /* ! FULL_PREEMPTION */ 521 522 if (running_thread->td_critnest > 1) 523 running_thread->td_owepreempt = 1; 524 else 525 mi_switch(SW_INVOL, NULL); 526 527 #else /* PREEMPTION */ 528 running_thread->td_flags |= TDF_NEEDRESCHED; 529 #endif /* PREEMPTION */ 530 return; 531 } 532 #endif /* !SMP */ 533 534 535 int limitcount; 536 #endif 537 void 538 setrunqueue(struct thread *td, int flags) 539 { 540 #ifdef KSE 541 struct ksegrp *kg; 542 struct thread *td2; 543 struct thread *tda; 544 545 CTR3(KTR_RUNQ, "setrunqueue: td:%p kg:%p pid:%d", 546 td, td->td_ksegrp, td->td_proc->p_pid); 547 #else 548 CTR2(KTR_RUNQ, "setrunqueue: td:%p pid:%d", 549 td, td->td_proc->p_pid); 550 #endif 551 CTR5(KTR_SCHED, "setrunqueue: %p(%s) prio %d by %p(%s)", 552 td, td->td_proc->p_comm, td->td_priority, curthread, 553 curthread->td_proc->p_comm); 554 mtx_assert(&sched_lock, MA_OWNED); 555 KASSERT((td->td_inhibitors == 0), 556 ("setrunqueue: trying to run inhibitted thread")); 557 KASSERT((TD_CAN_RUN(td) || TD_IS_RUNNING(td)), 558 ("setrunqueue: bad thread state")); 559 TD_SET_RUNQ(td); 560 #ifdef KSE 561 kg = td->td_ksegrp; 562 if ((td->td_proc->p_flag & P_HADTHREADS) == 0) { 563 /* 564 * Common path optimisation: Only one of everything 565 * and the KSE is always already attached. 566 * Totally ignore the ksegrp run queue. 567 */ 568 if (kg->kg_avail_opennings != 1) { 569 if (limitcount < 1) { 570 limitcount++; 571 printf("pid %d: corrected slot count (%d->1)\n", 572 td->td_proc->p_pid, kg->kg_avail_opennings); 573 574 } 575 kg->kg_avail_opennings = 1; 576 } 577 sched_add(td, flags); 578 return; 579 } 580 581 /* 582 * If the concurrency has reduced, and we would go in the 583 * assigned section, then keep removing entries from the 584 * system run queue, until we are not in that section 585 * or there is room for us to be put in that section. 586 * What we MUST avoid is the case where there are threads of less 587 * priority than the new one scheduled, but it can not 588 * be scheduled itself. That would lead to a non contiguous set 589 * of scheduled threads, and everything would break. 590 */ 591 tda = kg->kg_last_assigned; 592 while ((kg->kg_avail_opennings <= 0) && 593 (tda && (tda->td_priority > td->td_priority))) { 594 /* 595 * None free, but there is one we can commandeer. 596 */ 597 CTR2(KTR_RUNQ, 598 "setrunqueue: kg:%p: take slot from td: %p", kg, tda); 599 sched_rem(tda); 600 tda = kg->kg_last_assigned = 601 TAILQ_PREV(tda, threadqueue, td_runq); 602 } 603 604 /* 605 * Add the thread to the ksegrp's run queue at 606 * the appropriate place. 607 */ 608 TAILQ_FOREACH(td2, &kg->kg_runq, td_runq) { 609 if (td2->td_priority > td->td_priority) { 610 TAILQ_INSERT_BEFORE(td2, td, td_runq); 611 break; 612 } 613 } 614 if (td2 == NULL) { 615 /* We ran off the end of the TAILQ or it was empty. */ 616 TAILQ_INSERT_TAIL(&kg->kg_runq, td, td_runq); 617 } 618 619 /* 620 * If we have a slot to use, then put the thread on the system 621 * run queue and if needed, readjust the last_assigned pointer. 622 * it may be that we need to schedule something anyhow 623 * even if the availabel slots are -ve so that 624 * all the items < last_assigned are scheduled. 625 */ 626 if (kg->kg_avail_opennings > 0) { 627 if (tda == NULL) { 628 /* 629 * No pre-existing last assigned so whoever is first 630 * gets the slot.. (maybe us) 631 */ 632 td2 = TAILQ_FIRST(&kg->kg_runq); 633 kg->kg_last_assigned = td2; 634 } else if (tda->td_priority > td->td_priority) { 635 td2 = td; 636 } else { 637 /* 638 * We are past last_assigned, so 639 * give the next slot to whatever is next, 640 * which may or may not be us. 641 */ 642 td2 = TAILQ_NEXT(tda, td_runq); 643 kg->kg_last_assigned = td2; 644 } 645 sched_add(td2, flags); 646 } else { 647 CTR3(KTR_RUNQ, "setrunqueue: held: td%p kg%p pid%d", 648 td, td->td_ksegrp, td->td_proc->p_pid); 649 if ((flags & SRQ_YIELDING) == 0) 650 maybe_preempt_in_ksegrp(td); 651 } 652 #else 653 sched_add(td, flags); 654 #endif 655 } 656 657 /* 658 * Kernel thread preemption implementation. Critical sections mark 659 * regions of code in which preemptions are not allowed. 660 */ 661 void 662 critical_enter(void) 663 { 664 struct thread *td; 665 666 td = curthread; 667 td->td_critnest++; 668 CTR4(KTR_CRITICAL, "critical_enter by thread %p (%ld, %s) to %d", td, 669 (long)td->td_proc->p_pid, td->td_proc->p_comm, td->td_critnest); 670 } 671 672 void 673 critical_exit(void) 674 { 675 struct thread *td; 676 677 td = curthread; 678 KASSERT(td->td_critnest != 0, 679 ("critical_exit: td_critnest == 0")); 680 #ifdef PREEMPTION 681 if (td->td_critnest == 1) { 682 td->td_critnest = 0; 683 mtx_assert(&sched_lock, MA_NOTOWNED); 684 if (td->td_owepreempt) { 685 td->td_critnest = 1; 686 mtx_lock_spin(&sched_lock); 687 td->td_critnest--; 688 mi_switch(SW_INVOL, NULL); 689 mtx_unlock_spin(&sched_lock); 690 } 691 } else 692 #endif 693 td->td_critnest--; 694 695 CTR4(KTR_CRITICAL, "critical_exit by thread %p (%ld, %s) to %d", td, 696 (long)td->td_proc->p_pid, td->td_proc->p_comm, td->td_critnest); 697 } 698 699 /* 700 * This function is called when a thread is about to be put on run queue 701 * because it has been made runnable or its priority has been adjusted. It 702 * determines if the new thread should be immediately preempted to. If so, 703 * it switches to it and eventually returns true. If not, it returns false 704 * so that the caller may place the thread on an appropriate run queue. 705 */ 706 int 707 maybe_preempt(struct thread *td) 708 { 709 #ifdef PREEMPTION 710 struct thread *ctd; 711 int cpri, pri; 712 #endif 713 714 mtx_assert(&sched_lock, MA_OWNED); 715 #ifdef PREEMPTION 716 /* 717 * The new thread should not preempt the current thread if any of the 718 * following conditions are true: 719 * 720 * - The kernel is in the throes of crashing (panicstr). 721 * - The current thread has a higher (numerically lower) or 722 * equivalent priority. Note that this prevents curthread from 723 * trying to preempt to itself. 724 * - It is too early in the boot for context switches (cold is set). 725 * - The current thread has an inhibitor set or is in the process of 726 * exiting. In this case, the current thread is about to switch 727 * out anyways, so there's no point in preempting. If we did, 728 * the current thread would not be properly resumed as well, so 729 * just avoid that whole landmine. 730 * - If the new thread's priority is not a realtime priority and 731 * the current thread's priority is not an idle priority and 732 * FULL_PREEMPTION is disabled. 733 * 734 * If all of these conditions are false, but the current thread is in 735 * a nested critical section, then we have to defer the preemption 736 * until we exit the critical section. Otherwise, switch immediately 737 * to the new thread. 738 */ 739 ctd = curthread; 740 KASSERT ((ctd->td_kse != NULL && ctd->td_kse->ke_thread == ctd), 741 ("thread has no (or wrong) sched-private part.")); 742 KASSERT((td->td_inhibitors == 0), 743 ("maybe_preempt: trying to run inhibitted thread")); 744 pri = td->td_priority; 745 cpri = ctd->td_priority; 746 if (panicstr != NULL || pri >= cpri || cold /* || dumping */ || 747 TD_IS_INHIBITED(ctd) || td->td_kse->ke_state != KES_THREAD) 748 return (0); 749 #ifndef FULL_PREEMPTION 750 if (pri > PRI_MAX_ITHD && cpri < PRI_MIN_IDLE) 751 return (0); 752 #endif 753 754 if (ctd->td_critnest > 1) { 755 CTR1(KTR_PROC, "maybe_preempt: in critical section %d", 756 ctd->td_critnest); 757 ctd->td_owepreempt = 1; 758 return (0); 759 } 760 761 /* 762 * Thread is runnable but not yet put on system run queue. 763 */ 764 MPASS(TD_ON_RUNQ(td)); 765 MPASS(td->td_sched->ke_state != KES_ONRUNQ); 766 #ifdef KSE 767 if (td->td_proc->p_flag & P_HADTHREADS) { 768 /* 769 * If this is a threaded process we actually ARE on the 770 * ksegrp run queue so take it off that first. 771 * Also undo any damage done to the last_assigned pointer. 772 * XXX Fix setrunqueue so this isn't needed 773 */ 774 struct ksegrp *kg; 775 776 kg = td->td_ksegrp; 777 if (kg->kg_last_assigned == td) 778 kg->kg_last_assigned = 779 TAILQ_PREV(td, threadqueue, td_runq); 780 TAILQ_REMOVE(&kg->kg_runq, td, td_runq); 781 } 782 783 #endif 784 TD_SET_RUNNING(td); 785 CTR3(KTR_PROC, "preempting to thread %p (pid %d, %s)\n", td, 786 td->td_proc->p_pid, td->td_proc->p_comm); 787 mi_switch(SW_INVOL|SW_PREEMPT, td); 788 return (1); 789 #else 790 return (0); 791 #endif 792 } 793 794 #if 0 795 #ifndef PREEMPTION 796 /* XXX: There should be a non-static version of this. */ 797 static void 798 printf_caddr_t(void *data) 799 { 800 printf("%s", (char *)data); 801 } 802 static char preempt_warning[] = 803 "WARNING: Kernel preemption is disabled, expect reduced performance.\n"; 804 SYSINIT(preempt_warning, SI_SUB_COPYRIGHT, SI_ORDER_ANY, printf_caddr_t, 805 preempt_warning) 806 #endif 807 #endif 808 809 /************************************************************************ 810 * SYSTEM RUN QUEUE manipulations and tests * 811 ************************************************************************/ 812 /* 813 * Initialize a run structure. 814 */ 815 void 816 runq_init(struct runq *rq) 817 { 818 int i; 819 820 bzero(rq, sizeof *rq); 821 for (i = 0; i < RQ_NQS; i++) 822 TAILQ_INIT(&rq->rq_queues[i]); 823 } 824 825 /* 826 * Clear the status bit of the queue corresponding to priority level pri, 827 * indicating that it is empty. 828 */ 829 static __inline void 830 runq_clrbit(struct runq *rq, int pri) 831 { 832 struct rqbits *rqb; 833 834 rqb = &rq->rq_status; 835 CTR4(KTR_RUNQ, "runq_clrbit: bits=%#x %#x bit=%#x word=%d", 836 rqb->rqb_bits[RQB_WORD(pri)], 837 rqb->rqb_bits[RQB_WORD(pri)] & ~RQB_BIT(pri), 838 RQB_BIT(pri), RQB_WORD(pri)); 839 rqb->rqb_bits[RQB_WORD(pri)] &= ~RQB_BIT(pri); 840 } 841 842 /* 843 * Find the index of the first non-empty run queue. This is done by 844 * scanning the status bits, a set bit indicates a non-empty queue. 845 */ 846 static __inline int 847 runq_findbit(struct runq *rq) 848 { 849 struct rqbits *rqb; 850 int pri; 851 int i; 852 853 rqb = &rq->rq_status; 854 for (i = 0; i < RQB_LEN; i++) 855 if (rqb->rqb_bits[i]) { 856 pri = RQB_FFS(rqb->rqb_bits[i]) + (i << RQB_L2BPW); 857 CTR3(KTR_RUNQ, "runq_findbit: bits=%#x i=%d pri=%d", 858 rqb->rqb_bits[i], i, pri); 859 return (pri); 860 } 861 862 return (-1); 863 } 864 865 /* 866 * Set the status bit of the queue corresponding to priority level pri, 867 * indicating that it is non-empty. 868 */ 869 static __inline void 870 runq_setbit(struct runq *rq, int pri) 871 { 872 struct rqbits *rqb; 873 874 rqb = &rq->rq_status; 875 CTR4(KTR_RUNQ, "runq_setbit: bits=%#x %#x bit=%#x word=%d", 876 rqb->rqb_bits[RQB_WORD(pri)], 877 rqb->rqb_bits[RQB_WORD(pri)] | RQB_BIT(pri), 878 RQB_BIT(pri), RQB_WORD(pri)); 879 rqb->rqb_bits[RQB_WORD(pri)] |= RQB_BIT(pri); 880 } 881 882 /* 883 * Add the KSE to the queue specified by its priority, and set the 884 * corresponding status bit. 885 */ 886 void 887 runq_add(struct runq *rq, struct kse *ke, int flags) 888 { 889 struct rqhead *rqh; 890 int pri; 891 892 pri = ke->ke_thread->td_priority / RQ_PPQ; 893 ke->ke_rqindex = pri; 894 runq_setbit(rq, pri); 895 rqh = &rq->rq_queues[pri]; 896 CTR5(KTR_RUNQ, "runq_add: td=%p ke=%p pri=%d %d rqh=%p", 897 ke->ke_thread, ke, ke->ke_thread->td_priority, pri, rqh); 898 if (flags & SRQ_PREEMPTED) { 899 TAILQ_INSERT_HEAD(rqh, ke, ke_procq); 900 } else { 901 TAILQ_INSERT_TAIL(rqh, ke, ke_procq); 902 } 903 } 904 905 /* 906 * Return true if there are runnable processes of any priority on the run 907 * queue, false otherwise. Has no side effects, does not modify the run 908 * queue structure. 909 */ 910 int 911 runq_check(struct runq *rq) 912 { 913 struct rqbits *rqb; 914 int i; 915 916 rqb = &rq->rq_status; 917 for (i = 0; i < RQB_LEN; i++) 918 if (rqb->rqb_bits[i]) { 919 CTR2(KTR_RUNQ, "runq_check: bits=%#x i=%d", 920 rqb->rqb_bits[i], i); 921 return (1); 922 } 923 CTR0(KTR_RUNQ, "runq_check: empty"); 924 925 return (0); 926 } 927 928 #if defined(SMP) && defined(SCHED_4BSD) 929 int runq_fuzz = 1; 930 SYSCTL_INT(_kern_sched, OID_AUTO, runq_fuzz, CTLFLAG_RW, &runq_fuzz, 0, ""); 931 #endif 932 933 /* 934 * Find the highest priority process on the run queue. 935 */ 936 struct kse * 937 runq_choose(struct runq *rq) 938 { 939 struct rqhead *rqh; 940 struct kse *ke; 941 int pri; 942 943 mtx_assert(&sched_lock, MA_OWNED); 944 while ((pri = runq_findbit(rq)) != -1) { 945 rqh = &rq->rq_queues[pri]; 946 #if defined(SMP) && defined(SCHED_4BSD) 947 /* fuzz == 1 is normal.. 0 or less are ignored */ 948 if (runq_fuzz > 1) { 949 /* 950 * In the first couple of entries, check if 951 * there is one for our CPU as a preference. 952 */ 953 int count = runq_fuzz; 954 int cpu = PCPU_GET(cpuid); 955 struct kse *ke2; 956 ke2 = ke = TAILQ_FIRST(rqh); 957 958 while (count-- && ke2) { 959 if (ke->ke_thread->td_lastcpu == cpu) { 960 ke = ke2; 961 break; 962 } 963 ke2 = TAILQ_NEXT(ke2, ke_procq); 964 } 965 } else 966 #endif 967 ke = TAILQ_FIRST(rqh); 968 KASSERT(ke != NULL, ("runq_choose: no proc on busy queue")); 969 CTR3(KTR_RUNQ, 970 "runq_choose: pri=%d kse=%p rqh=%p", pri, ke, rqh); 971 return (ke); 972 } 973 CTR1(KTR_RUNQ, "runq_choose: idleproc pri=%d", pri); 974 975 return (NULL); 976 } 977 978 /* 979 * Remove the KSE from the queue specified by its priority, and clear the 980 * corresponding status bit if the queue becomes empty. 981 * Caller must set ke->ke_state afterwards. 982 */ 983 void 984 runq_remove(struct runq *rq, struct kse *ke) 985 { 986 struct rqhead *rqh; 987 int pri; 988 989 #ifdef KSE 990 KASSERT(ke->ke_proc->p_sflag & PS_INMEM, 991 #else 992 KASSERT(ke->ke_thread->td_proc->p_sflag & PS_INMEM, 993 #endif 994 ("runq_remove: process swapped out")); 995 pri = ke->ke_rqindex; 996 rqh = &rq->rq_queues[pri]; 997 CTR5(KTR_RUNQ, "runq_remove: td=%p, ke=%p pri=%d %d rqh=%p", 998 ke->ke_thread, ke, ke->ke_thread->td_priority, pri, rqh); 999 KASSERT(ke != NULL, ("runq_remove: no proc on busy queue")); 1000 TAILQ_REMOVE(rqh, ke, ke_procq); 1001 if (TAILQ_EMPTY(rqh)) { 1002 CTR0(KTR_RUNQ, "runq_remove: empty"); 1003 runq_clrbit(rq, pri); 1004 } 1005 } 1006 1007 /****** functions that are temporarily here ***********/ 1008 #include <vm/uma.h> 1009 extern struct mtx kse_zombie_lock; 1010 1011 #ifdef KSE 1012 /* 1013 * Allocate scheduler specific per-process resources. 1014 * The thread and ksegrp have already been linked in. 1015 * In this case just set the default concurrency value. 1016 * 1017 * Called from: 1018 * proc_init() (UMA init method) 1019 */ 1020 void 1021 sched_newproc(struct proc *p, struct ksegrp *kg, struct thread *td) 1022 { 1023 1024 /* This can go in sched_fork */ 1025 sched_init_concurrency(kg); 1026 } 1027 #endif 1028 1029 /* 1030 * thread is being either created or recycled. 1031 * Fix up the per-scheduler resources associated with it. 1032 * Called from: 1033 * sched_fork_thread() 1034 * thread_dtor() (*may go away) 1035 * thread_init() (*may go away) 1036 */ 1037 void 1038 sched_newthread(struct thread *td) 1039 { 1040 struct td_sched *ke; 1041 1042 ke = (struct td_sched *) (td + 1); 1043 bzero(ke, sizeof(*ke)); 1044 td->td_sched = ke; 1045 ke->ke_thread = td; 1046 ke->ke_state = KES_THREAD; 1047 } 1048 1049 #ifdef KSE 1050 /* 1051 * Set up an initial concurrency of 1 1052 * and set the given thread (if given) to be using that 1053 * concurrency slot. 1054 * May be used "offline"..before the ksegrp is attached to the world 1055 * and thus wouldn't need schedlock in that case. 1056 * Called from: 1057 * thr_create() 1058 * proc_init() (UMA) via sched_newproc() 1059 */ 1060 void 1061 sched_init_concurrency(struct ksegrp *kg) 1062 { 1063 1064 CTR1(KTR_RUNQ,"kg %p init slots and concurrency to 1", kg); 1065 kg->kg_concurrency = 1; 1066 kg->kg_avail_opennings = 1; 1067 } 1068 1069 /* 1070 * Change the concurrency of an existing ksegrp to N 1071 * Called from: 1072 * kse_create() 1073 * kse_exit() 1074 * thread_exit() 1075 * thread_single() 1076 */ 1077 void 1078 sched_set_concurrency(struct ksegrp *kg, int concurrency) 1079 { 1080 1081 CTR4(KTR_RUNQ,"kg %p set concurrency to %d, slots %d -> %d", 1082 kg, 1083 concurrency, 1084 kg->kg_avail_opennings, 1085 kg->kg_avail_opennings + (concurrency - kg->kg_concurrency)); 1086 kg->kg_avail_opennings += (concurrency - kg->kg_concurrency); 1087 kg->kg_concurrency = concurrency; 1088 } 1089 1090 /* 1091 * Called from thread_exit() for all exiting thread 1092 * 1093 * Not to be confused with sched_exit_thread() 1094 * that is only called from thread_exit() for threads exiting 1095 * without the rest of the process exiting because it is also called from 1096 * sched_exit() and we wouldn't want to call it twice. 1097 * XXX This can probably be fixed. 1098 */ 1099 void 1100 sched_thread_exit(struct thread *td) 1101 { 1102 1103 SLOT_RELEASE(td->td_ksegrp); 1104 slot_fill(td->td_ksegrp); 1105 } 1106 #endif 1107 1108 #endif /* KERN_SWITCH_INCLUDE */ 1109