1 /*- 2 * Copyright (c) 2003, Jeffrey Roberson <jeff@freebsd.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice unmodified, this list of conditions, and the following 10 * disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 29 #include <sys/param.h> 30 #include <sys/systm.h> 31 #include <sys/kernel.h> 32 #include <sys/ktr.h> 33 #include <sys/lock.h> 34 #include <sys/mutex.h> 35 #include <sys/proc.h> 36 #include <sys/sched.h> 37 #include <sys/smp.h> 38 #include <sys/sx.h> 39 #include <sys/sysctl.h> 40 #include <sys/sysproto.h> 41 #include <sys/vmmeter.h> 42 #ifdef DDB 43 #include <ddb/ddb.h> 44 #endif 45 #ifdef KTRACE 46 #include <sys/uio.h> 47 #include <sys/ktrace.h> 48 #endif 49 50 #include <machine/cpu.h> 51 52 /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ 53 /* XXX This is bogus compatability crap for ps */ 54 static fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 55 SYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, ""); 56 57 static void sched_setup(void *dummy); 58 SYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL) 59 60 /* 61 * These datastructures are allocated within their parent datastructure but 62 * are scheduler specific. 63 */ 64 65 struct ke_sched { 66 int ske_slice; 67 struct runq *ske_runq; 68 /* The following variables are only used for pctcpu calculation */ 69 int ske_ltick; /* Last tick that we were running on */ 70 int ske_ftick; /* First tick that we were running on */ 71 int ske_ticks; /* Tick count */ 72 u_char ske_cpu; 73 }; 74 #define ke_slice ke_sched->ske_slice 75 #define ke_runq ke_sched->ske_runq 76 #define ke_ltick ke_sched->ske_ltick 77 #define ke_ftick ke_sched->ske_ftick 78 #define ke_ticks ke_sched->ske_ticks 79 #define ke_cpu ke_sched->ske_cpu 80 81 struct kg_sched { 82 int skg_slptime; 83 }; 84 #define kg_slptime kg_sched->skg_slptime 85 86 struct td_sched { 87 int std_slptime; 88 int std_schedflag; 89 }; 90 #define td_slptime td_sched->std_slptime 91 #define td_schedflag td_sched->std_schedflag 92 93 #define TD_SCHED_BLOAD 0x0001 /* 94 * thread was counted as being in short 95 * term sleep. 96 */ 97 struct td_sched td_sched; 98 struct ke_sched ke_sched; 99 struct kg_sched kg_sched; 100 101 struct ke_sched *kse0_sched = &ke_sched; 102 struct kg_sched *ksegrp0_sched = &kg_sched; 103 struct p_sched *proc0_sched = NULL; 104 struct td_sched *thread0_sched = &td_sched; 105 106 /* 107 * This priority range has 20 priorities on either end that are reachable 108 * only through nice values. 109 */ 110 #define SCHED_PRI_NRESV 40 111 #define SCHED_PRI_RANGE ((PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE + 1) - \ 112 SCHED_PRI_NRESV) 113 114 /* 115 * These determine how sleep time effects the priority of a process. 116 * 117 * SLP_MAX: Maximum amount of accrued sleep time. 118 * SLP_SCALE: Scale the number of ticks slept across the dynamic priority 119 * range. 120 * SLP_TOPRI: Convert a number of ticks slept into a priority value. 121 * SLP_DECAY: Reduce the sleep time to 50% for every granted slice. 122 */ 123 #define SCHED_SLP_MAX (hz * 2) 124 #define SCHED_SLP_SCALE(slp) (((slp) * SCHED_PRI_RANGE) / SCHED_SLP_MAX) 125 #define SCHED_SLP_TOPRI(slp) (SCHED_PRI_RANGE - SCHED_SLP_SCALE((slp)) + \ 126 SCHED_PRI_NRESV / 2) 127 #define SCHED_SLP_DECAY(slp) ((slp) / 2) /* XXX Multiple kses break */ 128 129 /* 130 * These parameters and macros determine the size of the time slice that is 131 * granted to each thread. 132 * 133 * SLICE_MIN: Minimum time slice granted, in units of ticks. 134 * SLICE_MAX: Maximum time slice granted. 135 * SLICE_RANGE: Range of available time slices scaled by hz. 136 * SLICE_SCALE: The number slices granted per unit of pri or slp. 137 * PRI_TOSLICE: Compute a slice size that is proportional to the priority. 138 * SLP_TOSLICE: Compute a slice size that is inversely proportional to the 139 * amount of time slept. (smaller slices for interactive ksegs) 140 * PRI_COMP: This determines what fraction of the actual slice comes from 141 * the slice size computed from the priority. 142 * SLP_COMP: This determines what component of the actual slice comes from 143 * the slize size computed from the sleep time. 144 */ 145 #define SCHED_SLICE_MIN (hz / 100) 146 #define SCHED_SLICE_MAX (hz / 4) 147 #define SCHED_SLICE_RANGE (SCHED_SLICE_MAX - SCHED_SLICE_MIN + 1) 148 #define SCHED_SLICE_SCALE(val, max) (((val) * SCHED_SLICE_RANGE) / (max)) 149 #define SCHED_PRI_TOSLICE(pri) \ 150 (SCHED_SLICE_MAX - SCHED_SLICE_SCALE((pri), SCHED_PRI_RANGE)) 151 #define SCHED_SLP_TOSLICE(slp) \ 152 (SCHED_SLICE_MAX - SCHED_SLICE_SCALE((slp), SCHED_SLP_MAX)) 153 #define SCHED_SLP_COMP(slice) (((slice) / 5) * 3) /* 60% */ 154 #define SCHED_PRI_COMP(slice) (((slice) / 5) * 2) /* 40% */ 155 156 /* 157 * This macro determines whether or not the kse belongs on the current or 158 * next run queue. 159 */ 160 #define SCHED_CURR(kg) ((kg)->kg_slptime > (hz / 4) || \ 161 (kg)->kg_pri_class != PRI_TIMESHARE) 162 163 /* 164 * Cpu percentage computation macros and defines. 165 * 166 * SCHED_CPU_TIME: Number of seconds to average the cpu usage across. 167 * SCHED_CPU_TICKS: Number of hz ticks to average the cpu usage across. 168 */ 169 170 #define SCHED_CPU_TIME 60 171 #define SCHED_CPU_TICKS (hz * SCHED_CPU_TIME) 172 173 /* 174 * kseq - pair of runqs per processor 175 */ 176 177 struct kseq { 178 struct runq ksq_runqs[2]; 179 struct runq *ksq_curr; 180 struct runq *ksq_next; 181 int ksq_load; /* Total runnable */ 182 #ifdef SMP 183 unsigned int ksq_rslices; /* Slices on run queue */ 184 unsigned int ksq_bload; /* Threads waiting on IO */ 185 #endif 186 }; 187 188 /* 189 * One kse queue per processor. 190 */ 191 #ifdef SMP 192 struct kseq kseq_cpu[MAXCPU]; 193 #define KSEQ_SELF() (&kseq_cpu[PCPU_GET(cpuid)]) 194 #define KSEQ_CPU(x) (&kseq_cpu[(x)]) 195 #else 196 struct kseq kseq_cpu; 197 #define KSEQ_SELF() (&kseq_cpu) 198 #define KSEQ_CPU(x) (&kseq_cpu) 199 #endif 200 201 static int sched_slice(struct ksegrp *kg); 202 static int sched_priority(struct ksegrp *kg); 203 void sched_pctcpu_update(struct kse *ke); 204 int sched_pickcpu(void); 205 206 /* Operations on per processor queues */ 207 static struct kse * kseq_choose(struct kseq *kseq); 208 static void kseq_setup(struct kseq *kseq); 209 static __inline void kseq_add(struct kseq *kseq, struct kse *ke); 210 static __inline void kseq_rem(struct kseq *kseq, struct kse *ke); 211 #ifdef SMP 212 static __inline void kseq_sleep(struct kseq *kseq, struct kse *ke); 213 static __inline void kseq_wakeup(struct kseq *kseq, struct kse *ke); 214 struct kseq * kseq_load_highest(void); 215 #endif 216 217 static __inline void 218 kseq_add(struct kseq *kseq, struct kse *ke) 219 { 220 runq_add(ke->ke_runq, ke); 221 kseq->ksq_load++; 222 #ifdef SMP 223 kseq->ksq_rslices += ke->ke_slice; 224 #endif 225 } 226 static __inline void 227 kseq_rem(struct kseq *kseq, struct kse *ke) 228 { 229 kseq->ksq_load--; 230 runq_remove(ke->ke_runq, ke); 231 #ifdef SMP 232 kseq->ksq_rslices -= ke->ke_slice; 233 #endif 234 } 235 236 #ifdef SMP 237 static __inline void 238 kseq_sleep(struct kseq *kseq, struct kse *ke) 239 { 240 kseq->ksq_bload++; 241 } 242 243 static __inline void 244 kseq_wakeup(struct kseq *kseq, struct kse *ke) 245 { 246 kseq->ksq_bload--; 247 } 248 249 struct kseq * 250 kseq_load_highest(void) 251 { 252 struct kseq *kseq; 253 int load; 254 int cpu; 255 int i; 256 257 cpu = 0; 258 load = 0; 259 260 for (i = 0; i < mp_maxid; i++) { 261 if (CPU_ABSENT(i)) 262 continue; 263 kseq = KSEQ_CPU(i); 264 if (kseq->ksq_load > load) { 265 load = kseq->ksq_load; 266 cpu = i; 267 } 268 } 269 if (load) 270 return (KSEQ_CPU(cpu)); 271 272 return (NULL); 273 } 274 #endif 275 276 struct kse * 277 kseq_choose(struct kseq *kseq) 278 { 279 struct kse *ke; 280 struct runq *swap; 281 282 if ((ke = runq_choose(kseq->ksq_curr)) == NULL) { 283 swap = kseq->ksq_curr; 284 kseq->ksq_curr = kseq->ksq_next; 285 kseq->ksq_next = swap; 286 ke = runq_choose(kseq->ksq_curr); 287 } 288 289 return (ke); 290 } 291 292 293 static void 294 kseq_setup(struct kseq *kseq) 295 { 296 kseq->ksq_curr = &kseq->ksq_runqs[0]; 297 kseq->ksq_next = &kseq->ksq_runqs[1]; 298 runq_init(kseq->ksq_curr); 299 runq_init(kseq->ksq_next); 300 kseq->ksq_load = 0; 301 #ifdef SMP 302 kseq->ksq_rslices = 0; 303 kseq->ksq_bload = 0; 304 #endif 305 } 306 307 static void 308 sched_setup(void *dummy) 309 { 310 int i; 311 312 mtx_lock_spin(&sched_lock); 313 /* init kseqs */ 314 for (i = 0; i < MAXCPU; i++) 315 kseq_setup(KSEQ_CPU(i)); 316 mtx_unlock_spin(&sched_lock); 317 } 318 319 /* 320 * Scale the scheduling priority according to the "interactivity" of this 321 * process. 322 */ 323 static int 324 sched_priority(struct ksegrp *kg) 325 { 326 int pri; 327 328 if (kg->kg_pri_class != PRI_TIMESHARE) 329 return (kg->kg_user_pri); 330 331 pri = SCHED_SLP_TOPRI(kg->kg_slptime); 332 CTR2(KTR_RUNQ, "sched_priority: slptime: %d\tpri: %d", 333 kg->kg_slptime, pri); 334 335 pri += PRI_MIN_TIMESHARE; 336 pri += kg->kg_nice; 337 338 if (pri > PRI_MAX_TIMESHARE) 339 pri = PRI_MAX_TIMESHARE; 340 else if (pri < PRI_MIN_TIMESHARE) 341 pri = PRI_MIN_TIMESHARE; 342 343 kg->kg_user_pri = pri; 344 345 return (kg->kg_user_pri); 346 } 347 348 /* 349 * Calculate a time slice based on the process priority. 350 */ 351 static int 352 sched_slice(struct ksegrp *kg) 353 { 354 int pslice; 355 int sslice; 356 int slice; 357 int pri; 358 359 pri = kg->kg_user_pri; 360 pri -= PRI_MIN_TIMESHARE; 361 pslice = SCHED_PRI_TOSLICE(pri); 362 sslice = SCHED_SLP_TOSLICE(kg->kg_slptime); 363 slice = SCHED_SLP_COMP(sslice) + SCHED_PRI_COMP(pslice); 364 kg->kg_slptime = SCHED_SLP_DECAY(kg->kg_slptime); 365 366 CTR4(KTR_RUNQ, 367 "sched_slice: pri: %d\tsslice: %d\tpslice: %d\tslice: %d", 368 pri, sslice, pslice, slice); 369 370 if (slice < SCHED_SLICE_MIN) 371 slice = SCHED_SLICE_MIN; 372 else if (slice > SCHED_SLICE_MAX) 373 slice = SCHED_SLICE_MAX; 374 375 return (slice); 376 } 377 378 int 379 sched_rr_interval(void) 380 { 381 return (SCHED_SLICE_MAX); 382 } 383 384 void 385 sched_pctcpu_update(struct kse *ke) 386 { 387 /* 388 * Adjust counters and watermark for pctcpu calc. 389 */ 390 ke->ke_ticks = (ke->ke_ticks / (ke->ke_ltick - ke->ke_ftick)) * 391 SCHED_CPU_TICKS; 392 ke->ke_ltick = ticks; 393 ke->ke_ftick = ke->ke_ltick - SCHED_CPU_TICKS; 394 } 395 396 #ifdef SMP 397 /* XXX Should be changed to kseq_load_lowest() */ 398 int 399 sched_pickcpu(void) 400 { 401 struct kseq *kseq; 402 int load; 403 int cpu; 404 int i; 405 406 if (!smp_started) 407 return (0); 408 409 load = 0; 410 cpu = 0; 411 412 for (i = 0; i < mp_maxid; i++) { 413 if (CPU_ABSENT(i)) 414 continue; 415 kseq = KSEQ_CPU(i); 416 if (kseq->ksq_load < load) { 417 cpu = i; 418 load = kseq->ksq_load; 419 } 420 } 421 422 CTR1(KTR_RUNQ, "sched_pickcpu: %d", cpu); 423 return (cpu); 424 } 425 #else 426 int 427 sched_pickcpu(void) 428 { 429 return (0); 430 } 431 #endif 432 433 void 434 sched_prio(struct thread *td, u_char prio) 435 { 436 struct kse *ke; 437 struct runq *rq; 438 439 mtx_assert(&sched_lock, MA_OWNED); 440 ke = td->td_kse; 441 td->td_priority = prio; 442 443 if (TD_ON_RUNQ(td)) { 444 rq = ke->ke_runq; 445 446 runq_remove(rq, ke); 447 runq_add(rq, ke); 448 } 449 } 450 451 void 452 sched_switchout(struct thread *td) 453 { 454 struct kse *ke; 455 456 mtx_assert(&sched_lock, MA_OWNED); 457 458 ke = td->td_kse; 459 460 td->td_last_kse = ke; 461 td->td_lastcpu = ke->ke_oncpu; 462 ke->ke_oncpu = NOCPU; 463 ke->ke_flags &= ~KEF_NEEDRESCHED; 464 465 if (TD_IS_RUNNING(td)) { 466 setrunqueue(td); 467 return; 468 } else 469 td->td_kse->ke_runq = NULL; 470 471 /* 472 * We will not be on the run queue. So we must be 473 * sleeping or similar. 474 */ 475 if (td->td_proc->p_flag & P_KSES) 476 kse_reassign(ke); 477 } 478 479 void 480 sched_switchin(struct thread *td) 481 { 482 /* struct kse *ke = td->td_kse; */ 483 mtx_assert(&sched_lock, MA_OWNED); 484 485 td->td_kse->ke_oncpu = PCPU_GET(cpuid); 486 #if SCHED_STRICT_RESCHED 487 if (td->td_ksegrp->kg_pri_class == PRI_TIMESHARE && 488 td->td_priority != td->td_ksegrp->kg_user_pri) 489 curthread->td_kse->ke_flags |= KEF_NEEDRESCHED; 490 #endif 491 } 492 493 void 494 sched_nice(struct ksegrp *kg, int nice) 495 { 496 struct thread *td; 497 498 kg->kg_nice = nice; 499 sched_priority(kg); 500 FOREACH_THREAD_IN_GROUP(kg, td) { 501 td->td_kse->ke_flags |= KEF_NEEDRESCHED; 502 } 503 } 504 505 void 506 sched_sleep(struct thread *td, u_char prio) 507 { 508 mtx_assert(&sched_lock, MA_OWNED); 509 510 td->td_slptime = ticks; 511 td->td_priority = prio; 512 513 /* 514 * If this is an interactive task clear its queue so it moves back 515 * on to curr when it wakes up. Otherwise let it stay on the queue 516 * that it was assigned to. 517 */ 518 if (SCHED_CURR(td->td_kse->ke_ksegrp)) 519 td->td_kse->ke_runq = NULL; 520 #ifdef SMP 521 if (td->td_priority < PZERO) { 522 kseq_sleep(KSEQ_CPU(td->td_kse->ke_cpu), td->td_kse); 523 td->td_schedflag |= TD_SCHED_BLOAD; 524 } 525 #endif 526 } 527 528 void 529 sched_wakeup(struct thread *td) 530 { 531 struct ksegrp *kg; 532 533 mtx_assert(&sched_lock, MA_OWNED); 534 535 /* 536 * Let the kseg know how long we slept for. This is because process 537 * interactivity behavior is modeled in the kseg. 538 */ 539 kg = td->td_ksegrp; 540 541 if (td->td_slptime) { 542 kg->kg_slptime += ticks - td->td_slptime; 543 if (kg->kg_slptime > SCHED_SLP_MAX) 544 kg->kg_slptime = SCHED_SLP_MAX; 545 td->td_priority = sched_priority(kg); 546 } 547 td->td_slptime = 0; 548 #ifdef SMP 549 if (td->td_priority < PZERO && td->td_schedflag & TD_SCHED_BLOAD) { 550 kseq_wakeup(KSEQ_CPU(td->td_kse->ke_cpu), td->td_kse); 551 td->td_schedflag &= ~TD_SCHED_BLOAD; 552 } 553 #endif 554 setrunqueue(td); 555 #if SCHED_STRICT_RESCHED 556 if (td->td_priority < curthread->td_priority) 557 curthread->td_kse->ke_flags |= KEF_NEEDRESCHED; 558 #endif 559 } 560 561 /* 562 * Penalize the parent for creating a new child and initialize the child's 563 * priority. 564 */ 565 void 566 sched_fork(struct ksegrp *kg, struct ksegrp *child) 567 { 568 struct kse *ckse; 569 struct kse *pkse; 570 571 mtx_assert(&sched_lock, MA_OWNED); 572 ckse = FIRST_KSE_IN_KSEGRP(child); 573 pkse = FIRST_KSE_IN_KSEGRP(kg); 574 575 /* XXX Need something better here */ 576 child->kg_slptime = kg->kg_slptime; 577 child->kg_user_pri = kg->kg_user_pri; 578 579 if (pkse->ke_cpu != PCPU_GET(cpuid)) { 580 printf("pkse->ke_cpu = %d\n", pkse->ke_cpu); 581 printf("cpuid = %d", PCPU_GET(cpuid)); 582 Debugger("stop"); 583 } 584 585 ckse->ke_slice = pkse->ke_slice; 586 ckse->ke_cpu = pkse->ke_cpu; /* sched_pickcpu(); */ 587 ckse->ke_runq = NULL; 588 /* 589 * Claim that we've been running for one second for statistical 590 * purposes. 591 */ 592 ckse->ke_ticks = 0; 593 ckse->ke_ltick = ticks; 594 ckse->ke_ftick = ticks - hz; 595 } 596 597 /* 598 * Return some of the child's priority and interactivity to the parent. 599 */ 600 void 601 sched_exit(struct ksegrp *kg, struct ksegrp *child) 602 { 603 /* XXX Need something better here */ 604 mtx_assert(&sched_lock, MA_OWNED); 605 kg->kg_slptime = child->kg_slptime; 606 sched_priority(kg); 607 } 608 609 void 610 sched_clock(struct thread *td) 611 { 612 struct kse *ke; 613 #if SCHED_STRICT_RESCHED 614 struct kse *nke; 615 struct kseq *kseq; 616 #endif 617 struct ksegrp *kg; 618 619 620 ke = td->td_kse; 621 kg = td->td_ksegrp; 622 623 mtx_assert(&sched_lock, MA_OWNED); 624 KASSERT((td != NULL), ("schedclock: null thread pointer")); 625 626 /* Adjust ticks for pctcpu */ 627 ke->ke_ticks += 10000; 628 ke->ke_ltick = ticks; 629 /* Go up to one second beyond our max and then trim back down */ 630 if (ke->ke_ftick + SCHED_CPU_TICKS + hz < ke->ke_ltick) 631 sched_pctcpu_update(ke); 632 633 if (td->td_kse->ke_flags & KEF_IDLEKSE) 634 return; 635 636 /* 637 * Check for a higher priority task on the run queue. This can happen 638 * on SMP if another processor woke up a process on our runq. 639 */ 640 #if SCHED_STRICT_RESCHED 641 kseq = KSEQ_SELF(); 642 nke = runq_choose(kseq->ksq_curr); 643 644 if (nke && nke->ke_thread && 645 nke->ke_thread->td_priority < td->td_priority) 646 ke->ke_flags |= KEF_NEEDRESCHED; 647 #endif 648 /* 649 * We used a tick, decrease our total sleep time. This decreases our 650 * "interactivity". 651 */ 652 if (kg->kg_slptime) 653 kg->kg_slptime--; 654 /* 655 * We used up one time slice. 656 */ 657 ke->ke_slice--; 658 /* 659 * We're out of time, recompute priorities and requeue 660 */ 661 if (ke->ke_slice == 0) { 662 td->td_priority = sched_priority(kg); 663 ke->ke_slice = sched_slice(kg); 664 ke->ke_flags |= KEF_NEEDRESCHED; 665 ke->ke_runq = NULL; 666 } 667 } 668 669 int 670 sched_runnable(void) 671 { 672 struct kseq *kseq; 673 674 kseq = KSEQ_SELF(); 675 676 if (kseq->ksq_load) 677 return (1); 678 #ifdef SMP 679 /* 680 * For SMP we may steal other processor's KSEs. Just search until we 681 * verify that at least on other cpu has a runnable task. 682 */ 683 if (smp_started) { 684 int i; 685 686 #if 0 687 if (kseq->ksq_bload) 688 return (0); 689 #endif 690 691 for (i = 0; i < mp_maxid; i++) { 692 if (CPU_ABSENT(i)) 693 continue; 694 kseq = KSEQ_CPU(i); 695 if (kseq->ksq_load) 696 return (1); 697 } 698 } 699 #endif 700 return (0); 701 } 702 703 void 704 sched_userret(struct thread *td) 705 { 706 struct ksegrp *kg; 707 708 kg = td->td_ksegrp; 709 710 if (td->td_priority != kg->kg_user_pri) { 711 mtx_lock_spin(&sched_lock); 712 td->td_priority = kg->kg_user_pri; 713 mtx_unlock_spin(&sched_lock); 714 } 715 } 716 717 struct kse * 718 sched_choose(void) 719 { 720 struct kseq *kseq; 721 struct kse *ke; 722 723 kseq = KSEQ_SELF(); 724 ke = kseq_choose(kseq); 725 726 if (ke) { 727 ke->ke_state = KES_THREAD; 728 kseq_rem(kseq, ke); 729 } 730 731 #ifdef SMP 732 if (ke == NULL && smp_started) { 733 #if 0 734 if (kseq->ksq_bload) 735 return (NULL); 736 #endif 737 /* 738 * Find the cpu with the highest load and steal one proc. 739 */ 740 kseq = kseq_load_highest(); 741 if (kseq == NULL) 742 return (NULL); 743 ke = kseq_choose(kseq); 744 kseq_rem(kseq, ke); 745 746 ke->ke_state = KES_THREAD; 747 ke->ke_runq = NULL; 748 ke->ke_cpu = PCPU_GET(cpuid); 749 } 750 #endif 751 return (ke); 752 } 753 754 void 755 sched_add(struct kse *ke) 756 { 757 struct kseq *kseq; 758 759 mtx_assert(&sched_lock, MA_OWNED); 760 KASSERT((ke->ke_thread != NULL), ("sched_add: No thread on KSE")); 761 KASSERT((ke->ke_thread->td_kse != NULL), 762 ("sched_add: No KSE on thread")); 763 KASSERT(ke->ke_state != KES_ONRUNQ, 764 ("sched_add: kse %p (%s) already in run queue", ke, 765 ke->ke_proc->p_comm)); 766 KASSERT(ke->ke_proc->p_sflag & PS_INMEM, 767 ("sched_add: process swapped out")); 768 769 kseq = KSEQ_CPU(ke->ke_cpu); 770 771 if (ke->ke_runq == NULL) { 772 if (SCHED_CURR(ke->ke_ksegrp)) 773 ke->ke_runq = kseq->ksq_curr; 774 else 775 ke->ke_runq = kseq->ksq_next; 776 } 777 ke->ke_ksegrp->kg_runq_kses++; 778 ke->ke_state = KES_ONRUNQ; 779 780 kseq_add(kseq, ke); 781 } 782 783 void 784 sched_rem(struct kse *ke) 785 { 786 mtx_assert(&sched_lock, MA_OWNED); 787 /* KASSERT((ke->ke_state == KES_ONRUNQ), ("KSE not on run queue")); */ 788 789 ke->ke_runq = NULL; 790 ke->ke_state = KES_THREAD; 791 ke->ke_ksegrp->kg_runq_kses--; 792 793 kseq_rem(KSEQ_CPU(ke->ke_cpu), ke); 794 } 795 796 fixpt_t 797 sched_pctcpu(struct kse *ke) 798 { 799 fixpt_t pctcpu; 800 int realstathz; 801 802 pctcpu = 0; 803 realstathz = stathz ? stathz : hz; 804 805 if (ke->ke_ticks) { 806 int rtick; 807 808 /* Update to account for time potentially spent sleeping */ 809 ke->ke_ltick = ticks; 810 sched_pctcpu_update(ke); 811 812 /* How many rtick per second ? */ 813 rtick = ke->ke_ticks / (SCHED_CPU_TIME * 10000); 814 pctcpu = (FSCALE * ((FSCALE * rtick)/realstathz)) >> FSHIFT; 815 } 816 817 ke->ke_proc->p_swtime = ke->ke_ltick - ke->ke_ftick; 818 819 return (pctcpu); 820 } 821 822 int 823 sched_sizeof_kse(void) 824 { 825 return (sizeof(struct kse) + sizeof(struct ke_sched)); 826 } 827 828 int 829 sched_sizeof_ksegrp(void) 830 { 831 return (sizeof(struct ksegrp) + sizeof(struct kg_sched)); 832 } 833 834 int 835 sched_sizeof_proc(void) 836 { 837 return (sizeof(struct proc)); 838 } 839 840 int 841 sched_sizeof_thread(void) 842 { 843 return (sizeof(struct thread) + sizeof(struct td_sched)); 844 } 845