1 /*- 2 * Copyright (c) 2003, Jeffrey Roberson <jeff@freebsd.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice unmodified, this list of conditions, and the following 10 * disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 29 #include <sys/param.h> 30 #include <sys/systm.h> 31 #include <sys/kernel.h> 32 #include <sys/ktr.h> 33 #include <sys/lock.h> 34 #include <sys/mutex.h> 35 #include <sys/proc.h> 36 #include <sys/sched.h> 37 #include <sys/smp.h> 38 #include <sys/sx.h> 39 #include <sys/sysctl.h> 40 #include <sys/sysproto.h> 41 #include <sys/vmmeter.h> 42 #ifdef DDB 43 #include <ddb/ddb.h> 44 #endif 45 #ifdef KTRACE 46 #include <sys/uio.h> 47 #include <sys/ktrace.h> 48 #endif 49 50 #include <machine/cpu.h> 51 52 /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ 53 /* XXX This is bogus compatability crap for ps */ 54 static fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 55 SYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, ""); 56 57 static void sched_setup(void *dummy); 58 SYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL) 59 60 #define SCHED_STRICT_RESCHED 1 61 62 /* 63 * These datastructures are allocated within their parent datastructure but 64 * are scheduler specific. 65 */ 66 67 struct ke_sched { 68 int ske_slice; 69 struct runq *ske_runq; 70 /* The following variables are only used for pctcpu calculation */ 71 int ske_ltick; /* Last tick that we were running on */ 72 int ske_ftick; /* First tick that we were running on */ 73 int ske_ticks; /* Tick count */ 74 u_char ske_cpu; 75 }; 76 #define ke_slice ke_sched->ske_slice 77 #define ke_runq ke_sched->ske_runq 78 #define ke_ltick ke_sched->ske_ltick 79 #define ke_ftick ke_sched->ske_ftick 80 #define ke_ticks ke_sched->ske_ticks 81 #define ke_cpu ke_sched->ske_cpu 82 83 struct kg_sched { 84 int skg_slptime; /* Number of ticks we vol. slept */ 85 int skg_runtime; /* Number of ticks we were running */ 86 }; 87 #define kg_slptime kg_sched->skg_slptime 88 #define kg_runtime kg_sched->skg_runtime 89 90 struct td_sched { 91 int std_slptime; 92 int std_schedflag; 93 }; 94 #define td_slptime td_sched->std_slptime 95 #define td_schedflag td_sched->std_schedflag 96 97 #define TD_SCHED_BLOAD 0x0001 /* 98 * thread was counted as being in short 99 * term sleep. 100 */ 101 struct td_sched td_sched; 102 struct ke_sched ke_sched; 103 struct kg_sched kg_sched; 104 105 struct ke_sched *kse0_sched = &ke_sched; 106 struct kg_sched *ksegrp0_sched = &kg_sched; 107 struct p_sched *proc0_sched = NULL; 108 struct td_sched *thread0_sched = &td_sched; 109 110 /* 111 * This priority range has 20 priorities on either end that are reachable 112 * only through nice values. 113 */ 114 #define SCHED_PRI_RANGE (PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE + 1) 115 #define SCHED_PRI_NRESV 40 116 #define SCHED_PRI_BASE (SCHED_PRI_NRESV / 2) 117 #define SCHED_PRI_DYN (SCHED_PRI_RANGE - SCHED_PRI_NRESV) 118 #define SCHED_PRI_DYN_HALF (SCHED_PRI_DYN / 2) 119 120 /* 121 * These determine how sleep time effects the priority of a process. 122 * 123 * SLP_RUN_MAX: Maximum amount of sleep time + run time we'll accumulate 124 * before throttling back. 125 * SLP_RUN_THORTTLE: Divisor for reducing slp/run time. 126 * SLP_RATIO: Compute a bounded ratio of slp time vs run time. 127 * SLP_TOPRI: Convert a number of ticks slept and ticks ran into a priority 128 */ 129 #define SCHED_SLP_RUN_MAX ((hz * 30) * 1024) 130 #define SCHED_SLP_RUN_THROTTLE (10) 131 static __inline int 132 sched_slp_ratio(int b, int s) 133 { 134 b /= SCHED_PRI_DYN_HALF; 135 if (b == 0) 136 return (0); 137 s /= b; 138 return (s); 139 } 140 #define SCHED_SLP_TOPRI(slp, run) \ 141 ((((slp) > (run))? \ 142 sched_slp_ratio((slp), (run)): \ 143 SCHED_PRI_DYN_HALF + (SCHED_PRI_DYN_HALF - sched_slp_ratio((run), (slp))))+ \ 144 SCHED_PRI_NRESV / 2) 145 /* 146 * These parameters and macros determine the size of the time slice that is 147 * granted to each thread. 148 * 149 * SLICE_MIN: Minimum time slice granted, in units of ticks. 150 * SLICE_MAX: Maximum time slice granted. 151 * SLICE_RANGE: Range of available time slices scaled by hz. 152 * SLICE_SCALE: The number slices granted per unit of pri or slp. 153 * PRI_TOSLICE: Compute a slice size that is proportional to the priority. 154 * SLP_TOSLICE: Compute a slice size that is inversely proportional to the 155 * amount of time slept. (smaller slices for interactive ksegs) 156 * PRI_COMP: This determines what fraction of the actual slice comes from 157 * the slice size computed from the priority. 158 * SLP_COMP: This determines what component of the actual slice comes from 159 * the slize size computed from the sleep time. 160 */ 161 #define SCHED_SLICE_MIN (hz / 100) 162 #define SCHED_SLICE_MAX (hz / 4) 163 #define SCHED_SLICE_RANGE (SCHED_SLICE_MAX - SCHED_SLICE_MIN + 1) 164 #define SCHED_SLICE_SCALE(val, max) (((val) * SCHED_SLICE_RANGE) / (max)) 165 #define SCHED_PRI_TOSLICE(pri) \ 166 (SCHED_SLICE_MAX - SCHED_SLICE_SCALE((pri), SCHED_PRI_RANGE)) 167 #define SCHED_SLP_TOSLICE(slp) \ 168 (SCHED_SLICE_MAX - SCHED_SLICE_SCALE((slp), SCHED_PRI_DYN)) 169 #define SCHED_SLP_COMP(slice) (((slice) / 5) * 3) /* 60% */ 170 #define SCHED_PRI_COMP(slice) (((slice) / 5) * 2) /* 40% */ 171 172 /* 173 * This macro determines whether or not the kse belongs on the current or 174 * next run queue. 175 * 176 * XXX nice value should effect how interactive a kg is. 177 */ 178 #define SCHED_CURR(kg) (((kg)->kg_slptime > (kg)->kg_runtime && \ 179 sched_slp_ratio((kg)->kg_slptime, (kg)->kg_runtime) > 4)) 180 181 /* 182 * Cpu percentage computation macros and defines. 183 * 184 * SCHED_CPU_TIME: Number of seconds to average the cpu usage across. 185 * SCHED_CPU_TICKS: Number of hz ticks to average the cpu usage across. 186 */ 187 188 #define SCHED_CPU_TIME 60 189 #define SCHED_CPU_TICKS (hz * SCHED_CPU_TIME) 190 191 /* 192 * kseq - pair of runqs per processor 193 */ 194 195 struct kseq { 196 struct runq ksq_runqs[2]; 197 struct runq *ksq_curr; 198 struct runq *ksq_next; 199 int ksq_load; /* Total runnable */ 200 #ifdef SMP 201 unsigned int ksq_rslices; /* Slices on run queue */ 202 unsigned int ksq_bload; /* Threads waiting on IO */ 203 #endif 204 }; 205 206 /* 207 * One kse queue per processor. 208 */ 209 #ifdef SMP 210 struct kseq kseq_cpu[MAXCPU]; 211 #define KSEQ_SELF() (&kseq_cpu[PCPU_GET(cpuid)]) 212 #define KSEQ_CPU(x) (&kseq_cpu[(x)]) 213 #else 214 struct kseq kseq_cpu; 215 #define KSEQ_SELF() (&kseq_cpu) 216 #define KSEQ_CPU(x) (&kseq_cpu) 217 #endif 218 219 static int sched_slice(struct ksegrp *kg); 220 static int sched_priority(struct ksegrp *kg); 221 void sched_pctcpu_update(struct kse *ke); 222 int sched_pickcpu(void); 223 224 /* Operations on per processor queues */ 225 static struct kse * kseq_choose(struct kseq *kseq); 226 static void kseq_setup(struct kseq *kseq); 227 static __inline void kseq_add(struct kseq *kseq, struct kse *ke); 228 static __inline void kseq_rem(struct kseq *kseq, struct kse *ke); 229 #ifdef SMP 230 static __inline void kseq_sleep(struct kseq *kseq, struct kse *ke); 231 static __inline void kseq_wakeup(struct kseq *kseq, struct kse *ke); 232 struct kseq * kseq_load_highest(void); 233 #endif 234 235 static __inline void 236 kseq_add(struct kseq *kseq, struct kse *ke) 237 { 238 runq_add(ke->ke_runq, ke); 239 kseq->ksq_load++; 240 #ifdef SMP 241 kseq->ksq_rslices += ke->ke_slice; 242 #endif 243 } 244 static __inline void 245 kseq_rem(struct kseq *kseq, struct kse *ke) 246 { 247 kseq->ksq_load--; 248 runq_remove(ke->ke_runq, ke); 249 #ifdef SMP 250 kseq->ksq_rslices -= ke->ke_slice; 251 #endif 252 } 253 254 #ifdef SMP 255 static __inline void 256 kseq_sleep(struct kseq *kseq, struct kse *ke) 257 { 258 kseq->ksq_bload++; 259 } 260 261 static __inline void 262 kseq_wakeup(struct kseq *kseq, struct kse *ke) 263 { 264 kseq->ksq_bload--; 265 } 266 267 struct kseq * 268 kseq_load_highest(void) 269 { 270 struct kseq *kseq; 271 int load; 272 int cpu; 273 int i; 274 275 cpu = 0; 276 load = 0; 277 278 for (i = 0; i < mp_maxid; i++) { 279 if (CPU_ABSENT(i)) 280 continue; 281 kseq = KSEQ_CPU(i); 282 if (kseq->ksq_load > load) { 283 load = kseq->ksq_load; 284 cpu = i; 285 } 286 } 287 if (load) 288 return (KSEQ_CPU(cpu)); 289 290 return (NULL); 291 } 292 #endif 293 294 struct kse * 295 kseq_choose(struct kseq *kseq) 296 { 297 struct kse *ke; 298 struct runq *swap; 299 300 if ((ke = runq_choose(kseq->ksq_curr)) == NULL) { 301 swap = kseq->ksq_curr; 302 kseq->ksq_curr = kseq->ksq_next; 303 kseq->ksq_next = swap; 304 ke = runq_choose(kseq->ksq_curr); 305 } 306 307 return (ke); 308 } 309 310 311 static void 312 kseq_setup(struct kseq *kseq) 313 { 314 kseq->ksq_curr = &kseq->ksq_runqs[0]; 315 kseq->ksq_next = &kseq->ksq_runqs[1]; 316 runq_init(kseq->ksq_curr); 317 runq_init(kseq->ksq_next); 318 kseq->ksq_load = 0; 319 #ifdef SMP 320 kseq->ksq_rslices = 0; 321 kseq->ksq_bload = 0; 322 #endif 323 } 324 325 static void 326 sched_setup(void *dummy) 327 { 328 int i; 329 330 mtx_lock_spin(&sched_lock); 331 /* init kseqs */ 332 for (i = 0; i < MAXCPU; i++) 333 kseq_setup(KSEQ_CPU(i)); 334 mtx_unlock_spin(&sched_lock); 335 } 336 337 /* 338 * Scale the scheduling priority according to the "interactivity" of this 339 * process. 340 */ 341 static int 342 sched_priority(struct ksegrp *kg) 343 { 344 int pri; 345 346 if (kg->kg_pri_class != PRI_TIMESHARE) 347 return (kg->kg_user_pri); 348 349 pri = SCHED_SLP_TOPRI(kg->kg_slptime, kg->kg_runtime); 350 CTR2(KTR_RUNQ, "sched_priority: slptime: %d\tpri: %d", 351 kg->kg_slptime, pri); 352 353 pri += PRI_MIN_TIMESHARE; 354 pri += kg->kg_nice; 355 356 if (pri > PRI_MAX_TIMESHARE) 357 pri = PRI_MAX_TIMESHARE; 358 else if (pri < PRI_MIN_TIMESHARE) 359 pri = PRI_MIN_TIMESHARE; 360 361 kg->kg_user_pri = pri; 362 363 return (kg->kg_user_pri); 364 } 365 366 /* 367 * Calculate a time slice based on the process priority. 368 */ 369 static int 370 sched_slice(struct ksegrp *kg) 371 { 372 int pslice; 373 int sslice; 374 int slice; 375 int pri; 376 377 pri = kg->kg_user_pri; 378 pri -= PRI_MIN_TIMESHARE; 379 pslice = SCHED_PRI_TOSLICE(pri); 380 sslice = SCHED_PRI_TOSLICE(SCHED_SLP_TOPRI(kg->kg_slptime, kg->kg_runtime)); 381 /* 382 SCHED_SLP_TOSLICE(SCHED_SLP_RATIO( 383 kg->kg_slptime, kg->kg_runtime)); 384 */ 385 slice = SCHED_SLP_COMP(sslice) + SCHED_PRI_COMP(pslice); 386 387 CTR4(KTR_RUNQ, 388 "sched_slice: pri: %d\tsslice: %d\tpslice: %d\tslice: %d", 389 pri, sslice, pslice, slice); 390 391 if (slice < SCHED_SLICE_MIN) 392 slice = SCHED_SLICE_MIN; 393 else if (slice > SCHED_SLICE_MAX) 394 slice = SCHED_SLICE_MAX; 395 396 /* 397 * Every time we grant a new slice check to see if we need to scale 398 * back the slp and run time in the kg. This will cause us to forget 399 * old interactivity while maintaining the current ratio. 400 */ 401 if ((kg->kg_runtime + kg->kg_slptime) > SCHED_SLP_RUN_MAX) { 402 kg->kg_runtime /= SCHED_SLP_RUN_THROTTLE; 403 kg->kg_slptime /= SCHED_SLP_RUN_THROTTLE; 404 } 405 406 return (slice); 407 } 408 409 int 410 sched_rr_interval(void) 411 { 412 return (SCHED_SLICE_MAX); 413 } 414 415 void 416 sched_pctcpu_update(struct kse *ke) 417 { 418 /* 419 * Adjust counters and watermark for pctcpu calc. 420 */ 421 /* 422 * Shift the tick count out so that the divide doesn't round away 423 * our results. 424 */ 425 ke->ke_ticks <<= 10; 426 ke->ke_ticks = (ke->ke_ticks / (ke->ke_ltick - ke->ke_ftick)) * 427 SCHED_CPU_TICKS; 428 ke->ke_ticks >>= 10; 429 ke->ke_ltick = ticks; 430 ke->ke_ftick = ke->ke_ltick - SCHED_CPU_TICKS; 431 } 432 433 #ifdef SMP 434 /* XXX Should be changed to kseq_load_lowest() */ 435 int 436 sched_pickcpu(void) 437 { 438 struct kseq *kseq; 439 int load; 440 int cpu; 441 int i; 442 443 if (!smp_started) 444 return (0); 445 446 load = 0; 447 cpu = 0; 448 449 for (i = 0; i < mp_maxid; i++) { 450 if (CPU_ABSENT(i)) 451 continue; 452 kseq = KSEQ_CPU(i); 453 if (kseq->ksq_load < load) { 454 cpu = i; 455 load = kseq->ksq_load; 456 } 457 } 458 459 CTR1(KTR_RUNQ, "sched_pickcpu: %d", cpu); 460 return (cpu); 461 } 462 #else 463 int 464 sched_pickcpu(void) 465 { 466 return (0); 467 } 468 #endif 469 470 void 471 sched_prio(struct thread *td, u_char prio) 472 { 473 struct kse *ke; 474 struct runq *rq; 475 476 mtx_assert(&sched_lock, MA_OWNED); 477 ke = td->td_kse; 478 td->td_priority = prio; 479 480 if (TD_ON_RUNQ(td)) { 481 rq = ke->ke_runq; 482 483 runq_remove(rq, ke); 484 runq_add(rq, ke); 485 } 486 } 487 488 void 489 sched_switchout(struct thread *td) 490 { 491 struct kse *ke; 492 493 mtx_assert(&sched_lock, MA_OWNED); 494 495 ke = td->td_kse; 496 497 td->td_last_kse = ke; 498 td->td_lastcpu = ke->ke_oncpu; 499 ke->ke_oncpu = NOCPU; 500 td->td_flags &= ~TDF_NEEDRESCHED; 501 502 if (TD_IS_RUNNING(td)) { 503 setrunqueue(td); 504 return; 505 } else 506 td->td_kse->ke_runq = NULL; 507 508 /* 509 * We will not be on the run queue. So we must be 510 * sleeping or similar. 511 */ 512 if (td->td_proc->p_flag & P_THREADED) 513 kse_reassign(ke); 514 } 515 516 void 517 sched_switchin(struct thread *td) 518 { 519 /* struct kse *ke = td->td_kse; */ 520 mtx_assert(&sched_lock, MA_OWNED); 521 522 td->td_kse->ke_oncpu = PCPU_GET(cpuid); 523 #if SCHED_STRICT_RESCHED 524 if (td->td_ksegrp->kg_pri_class == PRI_TIMESHARE && 525 td->td_priority != td->td_ksegrp->kg_user_pri) 526 curthread->td_flags |= TDF_NEEDRESCHED; 527 #endif 528 } 529 530 void 531 sched_nice(struct ksegrp *kg, int nice) 532 { 533 struct thread *td; 534 535 kg->kg_nice = nice; 536 sched_priority(kg); 537 FOREACH_THREAD_IN_GROUP(kg, td) { 538 td->td_flags |= TDF_NEEDRESCHED; 539 } 540 } 541 542 void 543 sched_sleep(struct thread *td, u_char prio) 544 { 545 mtx_assert(&sched_lock, MA_OWNED); 546 547 td->td_slptime = ticks; 548 td->td_priority = prio; 549 550 /* 551 * If this is an interactive task clear its queue so it moves back 552 * on to curr when it wakes up. Otherwise let it stay on the queue 553 * that it was assigned to. 554 */ 555 if (SCHED_CURR(td->td_kse->ke_ksegrp)) 556 td->td_kse->ke_runq = NULL; 557 #ifdef SMP 558 if (td->td_priority < PZERO) { 559 kseq_sleep(KSEQ_CPU(td->td_kse->ke_cpu), td->td_kse); 560 td->td_schedflag |= TD_SCHED_BLOAD; 561 } 562 #endif 563 } 564 565 void 566 sched_wakeup(struct thread *td) 567 { 568 mtx_assert(&sched_lock, MA_OWNED); 569 570 /* 571 * Let the kseg know how long we slept for. This is because process 572 * interactivity behavior is modeled in the kseg. 573 */ 574 if (td->td_slptime) { 575 struct ksegrp *kg; 576 577 kg = td->td_ksegrp; 578 kg->kg_slptime += (ticks - td->td_slptime) * 1024; 579 sched_priority(kg); 580 td->td_slptime = 0; 581 } 582 #ifdef SMP 583 if (td->td_priority < PZERO && td->td_schedflag & TD_SCHED_BLOAD) { 584 kseq_wakeup(KSEQ_CPU(td->td_kse->ke_cpu), td->td_kse); 585 td->td_schedflag &= ~TD_SCHED_BLOAD; 586 } 587 #endif 588 setrunqueue(td); 589 #if SCHED_STRICT_RESCHED 590 if (td->td_priority < curthread->td_priority) 591 curthread->td_flags |= TDF_NEEDRESCHED; 592 #endif 593 } 594 595 /* 596 * Penalize the parent for creating a new child and initialize the child's 597 * priority. 598 */ 599 void 600 sched_fork(struct ksegrp *kg, struct ksegrp *child) 601 { 602 struct kse *ckse; 603 struct kse *pkse; 604 605 mtx_assert(&sched_lock, MA_OWNED); 606 ckse = FIRST_KSE_IN_KSEGRP(child); 607 pkse = FIRST_KSE_IN_KSEGRP(kg); 608 609 /* XXX Need something better here */ 610 if (kg->kg_slptime > kg->kg_runtime) { 611 child->kg_slptime = SCHED_PRI_DYN; 612 child->kg_runtime = kg->kg_slptime / SCHED_PRI_DYN; 613 } else { 614 child->kg_runtime = SCHED_PRI_DYN; 615 child->kg_slptime = kg->kg_runtime / SCHED_PRI_DYN; 616 } 617 #if 0 618 child->kg_slptime = kg->kg_slptime; 619 child->kg_runtime = kg->kg_runtime; 620 #endif 621 child->kg_user_pri = kg->kg_user_pri; 622 623 #if 0 624 if (pkse->ke_cpu != PCPU_GET(cpuid)) { 625 printf("pkse->ke_cpu = %d\n", pkse->ke_cpu); 626 printf("cpuid = %d", PCPU_GET(cpuid)); 627 Debugger("stop"); 628 } 629 #endif 630 631 ckse->ke_slice = pkse->ke_slice; 632 ckse->ke_cpu = pkse->ke_cpu; /* sched_pickcpu(); */ 633 ckse->ke_runq = NULL; 634 /* 635 * Claim that we've been running for one second for statistical 636 * purposes. 637 */ 638 ckse->ke_ticks = 0; 639 ckse->ke_ltick = ticks; 640 ckse->ke_ftick = ticks - hz; 641 } 642 643 /* 644 * Return some of the child's priority and interactivity to the parent. 645 */ 646 void 647 sched_exit(struct ksegrp *kg, struct ksegrp *child) 648 { 649 /* XXX Need something better here */ 650 mtx_assert(&sched_lock, MA_OWNED); 651 kg->kg_slptime = child->kg_slptime; 652 kg->kg_runtime = child->kg_runtime; 653 sched_priority(kg); 654 } 655 656 void 657 sched_clock(struct thread *td) 658 { 659 struct kse *ke; 660 #if SCHED_STRICT_RESCHED 661 struct kse *nke; 662 struct kseq *kseq; 663 #endif 664 struct ksegrp *kg; 665 666 667 ke = td->td_kse; 668 kg = td->td_ksegrp; 669 670 mtx_assert(&sched_lock, MA_OWNED); 671 KASSERT((td != NULL), ("schedclock: null thread pointer")); 672 673 /* Adjust ticks for pctcpu */ 674 ke->ke_ticks++; 675 ke->ke_ltick = ticks; 676 /* Go up to one second beyond our max and then trim back down */ 677 if (ke->ke_ftick + SCHED_CPU_TICKS + hz < ke->ke_ltick) 678 sched_pctcpu_update(ke); 679 680 if (td->td_kse->ke_flags & KEF_IDLEKSE) 681 return; 682 683 /* 684 * Check for a higher priority task on the run queue. This can happen 685 * on SMP if another processor woke up a process on our runq. 686 */ 687 #if SCHED_STRICT_RESCHED 688 kseq = KSEQ_SELF(); 689 nke = runq_choose(kseq->ksq_curr); 690 691 if (nke && nke->ke_thread && 692 nke->ke_thread->td_priority < td->td_priority) 693 td->td_flags |= TDF_NEEDRESCHED; 694 #endif 695 /* 696 * We used a tick charge it to the ksegrp so that we can compute our 697 * "interactivity". 698 */ 699 kg->kg_runtime += 1024; 700 701 /* 702 * We used up one time slice. 703 */ 704 ke->ke_slice--; 705 /* 706 * We're out of time, recompute priorities and requeue 707 */ 708 if (ke->ke_slice == 0) { 709 td->td_priority = sched_priority(kg); 710 ke->ke_slice = sched_slice(kg); 711 td->td_flags |= TDF_NEEDRESCHED; 712 ke->ke_runq = NULL; 713 } 714 } 715 716 int 717 sched_runnable(void) 718 { 719 struct kseq *kseq; 720 721 kseq = KSEQ_SELF(); 722 723 if (kseq->ksq_load) 724 return (1); 725 #ifdef SMP 726 /* 727 * For SMP we may steal other processor's KSEs. Just search until we 728 * verify that at least on other cpu has a runnable task. 729 */ 730 if (smp_started) { 731 int i; 732 733 #if 0 734 if (kseq->ksq_bload) 735 return (0); 736 #endif 737 738 for (i = 0; i < mp_maxid; i++) { 739 if (CPU_ABSENT(i)) 740 continue; 741 kseq = KSEQ_CPU(i); 742 if (kseq->ksq_load) 743 return (1); 744 } 745 } 746 #endif 747 return (0); 748 } 749 750 void 751 sched_userret(struct thread *td) 752 { 753 struct ksegrp *kg; 754 755 kg = td->td_ksegrp; 756 757 if (td->td_priority != kg->kg_user_pri) { 758 mtx_lock_spin(&sched_lock); 759 td->td_priority = kg->kg_user_pri; 760 mtx_unlock_spin(&sched_lock); 761 } 762 } 763 764 struct kse * 765 sched_choose(void) 766 { 767 struct kseq *kseq; 768 struct kse *ke; 769 770 kseq = KSEQ_SELF(); 771 ke = kseq_choose(kseq); 772 773 if (ke) { 774 ke->ke_state = KES_THREAD; 775 kseq_rem(kseq, ke); 776 } 777 778 #ifdef SMP 779 if (ke == NULL && smp_started) { 780 #if 0 781 if (kseq->ksq_bload) 782 return (NULL); 783 #endif 784 /* 785 * Find the cpu with the highest load and steal one proc. 786 */ 787 kseq = kseq_load_highest(); 788 if (kseq == NULL) 789 return (NULL); 790 ke = kseq_choose(kseq); 791 kseq_rem(kseq, ke); 792 793 ke->ke_state = KES_THREAD; 794 ke->ke_runq = NULL; 795 ke->ke_cpu = PCPU_GET(cpuid); 796 } 797 #endif 798 return (ke); 799 } 800 801 void 802 sched_add(struct kse *ke) 803 { 804 struct kseq *kseq; 805 806 mtx_assert(&sched_lock, MA_OWNED); 807 KASSERT((ke->ke_thread != NULL), ("sched_add: No thread on KSE")); 808 KASSERT((ke->ke_thread->td_kse != NULL), 809 ("sched_add: No KSE on thread")); 810 KASSERT(ke->ke_state != KES_ONRUNQ, 811 ("sched_add: kse %p (%s) already in run queue", ke, 812 ke->ke_proc->p_comm)); 813 KASSERT(ke->ke_proc->p_sflag & PS_INMEM, 814 ("sched_add: process swapped out")); 815 816 /* 817 * Timeshare threads get placed on the appropriate queue on their 818 * bound cpu. 819 */ 820 if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) { 821 kseq = KSEQ_CPU(ke->ke_cpu); 822 823 if (ke->ke_runq == NULL) { 824 if (SCHED_CURR(ke->ke_ksegrp)) 825 ke->ke_runq = kseq->ksq_curr; 826 else 827 ke->ke_runq = kseq->ksq_next; 828 } 829 /* 830 * If we're a real-time or interrupt thread place us on the curr 831 * queue for the current processor. Hopefully this will yield the 832 * lowest latency response. 833 */ 834 } else { 835 kseq = KSEQ_SELF(); 836 ke->ke_runq = kseq->ksq_curr; 837 } 838 ke->ke_ksegrp->kg_runq_kses++; 839 ke->ke_state = KES_ONRUNQ; 840 841 kseq_add(kseq, ke); 842 } 843 844 void 845 sched_rem(struct kse *ke) 846 { 847 mtx_assert(&sched_lock, MA_OWNED); 848 /* KASSERT((ke->ke_state == KES_ONRUNQ), ("KSE not on run queue")); */ 849 850 ke->ke_runq = NULL; 851 ke->ke_state = KES_THREAD; 852 ke->ke_ksegrp->kg_runq_kses--; 853 854 kseq_rem(KSEQ_CPU(ke->ke_cpu), ke); 855 } 856 857 fixpt_t 858 sched_pctcpu(struct kse *ke) 859 { 860 fixpt_t pctcpu; 861 int realstathz; 862 863 pctcpu = 0; 864 realstathz = stathz ? stathz : hz; 865 866 if (ke->ke_ticks) { 867 int rtick; 868 869 /* Update to account for time potentially spent sleeping */ 870 ke->ke_ltick = ticks; 871 sched_pctcpu_update(ke); 872 873 /* How many rtick per second ? */ 874 rtick = ke->ke_ticks / SCHED_CPU_TIME; 875 pctcpu = (FSCALE * ((FSCALE * rtick)/realstathz)) >> FSHIFT; 876 } 877 878 ke->ke_proc->p_swtime = ke->ke_ltick - ke->ke_ftick; 879 880 return (pctcpu); 881 } 882 883 int 884 sched_sizeof_kse(void) 885 { 886 return (sizeof(struct kse) + sizeof(struct ke_sched)); 887 } 888 889 int 890 sched_sizeof_ksegrp(void) 891 { 892 return (sizeof(struct ksegrp) + sizeof(struct kg_sched)); 893 } 894 895 int 896 sched_sizeof_proc(void) 897 { 898 return (sizeof(struct proc)); 899 } 900 901 int 902 sched_sizeof_thread(void) 903 { 904 return (sizeof(struct thread) + sizeof(struct td_sched)); 905 } 906