1 /*- 2 * Copyright (c) 2003, Jeffrey Roberson <jeff@freebsd.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice unmodified, this list of conditions, and the following 10 * disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 29 #include <sys/param.h> 30 #include <sys/systm.h> 31 #include <sys/kernel.h> 32 #include <sys/ktr.h> 33 #include <sys/lock.h> 34 #include <sys/mutex.h> 35 #include <sys/proc.h> 36 #include <sys/sched.h> 37 #include <sys/smp.h> 38 #include <sys/sx.h> 39 #include <sys/sysctl.h> 40 #include <sys/sysproto.h> 41 #include <sys/vmmeter.h> 42 #ifdef DDB 43 #include <ddb/ddb.h> 44 #endif 45 #ifdef KTRACE 46 #include <sys/uio.h> 47 #include <sys/ktrace.h> 48 #endif 49 50 #include <machine/cpu.h> 51 52 /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ 53 /* XXX This is bogus compatability crap for ps */ 54 static fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 55 SYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, ""); 56 57 static void sched_setup(void *dummy); 58 SYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL) 59 60 int realstathz; 61 62 #define SCHED_STRICT_RESCHED 1 63 64 /* 65 * These datastructures are allocated within their parent datastructure but 66 * are scheduler specific. 67 */ 68 69 struct ke_sched { 70 int ske_slice; 71 struct runq *ske_runq; 72 /* The following variables are only used for pctcpu calculation */ 73 int ske_ltick; /* Last tick that we were running on */ 74 int ske_ftick; /* First tick that we were running on */ 75 int ske_ticks; /* Tick count */ 76 u_char ske_cpu; 77 }; 78 #define ke_slice ke_sched->ske_slice 79 #define ke_runq ke_sched->ske_runq 80 #define ke_ltick ke_sched->ske_ltick 81 #define ke_ftick ke_sched->ske_ftick 82 #define ke_ticks ke_sched->ske_ticks 83 #define ke_cpu ke_sched->ske_cpu 84 85 struct kg_sched { 86 int skg_slptime; /* Number of ticks we vol. slept */ 87 int skg_runtime; /* Number of ticks we were running */ 88 }; 89 #define kg_slptime kg_sched->skg_slptime 90 #define kg_runtime kg_sched->skg_runtime 91 92 struct td_sched { 93 int std_slptime; 94 int std_schedflag; 95 }; 96 #define td_slptime td_sched->std_slptime 97 #define td_schedflag td_sched->std_schedflag 98 99 #define TD_SCHED_BLOAD 0x0001 /* 100 * thread was counted as being in short 101 * term sleep. 102 */ 103 struct td_sched td_sched; 104 struct ke_sched ke_sched; 105 struct kg_sched kg_sched; 106 107 struct ke_sched *kse0_sched = &ke_sched; 108 struct kg_sched *ksegrp0_sched = &kg_sched; 109 struct p_sched *proc0_sched = NULL; 110 struct td_sched *thread0_sched = &td_sched; 111 112 /* 113 * This priority range has 20 priorities on either end that are reachable 114 * only through nice values. 115 * 116 * PRI_RANGE: Total priority range for timeshare threads. 117 * PRI_NRESV: Reserved priorities for nice. 118 * PRI_BASE: The start of the dynamic range. 119 * DYN_RANGE: Number of priorities that are available int the dynamic 120 * priority range. 121 * DYN_HALF: Half of DYN_RANGE for convenience elsewhere. 122 * PRI_DYN: The dynamic priority which is derived from the number of ticks 123 * running vs the total number of ticks. 124 */ 125 #define SCHED_PRI_RANGE (PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE + 1) 126 #define SCHED_PRI_NRESV 40 127 #define SCHED_PRI_BASE ((SCHED_PRI_NRESV / 2) + PRI_MIN_TIMESHARE) 128 #define SCHED_DYN_RANGE (SCHED_PRI_RANGE - SCHED_PRI_NRESV) 129 #define SCHED_DYN_HALF (SCHED_DYN_RANGE / 2) 130 #define SCHED_PRI_DYN(run, total) (((run) * SCHED_DYN_RANGE) / (total)) 131 132 133 /* 134 * These determine the interactivity of a process. 135 * 136 * SLP_RUN_MAX: Maximum amount of sleep time + run time we'll accumulate 137 * before throttling back. 138 * SLP_RUN_THROTTLE: Divisor for reducing slp/run time. 139 * INTERACT_RANGE: Range of interactivity values. Smaller is better. 140 * INTERACT_HALF: Convenience define, half of the interactivity range. 141 * INTERACT_THRESH: Threshhold for placement on the current runq. 142 */ 143 #define SCHED_SLP_RUN_MAX ((hz * 30) << 10) 144 #define SCHED_SLP_RUN_THROTTLE (10) 145 #define SCHED_INTERACT_RANGE (100) 146 #define SCHED_INTERACT_HALF (SCHED_INTERACT_RANGE / 2) 147 #define SCHED_INTERACT_THRESH (10) 148 149 /* 150 * These parameters and macros determine the size of the time slice that is 151 * granted to each thread. 152 * 153 * SLICE_MIN: Minimum time slice granted, in units of ticks. 154 * SLICE_MAX: Maximum time slice granted. 155 * SLICE_RANGE: Range of available time slices scaled by hz. 156 * SLICE_SCALE: The number slices granted per unit of pri or slp. 157 * PRI_TOSLICE: Compute a slice size that is proportional to the priority. 158 * INTERACT_TOSLICE: Compute a slice size that is inversely proportional to 159 * the amount of time slept. (smaller slices for interactive ksegs) 160 * PRI_COMP: This determines what fraction of the actual slice comes from 161 * the slice size computed from the priority. 162 * INTERACT_COMP:This determines what component of the actual slice comes from 163 * the slize size computed from the interactivity score. 164 */ 165 #define SCHED_SLICE_MIN (hz / 100) 166 #define SCHED_SLICE_MAX (hz / 10) 167 #define SCHED_SLICE_RANGE (SCHED_SLICE_MAX - SCHED_SLICE_MIN + 1) 168 #define SCHED_SLICE_SCALE(val, max) (((val) * SCHED_SLICE_RANGE) / (max)) 169 #define SCHED_INTERACT_COMP(slice) ((slice) / 2) /* 50% */ 170 #define SCHED_PRI_COMP(slice) ((slice) / 2) /* 50% */ 171 #define SCHED_PRI_TOSLICE(pri) \ 172 (SCHED_SLICE_MAX - SCHED_SLICE_SCALE((pri), SCHED_PRI_RANGE)) 173 #define SCHED_INTERACT_TOSLICE(score) \ 174 (SCHED_SLICE_SCALE((score), SCHED_INTERACT_RANGE)) 175 176 /* 177 * This macro determines whether or not the kse belongs on the current or 178 * next run queue. 179 * 180 * XXX nice value should effect how interactive a kg is. 181 */ 182 #define SCHED_CURR(kg) (sched_interact_score(kg) < SCHED_INTERACT_THRESH) 183 184 /* 185 * Cpu percentage computation macros and defines. 186 * 187 * SCHED_CPU_TIME: Number of seconds to average the cpu usage across. 188 * SCHED_CPU_TICKS: Number of hz ticks to average the cpu usage across. 189 */ 190 191 #define SCHED_CPU_TIME 60 192 #define SCHED_CPU_TICKS (hz * SCHED_CPU_TIME) 193 194 /* 195 * kseq - pair of runqs per processor 196 */ 197 198 struct kseq { 199 struct runq ksq_runqs[2]; 200 struct runq *ksq_curr; 201 struct runq *ksq_next; 202 int ksq_load; /* Total runnable */ 203 #ifdef SMP 204 unsigned int ksq_rslices; /* Slices on run queue */ 205 unsigned int ksq_bload; /* Threads waiting on IO */ 206 #endif 207 }; 208 209 /* 210 * One kse queue per processor. 211 */ 212 #ifdef SMP 213 struct kseq kseq_cpu[MAXCPU]; 214 #define KSEQ_SELF() (&kseq_cpu[PCPU_GET(cpuid)]) 215 #define KSEQ_CPU(x) (&kseq_cpu[(x)]) 216 #else 217 struct kseq kseq_cpu; 218 #define KSEQ_SELF() (&kseq_cpu) 219 #define KSEQ_CPU(x) (&kseq_cpu) 220 #endif 221 222 static int sched_slice(struct ksegrp *kg); 223 static int sched_priority(struct ksegrp *kg); 224 static int sched_interact_score(struct ksegrp *kg); 225 void sched_pctcpu_update(struct kse *ke); 226 int sched_pickcpu(void); 227 228 /* Operations on per processor queues */ 229 static struct kse * kseq_choose(struct kseq *kseq); 230 static void kseq_setup(struct kseq *kseq); 231 static __inline void kseq_add(struct kseq *kseq, struct kse *ke); 232 static __inline void kseq_rem(struct kseq *kseq, struct kse *ke); 233 #ifdef SMP 234 static __inline void kseq_sleep(struct kseq *kseq, struct kse *ke); 235 static __inline void kseq_wakeup(struct kseq *kseq, struct kse *ke); 236 struct kseq * kseq_load_highest(void); 237 #endif 238 239 static __inline void 240 kseq_add(struct kseq *kseq, struct kse *ke) 241 { 242 runq_add(ke->ke_runq, ke); 243 kseq->ksq_load++; 244 #ifdef SMP 245 kseq->ksq_rslices += ke->ke_slice; 246 #endif 247 } 248 static __inline void 249 kseq_rem(struct kseq *kseq, struct kse *ke) 250 { 251 kseq->ksq_load--; 252 runq_remove(ke->ke_runq, ke); 253 #ifdef SMP 254 kseq->ksq_rslices -= ke->ke_slice; 255 #endif 256 } 257 258 #ifdef SMP 259 static __inline void 260 kseq_sleep(struct kseq *kseq, struct kse *ke) 261 { 262 kseq->ksq_bload++; 263 } 264 265 static __inline void 266 kseq_wakeup(struct kseq *kseq, struct kse *ke) 267 { 268 kseq->ksq_bload--; 269 } 270 271 struct kseq * 272 kseq_load_highest(void) 273 { 274 struct kseq *kseq; 275 int load; 276 int cpu; 277 int i; 278 279 cpu = 0; 280 load = 0; 281 282 for (i = 0; i < mp_maxid; i++) { 283 if (CPU_ABSENT(i)) 284 continue; 285 kseq = KSEQ_CPU(i); 286 if (kseq->ksq_load > load) { 287 load = kseq->ksq_load; 288 cpu = i; 289 } 290 } 291 if (load) 292 return (KSEQ_CPU(cpu)); 293 294 return (NULL); 295 } 296 #endif 297 298 struct kse * 299 kseq_choose(struct kseq *kseq) 300 { 301 struct kse *ke; 302 struct runq *swap; 303 304 if ((ke = runq_choose(kseq->ksq_curr)) == NULL) { 305 swap = kseq->ksq_curr; 306 kseq->ksq_curr = kseq->ksq_next; 307 kseq->ksq_next = swap; 308 ke = runq_choose(kseq->ksq_curr); 309 } 310 311 return (ke); 312 } 313 314 315 static void 316 kseq_setup(struct kseq *kseq) 317 { 318 kseq->ksq_curr = &kseq->ksq_runqs[0]; 319 kseq->ksq_next = &kseq->ksq_runqs[1]; 320 runq_init(kseq->ksq_curr); 321 runq_init(kseq->ksq_next); 322 kseq->ksq_load = 0; 323 #ifdef SMP 324 kseq->ksq_rslices = 0; 325 kseq->ksq_bload = 0; 326 #endif 327 } 328 329 static void 330 sched_setup(void *dummy) 331 { 332 int i; 333 334 realstathz = stathz ? stathz : hz; 335 336 mtx_lock_spin(&sched_lock); 337 /* init kseqs */ 338 for (i = 0; i < MAXCPU; i++) 339 kseq_setup(KSEQ_CPU(i)); 340 mtx_unlock_spin(&sched_lock); 341 } 342 343 /* 344 * Scale the scheduling priority according to the "interactivity" of this 345 * process. 346 */ 347 static int 348 sched_priority(struct ksegrp *kg) 349 { 350 int pri; 351 352 if (kg->kg_pri_class != PRI_TIMESHARE) 353 return (kg->kg_user_pri); 354 355 pri = sched_interact_score(kg) * SCHED_DYN_RANGE / SCHED_INTERACT_RANGE; 356 pri += SCHED_PRI_BASE; 357 pri += kg->kg_nice; 358 359 if (pri > PRI_MAX_TIMESHARE) 360 pri = PRI_MAX_TIMESHARE; 361 else if (pri < PRI_MIN_TIMESHARE) 362 pri = PRI_MIN_TIMESHARE; 363 364 kg->kg_user_pri = pri; 365 366 return (kg->kg_user_pri); 367 } 368 369 /* 370 * Calculate a time slice based on the process priority. 371 */ 372 static int 373 sched_slice(struct ksegrp *kg) 374 { 375 int pslice; 376 int islice; 377 int slice; 378 int pri; 379 380 pri = kg->kg_user_pri; 381 pri -= PRI_MIN_TIMESHARE; 382 pslice = SCHED_PRI_TOSLICE(pri); 383 islice = SCHED_INTERACT_TOSLICE(sched_interact_score(kg)); 384 slice = SCHED_INTERACT_COMP(islice) + SCHED_PRI_COMP(pslice); 385 386 if (slice < SCHED_SLICE_MIN) 387 slice = SCHED_SLICE_MIN; 388 else if (slice > SCHED_SLICE_MAX) 389 slice = SCHED_SLICE_MAX; 390 391 /* 392 * Every time we grant a new slice check to see if we need to scale 393 * back the slp and run time in the kg. This will cause us to forget 394 * old interactivity while maintaining the current ratio. 395 */ 396 if ((kg->kg_runtime + kg->kg_slptime) > SCHED_SLP_RUN_MAX) { 397 kg->kg_runtime /= SCHED_SLP_RUN_THROTTLE; 398 kg->kg_slptime /= SCHED_SLP_RUN_THROTTLE; 399 } 400 401 return (slice); 402 } 403 404 static int 405 sched_interact_score(struct ksegrp *kg) 406 { 407 int big; 408 int small; 409 int base; 410 411 if (kg->kg_runtime > kg->kg_slptime) { 412 big = kg->kg_runtime; 413 small = kg->kg_slptime; 414 base = SCHED_INTERACT_HALF; 415 } else { 416 big = kg->kg_slptime; 417 small = kg->kg_runtime; 418 base = 0; 419 } 420 421 big /= SCHED_INTERACT_HALF; 422 if (big != 0) 423 small /= big; 424 else 425 small = 0; 426 427 small += base; 428 /* XXX Factor in nice */ 429 return (small); 430 } 431 432 int 433 sched_rr_interval(void) 434 { 435 return (SCHED_SLICE_MAX); 436 } 437 438 void 439 sched_pctcpu_update(struct kse *ke) 440 { 441 /* 442 * Adjust counters and watermark for pctcpu calc. 443 */ 444 /* 445 * Shift the tick count out so that the divide doesn't round away 446 * our results. 447 */ 448 ke->ke_ticks <<= 10; 449 ke->ke_ticks = (ke->ke_ticks / (ke->ke_ltick - ke->ke_ftick)) * 450 SCHED_CPU_TICKS; 451 ke->ke_ticks >>= 10; 452 ke->ke_ltick = ticks; 453 ke->ke_ftick = ke->ke_ltick - SCHED_CPU_TICKS; 454 } 455 456 #ifdef SMP 457 /* XXX Should be changed to kseq_load_lowest() */ 458 int 459 sched_pickcpu(void) 460 { 461 struct kseq *kseq; 462 int load; 463 int cpu; 464 int i; 465 466 if (!smp_started) 467 return (0); 468 469 load = 0; 470 cpu = 0; 471 472 for (i = 0; i < mp_maxid; i++) { 473 if (CPU_ABSENT(i)) 474 continue; 475 kseq = KSEQ_CPU(i); 476 if (kseq->ksq_load < load) { 477 cpu = i; 478 load = kseq->ksq_load; 479 } 480 } 481 482 CTR1(KTR_RUNQ, "sched_pickcpu: %d", cpu); 483 return (cpu); 484 } 485 #else 486 int 487 sched_pickcpu(void) 488 { 489 return (0); 490 } 491 #endif 492 493 void 494 sched_prio(struct thread *td, u_char prio) 495 { 496 struct kse *ke; 497 struct runq *rq; 498 499 mtx_assert(&sched_lock, MA_OWNED); 500 ke = td->td_kse; 501 td->td_priority = prio; 502 503 if (TD_ON_RUNQ(td)) { 504 rq = ke->ke_runq; 505 506 runq_remove(rq, ke); 507 runq_add(rq, ke); 508 } 509 } 510 511 void 512 sched_switchout(struct thread *td) 513 { 514 struct kse *ke; 515 516 mtx_assert(&sched_lock, MA_OWNED); 517 518 ke = td->td_kse; 519 520 td->td_last_kse = ke; 521 td->td_lastcpu = ke->ke_oncpu; 522 ke->ke_oncpu = NOCPU; 523 td->td_flags &= ~TDF_NEEDRESCHED; 524 525 if (TD_IS_RUNNING(td)) { 526 setrunqueue(td); 527 return; 528 } 529 td->td_kse->ke_runq = NULL; 530 531 /* 532 * We will not be on the run queue. So we must be 533 * sleeping or similar. 534 */ 535 if (td->td_proc->p_flag & P_THREADED) 536 kse_reassign(ke); 537 } 538 539 void 540 sched_switchin(struct thread *td) 541 { 542 /* struct kse *ke = td->td_kse; */ 543 mtx_assert(&sched_lock, MA_OWNED); 544 545 td->td_kse->ke_oncpu = PCPU_GET(cpuid); 546 #if SCHED_STRICT_RESCHED 547 if (td->td_ksegrp->kg_pri_class == PRI_TIMESHARE && 548 td->td_priority != td->td_ksegrp->kg_user_pri) 549 curthread->td_flags |= TDF_NEEDRESCHED; 550 #endif 551 } 552 553 void 554 sched_nice(struct ksegrp *kg, int nice) 555 { 556 struct thread *td; 557 558 kg->kg_nice = nice; 559 sched_priority(kg); 560 FOREACH_THREAD_IN_GROUP(kg, td) { 561 td->td_flags |= TDF_NEEDRESCHED; 562 } 563 } 564 565 void 566 sched_sleep(struct thread *td, u_char prio) 567 { 568 mtx_assert(&sched_lock, MA_OWNED); 569 570 td->td_slptime = ticks; 571 td->td_priority = prio; 572 573 #ifdef SMP 574 if (td->td_priority < PZERO) { 575 kseq_sleep(KSEQ_CPU(td->td_kse->ke_cpu), td->td_kse); 576 td->td_schedflag |= TD_SCHED_BLOAD; 577 } 578 #endif 579 } 580 581 void 582 sched_wakeup(struct thread *td) 583 { 584 mtx_assert(&sched_lock, MA_OWNED); 585 586 /* 587 * Let the kseg know how long we slept for. This is because process 588 * interactivity behavior is modeled in the kseg. 589 */ 590 if (td->td_slptime) { 591 struct ksegrp *kg; 592 593 kg = td->td_ksegrp; 594 kg->kg_slptime += (ticks - td->td_slptime) << 10; 595 sched_priority(kg); 596 td->td_slptime = 0; 597 } 598 #ifdef SMP 599 if (td->td_priority < PZERO && td->td_schedflag & TD_SCHED_BLOAD) { 600 kseq_wakeup(KSEQ_CPU(td->td_kse->ke_cpu), td->td_kse); 601 td->td_schedflag &= ~TD_SCHED_BLOAD; 602 } 603 #endif 604 setrunqueue(td); 605 #if SCHED_STRICT_RESCHED 606 if (td->td_priority < curthread->td_priority) 607 curthread->td_flags |= TDF_NEEDRESCHED; 608 #endif 609 } 610 611 /* 612 * Penalize the parent for creating a new child and initialize the child's 613 * priority. 614 */ 615 void 616 sched_fork(struct ksegrp *kg, struct ksegrp *child) 617 { 618 struct kse *ckse; 619 struct kse *pkse; 620 621 mtx_assert(&sched_lock, MA_OWNED); 622 ckse = FIRST_KSE_IN_KSEGRP(child); 623 pkse = FIRST_KSE_IN_KSEGRP(kg); 624 625 /* XXX Need something better here */ 626 if (kg->kg_slptime > kg->kg_runtime) { 627 child->kg_slptime = SCHED_DYN_RANGE; 628 child->kg_runtime = kg->kg_slptime / SCHED_DYN_RANGE; 629 } else { 630 child->kg_runtime = SCHED_DYN_RANGE; 631 child->kg_slptime = kg->kg_runtime / SCHED_DYN_RANGE; 632 } 633 #if 0 634 child->kg_slptime = kg->kg_slptime; 635 child->kg_runtime = kg->kg_runtime; 636 #endif 637 child->kg_user_pri = kg->kg_user_pri; 638 639 #if 0 640 if (pkse->ke_cpu != PCPU_GET(cpuid)) { 641 printf("pkse->ke_cpu = %d\n", pkse->ke_cpu); 642 printf("cpuid = %d", PCPU_GET(cpuid)); 643 Debugger("stop"); 644 } 645 #endif 646 647 ckse->ke_slice = pkse->ke_slice; 648 ckse->ke_cpu = pkse->ke_cpu; /* sched_pickcpu(); */ 649 ckse->ke_runq = NULL; 650 /* 651 * Claim that we've been running for one second for statistical 652 * purposes. 653 */ 654 ckse->ke_ticks = 0; 655 ckse->ke_ltick = ticks; 656 ckse->ke_ftick = ticks - hz; 657 } 658 659 /* 660 * Return some of the child's priority and interactivity to the parent. 661 */ 662 void 663 sched_exit(struct ksegrp *kg, struct ksegrp *child) 664 { 665 /* XXX Need something better here */ 666 mtx_assert(&sched_lock, MA_OWNED); 667 kg->kg_slptime = child->kg_slptime; 668 kg->kg_runtime = child->kg_runtime; 669 sched_priority(kg); 670 } 671 672 void 673 sched_clock(struct thread *td) 674 { 675 struct kse *ke; 676 #if SCHED_STRICT_RESCHED 677 struct kse *nke; 678 struct kseq *kseq; 679 #endif 680 struct ksegrp *kg; 681 682 683 ke = td->td_kse; 684 kg = td->td_ksegrp; 685 686 mtx_assert(&sched_lock, MA_OWNED); 687 KASSERT((td != NULL), ("schedclock: null thread pointer")); 688 689 /* Adjust ticks for pctcpu */ 690 ke->ke_ticks++; 691 ke->ke_ltick = ticks; 692 /* Go up to one second beyond our max and then trim back down */ 693 if (ke->ke_ftick + SCHED_CPU_TICKS + hz < ke->ke_ltick) 694 sched_pctcpu_update(ke); 695 696 if (td->td_kse->ke_flags & KEF_IDLEKSE) 697 return; 698 699 /* 700 * Check for a higher priority task on the run queue. This can happen 701 * on SMP if another processor woke up a process on our runq. 702 */ 703 #if SCHED_STRICT_RESCHED 704 kseq = KSEQ_SELF(); 705 nke = runq_choose(kseq->ksq_curr); 706 707 if (nke && nke->ke_thread && 708 nke->ke_thread->td_priority < td->td_priority) 709 td->td_flags |= TDF_NEEDRESCHED; 710 #endif 711 /* 712 * We used a tick charge it to the ksegrp so that we can compute our 713 * "interactivity". 714 */ 715 kg->kg_runtime += 1 << 10; 716 717 /* 718 * We used up one time slice. 719 */ 720 ke->ke_slice--; 721 /* 722 * We're out of time, recompute priorities and requeue 723 */ 724 if (ke->ke_slice <= 0) { 725 sched_priority(kg); 726 ke->ke_slice = sched_slice(kg); 727 td->td_flags |= TDF_NEEDRESCHED; 728 ke->ke_runq = NULL; 729 } 730 } 731 732 int 733 sched_runnable(void) 734 { 735 struct kseq *kseq; 736 737 kseq = KSEQ_SELF(); 738 739 if (kseq->ksq_load) 740 return (1); 741 #ifdef SMP 742 /* 743 * For SMP we may steal other processor's KSEs. Just search until we 744 * verify that at least on other cpu has a runnable task. 745 */ 746 if (smp_started) { 747 int i; 748 749 #if 0 750 if (kseq->ksq_bload) 751 return (0); 752 #endif 753 754 for (i = 0; i < mp_maxid; i++) { 755 if (CPU_ABSENT(i)) 756 continue; 757 kseq = KSEQ_CPU(i); 758 if (kseq->ksq_load) 759 return (1); 760 } 761 } 762 #endif 763 return (0); 764 } 765 766 void 767 sched_userret(struct thread *td) 768 { 769 struct ksegrp *kg; 770 771 kg = td->td_ksegrp; 772 773 if (td->td_priority != kg->kg_user_pri) { 774 mtx_lock_spin(&sched_lock); 775 td->td_priority = kg->kg_user_pri; 776 mtx_unlock_spin(&sched_lock); 777 } 778 } 779 780 struct kse * 781 sched_choose(void) 782 { 783 struct kseq *kseq; 784 struct kse *ke; 785 786 kseq = KSEQ_SELF(); 787 ke = kseq_choose(kseq); 788 789 if (ke) { 790 ke->ke_state = KES_THREAD; 791 kseq_rem(kseq, ke); 792 } 793 794 #ifdef SMP 795 if (ke == NULL && smp_started) { 796 #if 0 797 if (kseq->ksq_bload) 798 return (NULL); 799 #endif 800 /* 801 * Find the cpu with the highest load and steal one proc. 802 */ 803 kseq = kseq_load_highest(); 804 if (kseq == NULL) 805 return (NULL); 806 ke = kseq_choose(kseq); 807 kseq_rem(kseq, ke); 808 809 ke->ke_state = KES_THREAD; 810 ke->ke_runq = NULL; 811 ke->ke_cpu = PCPU_GET(cpuid); 812 } 813 #endif 814 return (ke); 815 } 816 817 void 818 sched_add(struct kse *ke) 819 { 820 struct kseq *kseq; 821 822 mtx_assert(&sched_lock, MA_OWNED); 823 KASSERT((ke->ke_thread != NULL), ("sched_add: No thread on KSE")); 824 KASSERT((ke->ke_thread->td_kse != NULL), 825 ("sched_add: No KSE on thread")); 826 KASSERT(ke->ke_state != KES_ONRUNQ, 827 ("sched_add: kse %p (%s) already in run queue", ke, 828 ke->ke_proc->p_comm)); 829 KASSERT(ke->ke_proc->p_sflag & PS_INMEM, 830 ("sched_add: process swapped out")); 831 832 /* 833 * Timeshare threads get placed on the appropriate queue on their 834 * bound cpu. 835 */ 836 if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) { 837 kseq = KSEQ_CPU(ke->ke_cpu); 838 839 if (ke->ke_runq == NULL) { 840 if (SCHED_CURR(ke->ke_ksegrp)) 841 ke->ke_runq = kseq->ksq_curr; 842 else 843 ke->ke_runq = kseq->ksq_next; 844 } 845 /* 846 * If we're a real-time or interrupt thread place us on the curr 847 * queue for the current processor. Hopefully this will yield the 848 * lowest latency response. 849 */ 850 } else { 851 kseq = KSEQ_SELF(); 852 ke->ke_runq = kseq->ksq_curr; 853 } 854 ke->ke_ksegrp->kg_runq_kses++; 855 ke->ke_state = KES_ONRUNQ; 856 857 kseq_add(kseq, ke); 858 } 859 860 void 861 sched_rem(struct kse *ke) 862 { 863 mtx_assert(&sched_lock, MA_OWNED); 864 /* KASSERT((ke->ke_state == KES_ONRUNQ), ("KSE not on run queue")); */ 865 866 ke->ke_runq = NULL; 867 ke->ke_state = KES_THREAD; 868 ke->ke_ksegrp->kg_runq_kses--; 869 870 kseq_rem(KSEQ_CPU(ke->ke_cpu), ke); 871 } 872 873 fixpt_t 874 sched_pctcpu(struct kse *ke) 875 { 876 fixpt_t pctcpu; 877 int realstathz; 878 879 pctcpu = 0; 880 realstathz = stathz ? stathz : hz; 881 882 if (ke->ke_ticks) { 883 int rtick; 884 885 /* Update to account for time potentially spent sleeping */ 886 ke->ke_ltick = ticks; 887 sched_pctcpu_update(ke); 888 889 /* How many rtick per second ? */ 890 rtick = ke->ke_ticks / SCHED_CPU_TIME; 891 pctcpu = (FSCALE * ((FSCALE * rtick)/realstathz)) >> FSHIFT; 892 } 893 894 ke->ke_proc->p_swtime = ke->ke_ltick - ke->ke_ftick; 895 896 return (pctcpu); 897 } 898 899 int 900 sched_sizeof_kse(void) 901 { 902 return (sizeof(struct kse) + sizeof(struct ke_sched)); 903 } 904 905 int 906 sched_sizeof_ksegrp(void) 907 { 908 return (sizeof(struct ksegrp) + sizeof(struct kg_sched)); 909 } 910 911 int 912 sched_sizeof_proc(void) 913 { 914 return (sizeof(struct proc)); 915 } 916 917 int 918 sched_sizeof_thread(void) 919 { 920 return (sizeof(struct thread) + sizeof(struct td_sched)); 921 } 922