1 /*- 2 * Copyright (c) 2003, Jeffrey Roberson <jeff@freebsd.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice unmodified, this list of conditions, and the following 10 * disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 29 #include <sys/param.h> 30 #include <sys/systm.h> 31 #include <sys/kernel.h> 32 #include <sys/ktr.h> 33 #include <sys/lock.h> 34 #include <sys/mutex.h> 35 #include <sys/proc.h> 36 #include <sys/resource.h> 37 #include <sys/sched.h> 38 #include <sys/smp.h> 39 #include <sys/sx.h> 40 #include <sys/sysctl.h> 41 #include <sys/sysproto.h> 42 #include <sys/vmmeter.h> 43 #ifdef DDB 44 #include <ddb/ddb.h> 45 #endif 46 #ifdef KTRACE 47 #include <sys/uio.h> 48 #include <sys/ktrace.h> 49 #endif 50 51 #include <machine/cpu.h> 52 53 /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ 54 /* XXX This is bogus compatability crap for ps */ 55 static fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 56 SYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, ""); 57 58 static void sched_setup(void *dummy); 59 SYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL) 60 61 int realstathz; 62 63 #define SCHED_STRICT_RESCHED 1 64 65 /* 66 * These datastructures are allocated within their parent datastructure but 67 * are scheduler specific. 68 */ 69 70 struct ke_sched { 71 int ske_slice; 72 struct runq *ske_runq; 73 /* The following variables are only used for pctcpu calculation */ 74 int ske_ltick; /* Last tick that we were running on */ 75 int ske_ftick; /* First tick that we were running on */ 76 int ske_ticks; /* Tick count */ 77 u_char ske_cpu; 78 }; 79 #define ke_slice ke_sched->ske_slice 80 #define ke_runq ke_sched->ske_runq 81 #define ke_ltick ke_sched->ske_ltick 82 #define ke_ftick ke_sched->ske_ftick 83 #define ke_ticks ke_sched->ske_ticks 84 #define ke_cpu ke_sched->ske_cpu 85 86 struct kg_sched { 87 int skg_slptime; /* Number of ticks we vol. slept */ 88 int skg_runtime; /* Number of ticks we were running */ 89 }; 90 #define kg_slptime kg_sched->skg_slptime 91 #define kg_runtime kg_sched->skg_runtime 92 93 struct td_sched { 94 int std_slptime; 95 int std_schedflag; 96 }; 97 #define td_slptime td_sched->std_slptime 98 #define td_schedflag td_sched->std_schedflag 99 100 #define TD_SCHED_BLOAD 0x0001 /* 101 * thread was counted as being in short 102 * term sleep. 103 */ 104 struct td_sched td_sched; 105 struct ke_sched ke_sched; 106 struct kg_sched kg_sched; 107 108 struct ke_sched *kse0_sched = &ke_sched; 109 struct kg_sched *ksegrp0_sched = &kg_sched; 110 struct p_sched *proc0_sched = NULL; 111 struct td_sched *thread0_sched = &td_sched; 112 113 /* 114 * This priority range has 20 priorities on either end that are reachable 115 * only through nice values. 116 * 117 * PRI_RANGE: Total priority range for timeshare threads. 118 * PRI_NRESV: Reserved priorities for nice. 119 * PRI_BASE: The start of the dynamic range. 120 * DYN_RANGE: Number of priorities that are available int the dynamic 121 * priority range. 122 * DYN_HALF: Half of DYN_RANGE for convenience elsewhere. 123 * PRI_DYN: The dynamic priority which is derived from the number of ticks 124 * running vs the total number of ticks. 125 */ 126 #define SCHED_PRI_RANGE (PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE + 1) 127 #define SCHED_PRI_NRESV PRIO_TOTAL 128 #define SCHED_PRI_NHALF (PRIO_TOTAL / 2) 129 #define SCHED_PRI_BASE ((SCHED_PRI_NRESV / 2) + PRI_MIN_TIMESHARE) 130 #define SCHED_DYN_RANGE (SCHED_PRI_RANGE - SCHED_PRI_NRESV) 131 #define SCHED_DYN_HALF (SCHED_DYN_RANGE / 2) 132 #define SCHED_PRI_DYN(run, total) (((run) * SCHED_DYN_RANGE) / (total)) 133 134 135 /* 136 * These determine the interactivity of a process. 137 * 138 * SLP_RUN_MAX: Maximum amount of sleep time + run time we'll accumulate 139 * before throttling back. 140 * SLP_RUN_THROTTLE: Divisor for reducing slp/run time. 141 * INTERACT_RANGE: Range of interactivity values. Smaller is better. 142 * INTERACT_HALF: Convenience define, half of the interactivity range. 143 * INTERACT_THRESH: Threshhold for placement on the current runq. 144 */ 145 #define SCHED_SLP_RUN_MAX ((hz * 30) << 10) 146 #define SCHED_SLP_RUN_THROTTLE (10) 147 #define SCHED_INTERACT_RANGE (100) 148 #define SCHED_INTERACT_HALF (SCHED_INTERACT_RANGE / 2) 149 #define SCHED_INTERACT_THRESH (10) 150 151 /* 152 * These parameters and macros determine the size of the time slice that is 153 * granted to each thread. 154 * 155 * SLICE_MIN: Minimum time slice granted, in units of ticks. 156 * SLICE_MAX: Maximum time slice granted. 157 * SLICE_RANGE: Range of available time slices scaled by hz. 158 * SLICE_SCALE: The number slices granted per val in the range of [0, max]. 159 * SLICE_NICE: Determine the amount of slice granted to a scaled nice. 160 */ 161 #define SCHED_SLICE_MIN (hz / 100) 162 #define SCHED_SLICE_MAX (hz / 10) 163 #define SCHED_SLICE_RANGE (SCHED_SLICE_MAX - SCHED_SLICE_MIN + 1) 164 #define SCHED_SLICE_SCALE(val, max) (((val) * SCHED_SLICE_RANGE) / (max)) 165 #define SCHED_SLICE_NICE(nice) \ 166 (SCHED_SLICE_MAX - SCHED_SLICE_SCALE((nice), SCHED_PRI_NHALF)) 167 168 /* 169 * This macro determines whether or not the kse belongs on the current or 170 * next run queue. 171 * 172 * XXX nice value should effect how interactive a kg is. 173 */ 174 #define SCHED_CURR(kg) (sched_interact_score(kg) < SCHED_INTERACT_THRESH) 175 176 /* 177 * Cpu percentage computation macros and defines. 178 * 179 * SCHED_CPU_TIME: Number of seconds to average the cpu usage across. 180 * SCHED_CPU_TICKS: Number of hz ticks to average the cpu usage across. 181 */ 182 183 #define SCHED_CPU_TIME 60 184 #define SCHED_CPU_TICKS (hz * SCHED_CPU_TIME) 185 186 /* 187 * kseq - pair of runqs per processor 188 */ 189 190 struct kseq { 191 struct runq ksq_runqs[2]; 192 struct runq *ksq_curr; 193 struct runq *ksq_next; 194 int ksq_load; /* Total runnable */ 195 #ifdef SMP 196 unsigned int ksq_rslices; /* Slices on run queue */ 197 unsigned int ksq_bload; /* Threads waiting on IO */ 198 #endif 199 }; 200 201 /* 202 * One kse queue per processor. 203 */ 204 #ifdef SMP 205 struct kseq kseq_cpu[MAXCPU]; 206 #define KSEQ_SELF() (&kseq_cpu[PCPU_GET(cpuid)]) 207 #define KSEQ_CPU(x) (&kseq_cpu[(x)]) 208 #else 209 struct kseq kseq_cpu; 210 #define KSEQ_SELF() (&kseq_cpu) 211 #define KSEQ_CPU(x) (&kseq_cpu) 212 #endif 213 214 static void sched_slice(struct kse *ke); 215 static int sched_priority(struct ksegrp *kg); 216 static int sched_interact_score(struct ksegrp *kg); 217 void sched_pctcpu_update(struct kse *ke); 218 int sched_pickcpu(void); 219 220 /* Operations on per processor queues */ 221 static struct kse * kseq_choose(struct kseq *kseq); 222 static int kseq_nice_min(struct kseq *kseq); 223 static void kseq_setup(struct kseq *kseq); 224 static __inline void kseq_add(struct kseq *kseq, struct kse *ke); 225 static __inline void kseq_rem(struct kseq *kseq, struct kse *ke); 226 #ifdef SMP 227 static __inline void kseq_sleep(struct kseq *kseq, struct kse *ke); 228 static __inline void kseq_wakeup(struct kseq *kseq, struct kse *ke); 229 struct kseq * kseq_load_highest(void); 230 #endif 231 232 static __inline void 233 kseq_add(struct kseq *kseq, struct kse *ke) 234 { 235 runq_add(ke->ke_runq, ke); 236 kseq->ksq_load++; 237 #ifdef SMP 238 kseq->ksq_rslices += ke->ke_slice; 239 #endif 240 } 241 static __inline void 242 kseq_rem(struct kseq *kseq, struct kse *ke) 243 { 244 kseq->ksq_load--; 245 runq_remove(ke->ke_runq, ke); 246 #ifdef SMP 247 kseq->ksq_rslices -= ke->ke_slice; 248 #endif 249 } 250 251 #ifdef SMP 252 static __inline void 253 kseq_sleep(struct kseq *kseq, struct kse *ke) 254 { 255 kseq->ksq_bload++; 256 } 257 258 static __inline void 259 kseq_wakeup(struct kseq *kseq, struct kse *ke) 260 { 261 kseq->ksq_bload--; 262 } 263 264 struct kseq * 265 kseq_load_highest(void) 266 { 267 struct kseq *kseq; 268 int load; 269 int cpu; 270 int i; 271 272 cpu = 0; 273 load = 0; 274 275 for (i = 0; i < mp_maxid; i++) { 276 if (CPU_ABSENT(i)) 277 continue; 278 kseq = KSEQ_CPU(i); 279 if (kseq->ksq_load > load) { 280 load = kseq->ksq_load; 281 cpu = i; 282 } 283 } 284 if (load) 285 return (KSEQ_CPU(cpu)); 286 287 return (NULL); 288 } 289 #endif 290 291 struct kse * 292 kseq_choose(struct kseq *kseq) 293 { 294 struct kse *ke; 295 struct runq *swap; 296 297 if ((ke = runq_choose(kseq->ksq_curr)) == NULL) { 298 swap = kseq->ksq_curr; 299 kseq->ksq_curr = kseq->ksq_next; 300 kseq->ksq_next = swap; 301 ke = runq_choose(kseq->ksq_curr); 302 } 303 304 return (ke); 305 } 306 307 static int 308 kseq_nice_min(struct kseq *kseq) 309 { 310 struct kse *ke0; 311 struct kse *ke1; 312 313 if (kseq->ksq_load == 0) 314 return (0); 315 316 ke0 = runq_choose(kseq->ksq_curr); 317 ke1 = runq_choose(kseq->ksq_next); 318 319 if (ke0 == NULL) 320 return (ke1->ke_ksegrp->kg_nice); 321 322 if (ke1 == NULL) 323 return (ke0->ke_ksegrp->kg_nice); 324 325 return (min(ke0->ke_ksegrp->kg_nice, ke1->ke_ksegrp->kg_nice)); 326 } 327 328 static void 329 kseq_setup(struct kseq *kseq) 330 { 331 kseq->ksq_curr = &kseq->ksq_runqs[0]; 332 kseq->ksq_next = &kseq->ksq_runqs[1]; 333 runq_init(kseq->ksq_curr); 334 runq_init(kseq->ksq_next); 335 kseq->ksq_load = 0; 336 #ifdef SMP 337 kseq->ksq_rslices = 0; 338 kseq->ksq_bload = 0; 339 #endif 340 } 341 342 static void 343 sched_setup(void *dummy) 344 { 345 int i; 346 347 realstathz = stathz ? stathz : hz; 348 349 mtx_lock_spin(&sched_lock); 350 /* init kseqs */ 351 for (i = 0; i < MAXCPU; i++) 352 kseq_setup(KSEQ_CPU(i)); 353 mtx_unlock_spin(&sched_lock); 354 } 355 356 /* 357 * Scale the scheduling priority according to the "interactivity" of this 358 * process. 359 */ 360 static int 361 sched_priority(struct ksegrp *kg) 362 { 363 int pri; 364 365 if (kg->kg_pri_class != PRI_TIMESHARE) 366 return (kg->kg_user_pri); 367 368 pri = sched_interact_score(kg) * SCHED_DYN_RANGE / SCHED_INTERACT_RANGE; 369 pri += SCHED_PRI_BASE; 370 pri += kg->kg_nice; 371 372 if (pri > PRI_MAX_TIMESHARE) 373 pri = PRI_MAX_TIMESHARE; 374 else if (pri < PRI_MIN_TIMESHARE) 375 pri = PRI_MIN_TIMESHARE; 376 377 kg->kg_user_pri = pri; 378 379 return (kg->kg_user_pri); 380 } 381 382 /* 383 * Calculate a time slice based on the properties of the kseg and the runq 384 * that we're on. 385 */ 386 static void 387 sched_slice(struct kse *ke) 388 { 389 struct ksegrp *kg; 390 391 kg = ke->ke_ksegrp; 392 393 /* 394 * Rationale: 395 * KSEs in interactive ksegs get the minimum slice so that we 396 * quickly notice if it abuses its advantage. 397 * 398 * KSEs in non-interactive ksegs are assigned a slice that is 399 * based on the ksegs nice value relative to the least nice kseg 400 * on the run queue for this cpu. 401 * 402 * If the KSE is less nice than all others it gets the maximum 403 * slice and other KSEs will adjust their slice relative to 404 * this when they first expire. 405 * 406 * There is 20 point window that starts relative to the least 407 * nice kse on the run queue. Slice size is determined by 408 * the kse distance from the last nice ksegrp. 409 * 410 * If you are outside of the window you will get no slice and 411 * you will be reevaluated each time you are selected on the 412 * run queue. 413 * 414 */ 415 416 if (!SCHED_CURR(kg)) { 417 struct kseq *kseq; 418 int nice_base; 419 int nice; 420 421 kseq = KSEQ_CPU(ke->ke_cpu); 422 nice_base = kseq_nice_min(kseq); 423 nice = kg->kg_nice + (0 - nice_base); 424 425 if (kseq->ksq_load == 0 || kg->kg_nice < nice_base) 426 ke->ke_slice = SCHED_SLICE_MAX; 427 else if (nice <= SCHED_PRI_NHALF) 428 ke->ke_slice = SCHED_SLICE_NICE(nice); 429 else 430 ke->ke_slice = 0; 431 } else 432 ke->ke_slice = SCHED_SLICE_MIN; 433 434 /* 435 * Every time we grant a new slice check to see if we need to scale 436 * back the slp and run time in the kg. This will cause us to forget 437 * old interactivity while maintaining the current ratio. 438 */ 439 if ((kg->kg_runtime + kg->kg_slptime) > SCHED_SLP_RUN_MAX) { 440 kg->kg_runtime /= SCHED_SLP_RUN_THROTTLE; 441 kg->kg_slptime /= SCHED_SLP_RUN_THROTTLE; 442 } 443 444 return; 445 } 446 447 static int 448 sched_interact_score(struct ksegrp *kg) 449 { 450 int big; 451 int small; 452 int base; 453 454 if (kg->kg_runtime > kg->kg_slptime) { 455 big = kg->kg_runtime; 456 small = kg->kg_slptime; 457 base = SCHED_INTERACT_HALF; 458 } else { 459 big = kg->kg_slptime; 460 small = kg->kg_runtime; 461 base = 0; 462 } 463 464 big /= SCHED_INTERACT_HALF; 465 if (big != 0) 466 small /= big; 467 else 468 small = 0; 469 470 small += base; 471 /* XXX Factor in nice */ 472 return (small); 473 } 474 475 int 476 sched_rr_interval(void) 477 { 478 return (SCHED_SLICE_MAX); 479 } 480 481 void 482 sched_pctcpu_update(struct kse *ke) 483 { 484 /* 485 * Adjust counters and watermark for pctcpu calc. 486 */ 487 /* 488 * Shift the tick count out so that the divide doesn't round away 489 * our results. 490 */ 491 ke->ke_ticks <<= 10; 492 ke->ke_ticks = (ke->ke_ticks / (ke->ke_ltick - ke->ke_ftick)) * 493 SCHED_CPU_TICKS; 494 ke->ke_ticks >>= 10; 495 ke->ke_ltick = ticks; 496 ke->ke_ftick = ke->ke_ltick - SCHED_CPU_TICKS; 497 } 498 499 #ifdef SMP 500 /* XXX Should be changed to kseq_load_lowest() */ 501 int 502 sched_pickcpu(void) 503 { 504 struct kseq *kseq; 505 int load; 506 int cpu; 507 int i; 508 509 if (!smp_started) 510 return (0); 511 512 load = 0; 513 cpu = 0; 514 515 for (i = 0; i < mp_maxid; i++) { 516 if (CPU_ABSENT(i)) 517 continue; 518 kseq = KSEQ_CPU(i); 519 if (kseq->ksq_load < load) { 520 cpu = i; 521 load = kseq->ksq_load; 522 } 523 } 524 525 CTR1(KTR_RUNQ, "sched_pickcpu: %d", cpu); 526 return (cpu); 527 } 528 #else 529 int 530 sched_pickcpu(void) 531 { 532 return (0); 533 } 534 #endif 535 536 void 537 sched_prio(struct thread *td, u_char prio) 538 { 539 struct kse *ke; 540 struct runq *rq; 541 542 mtx_assert(&sched_lock, MA_OWNED); 543 ke = td->td_kse; 544 td->td_priority = prio; 545 546 if (TD_ON_RUNQ(td)) { 547 rq = ke->ke_runq; 548 549 runq_remove(rq, ke); 550 runq_add(rq, ke); 551 } 552 } 553 554 void 555 sched_switchout(struct thread *td) 556 { 557 struct kse *ke; 558 559 mtx_assert(&sched_lock, MA_OWNED); 560 561 ke = td->td_kse; 562 563 td->td_last_kse = ke; 564 td->td_lastcpu = ke->ke_oncpu; 565 ke->ke_oncpu = NOCPU; 566 td->td_flags &= ~TDF_NEEDRESCHED; 567 568 if (TD_IS_RUNNING(td)) { 569 setrunqueue(td); 570 return; 571 } 572 td->td_kse->ke_runq = NULL; 573 574 /* 575 * We will not be on the run queue. So we must be 576 * sleeping or similar. 577 */ 578 if (td->td_proc->p_flag & P_THREADED) 579 kse_reassign(ke); 580 } 581 582 void 583 sched_switchin(struct thread *td) 584 { 585 /* struct kse *ke = td->td_kse; */ 586 mtx_assert(&sched_lock, MA_OWNED); 587 588 td->td_kse->ke_oncpu = PCPU_GET(cpuid); 589 #if SCHED_STRICT_RESCHED 590 if (td->td_ksegrp->kg_pri_class == PRI_TIMESHARE && 591 td->td_priority != td->td_ksegrp->kg_user_pri) 592 curthread->td_flags |= TDF_NEEDRESCHED; 593 #endif 594 } 595 596 void 597 sched_nice(struct ksegrp *kg, int nice) 598 { 599 struct thread *td; 600 601 kg->kg_nice = nice; 602 sched_priority(kg); 603 FOREACH_THREAD_IN_GROUP(kg, td) { 604 td->td_flags |= TDF_NEEDRESCHED; 605 } 606 } 607 608 void 609 sched_sleep(struct thread *td, u_char prio) 610 { 611 mtx_assert(&sched_lock, MA_OWNED); 612 613 td->td_slptime = ticks; 614 td->td_priority = prio; 615 616 #ifdef SMP 617 if (td->td_priority < PZERO) { 618 kseq_sleep(KSEQ_CPU(td->td_kse->ke_cpu), td->td_kse); 619 td->td_schedflag |= TD_SCHED_BLOAD; 620 } 621 #endif 622 } 623 624 void 625 sched_wakeup(struct thread *td) 626 { 627 mtx_assert(&sched_lock, MA_OWNED); 628 629 /* 630 * Let the kseg know how long we slept for. This is because process 631 * interactivity behavior is modeled in the kseg. 632 */ 633 if (td->td_slptime) { 634 struct ksegrp *kg; 635 636 kg = td->td_ksegrp; 637 kg->kg_slptime += (ticks - td->td_slptime) << 10; 638 sched_priority(kg); 639 td->td_slptime = 0; 640 } 641 #ifdef SMP 642 if (td->td_priority < PZERO && td->td_schedflag & TD_SCHED_BLOAD) { 643 kseq_wakeup(KSEQ_CPU(td->td_kse->ke_cpu), td->td_kse); 644 td->td_schedflag &= ~TD_SCHED_BLOAD; 645 } 646 #endif 647 setrunqueue(td); 648 #if SCHED_STRICT_RESCHED 649 if (td->td_priority < curthread->td_priority) 650 curthread->td_flags |= TDF_NEEDRESCHED; 651 #endif 652 } 653 654 /* 655 * Penalize the parent for creating a new child and initialize the child's 656 * priority. 657 */ 658 void 659 sched_fork(struct ksegrp *kg, struct ksegrp *child) 660 { 661 struct kse *ckse; 662 struct kse *pkse; 663 664 mtx_assert(&sched_lock, MA_OWNED); 665 ckse = FIRST_KSE_IN_KSEGRP(child); 666 pkse = FIRST_KSE_IN_KSEGRP(kg); 667 668 /* XXX Need something better here */ 669 if (kg->kg_slptime > kg->kg_runtime) { 670 child->kg_slptime = SCHED_DYN_RANGE; 671 child->kg_runtime = kg->kg_slptime / SCHED_DYN_RANGE; 672 } else { 673 child->kg_runtime = SCHED_DYN_RANGE; 674 child->kg_slptime = kg->kg_runtime / SCHED_DYN_RANGE; 675 } 676 #if 0 677 child->kg_slptime = kg->kg_slptime; 678 child->kg_runtime = kg->kg_runtime; 679 #endif 680 child->kg_user_pri = kg->kg_user_pri; 681 682 #if 0 683 if (pkse->ke_cpu != PCPU_GET(cpuid)) { 684 printf("pkse->ke_cpu = %d\n", pkse->ke_cpu); 685 printf("cpuid = %d", PCPU_GET(cpuid)); 686 Debugger("stop"); 687 } 688 #endif 689 690 ckse->ke_slice = pkse->ke_slice; 691 ckse->ke_cpu = pkse->ke_cpu; /* sched_pickcpu(); */ 692 ckse->ke_runq = NULL; 693 /* 694 * Claim that we've been running for one second for statistical 695 * purposes. 696 */ 697 ckse->ke_ticks = 0; 698 ckse->ke_ltick = ticks; 699 ckse->ke_ftick = ticks - hz; 700 } 701 702 /* 703 * Return some of the child's priority and interactivity to the parent. 704 */ 705 void 706 sched_exit(struct ksegrp *kg, struct ksegrp *child) 707 { 708 /* XXX Need something better here */ 709 mtx_assert(&sched_lock, MA_OWNED); 710 kg->kg_slptime = child->kg_slptime; 711 kg->kg_runtime = child->kg_runtime; 712 sched_priority(kg); 713 } 714 715 void 716 sched_clock(struct thread *td) 717 { 718 struct kse *ke; 719 #if SCHED_STRICT_RESCHED 720 struct kse *nke; 721 struct kseq *kseq; 722 #endif 723 struct ksegrp *kg; 724 725 726 ke = td->td_kse; 727 kg = td->td_ksegrp; 728 729 mtx_assert(&sched_lock, MA_OWNED); 730 KASSERT((td != NULL), ("schedclock: null thread pointer")); 731 732 /* Adjust ticks for pctcpu */ 733 ke->ke_ticks++; 734 ke->ke_ltick = ticks; 735 /* Go up to one second beyond our max and then trim back down */ 736 if (ke->ke_ftick + SCHED_CPU_TICKS + hz < ke->ke_ltick) 737 sched_pctcpu_update(ke); 738 739 if (td->td_kse->ke_flags & KEF_IDLEKSE) 740 return; 741 742 /* 743 * Check for a higher priority task on the run queue. This can happen 744 * on SMP if another processor woke up a process on our runq. 745 */ 746 #if SCHED_STRICT_RESCHED 747 kseq = KSEQ_SELF(); 748 nke = runq_choose(kseq->ksq_curr); 749 750 if (nke && nke->ke_thread && 751 nke->ke_thread->td_priority < td->td_priority) 752 td->td_flags |= TDF_NEEDRESCHED; 753 #endif 754 /* 755 * We used a tick charge it to the ksegrp so that we can compute our 756 * "interactivity". 757 */ 758 kg->kg_runtime += 1 << 10; 759 760 /* 761 * We used up one time slice. 762 */ 763 ke->ke_slice--; 764 /* 765 * We're out of time, recompute priorities and requeue 766 */ 767 if (ke->ke_slice <= 0) { 768 sched_priority(kg); 769 sched_slice(ke); 770 td->td_flags |= TDF_NEEDRESCHED; 771 ke->ke_runq = NULL; 772 } 773 } 774 775 int 776 sched_runnable(void) 777 { 778 struct kseq *kseq; 779 780 kseq = KSEQ_SELF(); 781 782 if (kseq->ksq_load) 783 return (1); 784 #ifdef SMP 785 /* 786 * For SMP we may steal other processor's KSEs. Just search until we 787 * verify that at least on other cpu has a runnable task. 788 */ 789 if (smp_started) { 790 int i; 791 792 #if 0 793 if (kseq->ksq_bload) 794 return (0); 795 #endif 796 797 for (i = 0; i < mp_maxid; i++) { 798 if (CPU_ABSENT(i)) 799 continue; 800 kseq = KSEQ_CPU(i); 801 if (kseq->ksq_load) 802 return (1); 803 } 804 } 805 #endif 806 return (0); 807 } 808 809 void 810 sched_userret(struct thread *td) 811 { 812 struct ksegrp *kg; 813 814 kg = td->td_ksegrp; 815 816 if (td->td_priority != kg->kg_user_pri) { 817 mtx_lock_spin(&sched_lock); 818 td->td_priority = kg->kg_user_pri; 819 mtx_unlock_spin(&sched_lock); 820 } 821 } 822 823 struct kse * 824 sched_choose(void) 825 { 826 struct kseq *kseq; 827 struct kse *ke; 828 829 kseq = KSEQ_SELF(); 830 retry: 831 ke = kseq_choose(kseq); 832 833 if (ke) { 834 ke->ke_state = KES_THREAD; 835 kseq_rem(kseq, ke); 836 837 /* 838 * If we dequeue a kse with a slice of zero it was below the 839 * nice threshold to acquire a slice. Recalculate the slice 840 * to see if the situation has changed and then requeue. 841 */ 842 if (ke->ke_slice == 0) { 843 sched_slice(ke); 844 ke->ke_runq = kseq->ksq_next; 845 kseq_add(kseq, ke); 846 goto retry; 847 } 848 } 849 850 #ifdef SMP 851 if (ke == NULL && smp_started) { 852 #if 0 853 if (kseq->ksq_bload) 854 return (NULL); 855 #endif 856 /* 857 * Find the cpu with the highest load and steal one proc. 858 */ 859 kseq = kseq_load_highest(); 860 if (kseq == NULL) 861 return (NULL); 862 ke = kseq_choose(kseq); 863 kseq_rem(kseq, ke); 864 865 ke->ke_state = KES_THREAD; 866 ke->ke_runq = NULL; 867 ke->ke_cpu = PCPU_GET(cpuid); 868 } 869 #endif 870 return (ke); 871 } 872 873 void 874 sched_add(struct kse *ke) 875 { 876 struct kseq *kseq; 877 878 mtx_assert(&sched_lock, MA_OWNED); 879 KASSERT((ke->ke_thread != NULL), ("sched_add: No thread on KSE")); 880 KASSERT((ke->ke_thread->td_kse != NULL), 881 ("sched_add: No KSE on thread")); 882 KASSERT(ke->ke_state != KES_ONRUNQ, 883 ("sched_add: kse %p (%s) already in run queue", ke, 884 ke->ke_proc->p_comm)); 885 KASSERT(ke->ke_proc->p_sflag & PS_INMEM, 886 ("sched_add: process swapped out")); 887 888 /* 889 * Timeshare threads get placed on the appropriate queue on their 890 * bound cpu. 891 */ 892 if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) { 893 kseq = KSEQ_CPU(ke->ke_cpu); 894 895 if (ke->ke_runq == NULL) { 896 if (SCHED_CURR(ke->ke_ksegrp)) 897 ke->ke_runq = kseq->ksq_curr; 898 else 899 ke->ke_runq = kseq->ksq_next; 900 } 901 /* 902 * If we're a real-time or interrupt thread place us on the curr 903 * queue for the current processor. Hopefully this will yield the 904 * lowest latency response. 905 */ 906 } else { 907 kseq = KSEQ_SELF(); 908 ke->ke_runq = kseq->ksq_curr; 909 } 910 ke->ke_ksegrp->kg_runq_kses++; 911 ke->ke_state = KES_ONRUNQ; 912 913 kseq_add(kseq, ke); 914 } 915 916 void 917 sched_rem(struct kse *ke) 918 { 919 mtx_assert(&sched_lock, MA_OWNED); 920 /* KASSERT((ke->ke_state == KES_ONRUNQ), ("KSE not on run queue")); */ 921 922 ke->ke_runq = NULL; 923 ke->ke_state = KES_THREAD; 924 ke->ke_ksegrp->kg_runq_kses--; 925 926 kseq_rem(KSEQ_CPU(ke->ke_cpu), ke); 927 } 928 929 fixpt_t 930 sched_pctcpu(struct kse *ke) 931 { 932 fixpt_t pctcpu; 933 int realstathz; 934 935 pctcpu = 0; 936 realstathz = stathz ? stathz : hz; 937 938 if (ke->ke_ticks) { 939 int rtick; 940 941 /* Update to account for time potentially spent sleeping */ 942 ke->ke_ltick = ticks; 943 sched_pctcpu_update(ke); 944 945 /* How many rtick per second ? */ 946 rtick = ke->ke_ticks / SCHED_CPU_TIME; 947 pctcpu = (FSCALE * ((FSCALE * rtick)/realstathz)) >> FSHIFT; 948 } 949 950 ke->ke_proc->p_swtime = ke->ke_ltick - ke->ke_ftick; 951 952 return (pctcpu); 953 } 954 955 int 956 sched_sizeof_kse(void) 957 { 958 return (sizeof(struct kse) + sizeof(struct ke_sched)); 959 } 960 961 int 962 sched_sizeof_ksegrp(void) 963 { 964 return (sizeof(struct ksegrp) + sizeof(struct kg_sched)); 965 } 966 967 int 968 sched_sizeof_proc(void) 969 { 970 return (sizeof(struct proc)); 971 } 972 973 int 974 sched_sizeof_thread(void) 975 { 976 return (sizeof(struct thread) + sizeof(struct td_sched)); 977 } 978