1 /*- 2 * Copyright (c) 2003, Jeffrey Roberson <jeff@freebsd.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice unmodified, this list of conditions, and the following 10 * disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 29 #include <sys/param.h> 30 #include <sys/systm.h> 31 #include <sys/kernel.h> 32 #include <sys/ktr.h> 33 #include <sys/lock.h> 34 #include <sys/mutex.h> 35 #include <sys/proc.h> 36 #include <sys/resource.h> 37 #include <sys/sched.h> 38 #include <sys/smp.h> 39 #include <sys/sx.h> 40 #include <sys/sysctl.h> 41 #include <sys/sysproto.h> 42 #include <sys/vmmeter.h> 43 #ifdef DDB 44 #include <ddb/ddb.h> 45 #endif 46 #ifdef KTRACE 47 #include <sys/uio.h> 48 #include <sys/ktrace.h> 49 #endif 50 51 #include <machine/cpu.h> 52 53 /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ 54 /* XXX This is bogus compatability crap for ps */ 55 static fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 56 SYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, ""); 57 58 static void sched_setup(void *dummy); 59 SYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL) 60 61 int realstathz; 62 63 #define SCHED_STRICT_RESCHED 1 64 65 /* 66 * These datastructures are allocated within their parent datastructure but 67 * are scheduler specific. 68 */ 69 70 struct ke_sched { 71 int ske_slice; 72 struct runq *ske_runq; 73 /* The following variables are only used for pctcpu calculation */ 74 int ske_ltick; /* Last tick that we were running on */ 75 int ske_ftick; /* First tick that we were running on */ 76 int ske_ticks; /* Tick count */ 77 u_char ske_cpu; 78 }; 79 #define ke_slice ke_sched->ske_slice 80 #define ke_runq ke_sched->ske_runq 81 #define ke_ltick ke_sched->ske_ltick 82 #define ke_ftick ke_sched->ske_ftick 83 #define ke_ticks ke_sched->ske_ticks 84 #define ke_cpu ke_sched->ske_cpu 85 86 struct kg_sched { 87 int skg_slptime; /* Number of ticks we vol. slept */ 88 int skg_runtime; /* Number of ticks we were running */ 89 }; 90 #define kg_slptime kg_sched->skg_slptime 91 #define kg_runtime kg_sched->skg_runtime 92 93 struct td_sched { 94 int std_slptime; 95 int std_schedflag; 96 }; 97 #define td_slptime td_sched->std_slptime 98 #define td_schedflag td_sched->std_schedflag 99 100 #define TD_SCHED_BLOAD 0x0001 /* 101 * thread was counted as being in short 102 * term sleep. 103 */ 104 struct td_sched td_sched; 105 struct ke_sched ke_sched; 106 struct kg_sched kg_sched; 107 108 struct ke_sched *kse0_sched = &ke_sched; 109 struct kg_sched *ksegrp0_sched = &kg_sched; 110 struct p_sched *proc0_sched = NULL; 111 struct td_sched *thread0_sched = &td_sched; 112 113 /* 114 * This priority range has 20 priorities on either end that are reachable 115 * only through nice values. 116 * 117 * PRI_RANGE: Total priority range for timeshare threads. 118 * PRI_NRESV: Reserved priorities for nice. 119 * PRI_BASE: The start of the dynamic range. 120 * DYN_RANGE: Number of priorities that are available int the dynamic 121 * priority range. 122 * DYN_HALF: Half of DYN_RANGE for convenience elsewhere. 123 * PRI_DYN: The dynamic priority which is derived from the number of ticks 124 * running vs the total number of ticks. 125 */ 126 #define SCHED_PRI_RANGE (PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE + 1) 127 #define SCHED_PRI_NRESV PRIO_TOTAL 128 #define SCHED_PRI_NHALF (PRIO_TOTAL / 2) 129 #define SCHED_PRI_BASE ((SCHED_PRI_NRESV / 2) + PRI_MIN_TIMESHARE) 130 #define SCHED_DYN_RANGE (SCHED_PRI_RANGE - SCHED_PRI_NRESV) 131 #define SCHED_DYN_HALF (SCHED_DYN_RANGE / 2) 132 #define SCHED_PRI_DYN(run, total) (((run) * SCHED_DYN_RANGE) / (total)) 133 134 135 /* 136 * These determine the interactivity of a process. 137 * 138 * SLP_RUN_MAX: Maximum amount of sleep time + run time we'll accumulate 139 * before throttling back. 140 * SLP_RUN_THROTTLE: Divisor for reducing slp/run time. 141 * INTERACT_RANGE: Range of interactivity values. Smaller is better. 142 * INTERACT_HALF: Convenience define, half of the interactivity range. 143 * INTERACT_THRESH: Threshhold for placement on the current runq. 144 */ 145 #define SCHED_SLP_RUN_MAX ((hz * 2) << 10) 146 #define SCHED_SLP_RUN_THROTTLE (10) 147 #define SCHED_INTERACT_RANGE (100) 148 #define SCHED_INTERACT_HALF (SCHED_INTERACT_RANGE / 2) 149 #define SCHED_INTERACT_THRESH (10) 150 151 /* 152 * These parameters and macros determine the size of the time slice that is 153 * granted to each thread. 154 * 155 * SLICE_MIN: Minimum time slice granted, in units of ticks. 156 * SLICE_MAX: Maximum time slice granted. 157 * SLICE_RANGE: Range of available time slices scaled by hz. 158 * SLICE_SCALE: The number slices granted per val in the range of [0, max]. 159 * SLICE_NICE: Determine the amount of slice granted to a scaled nice. 160 */ 161 #define SCHED_SLICE_MIN (hz / 100) 162 #define SCHED_SLICE_MAX (hz / 10) 163 #define SCHED_SLICE_RANGE (SCHED_SLICE_MAX - SCHED_SLICE_MIN + 1) 164 #define SCHED_SLICE_SCALE(val, max) (((val) * SCHED_SLICE_RANGE) / (max)) 165 #define SCHED_SLICE_NICE(nice) \ 166 (SCHED_SLICE_MAX - SCHED_SLICE_SCALE((nice), SCHED_PRI_NHALF)) 167 168 /* 169 * This macro determines whether or not the kse belongs on the current or 170 * next run queue. 171 * 172 * XXX nice value should effect how interactive a kg is. 173 */ 174 #define SCHED_CURR(kg) (sched_interact_score(kg) < SCHED_INTERACT_THRESH) 175 176 /* 177 * Cpu percentage computation macros and defines. 178 * 179 * SCHED_CPU_TIME: Number of seconds to average the cpu usage across. 180 * SCHED_CPU_TICKS: Number of hz ticks to average the cpu usage across. 181 */ 182 183 #define SCHED_CPU_TIME 10 184 #define SCHED_CPU_TICKS (hz * SCHED_CPU_TIME) 185 186 /* 187 * kseq - pair of runqs per processor 188 */ 189 190 struct kseq { 191 struct runq ksq_ithd; /* Queue of ITHD and REALTIME tds. */ 192 struct runq ksq_idle; /* Queue of IDLE threads. */ 193 struct runq ksq_runqs[2]; /* Run queues for TIMESHARE. */ 194 struct runq *ksq_curr; 195 struct runq *ksq_next; 196 int ksq_itload; /* Total runnable for ITHD. */ 197 int ksq_tsload; /* Total runnable for TIMESHARE. */ 198 int ksq_idload; /* Total runnable for IDLE. */ 199 #ifdef SMP 200 unsigned int ksq_rslices; /* Slices on run queue */ 201 unsigned int ksq_bload; /* Threads waiting on IO */ 202 #endif 203 }; 204 205 /* 206 * One kse queue per processor. 207 */ 208 #ifdef SMP 209 struct kseq kseq_cpu[MAXCPU]; 210 #define KSEQ_SELF() (&kseq_cpu[PCPU_GET(cpuid)]) 211 #define KSEQ_CPU(x) (&kseq_cpu[(x)]) 212 #else 213 struct kseq kseq_cpu; 214 #define KSEQ_SELF() (&kseq_cpu) 215 #define KSEQ_CPU(x) (&kseq_cpu) 216 #endif 217 218 static void sched_slice(struct kse *ke); 219 static int sched_priority(struct ksegrp *kg); 220 static int sched_interact_score(struct ksegrp *kg); 221 void sched_pctcpu_update(struct kse *ke); 222 int sched_pickcpu(void); 223 224 /* Operations on per processor queues */ 225 static struct kse * kseq_choose(struct kseq *kseq); 226 static int kseq_nice_min(struct kseq *kseq); 227 static void kseq_setup(struct kseq *kseq); 228 static void kseq_add(struct kseq *kseq, struct kse *ke); 229 static __inline void kseq_rem(struct kseq *kseq, struct kse *ke); 230 #ifdef SMP 231 static __inline void kseq_sleep(struct kseq *kseq, struct kse *ke); 232 static __inline void kseq_wakeup(struct kseq *kseq, struct kse *ke); 233 struct kseq * kseq_load_highest(void); 234 #endif 235 236 static void 237 kseq_add(struct kseq *kseq, struct kse *ke) 238 { 239 struct ksegrp *kg; 240 241 kg = ke->ke_ksegrp; 242 243 /* 244 * Figure out what run queue we should go on and assign a slice. 245 */ 246 switch (kg->kg_pri_class) { 247 /* 248 * If we're a real-time or interrupt thread place us on the curr 249 * queue for the current processor. Hopefully this will yield the 250 * lowest latency response. 251 */ 252 case PRI_ITHD: 253 case PRI_REALTIME: 254 ke->ke_runq = &kseq->ksq_ithd; 255 ke->ke_slice = SCHED_SLICE_MAX; 256 kseq->ksq_itload++; 257 break; 258 /* 259 * Timeshare threads get placed on the appropriate queue on their 260 * bound cpu. 261 */ 262 case PRI_TIMESHARE: 263 if (ke->ke_runq == NULL) { 264 if (SCHED_CURR(kg)) 265 ke->ke_runq = kseq->ksq_curr; 266 else 267 ke->ke_runq = kseq->ksq_next; 268 } 269 if (ke->ke_slice == 0) 270 sched_slice(ke); 271 kseq->ksq_tsload++; 272 break; 273 /* 274 * Only grant PRI_IDLE processes a slice if there is nothing else 275 * running. 276 */ 277 case PRI_IDLE: 278 ke->ke_runq = &kseq->ksq_idle; 279 ke->ke_slice = SCHED_SLICE_MIN; 280 kseq->ksq_idload++; 281 break; 282 default: 283 panic("Unknown priority class.\n"); 284 break; 285 } 286 287 runq_add(ke->ke_runq, ke); 288 #ifdef SMP 289 kseq->ksq_rslices += ke->ke_slice; 290 #endif 291 } 292 static void 293 kseq_rem(struct kseq *kseq, struct kse *ke) 294 { 295 struct ksegrp *kg; 296 297 kg = ke->ke_ksegrp; 298 299 /* 300 * XXX Consider making the load an array. 301 */ 302 switch (kg->kg_pri_class) { 303 case PRI_ITHD: 304 case PRI_REALTIME: 305 kseq->ksq_itload--; 306 break; 307 case PRI_TIMESHARE: 308 kseq->ksq_tsload--; 309 break; 310 case PRI_IDLE: 311 kseq->ksq_idload--; 312 break; 313 } 314 runq_remove(ke->ke_runq, ke); 315 #ifdef SMP 316 kseq->ksq_rslices -= ke->ke_slice; 317 #endif 318 } 319 320 #ifdef SMP 321 static __inline void 322 kseq_sleep(struct kseq *kseq, struct kse *ke) 323 { 324 kseq->ksq_bload++; 325 } 326 327 static __inline void 328 kseq_wakeup(struct kseq *kseq, struct kse *ke) 329 { 330 kseq->ksq_bload--; 331 } 332 333 struct kseq * 334 kseq_load_highest(void) 335 { 336 struct kseq *kseq; 337 int load; 338 int cpu; 339 int i; 340 341 cpu = 0; 342 load = 0; 343 344 for (i = 0; i < mp_maxid; i++) { 345 if (CPU_ABSENT(i)) 346 continue; 347 kseq = KSEQ_CPU(i); 348 if (kseq->ksq_tsload > load) { 349 load = kseq->ksq_tsload; 350 cpu = i; 351 } 352 } 353 if (load) 354 return (KSEQ_CPU(cpu)); 355 356 return (NULL); 357 } 358 #endif 359 360 struct kse * 361 kseq_choose(struct kseq *kseq) 362 { 363 struct kse *ke; 364 struct runq *swap; 365 366 if (kseq->ksq_itload) 367 return (runq_choose(&kseq->ksq_ithd)); 368 369 if (kseq->ksq_tsload) { 370 if ((ke = runq_choose(kseq->ksq_curr)) != NULL) 371 return (ke); 372 373 swap = kseq->ksq_curr; 374 kseq->ksq_curr = kseq->ksq_next; 375 kseq->ksq_next = swap; 376 377 return (runq_choose(kseq->ksq_curr)); 378 } 379 if (kseq->ksq_idload) 380 return (runq_choose(&kseq->ksq_idle)); 381 382 return (NULL); 383 } 384 385 static int 386 kseq_nice_min(struct kseq *kseq) 387 { 388 struct kse *ke0; 389 struct kse *ke1; 390 391 if (kseq->ksq_tsload == 0) 392 return (0); 393 394 ke0 = runq_choose(kseq->ksq_curr); 395 ke1 = runq_choose(kseq->ksq_next); 396 397 if (ke0 == NULL) 398 return (ke1->ke_ksegrp->kg_nice); 399 400 if (ke1 == NULL) 401 return (ke0->ke_ksegrp->kg_nice); 402 403 return (min(ke0->ke_ksegrp->kg_nice, ke1->ke_ksegrp->kg_nice)); 404 } 405 406 static void 407 kseq_setup(struct kseq *kseq) 408 { 409 kseq->ksq_curr = &kseq->ksq_runqs[0]; 410 kseq->ksq_next = &kseq->ksq_runqs[1]; 411 runq_init(&kseq->ksq_ithd); 412 runq_init(kseq->ksq_curr); 413 runq_init(kseq->ksq_next); 414 runq_init(&kseq->ksq_idle); 415 kseq->ksq_itload = 0; 416 kseq->ksq_tsload = 0; 417 kseq->ksq_idload = 0; 418 #ifdef SMP 419 kseq->ksq_rslices = 0; 420 kseq->ksq_bload = 0; 421 #endif 422 } 423 424 static void 425 sched_setup(void *dummy) 426 { 427 int i; 428 429 realstathz = stathz ? stathz : hz; 430 431 mtx_lock_spin(&sched_lock); 432 /* init kseqs */ 433 for (i = 0; i < MAXCPU; i++) 434 kseq_setup(KSEQ_CPU(i)); 435 mtx_unlock_spin(&sched_lock); 436 } 437 438 /* 439 * Scale the scheduling priority according to the "interactivity" of this 440 * process. 441 */ 442 static int 443 sched_priority(struct ksegrp *kg) 444 { 445 int pri; 446 447 if (kg->kg_pri_class != PRI_TIMESHARE) 448 return (kg->kg_user_pri); 449 450 pri = sched_interact_score(kg) * SCHED_DYN_RANGE / SCHED_INTERACT_RANGE; 451 pri += SCHED_PRI_BASE; 452 pri += kg->kg_nice; 453 454 if (pri > PRI_MAX_TIMESHARE) 455 pri = PRI_MAX_TIMESHARE; 456 else if (pri < PRI_MIN_TIMESHARE) 457 pri = PRI_MIN_TIMESHARE; 458 459 kg->kg_user_pri = pri; 460 461 return (kg->kg_user_pri); 462 } 463 464 /* 465 * Calculate a time slice based on the properties of the kseg and the runq 466 * that we're on. This is only for PRI_TIMESHARE ksegrps. 467 */ 468 static void 469 sched_slice(struct kse *ke) 470 { 471 struct ksegrp *kg; 472 473 kg = ke->ke_ksegrp; 474 475 /* 476 * Rationale: 477 * KSEs in interactive ksegs get the minimum slice so that we 478 * quickly notice if it abuses its advantage. 479 * 480 * KSEs in non-interactive ksegs are assigned a slice that is 481 * based on the ksegs nice value relative to the least nice kseg 482 * on the run queue for this cpu. 483 * 484 * If the KSE is less nice than all others it gets the maximum 485 * slice and other KSEs will adjust their slice relative to 486 * this when they first expire. 487 * 488 * There is 20 point window that starts relative to the least 489 * nice kse on the run queue. Slice size is determined by 490 * the kse distance from the last nice ksegrp. 491 * 492 * If you are outside of the window you will get no slice and 493 * you will be reevaluated each time you are selected on the 494 * run queue. 495 * 496 */ 497 498 if (!SCHED_CURR(kg)) { 499 struct kseq *kseq; 500 int nice_base; 501 int nice; 502 503 kseq = KSEQ_CPU(ke->ke_cpu); 504 nice_base = kseq_nice_min(kseq); 505 nice = kg->kg_nice + (0 - nice_base); 506 507 if (kseq->ksq_tsload == 0 || kg->kg_nice < nice_base) 508 ke->ke_slice = SCHED_SLICE_MAX; 509 else if (nice <= SCHED_PRI_NHALF) 510 ke->ke_slice = SCHED_SLICE_NICE(nice); 511 else 512 ke->ke_slice = 0; 513 } else 514 ke->ke_slice = SCHED_SLICE_MIN; 515 516 /* 517 * Check to see if we need to scale back the slp and run time 518 * in the kg. This will cause us to forget old interactivity 519 * while maintaining the current ratio. 520 */ 521 if ((kg->kg_runtime + kg->kg_slptime) > SCHED_SLP_RUN_MAX) { 522 kg->kg_runtime /= SCHED_SLP_RUN_THROTTLE; 523 kg->kg_slptime /= SCHED_SLP_RUN_THROTTLE; 524 } 525 526 return; 527 } 528 529 static int 530 sched_interact_score(struct ksegrp *kg) 531 { 532 int big; 533 int small; 534 int base; 535 536 if (kg->kg_runtime > kg->kg_slptime) { 537 big = kg->kg_runtime; 538 small = kg->kg_slptime; 539 base = SCHED_INTERACT_HALF; 540 } else { 541 big = kg->kg_slptime; 542 small = kg->kg_runtime; 543 base = 0; 544 } 545 546 big /= SCHED_INTERACT_HALF; 547 if (big != 0) 548 small /= big; 549 else 550 small = 0; 551 552 small += base; 553 /* XXX Factor in nice */ 554 return (small); 555 } 556 557 int 558 sched_rr_interval(void) 559 { 560 return (SCHED_SLICE_MAX); 561 } 562 563 void 564 sched_pctcpu_update(struct kse *ke) 565 { 566 /* 567 * Adjust counters and watermark for pctcpu calc. 568 */ 569 /* 570 * Shift the tick count out so that the divide doesn't round away 571 * our results. 572 */ 573 ke->ke_ticks <<= 10; 574 ke->ke_ticks = (ke->ke_ticks / (ke->ke_ltick - ke->ke_ftick)) * 575 SCHED_CPU_TICKS; 576 ke->ke_ticks >>= 10; 577 ke->ke_ltick = ticks; 578 ke->ke_ftick = ke->ke_ltick - SCHED_CPU_TICKS; 579 } 580 581 #ifdef SMP 582 /* XXX Should be changed to kseq_load_lowest() */ 583 int 584 sched_pickcpu(void) 585 { 586 struct kseq *kseq; 587 int load; 588 int cpu; 589 int i; 590 591 if (!smp_started) 592 return (0); 593 594 load = 0; 595 cpu = 0; 596 597 for (i = 0; i < mp_maxid; i++) { 598 if (CPU_ABSENT(i)) 599 continue; 600 kseq = KSEQ_CPU(i); 601 if (kseq->ksq_tsload < load) { 602 cpu = i; 603 load = kseq->ksq_tsload; 604 } 605 } 606 607 CTR1(KTR_RUNQ, "sched_pickcpu: %d", cpu); 608 return (cpu); 609 } 610 #else 611 int 612 sched_pickcpu(void) 613 { 614 return (0); 615 } 616 #endif 617 618 void 619 sched_prio(struct thread *td, u_char prio) 620 { 621 struct kse *ke; 622 struct runq *rq; 623 624 mtx_assert(&sched_lock, MA_OWNED); 625 ke = td->td_kse; 626 td->td_priority = prio; 627 628 if (TD_ON_RUNQ(td)) { 629 rq = ke->ke_runq; 630 631 runq_remove(rq, ke); 632 runq_add(rq, ke); 633 } 634 } 635 636 void 637 sched_switchout(struct thread *td) 638 { 639 struct kse *ke; 640 641 mtx_assert(&sched_lock, MA_OWNED); 642 643 ke = td->td_kse; 644 645 td->td_last_kse = ke; 646 td->td_lastcpu = ke->ke_oncpu; 647 ke->ke_oncpu = NOCPU; 648 td->td_flags &= ~TDF_NEEDRESCHED; 649 650 if (TD_IS_RUNNING(td)) { 651 setrunqueue(td); 652 return; 653 } 654 td->td_kse->ke_runq = NULL; 655 656 /* 657 * We will not be on the run queue. So we must be 658 * sleeping or similar. 659 */ 660 if (td->td_proc->p_flag & P_THREADED) 661 kse_reassign(ke); 662 } 663 664 void 665 sched_switchin(struct thread *td) 666 { 667 /* struct kse *ke = td->td_kse; */ 668 mtx_assert(&sched_lock, MA_OWNED); 669 670 td->td_kse->ke_oncpu = PCPU_GET(cpuid); 671 #if SCHED_STRICT_RESCHED 672 if (td->td_ksegrp->kg_pri_class == PRI_TIMESHARE && 673 td->td_priority != td->td_ksegrp->kg_user_pri) 674 curthread->td_flags |= TDF_NEEDRESCHED; 675 #endif 676 } 677 678 void 679 sched_nice(struct ksegrp *kg, int nice) 680 { 681 struct thread *td; 682 683 kg->kg_nice = nice; 684 sched_priority(kg); 685 FOREACH_THREAD_IN_GROUP(kg, td) { 686 td->td_flags |= TDF_NEEDRESCHED; 687 } 688 } 689 690 void 691 sched_sleep(struct thread *td, u_char prio) 692 { 693 mtx_assert(&sched_lock, MA_OWNED); 694 695 td->td_slptime = ticks; 696 td->td_priority = prio; 697 698 #ifdef SMP 699 if (td->td_priority < PZERO) { 700 kseq_sleep(KSEQ_CPU(td->td_kse->ke_cpu), td->td_kse); 701 td->td_schedflag |= TD_SCHED_BLOAD; 702 } 703 #endif 704 } 705 706 void 707 sched_wakeup(struct thread *td) 708 { 709 mtx_assert(&sched_lock, MA_OWNED); 710 711 /* 712 * Let the kseg know how long we slept for. This is because process 713 * interactivity behavior is modeled in the kseg. 714 */ 715 if (td->td_slptime) { 716 struct ksegrp *kg; 717 718 kg = td->td_ksegrp; 719 kg->kg_slptime += (ticks - td->td_slptime) << 10; 720 sched_priority(kg); 721 td->td_slptime = 0; 722 } 723 #ifdef SMP 724 if (td->td_priority < PZERO && td->td_schedflag & TD_SCHED_BLOAD) { 725 kseq_wakeup(KSEQ_CPU(td->td_kse->ke_cpu), td->td_kse); 726 td->td_schedflag &= ~TD_SCHED_BLOAD; 727 } 728 #endif 729 setrunqueue(td); 730 #if SCHED_STRICT_RESCHED 731 if (td->td_priority < curthread->td_priority) 732 curthread->td_flags |= TDF_NEEDRESCHED; 733 #endif 734 } 735 736 /* 737 * Penalize the parent for creating a new child and initialize the child's 738 * priority. 739 */ 740 void 741 sched_fork(struct ksegrp *kg, struct ksegrp *child) 742 { 743 struct kse *ckse; 744 struct kse *pkse; 745 746 mtx_assert(&sched_lock, MA_OWNED); 747 ckse = FIRST_KSE_IN_KSEGRP(child); 748 pkse = FIRST_KSE_IN_KSEGRP(kg); 749 750 /* XXX Need something better here */ 751 if (kg->kg_slptime > kg->kg_runtime) { 752 child->kg_slptime = SCHED_DYN_RANGE; 753 child->kg_runtime = kg->kg_slptime / SCHED_DYN_RANGE; 754 } else { 755 child->kg_runtime = SCHED_DYN_RANGE; 756 child->kg_slptime = kg->kg_runtime / SCHED_DYN_RANGE; 757 } 758 #if 0 759 child->kg_slptime = kg->kg_slptime; 760 child->kg_runtime = kg->kg_runtime; 761 #endif 762 child->kg_user_pri = kg->kg_user_pri; 763 764 #if 0 765 if (pkse->ke_cpu != PCPU_GET(cpuid)) { 766 printf("pkse->ke_cpu = %d\n", pkse->ke_cpu); 767 printf("cpuid = %d", PCPU_GET(cpuid)); 768 Debugger("stop"); 769 } 770 #endif 771 772 ckse->ke_slice = pkse->ke_slice; 773 ckse->ke_cpu = pkse->ke_cpu; /* sched_pickcpu(); */ 774 ckse->ke_runq = NULL; 775 /* 776 * Claim that we've been running for one second for statistical 777 * purposes. 778 */ 779 ckse->ke_ticks = 0; 780 ckse->ke_ltick = ticks; 781 ckse->ke_ftick = ticks - hz; 782 } 783 784 /* 785 * Return some of the child's priority and interactivity to the parent. 786 */ 787 void 788 sched_exit(struct ksegrp *kg, struct ksegrp *child) 789 { 790 /* XXX Need something better here */ 791 mtx_assert(&sched_lock, MA_OWNED); 792 #if 0 793 kg->kg_slptime = child->kg_slptime; 794 kg->kg_runtime = child->kg_runtime; 795 sched_priority(kg); 796 #endif 797 } 798 799 void 800 sched_clock(struct thread *td) 801 { 802 struct kse *ke; 803 #if SCHED_STRICT_RESCHED 804 struct kse *nke; 805 struct kseq *kseq; 806 #endif 807 struct ksegrp *kg; 808 809 810 ke = td->td_kse; 811 kg = td->td_ksegrp; 812 813 mtx_assert(&sched_lock, MA_OWNED); 814 KASSERT((td != NULL), ("schedclock: null thread pointer")); 815 816 /* Adjust ticks for pctcpu */ 817 ke->ke_ticks++; 818 ke->ke_ltick = ticks; 819 820 /* Go up to one second beyond our max and then trim back down */ 821 if (ke->ke_ftick + SCHED_CPU_TICKS + hz < ke->ke_ltick) 822 sched_pctcpu_update(ke); 823 824 if (td->td_kse->ke_flags & KEF_IDLEKSE) 825 return; 826 827 /* 828 * Check for a higher priority task on the run queue. This can happen 829 * on SMP if another processor woke up a process on our runq. 830 */ 831 #if SCHED_STRICT_RESCHED 832 kseq = KSEQ_SELF(); 833 nke = runq_choose(kseq->ksq_curr); 834 835 if (nke && nke->ke_thread && 836 nke->ke_thread->td_priority < td->td_priority) 837 td->td_flags |= TDF_NEEDRESCHED; 838 #endif 839 /* 840 * We only do slicing code for TIMESHARE ksegrps. 841 */ 842 if (kg->kg_pri_class != PRI_TIMESHARE) 843 return; 844 /* 845 * We used a tick charge it to the ksegrp so that we can compute our 846 * "interactivity". 847 */ 848 kg->kg_runtime += 1 << 10; 849 850 /* 851 * We used up one time slice. 852 */ 853 ke->ke_slice--; 854 /* 855 * We're out of time, recompute priorities and requeue. We'll get a 856 * new slice when we're put back on the run queue. 857 */ 858 if (ke->ke_slice <= 0) { 859 sched_priority(kg); 860 td->td_flags |= TDF_NEEDRESCHED; 861 ke->ke_runq = NULL; 862 } 863 } 864 865 int 866 sched_runnable(void) 867 { 868 struct kseq *kseq; 869 870 kseq = KSEQ_SELF(); 871 872 if (kseq->ksq_tsload || kseq->ksq_idload || kseq->ksq_itload) 873 return (1); 874 #ifdef SMP 875 /* 876 * For SMP we may steal other processor's KSEs. Just search until we 877 * verify that at least on other cpu has a runnable task. 878 */ 879 if (smp_started) { 880 int i; 881 882 #if 0 883 if (kseq->ksq_bload) 884 return (0); 885 #endif 886 887 for (i = 0; i < mp_maxid; i++) { 888 if (CPU_ABSENT(i)) 889 continue; 890 kseq = KSEQ_CPU(i); 891 if (kseq->ksq_tsload) 892 return (1); 893 } 894 } 895 #endif 896 return (0); 897 } 898 899 void 900 sched_userret(struct thread *td) 901 { 902 struct ksegrp *kg; 903 904 kg = td->td_ksegrp; 905 906 if (td->td_priority != kg->kg_user_pri) { 907 mtx_lock_spin(&sched_lock); 908 td->td_priority = kg->kg_user_pri; 909 mtx_unlock_spin(&sched_lock); 910 } 911 } 912 913 struct kse * 914 sched_choose(void) 915 { 916 struct kseq *kseq; 917 struct kse *ke; 918 919 kseq = KSEQ_SELF(); 920 retry: 921 ke = kseq_choose(kseq); 922 923 if (ke) { 924 ke->ke_state = KES_THREAD; 925 kseq_rem(kseq, ke); 926 927 /* 928 * If we dequeue a kse with a slice of zero it was below the 929 * nice threshold to acquire a slice. Force it on to the 930 * next run queue and let kseq_add() pick a new slice. 931 * 932 * XXX This code should live in a TIMESHARE specific section. 933 */ 934 if (ke->ke_slice == 0) { 935 ke->ke_runq = kseq->ksq_next; 936 kseq_add(kseq, ke); 937 goto retry; 938 } 939 } 940 941 #ifdef SMP 942 if (ke == NULL && smp_started) { 943 #if 0 944 if (kseq->ksq_bload) 945 return (NULL); 946 #endif 947 /* 948 * Find the cpu with the highest load and steal one proc. 949 */ 950 kseq = kseq_load_highest(); 951 if (kseq == NULL) 952 return (NULL); 953 /* 954 * XXX Do we want to migrate interrupt or realtime threads? 955 * Currently we'll only try to steal if there is a TIMESHARE 956 * thread available but we will steal a REALTIME or interrupt 957 */ 958 ke = kseq_choose(kseq); 959 kseq_rem(kseq, ke); 960 961 ke->ke_state = KES_THREAD; 962 ke->ke_runq = NULL; 963 ke->ke_cpu = PCPU_GET(cpuid); 964 } 965 #endif 966 return (ke); 967 } 968 969 void 970 sched_add(struct kse *ke) 971 { 972 struct kseq *kseq; 973 974 mtx_assert(&sched_lock, MA_OWNED); 975 KASSERT((ke->ke_thread != NULL), ("sched_add: No thread on KSE")); 976 KASSERT((ke->ke_thread->td_kse != NULL), 977 ("sched_add: No KSE on thread")); 978 KASSERT(ke->ke_state != KES_ONRUNQ, 979 ("sched_add: kse %p (%s) already in run queue", ke, 980 ke->ke_proc->p_comm)); 981 KASSERT(ke->ke_proc->p_sflag & PS_INMEM, 982 ("sched_add: process swapped out")); 983 984 switch (ke->ke_ksegrp->kg_pri_class) { 985 case PRI_ITHD: 986 case PRI_REALTIME: 987 kseq = KSEQ_SELF(); 988 break; 989 case PRI_TIMESHARE: 990 case PRI_IDLE: 991 default: 992 kseq = KSEQ_CPU(ke->ke_cpu); 993 break; 994 } 995 996 ke->ke_ksegrp->kg_runq_kses++; 997 ke->ke_state = KES_ONRUNQ; 998 999 kseq_add(kseq, ke); 1000 } 1001 1002 void 1003 sched_rem(struct kse *ke) 1004 { 1005 mtx_assert(&sched_lock, MA_OWNED); 1006 /* KASSERT((ke->ke_state == KES_ONRUNQ), ("KSE not on run queue")); */ 1007 1008 ke->ke_runq = NULL; 1009 ke->ke_state = KES_THREAD; 1010 ke->ke_ksegrp->kg_runq_kses--; 1011 1012 kseq_rem(KSEQ_CPU(ke->ke_cpu), ke); 1013 } 1014 1015 fixpt_t 1016 sched_pctcpu(struct kse *ke) 1017 { 1018 fixpt_t pctcpu; 1019 int realstathz; 1020 1021 pctcpu = 0; 1022 realstathz = stathz ? stathz : hz; 1023 1024 if (ke->ke_ticks) { 1025 int rtick; 1026 1027 /* Update to account for time potentially spent sleeping */ 1028 ke->ke_ltick = ticks; 1029 sched_pctcpu_update(ke); 1030 1031 /* How many rtick per second ? */ 1032 rtick = ke->ke_ticks / SCHED_CPU_TIME; 1033 pctcpu = (FSCALE * ((FSCALE * rtick)/realstathz)) >> FSHIFT; 1034 } 1035 1036 ke->ke_proc->p_swtime = ke->ke_ltick - ke->ke_ftick; 1037 1038 return (pctcpu); 1039 } 1040 1041 int 1042 sched_sizeof_kse(void) 1043 { 1044 return (sizeof(struct kse) + sizeof(struct ke_sched)); 1045 } 1046 1047 int 1048 sched_sizeof_ksegrp(void) 1049 { 1050 return (sizeof(struct ksegrp) + sizeof(struct kg_sched)); 1051 } 1052 1053 int 1054 sched_sizeof_proc(void) 1055 { 1056 return (sizeof(struct proc)); 1057 } 1058 1059 int 1060 sched_sizeof_thread(void) 1061 { 1062 return (sizeof(struct thread) + sizeof(struct td_sched)); 1063 } 1064