1 /*- 2 * Copyright (c) 2003, Jeffrey Roberson <jeff@freebsd.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice unmodified, this list of conditions, and the following 10 * disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 29 #include <sys/param.h> 30 #include <sys/systm.h> 31 #include <sys/kernel.h> 32 #include <sys/ktr.h> 33 #include <sys/lock.h> 34 #include <sys/mutex.h> 35 #include <sys/proc.h> 36 #include <sys/sched.h> 37 #include <sys/smp.h> 38 #include <sys/sx.h> 39 #include <sys/sysctl.h> 40 #include <sys/sysproto.h> 41 #include <sys/vmmeter.h> 42 #ifdef DDB 43 #include <ddb/ddb.h> 44 #endif 45 #ifdef KTRACE 46 #include <sys/uio.h> 47 #include <sys/ktrace.h> 48 #endif 49 50 #include <machine/cpu.h> 51 52 /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ 53 /* XXX This is bogus compatability crap for ps */ 54 static fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 55 SYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, ""); 56 57 static void sched_setup(void *dummy); 58 SYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL) 59 60 #define SCHED_STRICT_RESCHED 1 61 62 /* 63 * These datastructures are allocated within their parent datastructure but 64 * are scheduler specific. 65 */ 66 67 struct ke_sched { 68 int ske_slice; 69 struct runq *ske_runq; 70 /* The following variables are only used for pctcpu calculation */ 71 int ske_ltick; /* Last tick that we were running on */ 72 int ske_ftick; /* First tick that we were running on */ 73 int ske_ticks; /* Tick count */ 74 u_char ske_cpu; 75 }; 76 #define ke_slice ke_sched->ske_slice 77 #define ke_runq ke_sched->ske_runq 78 #define ke_ltick ke_sched->ske_ltick 79 #define ke_ftick ke_sched->ske_ftick 80 #define ke_ticks ke_sched->ske_ticks 81 #define ke_cpu ke_sched->ske_cpu 82 83 struct kg_sched { 84 int skg_slptime; /* Number of ticks we vol. slept */ 85 int skg_runtime; /* Number of ticks we were running */ 86 }; 87 #define kg_slptime kg_sched->skg_slptime 88 #define kg_runtime kg_sched->skg_runtime 89 90 struct td_sched { 91 int std_slptime; 92 int std_schedflag; 93 }; 94 #define td_slptime td_sched->std_slptime 95 #define td_schedflag td_sched->std_schedflag 96 97 #define TD_SCHED_BLOAD 0x0001 /* 98 * thread was counted as being in short 99 * term sleep. 100 */ 101 struct td_sched td_sched; 102 struct ke_sched ke_sched; 103 struct kg_sched kg_sched; 104 105 struct ke_sched *kse0_sched = &ke_sched; 106 struct kg_sched *ksegrp0_sched = &kg_sched; 107 struct p_sched *proc0_sched = NULL; 108 struct td_sched *thread0_sched = &td_sched; 109 110 /* 111 * This priority range has 20 priorities on either end that are reachable 112 * only through nice values. 113 */ 114 #define SCHED_PRI_RANGE (PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE + 1) 115 #define SCHED_PRI_NRESV 40 116 #define SCHED_PRI_BASE (SCHED_PRI_NRESV / 2) 117 #define SCHED_PRI_DYN (SCHED_PRI_RANGE - SCHED_PRI_NRESV) 118 #define SCHED_PRI_DYN_HALF (SCHED_PRI_DYN / 2) 119 120 /* 121 * These determine how sleep time effects the priority of a process. 122 * 123 * SLP_RUN_MAX: Maximum amount of sleep time + run time we'll accumulate 124 * before throttling back. 125 * SLP_RUN_THORTTLE: Divisor for reducing slp/run time. 126 * SLP_RATIO: Compute a bounded ratio of slp time vs run time. 127 * SLP_TOPRI: Convert a number of ticks slept and ticks ran into a priority 128 */ 129 #define SCHED_SLP_RUN_MAX ((hz * 30) * 1024) 130 #define SCHED_SLP_RUN_THROTTLE (10) 131 static __inline int 132 sched_slp_ratio(int b, int s) 133 { 134 b /= SCHED_PRI_DYN_HALF; 135 if (b == 0) 136 return (0); 137 s /= b; 138 return (s); 139 } 140 #define SCHED_SLP_TOPRI(slp, run) \ 141 ((((slp) > (run))? \ 142 sched_slp_ratio((slp), (run)): \ 143 SCHED_PRI_DYN_HALF + (SCHED_PRI_DYN_HALF - sched_slp_ratio((run), (slp))))+ \ 144 SCHED_PRI_NRESV / 2) 145 /* 146 * These parameters and macros determine the size of the time slice that is 147 * granted to each thread. 148 * 149 * SLICE_MIN: Minimum time slice granted, in units of ticks. 150 * SLICE_MAX: Maximum time slice granted. 151 * SLICE_RANGE: Range of available time slices scaled by hz. 152 * SLICE_SCALE: The number slices granted per unit of pri or slp. 153 * PRI_TOSLICE: Compute a slice size that is proportional to the priority. 154 * SLP_TOSLICE: Compute a slice size that is inversely proportional to the 155 * amount of time slept. (smaller slices for interactive ksegs) 156 * PRI_COMP: This determines what fraction of the actual slice comes from 157 * the slice size computed from the priority. 158 * SLP_COMP: This determines what component of the actual slice comes from 159 * the slize size computed from the sleep time. 160 */ 161 #define SCHED_SLICE_MIN (hz / 100) 162 #define SCHED_SLICE_MAX (hz / 4) 163 #define SCHED_SLICE_RANGE (SCHED_SLICE_MAX - SCHED_SLICE_MIN + 1) 164 #define SCHED_SLICE_SCALE(val, max) (((val) * SCHED_SLICE_RANGE) / (max)) 165 #define SCHED_PRI_TOSLICE(pri) \ 166 (SCHED_SLICE_MAX - SCHED_SLICE_SCALE((pri), SCHED_PRI_RANGE)) 167 #define SCHED_SLP_TOSLICE(slp) \ 168 (SCHED_SLICE_MAX - SCHED_SLICE_SCALE((slp), SCHED_PRI_DYN)) 169 #define SCHED_SLP_COMP(slice) (((slice) / 5) * 3) /* 60% */ 170 #define SCHED_PRI_COMP(slice) (((slice) / 5) * 2) /* 40% */ 171 172 /* 173 * This macro determines whether or not the kse belongs on the current or 174 * next run queue. 175 * 176 * XXX nice value should effect how interactive a kg is. 177 */ 178 #define SCHED_CURR(kg) (((kg)->kg_slptime > (kg)->kg_runtime && \ 179 sched_slp_ratio((kg)->kg_slptime, (kg)->kg_runtime) > 4) || \ 180 (kg)->kg_pri_class != PRI_TIMESHARE) 181 182 /* 183 * Cpu percentage computation macros and defines. 184 * 185 * SCHED_CPU_TIME: Number of seconds to average the cpu usage across. 186 * SCHED_CPU_TICKS: Number of hz ticks to average the cpu usage across. 187 */ 188 189 #define SCHED_CPU_TIME 60 190 #define SCHED_CPU_TICKS (hz * SCHED_CPU_TIME) 191 192 /* 193 * kseq - pair of runqs per processor 194 */ 195 196 struct kseq { 197 struct runq ksq_runqs[2]; 198 struct runq *ksq_curr; 199 struct runq *ksq_next; 200 int ksq_load; /* Total runnable */ 201 #ifdef SMP 202 unsigned int ksq_rslices; /* Slices on run queue */ 203 unsigned int ksq_bload; /* Threads waiting on IO */ 204 #endif 205 }; 206 207 /* 208 * One kse queue per processor. 209 */ 210 #ifdef SMP 211 struct kseq kseq_cpu[MAXCPU]; 212 #define KSEQ_SELF() (&kseq_cpu[PCPU_GET(cpuid)]) 213 #define KSEQ_CPU(x) (&kseq_cpu[(x)]) 214 #else 215 struct kseq kseq_cpu; 216 #define KSEQ_SELF() (&kseq_cpu) 217 #define KSEQ_CPU(x) (&kseq_cpu) 218 #endif 219 220 static int sched_slice(struct ksegrp *kg); 221 static int sched_priority(struct ksegrp *kg); 222 void sched_pctcpu_update(struct kse *ke); 223 int sched_pickcpu(void); 224 225 /* Operations on per processor queues */ 226 static struct kse * kseq_choose(struct kseq *kseq); 227 static void kseq_setup(struct kseq *kseq); 228 static __inline void kseq_add(struct kseq *kseq, struct kse *ke); 229 static __inline void kseq_rem(struct kseq *kseq, struct kse *ke); 230 #ifdef SMP 231 static __inline void kseq_sleep(struct kseq *kseq, struct kse *ke); 232 static __inline void kseq_wakeup(struct kseq *kseq, struct kse *ke); 233 struct kseq * kseq_load_highest(void); 234 #endif 235 236 static __inline void 237 kseq_add(struct kseq *kseq, struct kse *ke) 238 { 239 runq_add(ke->ke_runq, ke); 240 kseq->ksq_load++; 241 #ifdef SMP 242 kseq->ksq_rslices += ke->ke_slice; 243 #endif 244 } 245 static __inline void 246 kseq_rem(struct kseq *kseq, struct kse *ke) 247 { 248 kseq->ksq_load--; 249 runq_remove(ke->ke_runq, ke); 250 #ifdef SMP 251 kseq->ksq_rslices -= ke->ke_slice; 252 #endif 253 } 254 255 #ifdef SMP 256 static __inline void 257 kseq_sleep(struct kseq *kseq, struct kse *ke) 258 { 259 kseq->ksq_bload++; 260 } 261 262 static __inline void 263 kseq_wakeup(struct kseq *kseq, struct kse *ke) 264 { 265 kseq->ksq_bload--; 266 } 267 268 struct kseq * 269 kseq_load_highest(void) 270 { 271 struct kseq *kseq; 272 int load; 273 int cpu; 274 int i; 275 276 cpu = 0; 277 load = 0; 278 279 for (i = 0; i < mp_maxid; i++) { 280 if (CPU_ABSENT(i)) 281 continue; 282 kseq = KSEQ_CPU(i); 283 if (kseq->ksq_load > load) { 284 load = kseq->ksq_load; 285 cpu = i; 286 } 287 } 288 if (load) 289 return (KSEQ_CPU(cpu)); 290 291 return (NULL); 292 } 293 #endif 294 295 struct kse * 296 kseq_choose(struct kseq *kseq) 297 { 298 struct kse *ke; 299 struct runq *swap; 300 301 if ((ke = runq_choose(kseq->ksq_curr)) == NULL) { 302 swap = kseq->ksq_curr; 303 kseq->ksq_curr = kseq->ksq_next; 304 kseq->ksq_next = swap; 305 ke = runq_choose(kseq->ksq_curr); 306 } 307 308 return (ke); 309 } 310 311 312 static void 313 kseq_setup(struct kseq *kseq) 314 { 315 kseq->ksq_curr = &kseq->ksq_runqs[0]; 316 kseq->ksq_next = &kseq->ksq_runqs[1]; 317 runq_init(kseq->ksq_curr); 318 runq_init(kseq->ksq_next); 319 kseq->ksq_load = 0; 320 #ifdef SMP 321 kseq->ksq_rslices = 0; 322 kseq->ksq_bload = 0; 323 #endif 324 } 325 326 static void 327 sched_setup(void *dummy) 328 { 329 int i; 330 331 mtx_lock_spin(&sched_lock); 332 /* init kseqs */ 333 for (i = 0; i < MAXCPU; i++) 334 kseq_setup(KSEQ_CPU(i)); 335 mtx_unlock_spin(&sched_lock); 336 } 337 338 /* 339 * Scale the scheduling priority according to the "interactivity" of this 340 * process. 341 */ 342 static int 343 sched_priority(struct ksegrp *kg) 344 { 345 int pri; 346 347 if (kg->kg_pri_class != PRI_TIMESHARE) 348 return (kg->kg_user_pri); 349 350 pri = SCHED_SLP_TOPRI(kg->kg_slptime, kg->kg_runtime); 351 CTR2(KTR_RUNQ, "sched_priority: slptime: %d\tpri: %d", 352 kg->kg_slptime, pri); 353 354 pri += PRI_MIN_TIMESHARE; 355 pri += kg->kg_nice; 356 357 if (pri > PRI_MAX_TIMESHARE) 358 pri = PRI_MAX_TIMESHARE; 359 else if (pri < PRI_MIN_TIMESHARE) 360 pri = PRI_MIN_TIMESHARE; 361 362 kg->kg_user_pri = pri; 363 364 return (kg->kg_user_pri); 365 } 366 367 /* 368 * Calculate a time slice based on the process priority. 369 */ 370 static int 371 sched_slice(struct ksegrp *kg) 372 { 373 int pslice; 374 int sslice; 375 int slice; 376 int pri; 377 378 pri = kg->kg_user_pri; 379 pri -= PRI_MIN_TIMESHARE; 380 pslice = SCHED_PRI_TOSLICE(pri); 381 sslice = SCHED_PRI_TOSLICE(SCHED_SLP_TOPRI(kg->kg_slptime, kg->kg_runtime)); 382 /* 383 SCHED_SLP_TOSLICE(SCHED_SLP_RATIO( 384 kg->kg_slptime, kg->kg_runtime)); 385 */ 386 slice = SCHED_SLP_COMP(sslice) + SCHED_PRI_COMP(pslice); 387 388 CTR4(KTR_RUNQ, 389 "sched_slice: pri: %d\tsslice: %d\tpslice: %d\tslice: %d", 390 pri, sslice, pslice, slice); 391 392 if (slice < SCHED_SLICE_MIN) 393 slice = SCHED_SLICE_MIN; 394 else if (slice > SCHED_SLICE_MAX) 395 slice = SCHED_SLICE_MAX; 396 397 /* 398 * Every time we grant a new slice check to see if we need to scale 399 * back the slp and run time in the kg. This will cause us to forget 400 * old interactivity while maintaining the current ratio. 401 */ 402 if ((kg->kg_runtime + kg->kg_slptime) > SCHED_SLP_RUN_MAX) { 403 kg->kg_runtime /= SCHED_SLP_RUN_THROTTLE; 404 kg->kg_slptime /= SCHED_SLP_RUN_THROTTLE; 405 } 406 407 return (slice); 408 } 409 410 int 411 sched_rr_interval(void) 412 { 413 return (SCHED_SLICE_MAX); 414 } 415 416 void 417 sched_pctcpu_update(struct kse *ke) 418 { 419 /* 420 * Adjust counters and watermark for pctcpu calc. 421 */ 422 ke->ke_ticks = (ke->ke_ticks / (ke->ke_ltick - ke->ke_ftick)) * 423 SCHED_CPU_TICKS; 424 ke->ke_ltick = ticks; 425 ke->ke_ftick = ke->ke_ltick - SCHED_CPU_TICKS; 426 } 427 428 #ifdef SMP 429 /* XXX Should be changed to kseq_load_lowest() */ 430 int 431 sched_pickcpu(void) 432 { 433 struct kseq *kseq; 434 int load; 435 int cpu; 436 int i; 437 438 if (!smp_started) 439 return (0); 440 441 load = 0; 442 cpu = 0; 443 444 for (i = 0; i < mp_maxid; i++) { 445 if (CPU_ABSENT(i)) 446 continue; 447 kseq = KSEQ_CPU(i); 448 if (kseq->ksq_load < load) { 449 cpu = i; 450 load = kseq->ksq_load; 451 } 452 } 453 454 CTR1(KTR_RUNQ, "sched_pickcpu: %d", cpu); 455 return (cpu); 456 } 457 #else 458 int 459 sched_pickcpu(void) 460 { 461 return (0); 462 } 463 #endif 464 465 void 466 sched_prio(struct thread *td, u_char prio) 467 { 468 struct kse *ke; 469 struct runq *rq; 470 471 mtx_assert(&sched_lock, MA_OWNED); 472 ke = td->td_kse; 473 td->td_priority = prio; 474 475 if (TD_ON_RUNQ(td)) { 476 rq = ke->ke_runq; 477 478 runq_remove(rq, ke); 479 runq_add(rq, ke); 480 } 481 } 482 483 void 484 sched_switchout(struct thread *td) 485 { 486 struct kse *ke; 487 488 mtx_assert(&sched_lock, MA_OWNED); 489 490 ke = td->td_kse; 491 492 td->td_last_kse = ke; 493 td->td_lastcpu = ke->ke_oncpu; 494 ke->ke_oncpu = NOCPU; 495 td->td_flags &= ~TDF_NEEDRESCHED; 496 497 if (TD_IS_RUNNING(td)) { 498 setrunqueue(td); 499 return; 500 } else 501 td->td_kse->ke_runq = NULL; 502 503 /* 504 * We will not be on the run queue. So we must be 505 * sleeping or similar. 506 */ 507 if (td->td_proc->p_flag & P_THREADED) 508 kse_reassign(ke); 509 } 510 511 void 512 sched_switchin(struct thread *td) 513 { 514 /* struct kse *ke = td->td_kse; */ 515 mtx_assert(&sched_lock, MA_OWNED); 516 517 td->td_kse->ke_oncpu = PCPU_GET(cpuid); 518 #if SCHED_STRICT_RESCHED 519 if (td->td_ksegrp->kg_pri_class == PRI_TIMESHARE && 520 td->td_priority != td->td_ksegrp->kg_user_pri) 521 curthread->td_flags |= TDF_NEEDRESCHED; 522 #endif 523 } 524 525 void 526 sched_nice(struct ksegrp *kg, int nice) 527 { 528 struct thread *td; 529 530 kg->kg_nice = nice; 531 sched_priority(kg); 532 FOREACH_THREAD_IN_GROUP(kg, td) { 533 td->td_flags |= TDF_NEEDRESCHED; 534 } 535 } 536 537 void 538 sched_sleep(struct thread *td, u_char prio) 539 { 540 mtx_assert(&sched_lock, MA_OWNED); 541 542 td->td_slptime = ticks; 543 td->td_priority = prio; 544 545 /* 546 * If this is an interactive task clear its queue so it moves back 547 * on to curr when it wakes up. Otherwise let it stay on the queue 548 * that it was assigned to. 549 */ 550 if (SCHED_CURR(td->td_kse->ke_ksegrp)) 551 td->td_kse->ke_runq = NULL; 552 #ifdef SMP 553 if (td->td_priority < PZERO) { 554 kseq_sleep(KSEQ_CPU(td->td_kse->ke_cpu), td->td_kse); 555 td->td_schedflag |= TD_SCHED_BLOAD; 556 } 557 #endif 558 } 559 560 void 561 sched_wakeup(struct thread *td) 562 { 563 mtx_assert(&sched_lock, MA_OWNED); 564 565 /* 566 * Let the kseg know how long we slept for. This is because process 567 * interactivity behavior is modeled in the kseg. 568 */ 569 if (td->td_slptime) { 570 struct ksegrp *kg; 571 572 kg = td->td_ksegrp; 573 kg->kg_slptime += (ticks - td->td_slptime) * 1024; 574 sched_priority(kg); 575 td->td_slptime = 0; 576 } 577 #ifdef SMP 578 if (td->td_priority < PZERO && td->td_schedflag & TD_SCHED_BLOAD) { 579 kseq_wakeup(KSEQ_CPU(td->td_kse->ke_cpu), td->td_kse); 580 td->td_schedflag &= ~TD_SCHED_BLOAD; 581 } 582 #endif 583 setrunqueue(td); 584 #if SCHED_STRICT_RESCHED 585 if (td->td_priority < curthread->td_priority) 586 curthread->td_flags |= TDF_NEEDRESCHED; 587 #endif 588 } 589 590 /* 591 * Penalize the parent for creating a new child and initialize the child's 592 * priority. 593 */ 594 void 595 sched_fork(struct ksegrp *kg, struct ksegrp *child) 596 { 597 struct kse *ckse; 598 struct kse *pkse; 599 600 mtx_assert(&sched_lock, MA_OWNED); 601 ckse = FIRST_KSE_IN_KSEGRP(child); 602 pkse = FIRST_KSE_IN_KSEGRP(kg); 603 604 /* XXX Need something better here */ 605 if (kg->kg_slptime > kg->kg_runtime) { 606 child->kg_slptime = SCHED_PRI_DYN; 607 child->kg_runtime = kg->kg_slptime / SCHED_PRI_DYN; 608 } else { 609 child->kg_runtime = SCHED_PRI_DYN; 610 child->kg_slptime = kg->kg_runtime / SCHED_PRI_DYN; 611 } 612 #if 0 613 child->kg_slptime = kg->kg_slptime; 614 child->kg_runtime = kg->kg_runtime; 615 #endif 616 child->kg_user_pri = kg->kg_user_pri; 617 618 #if 0 619 if (pkse->ke_cpu != PCPU_GET(cpuid)) { 620 printf("pkse->ke_cpu = %d\n", pkse->ke_cpu); 621 printf("cpuid = %d", PCPU_GET(cpuid)); 622 Debugger("stop"); 623 } 624 #endif 625 626 ckse->ke_slice = pkse->ke_slice; 627 ckse->ke_cpu = pkse->ke_cpu; /* sched_pickcpu(); */ 628 ckse->ke_runq = NULL; 629 /* 630 * Claim that we've been running for one second for statistical 631 * purposes. 632 */ 633 ckse->ke_ticks = 0; 634 ckse->ke_ltick = ticks; 635 ckse->ke_ftick = ticks - hz; 636 } 637 638 /* 639 * Return some of the child's priority and interactivity to the parent. 640 */ 641 void 642 sched_exit(struct ksegrp *kg, struct ksegrp *child) 643 { 644 /* XXX Need something better here */ 645 mtx_assert(&sched_lock, MA_OWNED); 646 kg->kg_slptime = child->kg_slptime; 647 kg->kg_runtime = child->kg_runtime; 648 sched_priority(kg); 649 } 650 651 void 652 sched_clock(struct thread *td) 653 { 654 struct kse *ke; 655 #if SCHED_STRICT_RESCHED 656 struct kse *nke; 657 struct kseq *kseq; 658 #endif 659 struct ksegrp *kg; 660 661 662 ke = td->td_kse; 663 kg = td->td_ksegrp; 664 665 mtx_assert(&sched_lock, MA_OWNED); 666 KASSERT((td != NULL), ("schedclock: null thread pointer")); 667 668 /* Adjust ticks for pctcpu */ 669 ke->ke_ticks += 10000; 670 ke->ke_ltick = ticks; 671 /* Go up to one second beyond our max and then trim back down */ 672 if (ke->ke_ftick + SCHED_CPU_TICKS + hz < ke->ke_ltick) 673 sched_pctcpu_update(ke); 674 675 if (td->td_kse->ke_flags & KEF_IDLEKSE) 676 return; 677 678 /* 679 * Check for a higher priority task on the run queue. This can happen 680 * on SMP if another processor woke up a process on our runq. 681 */ 682 #if SCHED_STRICT_RESCHED 683 kseq = KSEQ_SELF(); 684 nke = runq_choose(kseq->ksq_curr); 685 686 if (nke && nke->ke_thread && 687 nke->ke_thread->td_priority < td->td_priority) 688 td->td_flags |= TDF_NEEDRESCHED; 689 #endif 690 /* 691 * We used a tick charge it to the ksegrp so that we can compute our 692 * "interactivity". 693 */ 694 kg->kg_runtime += 1024; 695 696 /* 697 * We used up one time slice. 698 */ 699 ke->ke_slice--; 700 /* 701 * We're out of time, recompute priorities and requeue 702 */ 703 if (ke->ke_slice == 0) { 704 td->td_priority = sched_priority(kg); 705 ke->ke_slice = sched_slice(kg); 706 td->td_flags |= TDF_NEEDRESCHED; 707 ke->ke_runq = NULL; 708 } 709 } 710 711 int 712 sched_runnable(void) 713 { 714 struct kseq *kseq; 715 716 kseq = KSEQ_SELF(); 717 718 if (kseq->ksq_load) 719 return (1); 720 #ifdef SMP 721 /* 722 * For SMP we may steal other processor's KSEs. Just search until we 723 * verify that at least on other cpu has a runnable task. 724 */ 725 if (smp_started) { 726 int i; 727 728 #if 0 729 if (kseq->ksq_bload) 730 return (0); 731 #endif 732 733 for (i = 0; i < mp_maxid; i++) { 734 if (CPU_ABSENT(i)) 735 continue; 736 kseq = KSEQ_CPU(i); 737 if (kseq->ksq_load) 738 return (1); 739 } 740 } 741 #endif 742 return (0); 743 } 744 745 void 746 sched_userret(struct thread *td) 747 { 748 struct ksegrp *kg; 749 750 kg = td->td_ksegrp; 751 752 if (td->td_priority != kg->kg_user_pri) { 753 mtx_lock_spin(&sched_lock); 754 td->td_priority = kg->kg_user_pri; 755 mtx_unlock_spin(&sched_lock); 756 } 757 } 758 759 struct kse * 760 sched_choose(void) 761 { 762 struct kseq *kseq; 763 struct kse *ke; 764 765 kseq = KSEQ_SELF(); 766 ke = kseq_choose(kseq); 767 768 if (ke) { 769 ke->ke_state = KES_THREAD; 770 kseq_rem(kseq, ke); 771 } 772 773 #ifdef SMP 774 if (ke == NULL && smp_started) { 775 #if 0 776 if (kseq->ksq_bload) 777 return (NULL); 778 #endif 779 /* 780 * Find the cpu with the highest load and steal one proc. 781 */ 782 kseq = kseq_load_highest(); 783 if (kseq == NULL) 784 return (NULL); 785 ke = kseq_choose(kseq); 786 kseq_rem(kseq, ke); 787 788 ke->ke_state = KES_THREAD; 789 ke->ke_runq = NULL; 790 ke->ke_cpu = PCPU_GET(cpuid); 791 } 792 #endif 793 return (ke); 794 } 795 796 void 797 sched_add(struct kse *ke) 798 { 799 struct kseq *kseq; 800 801 mtx_assert(&sched_lock, MA_OWNED); 802 KASSERT((ke->ke_thread != NULL), ("sched_add: No thread on KSE")); 803 KASSERT((ke->ke_thread->td_kse != NULL), 804 ("sched_add: No KSE on thread")); 805 KASSERT(ke->ke_state != KES_ONRUNQ, 806 ("sched_add: kse %p (%s) already in run queue", ke, 807 ke->ke_proc->p_comm)); 808 KASSERT(ke->ke_proc->p_sflag & PS_INMEM, 809 ("sched_add: process swapped out")); 810 811 kseq = KSEQ_CPU(ke->ke_cpu); 812 813 if (ke->ke_runq == NULL) { 814 if (SCHED_CURR(ke->ke_ksegrp)) 815 ke->ke_runq = kseq->ksq_curr; 816 else 817 ke->ke_runq = kseq->ksq_next; 818 } 819 ke->ke_ksegrp->kg_runq_kses++; 820 ke->ke_state = KES_ONRUNQ; 821 822 kseq_add(kseq, ke); 823 } 824 825 void 826 sched_rem(struct kse *ke) 827 { 828 mtx_assert(&sched_lock, MA_OWNED); 829 /* KASSERT((ke->ke_state == KES_ONRUNQ), ("KSE not on run queue")); */ 830 831 ke->ke_runq = NULL; 832 ke->ke_state = KES_THREAD; 833 ke->ke_ksegrp->kg_runq_kses--; 834 835 kseq_rem(KSEQ_CPU(ke->ke_cpu), ke); 836 } 837 838 fixpt_t 839 sched_pctcpu(struct kse *ke) 840 { 841 fixpt_t pctcpu; 842 int realstathz; 843 844 pctcpu = 0; 845 realstathz = stathz ? stathz : hz; 846 847 if (ke->ke_ticks) { 848 int rtick; 849 850 /* Update to account for time potentially spent sleeping */ 851 ke->ke_ltick = ticks; 852 sched_pctcpu_update(ke); 853 854 /* How many rtick per second ? */ 855 rtick = ke->ke_ticks / (SCHED_CPU_TIME * 10000); 856 pctcpu = (FSCALE * ((FSCALE * rtick)/realstathz)) >> FSHIFT; 857 } 858 859 ke->ke_proc->p_swtime = ke->ke_ltick - ke->ke_ftick; 860 861 return (pctcpu); 862 } 863 864 int 865 sched_sizeof_kse(void) 866 { 867 return (sizeof(struct kse) + sizeof(struct ke_sched)); 868 } 869 870 int 871 sched_sizeof_ksegrp(void) 872 { 873 return (sizeof(struct ksegrp) + sizeof(struct kg_sched)); 874 } 875 876 int 877 sched_sizeof_proc(void) 878 { 879 return (sizeof(struct proc)); 880 } 881 882 int 883 sched_sizeof_thread(void) 884 { 885 return (sizeof(struct thread) + sizeof(struct td_sched)); 886 } 887