Lines Matching +full:over +full:- +full:current +full:- +full:scale +full:- +full:factor
1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
4 * Copyright (c) 2002-2007, Jeffrey Roberson <jeff@freebsd.org>
32 * performance under load even on uni-processor systems.
90 #define TDQ_LOADNAME_LEN (sizeof("CPU ") + sizeof(__XSTRING(MAXCPU)) - 1 + sizeof(" load"))
115 #define THREAD_CAN_MIGRATE(td) ((td)->td_pinned == 0)
117 CPU_ISSET((cpu), &(td)->td_cpuset->cs_mask)
124 * Priority ranges used for interactive and non-interactive timeshare
127 * (NHALF, x, and NHALF) handle non-interactive threads with the outer
130 #define PRI_TIMESHARE_RANGE (PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE + 1)
131 #define PRI_INTERACT_RANGE ((PRI_TIMESHARE_RANGE - SCHED_PRI_NRESV) / 2)
132 #define PRI_BATCH_RANGE (PRI_TIMESHARE_RANGE - PRI_INTERACT_RANGE)
135 #define PRI_MAX_INTERACT (PRI_MIN_TIMESHARE + PRI_INTERACT_RANGE - 1)
140 * These macros determine priorities for non-interactive threads. They are
151 #define SCHED_PRI_CPU_RANGE (PRI_BATCH_RANGE - SCHED_PRI_NRESV)
152 #define SCHED_PRI_NICE(nice) (((nice) - PRIO_MIN) * 5 / 4)
178 #define RQ_TS_POL_MODULO (RQ_TS_POL_MAX - RQ_TS_POL_MIN + 1)
184 * Must be at most 20 to avoid overflow in sched_pctcpu()'s current formula.
186 * SCHED_TICK_SHIFT: Shift factor to avoid rounding away results.
189 * SCHED_CPU_DECAY_NUMER: Numerator of %CPU decay factor.
190 * SCHED_CPU_DECAY_DENOM: Denominator of %CPU decay factor.
195 #define SCHED_TICK_RUN_SHIFTED(ts) ((ts)->ts_ticks)
196 #define SCHED_TICK_LENGTH(ts) (max((ts)->ts_ltick - (ts)->ts_ftick, 1))
214 * INTERACT_THRESH: Threshold for placement on the current runq.
230 #define TDF_SLICEEND TDF_SCHED2 /* Thread time slice is over. */
234 * the shift factor. Without the shift the error rate
256 static int __read_mostly sched_idlespinthresh = -1;
259 * tdq - per processor runqs and statistics. A mutex synchronizes access to
265 * (l) all accesses are CPU-local
278 struct thread *tdq_curthread; /* (t) Current executing thread. */
307 #define TDQ_LOAD(tdq) atomic_load_int(&(tdq)->tdq_load)
308 #define TDQ_TRANSFERABLE(tdq) atomic_load_int(&(tdq)->tdq_transferable)
309 #define TDQ_SWITCHCNT(tdq) (atomic_load_short(&(tdq)->tdq_switchcnt) + \
310 atomic_load_short(&(tdq)->tdq_oldswitchcnt))
311 #define TDQ_SWITCHCNT_INC(tdq) (atomic_store_short(&(tdq)->tdq_switchcnt, \
312 atomic_load_short(&(tdq)->tdq_switchcnt) + 1))
322 #define SCHED_AFFINITY(ts, t) ((u_int)ticks - (ts)->ts_rltick < (t) * affinity)
325 * Run-time tunables.
345 #define TDQ_ID(x) ((x)->tdq_id)
360 #define TDQ_LOCKPTR(t) ((struct mtx *)(&(t)->tdq_lock))
440 * Print the threads waiting on a run-queue.
453 i, rq->rq_status.rq_sw[i]); in runq_print()
455 if (rq->rq_status.rq_sw[i] & (1ul << j)) { in runq_print()
457 rqq = &rq->rq_queues[pri]; in runq_print()
460 td, td->td_name, td->td_priority, in runq_print()
461 td->td_rqindex, pri); in runq_print()
468 * Print the status of a per-cpu thread queue. Should be a ddb show cmd.
479 printf("\tLock name: %s\n", tdq->tdq_name); in tdq_print()
480 printf("\tload: %d\n", tdq->tdq_load); in tdq_print()
481 printf("\tswitch cnt: %d\n", tdq->tdq_switchcnt); in tdq_print()
482 printf("\told switch cnt: %d\n", tdq->tdq_oldswitchcnt); in tdq_print()
483 printf("\tTS insert offset: %d\n", tdq->tdq_ts_off); in tdq_print()
484 printf("\tTS dequeue offset: %d\n", tdq->tdq_ts_deq_off); in tdq_print()
485 printf("\tload transferable: %d\n", tdq->tdq_transferable); in tdq_print()
486 printf("\tlowest priority: %d\n", tdq->tdq_lowpri); in tdq_print()
488 runq_print(&tdq->tdq_runq); in tdq_print()
495 * If the new priority is not better than the current priority there is in sched_shouldpreempt()
516 * If we're interactive or better and there is non-interactive in sched_shouldpreempt()
525 * Add a thread to the actual run-queue. Keeps transferable counts up to
526 * date with what is actually on the run-queue. Selects the correct
538 pri = td->td_priority; in tdq_runq_add()
542 tdq->tdq_transferable++; in tdq_runq_add()
543 ts->ts_flags |= TSF_XFERABLE; in tdq_runq_add()
554 /* Current queue from which processes are being run. */ in tdq_runq_add()
555 idx = tdq->tdq_ts_deq_off; in tdq_runq_add()
557 idx = (RQ_PRI_TO_QUEUE_IDX(pri) - RQ_TS_POL_MIN + in tdq_runq_add()
558 tdq->tdq_ts_off) % RQ_TS_POL_MODULO; in tdq_runq_add()
564 if (tdq->tdq_ts_deq_off != tdq->tdq_ts_off && in tdq_runq_add()
565 idx == tdq->tdq_ts_deq_off) in tdq_runq_add()
567 idx = (idx - 1 + RQ_TS_POL_MODULO) % in tdq_runq_add()
572 runq_add_idx(&tdq->tdq_runq, td, idx, flags); in tdq_runq_add()
574 runq_add(&tdq->tdq_runq, td, flags); in tdq_runq_add()
578 * Advance the timesharing dequeue offset to the next non-empty queue or the
592 while (tdq->tdq_ts_deq_off != tdq->tdq_ts_off) { in tdq_advance_ts_deq_off()
595 else if (!runq_is_queue_empty(&tdq->tdq_runq, in tdq_advance_ts_deq_off()
596 tdq->tdq_ts_deq_off + RQ_TS_POL_MIN)) in tdq_advance_ts_deq_off()
599 tdq->tdq_ts_deq_off = (tdq->tdq_ts_deq_off + 1) % in tdq_advance_ts_deq_off()
605 * Remove a thread from a run-queue. This typically happens when a thread
618 if (ts->ts_flags & TSF_XFERABLE) { in tdq_runq_rem()
619 tdq->tdq_transferable--; in tdq_runq_rem()
620 ts->ts_flags &= ~TSF_XFERABLE; in tdq_runq_rem()
622 queue_empty = runq_remove(&tdq->tdq_runq, td); in tdq_runq_rem()
629 if (PRI_MIN_BATCH <= td->td_priority && in tdq_runq_rem()
630 td->td_priority <= PRI_MAX_BATCH && queue_empty && in tdq_runq_rem()
631 tdq->tdq_ts_deq_off + RQ_TS_POL_MIN == td->td_rqindex) in tdq_runq_rem()
646 tdq->tdq_load++; in tdq_load_add()
647 if ((td->td_flags & TDF_NOLOAD) == 0) in tdq_load_add()
648 tdq->tdq_sysload++; in tdq_load_add()
649 KTR_COUNTER0(KTR_SCHED, "load", tdq->tdq_loadname, tdq->tdq_load); in tdq_load_add()
650 SDT_PROBE2(sched, , , load__change, (int)TDQ_ID(tdq), tdq->tdq_load); in tdq_load_add()
663 KASSERT(tdq->tdq_load != 0, in tdq_load_rem()
666 tdq->tdq_load--; in tdq_load_rem()
667 if ((td->td_flags & TDF_NOLOAD) == 0) in tdq_load_rem()
668 tdq->tdq_sysload--; in tdq_load_rem()
669 KTR_COUNTER0(KTR_SCHED, "load", tdq->tdq_loadname, tdq->tdq_load); in tdq_load_rem()
670 SDT_PROBE2(sched, , , load__change, (int)TDQ_ID(tdq), tdq->tdq_load); in tdq_load_rem()
689 load = tdq->tdq_sysload - 1; in tdq_slice()
698 * Set lowpri to its exact value by searching the run-queue and
708 ctd = tdq->tdq_curthread; in tdq_setlowpri()
710 if (td == NULL || td->td_priority > ctd->td_priority) in tdq_setlowpri()
711 tdq->tdq_lowpri = ctd->td_priority; in tdq_setlowpri()
713 tdq->tdq_lowpri = td->td_priority; in tdq_setlowpri()
766 r->csr_cpu = -1; in cpu_search_lowest()
769 if (cg->cg_children > 0) { in cpu_search_lowest()
770 for (c = cg->cg_children - 1; c >= 0; c--) { in cpu_search_lowest()
771 load = cpu_search_lowest(&cg->cg_child[c], s, &lr); in cpu_search_lowest()
776 * It allows round-robin between SMT groups with equal in cpu_search_lowest()
779 if (__predict_false(s->cs_running) && in cpu_search_lowest()
780 (cg->cg_child[c].cg_flags & CG_FLAG_THREAD) && in cpu_search_lowest()
785 (load == bload && lr.csr_load < r->csr_load))) { in cpu_search_lowest()
787 r->csr_cpu = lr.csr_cpu; in cpu_search_lowest()
788 r->csr_load = lr.csr_load; in cpu_search_lowest()
795 for (c = cg->cg_last; c >= cg->cg_first; c--) { in cpu_search_lowest()
796 if (!CPU_ISSET(c, &cg->cg_mask)) in cpu_search_lowest()
800 if (c == s->cs_prefer) { in cpu_search_lowest()
801 if (__predict_false(s->cs_running)) in cpu_search_lowest()
802 l--; in cpu_search_lowest()
807 total += load - p; in cpu_search_lowest()
814 if (l > s->cs_load || in cpu_search_lowest()
815 (atomic_load_char(&tdq->tdq_lowpri) <= s->cs_pri && in cpu_search_lowest()
816 (!s->cs_running || c != s->cs_prefer)) || in cpu_search_lowest()
817 !CPU_ISSET(c, s->cs_mask)) in cpu_search_lowest()
822 * It allows round-robin between CPUs with equal load in cpu_search_lowest()
825 if (__predict_false(s->cs_running) && l > 0) in cpu_search_lowest()
828 load -= sched_random() % 128; in cpu_search_lowest()
829 if (bload > load - p) { in cpu_search_lowest()
830 bload = load - p; in cpu_search_lowest()
831 r->csr_cpu = c; in cpu_search_lowest()
832 r->csr_load = load; in cpu_search_lowest()
848 r->csr_cpu = -1; in cpu_search_highest()
851 if (cg->cg_children > 0) { in cpu_search_highest()
852 for (c = cg->cg_children - 1; c >= 0; c--) { in cpu_search_highest()
853 load = cpu_search_highest(&cg->cg_child[c], s, &lr); in cpu_search_highest()
856 (load == bload && lr.csr_load > r->csr_load))) { in cpu_search_highest()
858 r->csr_cpu = lr.csr_cpu; in cpu_search_highest()
859 r->csr_load = lr.csr_load; in cpu_search_highest()
866 for (c = cg->cg_last; c >= cg->cg_first; c--) { in cpu_search_highest()
867 if (!CPU_ISSET(c, &cg->cg_mask)) in cpu_search_highest()
877 if (l < s->cs_load || TDQ_TRANSFERABLE(tdq) < s->cs_trans || in cpu_search_highest()
878 !CPU_ISSET(c, s->cs_mask)) in cpu_search_highest()
881 load -= sched_random() % 256; in cpu_search_highest()
884 r->csr_cpu = c; in cpu_search_highest()
887 r->csr_load = bload; in cpu_search_highest()
893 * lowpri greater than pri pri. A pri of -1 indicates any priority is
941 if (high == -1) in sched_balance_group()
955 td = tdq->tdq_curthread; in sched_balance_group()
956 if (td->td_lock == TDQ_LOCKPTR(tdq) && in sched_balance_group()
957 (td->td_flags & TDF_IDLETD) == 0 && in sched_balance_group()
959 td->td_flags |= TDF_PICKCPU; in sched_balance_group()
971 low = sched_lowest(cg, &lmask, -1, TDQ_LOAD(tdq) - 1, high, 1); in sched_balance_group()
973 if (anylow && low == -1) in sched_balance_group()
976 if (low == -1) in sched_balance_group()
1049 if (high->tdq_transferable != 0 && high->tdq_load > low->tdq_load) { in sched_balance_pair()
1051 if (lowpri != -1) { in sched_balance_pair()
1053 * In case the target isn't the current CPU notify it of in sched_balance_pair()
1061 sched_setpreempt(low->tdq_lowpri); in sched_balance_pair()
1070 * Move a thread from one thread queue to another. Returns -1 if the source
1088 return (-1); in tdq_move()
1097 td->td_lock = TDQ_LOCKPTR(to); in tdq_move()
1098 td_get_sched(td)->ts_cpu = cpu; in tdq_move()
1114 if (smp_started == 0 || steal_idle == 0 || tdq->tdq_cg == NULL) in tdq_idled()
1120 for (cg = tdq->tdq_cg, goup = 0; ; ) { in tdq_idled()
1135 if (cpu == -1) { in tdq_idled()
1137 cg = cg->cg_parent; in tdq_idled()
1140 parent = cg->cg_parent; in tdq_idled()
1143 if (parent->cg_children == 2) { in tdq_idled()
1144 if (cg == &parent->cg_child[0]) in tdq_idled()
1145 cg = &parent->cg_child[1]; in tdq_idled()
1147 cg = &parent->cg_child[0]; in tdq_idled()
1172 if (tdq->tdq_load > 0) { in tdq_idled()
1197 if (tdq_move(steal, tdq) != -1) in tdq_idled()
1227 KASSERT(tdq->tdq_lowpri <= lowpri, in tdq_notify()
1228 ("tdq_notify: lowpri %d > tdq_lowpri %d", lowpri, tdq->tdq_lowpri)); in tdq_notify()
1230 if (tdq->tdq_owepreempt) in tdq_notify()
1237 if (!sched_shouldpreempt(tdq->tdq_lowpri, lowpri, 1)) in tdq_notify()
1253 if (TD_IS_IDLETHREAD(tdq->tdq_curthread) && in tdq_notify()
1254 (atomic_load_int(&tdq->tdq_cpu_idle) == 0 || cpu_idle_wakeup(cpu))) in tdq_notify()
1261 tdq->tdq_owepreempt = 1; in tdq_notify()
1277 if (THREAD_CAN_MIGRATE(td) && THREAD_CAN_SCHED(td, d->cpu)) { in runq_steal_pred()
1278 d->td = td; in runq_steal_pred()
1300 if (idx != -1) { in runq_steal_range()
1331 td = runq_steal_range(rq, RQ_TS_POL_MIN, RQ_TS_POL_MIN + off - 1, cpu); in runq_steal_timeshare()
1352 td = runq_steal_realtime(&tdq->tdq_runq, cpu); in tdq_steal()
1355 td = runq_steal_timeshare(&tdq->tdq_runq, cpu, tdq->tdq_ts_deq_off); in tdq_steal()
1358 return (runq_steal_idle(&tdq->tdq_runq, cpu)); in tdq_steal()
1363 * current lock and returns with the assigned queue locked.
1374 td_get_sched(td)->ts_cpu = cpu; in sched_setcpu()
1378 if (td->td_lock == TDQ_LOCKPTR(tdq)) { in sched_setcpu()
1402 SCHED_STAT_DEFINE(pickcpu_local, "Migrated to current cpu");
1416 KASSERT(!CPU_ABSENT(ts->ts_cpu), ("sched_pickcpu: Start scheduler on " in sched_pickcpu()
1417 "absent CPU %d for thread %s.", ts->ts_cpu, td->td_name)); in sched_pickcpu()
1424 return (ts->ts_cpu); in sched_pickcpu()
1429 if (td->td_priority <= PRI_MAX_ITHD && THREAD_CAN_SCHED(td, self) && in sched_pickcpu()
1430 curthread->td_intr_nesting_level) { in sched_pickcpu()
1432 if (tdq->tdq_lowpri >= PRI_MIN_IDLE) { in sched_pickcpu()
1436 ts->ts_cpu = self; in sched_pickcpu()
1438 cg = tdq->tdq_cg; in sched_pickcpu()
1442 tdq = TDQ_CPU(ts->ts_cpu); in sched_pickcpu()
1443 cg = tdq->tdq_cg; in sched_pickcpu()
1449 if (THREAD_CAN_SCHED(td, ts->ts_cpu) && in sched_pickcpu()
1450 atomic_load_char(&tdq->tdq_lowpri) >= PRI_MIN_IDLE && in sched_pickcpu()
1452 if (cg->cg_flags & CG_FLAG_THREAD) { in sched_pickcpu()
1454 for (cpu = cg->cg_first; cpu <= cg->cg_last; cpu++) { in sched_pickcpu()
1456 atomic_load_char(&TDQ_CPU(cpu)->tdq_lowpri); in sched_pickcpu()
1457 if (CPU_ISSET(cpu, &cg->cg_mask) && in sched_pickcpu()
1461 if (cpu > cg->cg_last) { in sched_pickcpu()
1463 return (ts->ts_cpu); in sched_pickcpu()
1467 return (ts->ts_cpu); in sched_pickcpu()
1476 for (ccg = NULL; cg != NULL; cg = cg->cg_parent) { in sched_pickcpu()
1477 if (cg->cg_flags & CG_FLAG_THREAD) in sched_pickcpu()
1479 if (cg->cg_children == 1 || cg->cg_count == 1) in sched_pickcpu()
1481 if (cg->cg_level == CG_SHARE_NONE || in sched_pickcpu()
1482 (!intr && !SCHED_AFFINITY(ts, cg->cg_level))) in sched_pickcpu()
1489 cpu = -1; in sched_pickcpu()
1490 mask = &td->td_cpuset->cs_mask; in sched_pickcpu()
1491 pri = td->td_priority; in sched_pickcpu()
1499 cpu = sched_lowest(ccg, mask, pri, INT_MAX, ts->ts_cpu, r); in sched_pickcpu()
1506 INT_MAX, ts->ts_cpu, r); in sched_pickcpu()
1512 cpu = sched_lowest(cpu_top, mask, pri, INT_MAX, ts->ts_cpu, r); in sched_pickcpu()
1518 cpu = sched_lowest(cpu_top, mask, -1, INT_MAX, ts->ts_cpu, r); in sched_pickcpu()
1525 * Compare the lowest loaded cpu to current cpu. in sched_pickcpu()
1528 if (THREAD_CAN_SCHED(td, self) && TDQ_SELF()->tdq_lowpri > pri && in sched_pickcpu()
1529 atomic_load_char(&tdq->tdq_lowpri) < PRI_MIN_IDLE && in sched_pickcpu()
1534 if (cpu != ts->ts_cpu) in sched_pickcpu()
1558 td = runq_first_thread_range(rq, RQ_TS_POL_MIN, RQ_TS_POL_MIN + off - 1); in runq_choose_timeshare()
1578 td = runq_choose_realtime(&tdq->tdq_runq); in tdq_choose()
1581 td = runq_choose_timeshare(&tdq->tdq_runq, tdq->tdq_ts_deq_off); in tdq_choose()
1583 KASSERT(td->td_priority >= PRI_MIN_BATCH, in tdq_choose()
1585 td->td_priority)); in tdq_choose()
1588 td = runq_choose_idle(&tdq->tdq_runq); in tdq_choose()
1590 KASSERT(td->td_priority >= PRI_MIN_IDLE, in tdq_choose()
1592 td->td_priority)); in tdq_choose()
1608 runq_init(&tdq->tdq_runq); in tdq_setup()
1609 tdq->tdq_id = id; in tdq_setup()
1610 snprintf(tdq->tdq_name, sizeof(tdq->tdq_name), in tdq_setup()
1612 mtx_init(&tdq->tdq_lock, tdq->tdq_name, "sched lock", MTX_SPIN); in tdq_setup()
1614 snprintf(tdq->tdq_loadname, sizeof(tdq->tdq_loadname), in tdq_setup()
1630 tdq->tdq_cg = smp_topo_find(cpu_top, i); in sched_setup_smp()
1631 if (tdq->tdq_cg == NULL) in sched_setup_smp()
1660 tdq->tdq_curthread = &thread0; in sched_setup()
1661 tdq->tdq_lowpri = thread0.td_priority; in sched_setup()
1710 * waiting on a run-queue. Would be prettier if we had floating point.
1715 * scaling factor
1716 * interactivity score = ---------------------
1723 * scaling factor
1724 * interactivity score = 2 * scaling factor - ---------------------
1740 ts->ts_runtime >= ts->ts_slptime) in sched_interact_score()
1743 if (ts->ts_runtime > ts->ts_slptime) { in sched_interact_score()
1744 div = max(1, ts->ts_runtime / SCHED_INTERACT_HALF); in sched_interact_score()
1746 (SCHED_INTERACT_HALF - (ts->ts_slptime / div))); in sched_interact_score()
1748 if (ts->ts_slptime > ts->ts_runtime) { in sched_interact_score()
1749 div = max(1, ts->ts_slptime / SCHED_INTERACT_HALF); in sched_interact_score()
1750 return (ts->ts_runtime / div); in sched_interact_score()
1753 if (ts->ts_runtime) in sched_interact_score()
1764 * Scale the scheduling priority according to the "interactivity" of this
1773 if (PRI_BASE(td->td_pri_class) != PRI_TIMESHARE) in sched_priority()
1776 nice = td->td_proc->p_nice; in sched_priority()
1793 pri += (PRI_MAX_INTERACT - PRI_MIN_INTERACT + 1) * score / in sched_priority()
1805 const u_int cpu_pri_off = (((SCHED_PRI_CPU_RANGE - 1) * in sched_priority()
1837 sum = ts->ts_runtime + ts->ts_slptime; in sched_interact_update()
1846 if (ts->ts_runtime > ts->ts_slptime) { in sched_interact_update()
1847 ts->ts_runtime = SCHED_SLP_RUN_MAX; in sched_interact_update()
1848 ts->ts_slptime = 1; in sched_interact_update()
1850 ts->ts_slptime = SCHED_SLP_RUN_MAX; in sched_interact_update()
1851 ts->ts_runtime = 1; in sched_interact_update()
1861 ts->ts_runtime /= 2; in sched_interact_update()
1862 ts->ts_slptime /= 2; in sched_interact_update()
1865 ts->ts_runtime = (ts->ts_runtime / 5) * 4; in sched_interact_update()
1866 ts->ts_slptime = (ts->ts_slptime / 5) * 4; in sched_interact_update()
1870 * Scale back the interactivity history when a child thread is created. The
1883 sum = ts->ts_runtime + ts->ts_slptime; in sched_interact_fork()
1886 ts->ts_runtime /= ratio; in sched_interact_fork()
1887 ts->ts_slptime /= ratio; in sched_interact_fork()
1903 ts0->ts_ftick = (u_int)ticks; in schedinit()
1904 ts0->ts_ltick = ts0->ts_ftick; in schedinit()
1905 ts0->ts_slice = 0; in schedinit()
1906 ts0->ts_cpu = curcpu; /* set valid CPU number */ in schedinit()
1926 PCPU_GET(idlethread)->td_lock = TDQ_LOCKPTR(TDQ_SELF()); in schedinit_ap()
1956 const u_int lu_span = t - ts->ts_ltick; in sched_pctcpu_update()
1964 ts->ts_ticks = run ? (t_tgt << SCHED_TICK_SHIFT) : 0; in sched_pctcpu_update()
1965 ts->ts_ftick = t - t_tgt; in sched_pctcpu_update()
1966 ts->ts_ltick = t; in sched_pctcpu_update()
1970 if (t - ts->ts_ftick >= t_max) { in sched_pctcpu_update()
1986 ts->ts_ticks = SCHED_TICK_RUN_SHIFTED(ts) / in sched_pctcpu_update()
1987 SCHED_TICK_LENGTH(ts) * (t_tgt - lu_span); in sched_pctcpu_update()
1988 ts->ts_ftick = t - t_tgt; in sched_pctcpu_update()
1992 ts->ts_ticks += lu_span << SCHED_TICK_SHIFT; in sched_pctcpu_update()
1993 ts->ts_ltick = t; in sched_pctcpu_update()
1997 * Adjust the priority of a thread. Move it to the appropriate run-queue
1998 * if necessary. This is the back-end for several priority related
2008 "prio:%d", td->td_priority, "new prio:%d", prio, in sched_thread_priority()
2010 SDT_PROBE3(sched, , , change__pri, td, td->td_proc, prio); in sched_thread_priority()
2011 if (td != curthread && prio < td->td_priority) { in sched_thread_priority()
2013 "lend prio", "prio:%d", td->td_priority, "new prio:%d", in sched_thread_priority()
2015 SDT_PROBE4(sched, , , lend__pri, td, td->td_proc, prio, in sched_thread_priority()
2019 if (td->td_priority == prio) in sched_thread_priority()
2024 * queue. This could be optimized to not re-add in some in sched_thread_priority()
2027 if (TD_ON_RUNQ(td) && prio < td->td_priority) { in sched_thread_priority()
2029 td->td_priority = prio; in sched_thread_priority()
2035 * information so other cpus are aware of our current priority. in sched_thread_priority()
2038 tdq = TDQ_CPU(td_get_sched(td)->ts_cpu); in sched_thread_priority()
2039 oldpri = td->td_priority; in sched_thread_priority()
2040 td->td_priority = prio; in sched_thread_priority()
2041 if (prio < tdq->tdq_lowpri) in sched_thread_priority()
2042 tdq->tdq_lowpri = prio; in sched_thread_priority()
2043 else if (tdq->tdq_lowpri == oldpri) in sched_thread_priority()
2047 td->td_priority = prio; in sched_thread_priority()
2058 td->td_flags |= TDF_BORROWING; in sched_lend_prio()
2064 * over. The prio argument is the minimum priority the thread
2075 if (td->td_base_pri >= PRI_MIN_TIMESHARE && in sched_unlend_prio()
2076 td->td_base_pri <= PRI_MAX_TIMESHARE) in sched_unlend_prio()
2077 base_pri = td->td_user_pri; in sched_unlend_prio()
2079 base_pri = td->td_base_pri; in sched_unlend_prio()
2081 td->td_flags &= ~TDF_BORROWING; in sched_unlend_prio()
2096 td->td_base_pri = prio; in sched_prio()
2102 if (td->td_flags & TDF_BORROWING && td->td_priority < prio) in sched_prio()
2106 oldprio = td->td_priority; in sched_prio()
2124 MPASS(td->td_pri_class == PRI_ITHD); in sched_ithread_prio()
2125 td->td_base_ithread_pri = prio; in sched_ithread_prio()
2130 * Set the base user priority, does not effect current running priority.
2136 td->td_base_user_pri = prio; in sched_user_prio()
2137 if (td->td_lend_user_pri <= prio) in sched_user_prio()
2139 td->td_user_pri = prio; in sched_user_prio()
2147 td->td_lend_user_pri = prio; in sched_lend_user_prio()
2148 td->td_user_pri = min(prio, td->td_base_user_pri); in sched_lend_user_prio()
2149 if (td->td_priority > td->td_user_pri) in sched_lend_user_prio()
2150 sched_prio(td, td->td_user_pri); in sched_lend_user_prio()
2151 else if (td->td_priority != td->td_user_pri) in sched_lend_user_prio()
2162 if (td->td_lend_user_pri == prio) in sched_lend_user_prio_cond()
2184 tdq->tdq_cg == NULL) in tdq_trysteal()
2191 for (i = 1, cg = tdq->tdq_cg, goup = 0; ; ) { in tdq_trysteal()
2208 if (cpu == -1) { in tdq_trysteal()
2210 cg = cg->cg_parent; in tdq_trysteal()
2217 parent = cg->cg_parent; in tdq_trysteal()
2222 if (parent->cg_children == 2) { in tdq_trysteal()
2223 if (cg == &parent->cg_child[0]) in tdq_trysteal()
2224 cg = &parent->cg_child[1]; in tdq_trysteal()
2226 cg = &parent->cg_child[0]; in tdq_trysteal()
2249 if (tdq->tdq_load > 0) in tdq_trysteal()
2267 if (tdq_move(steal, tdq) == -1) { in tdq_trysteal()
2291 (td_get_sched(td)->ts_flags & TSF_BOUND) != 0, in sched_switch_migrate()
2293 KASSERT(!CPU_ABSENT(td_get_sched(td)->ts_cpu), ("sched_switch_migrate: " in sched_switch_migrate()
2294 "thread %s queued on absent CPU %d.", td->td_name, in sched_switch_migrate()
2295 td_get_sched(td)->ts_cpu)); in sched_switch_migrate()
2296 tdn = TDQ_CPU(td_get_sched(td)->ts_cpu); in sched_switch_migrate()
2302 * prevent preemption while we're holding neither run-queue lock. in sched_switch_migrate()
2320 atomic_store_rel_ptr((volatile uintptr_t *)&td->td_lock, in thread_unblock_switch()
2350 pickcpu = (td->td_flags & TDF_PICKCPU) != 0; in sched_switch()
2352 ts->ts_rltick = (u_int)ticks - affinity * MAX_CACHE_LEVELS; in sched_switch()
2354 ts->ts_rltick = (u_int)ticks; in sched_switch()
2356 td->td_lastcpu = td->td_oncpu; in sched_switch()
2357 preempted = (td->td_flags & TDF_SLICEEND) == 0 && in sched_switch()
2359 td->td_flags &= ~(TDF_PICKCPU | TDF_SLICEEND); in sched_switch()
2361 td->td_owepreempt = 0; in sched_switch()
2362 atomic_store_char(&tdq->tdq_owepreempt, 0); in sched_switch()
2379 if (THREAD_CAN_MIGRATE(td) && (!THREAD_CAN_SCHED(td, ts->ts_cpu) in sched_switch()
2381 ts->ts_cpu = sched_pickcpu(td, 0); in sched_switch()
2383 if (ts->ts_cpu == cpuid) in sched_switch()
2395 if (tdq->tdq_load == 0) in sched_switch()
2403 "prio:%d", td->td_priority); in sched_switch()
2406 "prio:%d", td->td_priority, "wmesg:\"%s\"", td->td_wmesg, in sched_switch()
2407 "lockname:\"%s\"", td->td_lockname); in sched_switch()
2412 * appropriate cpu run-queue or sleep-queue and with the current in sched_switch()
2413 * thread-queue locked. in sched_switch()
2416 MPASS(td == tdq->tdq_curthread); in sched_switch()
2426 if (PMC_PROC_IS_USING_PMCS(td->td_proc)) in sched_switch()
2429 SDT_PROBE2(sched, , , off__cpu, newtd, newtd->td_proc); in sched_switch()
2446 td->td_oncpu = NOCPU; in sched_switch()
2448 cpuid = td->td_oncpu = PCPU_GET(cpuid); in sched_switch()
2452 if (PMC_PROC_IS_USING_PMCS(td->td_proc)) in sched_switch()
2459 KASSERT(curthread->td_md.md_spinlock_count == 1, in sched_switch()
2460 ("invalid count %d", curthread->td_md.md_spinlock_count)); in sched_switch()
2463 "prio:%d", td->td_priority); in sched_switch()
2476 p->p_nice = nice; in sched_nice()
2480 sched_prio(td, td->td_base_user_pri); in sched_nice()
2494 td->td_slptick = ticks; in sched_sleep()
2495 if (PRI_BASE(td->td_pri_class) != PRI_TIMESHARE) in sched_sleep()
2499 else if (static_boost && td->td_priority > static_boost) in sched_sleep()
2522 slptick = td->td_slptick; in sched_wakeup()
2523 td->td_slptick = 0; in sched_wakeup()
2525 ts->ts_slptime += (ticks - slptick) << SCHED_TICK_SHIFT; in sched_wakeup()
2534 if (PRI_BASE(td->td_pri_class) == PRI_ITHD && in sched_wakeup()
2535 td->td_priority != td->td_base_ithread_pri) in sched_wakeup()
2536 sched_prio(td, td->td_base_ithread_pri); in sched_wakeup()
2539 * Reset the slice value since we slept and advanced the round-robin. in sched_wakeup()
2541 ts->ts_slice = 0; in sched_wakeup()
2560 td_get_sched(td)->ts_runtime += tickincr; in sched_fork()
2582 child->td_oncpu = NOCPU; in sched_fork_thread()
2583 child->td_lastcpu = NOCPU; in sched_fork_thread()
2584 child->td_lock = TDQ_LOCKPTR(tdq); in sched_fork_thread()
2585 child->td_cpuset = cpuset_ref(td->td_cpuset); in sched_fork_thread()
2586 child->td_domain.dr_policy = td->td_cpuset->cs_domain; in sched_fork_thread()
2587 ts2->ts_cpu = ts->ts_cpu; in sched_fork_thread()
2588 ts2->ts_flags = 0; in sched_fork_thread()
2592 ts2->ts_ticks = ts->ts_ticks; in sched_fork_thread()
2593 ts2->ts_ltick = ts->ts_ltick; in sched_fork_thread()
2594 ts2->ts_ftick = ts->ts_ftick; in sched_fork_thread()
2598 child->td_priority = child->td_base_pri; in sched_fork_thread()
2602 ts2->ts_slptime = ts->ts_slptime; in sched_fork_thread()
2603 ts2->ts_runtime = ts->ts_runtime; in sched_fork_thread()
2605 ts2->ts_slice = tdq_slice(tdq) - sched_slice_min; in sched_fork_thread()
2607 bzero(ts2->ts_name, sizeof(ts2->ts_name)); in sched_fork_thread()
2619 if (td->td_pri_class == class) in sched_class()
2621 td->td_pri_class = class; in sched_class()
2633 "prio:%d", child->td_priority); in sched_exit()
2650 "prio:%d", child->td_priority); in sched_exit_thread()
2657 td_get_sched(td)->ts_runtime += td_get_sched(child)->ts_runtime; in sched_exit_thread()
2669 SDT_PROBE2(sched, , , surrender, td, td->td_proc); in sched_preempt()
2674 if (td->td_priority > tdq->tdq_lowpri) { in sched_preempt()
2675 if (td->td_critnest == 1) { in sched_preempt()
2683 td->td_owepreempt = 1; in sched_preempt()
2685 tdq->tdq_owepreempt = 0; in sched_preempt()
2691 * Fix priorities on return to user-space. Priorities may be elevated due
2699 td->td_priority = td->td_user_pri; in sched_userret_slowpath()
2700 td->td_base_pri = td->td_user_pri; in sched_userret_slowpath()
2707 "Interrupt thread preemptions due to time-sharing");
2716 if (PRI_BASE(td->td_pri_class) == PRI_ITHD) in td_slice()
2739 balance_ticks -= cnt; in sched_clock()
2749 tdq->tdq_oldswitchcnt = tdq->tdq_switchcnt; in sched_clock()
2750 tdq->tdq_switchcnt = tdq->tdq_load; in sched_clock()
2755 * anti-starvation and "nice" behaviors after the switch to a single in sched_clock()
2756 * 256-queue runqueue, since the queue insert offset is incremented by in sched_clock()
2759 * of 64 before (separate runqueue), we apply a factor 7/4 when in sched_clock()
2763 if (tdq->tdq_ts_off == tdq->tdq_ts_deq_off) { in sched_clock()
2764 tdq->tdq_ts_ticks += cnt; in sched_clock()
2765 tdq->tdq_ts_off = (tdq->tdq_ts_off + 2 * cnt - in sched_clock()
2766 tdq-> tdq_ts_ticks / 4) % RQ_TS_POL_MODULO; in sched_clock()
2767 tdq->tdq_ts_ticks %= 4; in sched_clock()
2772 if ((td->td_pri_class & PRI_FIFO_BIT) || TD_IS_IDLETHREAD(td)) in sched_clock()
2775 if (PRI_BASE(td->td_pri_class) == PRI_TIMESHARE) { in sched_clock()
2780 td_get_sched(td)->ts_runtime += tickincr * cnt; in sched_clock()
2786 * Force a context switch if the current thread has used up a full in sched_clock()
2789 ts->ts_slice += cnt; in sched_clock()
2790 if (ts->ts_slice >= td_slice(td, tdq)) { in sched_clock()
2791 ts->ts_slice = 0; in sched_clock()
2797 if (PRI_BASE(td->td_pri_class) == PRI_ITHD) { in sched_clock()
2799 td->td_owepreempt = 1; in sched_clock()
2800 if (td->td_base_pri + RQ_PPQ < PRI_MAX_ITHD) { in sched_clock()
2802 sched_prio(td, td->td_base_pri + RQ_PPQ); in sched_clock()
2806 td->td_flags |= TDF_SLICEEND; in sched_clock()
2819 * Return whether the current CPU has runnable tasks. Used for in-kernel
2833 * the run-queue while running however the load remains.
2846 tdq->tdq_lowpri = td->td_priority; in sched_choose()
2848 tdq->tdq_lowpri = PRI_MAX_IDLE; in sched_choose()
2851 tdq->tdq_curthread = td; in sched_choose()
2869 cpri = ctd->td_priority; in sched_setpreempt()
2876 ctd->td_owepreempt = 1; in sched_setpreempt()
2891 KASSERT((td->td_inhibitors == 0), in tdq_add()
2895 KASSERT(td->td_flags & TDF_INMEM, in tdq_add()
2898 lowpri = tdq->tdq_lowpri; in tdq_add()
2899 if (td->td_priority < lowpri) in tdq_add()
2900 tdq->tdq_lowpri = td->td_priority; in tdq_add()
2921 "prio:%d", td->td_priority, KTR_ATTR_LINKED, in sched_add()
2925 SDT_PROBE4(sched, , , enqueue, td, td->td_proc, NULL, in sched_add()
2930 * run-queue. in sched_add()
2932 if (PRI_BASE(td->td_pri_class) == PRI_TIMESHARE) in sched_add()
2945 sched_setpreempt(td->td_priority); in sched_add()
2949 * Now that the thread is moving to the run-queue, set the lock in sched_add()
2952 if (td->td_lock != TDQ_LOCKPTR(tdq)) { in sched_add()
2955 td->td_lock = TDQ_LOCKPTR(tdq); in sched_add()
2961 sched_setpreempt(td->td_priority); in sched_add()
2968 * Remove a thread from a run-queue without running it. This is used
2978 "prio:%d", td->td_priority); in sched_rem()
2979 SDT_PROBE3(sched, , , dequeue, td, td->td_proc, NULL); in sched_rem()
2980 tdq = TDQ_CPU(td_get_sched(td)->ts_cpu); in sched_rem()
2982 MPASS(td->td_lock == TDQ_LOCKPTR(tdq)); in sched_rem()
2988 if (td->td_priority == tdq->tdq_lowpri) in sched_rem()
3006 pctcpu = ((FSHIFT >= SCHED_TICK_SHIFT ? /* Resolved at compile-time. */ in sched_pctcpu()
3007 (SCHED_TICK_RUN_SHIFTED(ts) << (FSHIFT - SCHED_TICK_SHIFT)) : in sched_pctcpu()
3008 (SCHED_TICK_RUN_SHIFTED(ts) >> (SCHED_TICK_SHIFT - FSHIFT))) + in sched_pctcpu()
3025 if (THREAD_CAN_SCHED(td, ts->ts_cpu)) in sched_affinity()
3041 ipi_cpu(ts->ts_cpu, IPI_PREEMPT); in sched_affinity()
3056 if (ts->ts_flags & TSF_BOUND) in sched_bind()
3059 ts->ts_flags |= TSF_BOUND; in sched_bind()
3063 ts->ts_cpu = cpu; in sched_bind()
3080 if ((ts->ts_flags & TSF_BOUND) == 0) in sched_unbind()
3082 ts->ts_flags &= ~TSF_BOUND; in sched_unbind()
3090 return (td_get_sched(td)->ts_flags & TSF_BOUND); in sched_is_bound()
3115 total += atomic_load_int(&TDQ_CPU(i)->tdq_sysload); in sched_load()
3118 return (atomic_load_int(&TDQ_SELF()->tdq_sysload)); in sched_load()
3136 ((tdq)->tdq_cg != NULL && ((tdq)->tdq_cg->cg_flags & CG_FLAG_THREAD) == 0)
3156 oldswitchcnt = -1; in sched_idletd()
3194 atomic_store_int(&tdq->tdq_cpu_idle, 1); in sched_idletd()
3207 atomic_store_int(&tdq->tdq_cpu_idle, 0); in sched_idletd()
3211 atomic_store_int(&tdq->tdq_cpu_idle, 0); in sched_idletd()
3214 * Account thread-less hardware interrupts and in sched_idletd()
3239 KASSERT(curthread->td_md.md_spinlock_count == 1, in sched_throw_grab()
3240 ("invalid count %d", curthread->td_md.md_spinlock_count)); in sched_throw_grab()
3290 td->td_lastcpu = td->td_oncpu; in sched_throw()
3291 td->td_oncpu = NOCPU; in sched_throw()
3316 * non-nested critical section with the scheduler lock held. in sched_fork_exit()
3318 KASSERT(curthread->td_md.md_spinlock_count == 1, in sched_fork_exit()
3319 ("invalid count %d", curthread->td_md.md_spinlock_count)); in sched_fork_exit()
3324 MPASS(td->td_lock == TDQ_LOCKPTR(tdq)); in sched_fork_exit()
3325 td->td_oncpu = cpuid; in sched_fork_exit()
3327 "prio:%d", td->td_priority); in sched_fork_exit()
3341 if (ts->ts_name[0] == '\0') in sched_tdname()
3342 snprintf(ts->ts_name, sizeof(ts->ts_name), in sched_tdname()
3343 "%s tid %d", td->td_name, td->td_tid); in sched_tdname()
3344 return (ts->ts_name); in sched_tdname()
3346 return (td->td_name); in sched_tdname()
3357 ts->ts_name[0] = '\0'; in sched_clear_tdname()
3374 sbuf_printf(sb, "%*s<group level=\"%d\" cache-level=\"%d\">\n", indent, in sysctl_kern_sched_topology_spec_internal()
3375 "", 1 + indent / 2, cg->cg_level); in sysctl_kern_sched_topology_spec_internal()
3377 cg->cg_count, cpusetobj_strprint(cpusetbuf, &cg->cg_mask)); in sysctl_kern_sched_topology_spec_internal()
3379 for (i = cg->cg_first; i <= cg->cg_last; i++) { in sysctl_kern_sched_topology_spec_internal()
3380 if (CPU_ISSET(i, &cg->cg_mask)) { in sysctl_kern_sched_topology_spec_internal()
3390 if (cg->cg_flags != 0) { in sysctl_kern_sched_topology_spec_internal()
3392 if ((cg->cg_flags & CG_FLAG_HTT) != 0) in sysctl_kern_sched_topology_spec_internal()
3394 if ((cg->cg_flags & CG_FLAG_THREAD) != 0) in sysctl_kern_sched_topology_spec_internal()
3396 if ((cg->cg_flags & CG_FLAG_SMT) != 0) in sysctl_kern_sched_topology_spec_internal()
3398 if ((cg->cg_flags & CG_FLAG_NODE) != 0) in sysctl_kern_sched_topology_spec_internal()
3403 if (cg->cg_children > 0) { in sysctl_kern_sched_topology_spec_internal()
3405 for (i = 0; i < cg->cg_children; i++) in sysctl_kern_sched_topology_spec_internal()
3407 &cg->cg_child[i], indent+2); in sysctl_kern_sched_topology_spec_internal()
3451 if (error != 0 || req->newptr == NULL) in sysctl_kern_quantum()
3488 "Enables the long-term load balancer");
3491 "Average period in stathz ticks to run the long-term balancer");
3509 "Decay factor used for updating %CPU in 4BSD scheduler");