sched_ule.c - OpenGrok cross reference for /freebsd/sys/kern/sched

Lines Matching +full:over +full:- +full:current +full:- +full:scale +full:- +full:factor
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
4  * Copyright (c) 2002-2007, Jeffrey Roberson <jeff@freebsd.org>
32  * performance under load even on uni-processor systems.
90 #define	TDQ_LOADNAME_LEN	(sizeof("CPU ") + sizeof(__XSTRING(MAXCPU)) - 1 + sizeof(" load"))
115 #define	THREAD_CAN_MIGRATE(td)	((td)->td_pinned == 0)
117     CPU_ISSET((cpu), &(td)->td_cpuset->cs_mask)
124  * Priority ranges used for interactive and non-interactive timeshare
127  * (NHALF, x, and NHALF) handle non-interactive threads with the outer
130 #define	PRI_TIMESHARE_RANGE	(PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE + 1)
131 #define	PRI_INTERACT_RANGE	((PRI_TIMESHARE_RANGE - SCHED_PRI_NRESV) / 2)
132 #define	PRI_BATCH_RANGE		(PRI_TIMESHARE_RANGE - PRI_INTERACT_RANGE)
135 #define	PRI_MAX_INTERACT	(PRI_MIN_TIMESHARE + PRI_INTERACT_RANGE - 1)
140  * These macros determine priorities for non-interactive threads.  They are
151 #define	SCHED_PRI_CPU_RANGE	(PRI_BATCH_RANGE - SCHED_PRI_NRESV)
152 #define	SCHED_PRI_NICE(nice)	(((nice) - PRIO_MIN) * 5 / 4)
178 #define RQ_TS_POL_MODULO	(RQ_TS_POL_MAX - RQ_TS_POL_MIN + 1)
184  *   Must be at most 20 to avoid overflow in sched_pctcpu()'s current formula.
186  * SCHED_TICK_SHIFT:	Shift factor to avoid rounding away results.
189  * SCHED_CPU_DECAY_NUMER: Numerator of %CPU decay factor.
190  * SCHED_CPU_DECAY_DENOM: Denominator of %CPU decay factor.
195 #define	SCHED_TICK_RUN_SHIFTED(ts)	((ts)->ts_ticks)
196 #define	SCHED_TICK_LENGTH(ts)		(max((ts)->ts_ltick - (ts)->ts_ftick, 1))
214  * INTERACT_THRESH:	Threshold for placement on the current runq.
230 #define	TDF_SLICEEND	TDF_SCHED2	/* Thread time slice is over. */
234  *			the shift factor.  Without the shift the error rate
256 static int __read_mostly sched_idlespinthresh = -1;
259  * tdq - per processor runqs and statistics.  A mutex synchronizes access to
265  * (l)  all accesses are CPU-local
278 	struct thread	*tdq_curthread;	/* (t) Current executing thread. */
307 #define	TDQ_LOAD(tdq)		atomic_load_int(&(tdq)->tdq_load)
308 #define	TDQ_TRANSFERABLE(tdq)	atomic_load_int(&(tdq)->tdq_transferable)
309 #define	TDQ_SWITCHCNT(tdq)	(atomic_load_short(&(tdq)->tdq_switchcnt) + \
310 				 atomic_load_short(&(tdq)->tdq_oldswitchcnt))
311 #define	TDQ_SWITCHCNT_INC(tdq)	(atomic_store_short(&(tdq)->tdq_switchcnt, \
312 				 atomic_load_short(&(tdq)->tdq_switchcnt) + 1))
322 #define	SCHED_AFFINITY(ts, t)	((u_int)ticks - (ts)->ts_rltick < (t) * affinity)
325  * Run-time tunables.
345 #define	TDQ_ID(x)	((x)->tdq_id)
360 #define	TDQ_LOCKPTR(t)		((struct mtx *)(&(t)->tdq_lock))
440  * Print the threads waiting on a run-queue.
453 		    i, rq->rq_status.rq_sw[i]);  in runq_print()
455 			if (rq->rq_status.rq_sw[i] & (1ul << j)) {  in runq_print()
457 				rqq = &rq->rq_queues[pri];  in runq_print()
460 					    td, td->td_name, td->td_priority,  in runq_print()
461 					    td->td_rqindex, pri);  in runq_print()
468  * Print the status of a per-cpu thread queue.  Should be a ddb show cmd.
479 	printf("\tLock name:         %s\n", tdq->tdq_name);  in tdq_print()
480 	printf("\tload:              %d\n", tdq->tdq_load);  in tdq_print()
481 	printf("\tswitch cnt:        %d\n", tdq->tdq_switchcnt);  in tdq_print()
482 	printf("\told switch cnt:    %d\n", tdq->tdq_oldswitchcnt);  in tdq_print()
483 	printf("\tTS insert offset:  %d\n", tdq->tdq_ts_off);  in tdq_print()
484 	printf("\tTS dequeue offset: %d\n", tdq->tdq_ts_deq_off);  in tdq_print()
485 	printf("\tload transferable: %d\n", tdq->tdq_transferable);  in tdq_print()
486 	printf("\tlowest priority:   %d\n", tdq->tdq_lowpri);  in tdq_print()
488 	runq_print(&tdq->tdq_runq);  in tdq_print()
495 	 * If the new priority is not better than the current priority there is  in sched_shouldpreempt()
516 	 * If we're interactive or better and there is non-interactive  in sched_shouldpreempt()
525  * Add a thread to the actual run-queue.  Keeps transferable counts up to
526  * date with what is actually on the run-queue.  Selects the correct
538 	pri = td->td_priority;  in tdq_runq_add()
542 		tdq->tdq_transferable++;  in tdq_runq_add()
543 		ts->ts_flags |= TSF_XFERABLE;  in tdq_runq_add()
554 			/* Current queue from which processes are being run. */  in tdq_runq_add()
555 			idx = tdq->tdq_ts_deq_off;  in tdq_runq_add()
557 			idx = (RQ_PRI_TO_QUEUE_IDX(pri) - RQ_TS_POL_MIN +  in tdq_runq_add()
558 			    tdq->tdq_ts_off) % RQ_TS_POL_MODULO;  in tdq_runq_add()
564 			if (tdq->tdq_ts_deq_off != tdq->tdq_ts_off &&  in tdq_runq_add()
565 			    idx == tdq->tdq_ts_deq_off)  in tdq_runq_add()
567 				idx = (idx - 1 + RQ_TS_POL_MODULO) %  in tdq_runq_add()
572 		runq_add_idx(&tdq->tdq_runq, td, idx, flags);  in tdq_runq_add()
574 		runq_add(&tdq->tdq_runq, td, flags);  in tdq_runq_add()
578  * Advance the timesharing dequeue offset to the next non-empty queue or the
592 	while (tdq->tdq_ts_deq_off != tdq->tdq_ts_off) {  in tdq_advance_ts_deq_off()
595 		else if (!runq_is_queue_empty(&tdq->tdq_runq,  in tdq_advance_ts_deq_off()
596 		    tdq->tdq_ts_deq_off + RQ_TS_POL_MIN))  in tdq_advance_ts_deq_off()
599 		tdq->tdq_ts_deq_off = (tdq->tdq_ts_deq_off + 1) %  in tdq_advance_ts_deq_off()
605  * Remove a thread from a run-queue.  This typically happens when a thread
618 	if (ts->ts_flags & TSF_XFERABLE) {  in tdq_runq_rem()
619 		tdq->tdq_transferable--;  in tdq_runq_rem()
620 		ts->ts_flags &= ~TSF_XFERABLE;  in tdq_runq_rem()
622 	queue_empty = runq_remove(&tdq->tdq_runq, td);  in tdq_runq_rem()
629 	if (PRI_MIN_BATCH <= td->td_priority &&  in tdq_runq_rem()
630 	    td->td_priority <= PRI_MAX_BATCH && queue_empty &&  in tdq_runq_rem()
631 	    tdq->tdq_ts_deq_off + RQ_TS_POL_MIN == td->td_rqindex)  in tdq_runq_rem()
646 	tdq->tdq_load++;  in tdq_load_add()
647 	if ((td->td_flags & TDF_NOLOAD) == 0)  in tdq_load_add()
648 		tdq->tdq_sysload++;  in tdq_load_add()
649 	KTR_COUNTER0(KTR_SCHED, "load", tdq->tdq_loadname, tdq->tdq_load);  in tdq_load_add()
650 	SDT_PROBE2(sched, , , load__change, (int)TDQ_ID(tdq), tdq->tdq_load);  in tdq_load_add()
663 	KASSERT(tdq->tdq_load != 0,  in tdq_load_rem()
666 	tdq->tdq_load--;  in tdq_load_rem()
667 	if ((td->td_flags & TDF_NOLOAD) == 0)  in tdq_load_rem()
668 		tdq->tdq_sysload--;  in tdq_load_rem()
669 	KTR_COUNTER0(KTR_SCHED, "load", tdq->tdq_loadname, tdq->tdq_load);  in tdq_load_rem()
670 	SDT_PROBE2(sched, , , load__change, (int)TDQ_ID(tdq), tdq->tdq_load);  in tdq_load_rem()
689 	load = tdq->tdq_sysload - 1;  in tdq_slice()
698  * Set lowpri to its exact value by searching the run-queue and
708 		ctd = tdq->tdq_curthread;  in tdq_setlowpri()
710 	if (td == NULL || td->td_priority > ctd->td_priority)  in tdq_setlowpri()
711 		tdq->tdq_lowpri = ctd->td_priority;  in tdq_setlowpri()
713 		tdq->tdq_lowpri = td->td_priority;  in tdq_setlowpri()
766 	r->csr_cpu = -1;  in cpu_search_lowest()
769 	if (cg->cg_children > 0) {  in cpu_search_lowest()
770 		for (c = cg->cg_children - 1; c >= 0; c--) {  in cpu_search_lowest()
771 			load = cpu_search_lowest(&cg->cg_child[c], s, &lr);  in cpu_search_lowest()
776 			 * It allows round-robin between SMT groups with equal  in cpu_search_lowest()
779 			if (__predict_false(s->cs_running) &&  in cpu_search_lowest()
780 			    (cg->cg_child[c].cg_flags & CG_FLAG_THREAD) &&  in cpu_search_lowest()
785 			    (load == bload && lr.csr_load < r->csr_load))) {  in cpu_search_lowest()
787 				r->csr_cpu = lr.csr_cpu;  in cpu_search_lowest()
788 				r->csr_load = lr.csr_load;  in cpu_search_lowest()
795 	for (c = cg->cg_last; c >= cg->cg_first; c--) {  in cpu_search_lowest()
796 		if (!CPU_ISSET(c, &cg->cg_mask))  in cpu_search_lowest()
800 		if (c == s->cs_prefer) {  in cpu_search_lowest()
801 			if (__predict_false(s->cs_running))  in cpu_search_lowest()
802 				l--;  in cpu_search_lowest()
807 		total += load - p;  in cpu_search_lowest()
814 		if (l > s->cs_load ||  in cpu_search_lowest()
815 		    (atomic_load_char(&tdq->tdq_lowpri) <= s->cs_pri &&  in cpu_search_lowest()
816 		     (!s->cs_running || c != s->cs_prefer)) ||  in cpu_search_lowest()
817 		    !CPU_ISSET(c, s->cs_mask))  in cpu_search_lowest()
822 		 * It allows round-robin between CPUs with equal load  in cpu_search_lowest()
825 		if (__predict_false(s->cs_running) && l > 0)  in cpu_search_lowest()
828 		load -= sched_random() % 128;  in cpu_search_lowest()
829 		if (bload > load - p) {  in cpu_search_lowest()
830 			bload = load - p;  in cpu_search_lowest()
831 			r->csr_cpu = c;  in cpu_search_lowest()
832 			r->csr_load = load;  in cpu_search_lowest()
848 	r->csr_cpu = -1;  in cpu_search_highest()
851 	if (cg->cg_children > 0) {  in cpu_search_highest()
852 		for (c = cg->cg_children - 1; c >= 0; c--) {  in cpu_search_highest()
853 			load = cpu_search_highest(&cg->cg_child[c], s, &lr);  in cpu_search_highest()
856 			    (load == bload && lr.csr_load > r->csr_load))) {  in cpu_search_highest()
858 				r->csr_cpu = lr.csr_cpu;  in cpu_search_highest()
859 				r->csr_load = lr.csr_load;  in cpu_search_highest()
866 	for (c = cg->cg_last; c >= cg->cg_first; c--) {  in cpu_search_highest()
867 		if (!CPU_ISSET(c, &cg->cg_mask))  in cpu_search_highest()
877 		if (l < s->cs_load || TDQ_TRANSFERABLE(tdq) < s->cs_trans ||  in cpu_search_highest()
878 		    !CPU_ISSET(c, s->cs_mask))  in cpu_search_highest()
881 		load -= sched_random() % 256;  in cpu_search_highest()
884 			r->csr_cpu = c;  in cpu_search_highest()
887 	r->csr_load = bload;  in cpu_search_highest()
893  * lowpri greater than pri  pri.  A pri of -1 indicates any priority is
941 		if (high == -1)  in sched_balance_group()
955 			td = tdq->tdq_curthread;  in sched_balance_group()
956 			if (td->td_lock == TDQ_LOCKPTR(tdq) &&  in sched_balance_group()
957 			    (td->td_flags & TDF_IDLETD) == 0 &&  in sched_balance_group()
959 				td->td_flags |= TDF_PICKCPU;  in sched_balance_group()
971 		low = sched_lowest(cg, &lmask, -1, TDQ_LOAD(tdq) - 1, high, 1);  in sched_balance_group()
973 		if (anylow && low == -1)  in sched_balance_group()
976 		if (low == -1)  in sched_balance_group()
1049 	if (high->tdq_transferable != 0 && high->tdq_load > low->tdq_load) {  in sched_balance_pair()
1051 		if (lowpri != -1) {  in sched_balance_pair()
1053 			 * In case the target isn't the current CPU notify it of  in sched_balance_pair()
1061 				sched_setpreempt(low->tdq_lowpri);  in sched_balance_pair()
1070  * Move a thread from one thread queue to another.  Returns -1 if the source
1088 		return (-1);  in tdq_move()
1097 	td->td_lock = TDQ_LOCKPTR(to);  in tdq_move()
1098 	td_get_sched(td)->ts_cpu = cpu;  in tdq_move()
1114 	if (smp_started == 0 || steal_idle == 0 || tdq->tdq_cg == NULL)  in tdq_idled()
1120 	for (cg = tdq->tdq_cg, goup = 0; ; ) {  in tdq_idled()
1135 		if (cpu == -1) {  in tdq_idled()
1137 				cg = cg->cg_parent;  in tdq_idled()
1140 			parent = cg->cg_parent;  in tdq_idled()
1143 			if (parent->cg_children == 2) {  in tdq_idled()
1144 				if (cg == &parent->cg_child[0])  in tdq_idled()
1145 					cg = &parent->cg_child[1];  in tdq_idled()
1147 					cg = &parent->cg_child[0];  in tdq_idled()
1172 		if (tdq->tdq_load > 0) {  in tdq_idled()
1197 		if (tdq_move(steal, tdq) != -1)  in tdq_idled()
1227 	KASSERT(tdq->tdq_lowpri <= lowpri,  in tdq_notify()
1228 	    ("tdq_notify: lowpri %d > tdq_lowpri %d", lowpri, tdq->tdq_lowpri));  in tdq_notify()
1230 	if (tdq->tdq_owepreempt)  in tdq_notify()
1237 	if (!sched_shouldpreempt(tdq->tdq_lowpri, lowpri, 1))  in tdq_notify()
1253 	if (TD_IS_IDLETHREAD(tdq->tdq_curthread) &&  in tdq_notify()
1254 	    (atomic_load_int(&tdq->tdq_cpu_idle) == 0 || cpu_idle_wakeup(cpu)))  in tdq_notify()
1261 	tdq->tdq_owepreempt = 1;  in tdq_notify()
1277 		if (THREAD_CAN_MIGRATE(td) && THREAD_CAN_SCHED(td, d->cpu)) {  in runq_steal_pred()
1278 			d->td = td;  in runq_steal_pred()
1300 	if (idx != -1) {  in runq_steal_range()
1331 	td = runq_steal_range(rq, RQ_TS_POL_MIN, RQ_TS_POL_MIN + off - 1, cpu);  in runq_steal_timeshare()
1352 	td = runq_steal_realtime(&tdq->tdq_runq, cpu);  in tdq_steal()
1355 	td = runq_steal_timeshare(&tdq->tdq_runq, cpu, tdq->tdq_ts_deq_off);  in tdq_steal()
1358 	return (runq_steal_idle(&tdq->tdq_runq, cpu));  in tdq_steal()
1363  * current lock and returns with the assigned queue locked.
1374 	td_get_sched(td)->ts_cpu = cpu;  in sched_setcpu()
1378 	if (td->td_lock == TDQ_LOCKPTR(tdq)) {  in sched_setcpu()
1402 SCHED_STAT_DEFINE(pickcpu_local, "Migrated to current cpu");
1416 	KASSERT(!CPU_ABSENT(ts->ts_cpu), ("sched_pickcpu: Start scheduler on "  in sched_pickcpu()
1417 	    "absent CPU %d for thread %s.", ts->ts_cpu, td->td_name));  in sched_pickcpu()
1424 		return (ts->ts_cpu);  in sched_pickcpu()
1429 	if (td->td_priority <= PRI_MAX_ITHD && THREAD_CAN_SCHED(td, self) &&  in sched_pickcpu()
1430 	    curthread->td_intr_nesting_level) {  in sched_pickcpu()
1432 		if (tdq->tdq_lowpri >= PRI_MIN_IDLE) {  in sched_pickcpu()
1436 		ts->ts_cpu = self;  in sched_pickcpu()
1438 		cg = tdq->tdq_cg;  in sched_pickcpu()
1442 		tdq = TDQ_CPU(ts->ts_cpu);  in sched_pickcpu()
1443 		cg = tdq->tdq_cg;  in sched_pickcpu()
1449 	if (THREAD_CAN_SCHED(td, ts->ts_cpu) &&  in sched_pickcpu()
1450 	    atomic_load_char(&tdq->tdq_lowpri) >= PRI_MIN_IDLE &&  in sched_pickcpu()
1452 		if (cg->cg_flags & CG_FLAG_THREAD) {  in sched_pickcpu()
1454 			for (cpu = cg->cg_first; cpu <= cg->cg_last; cpu++) {  in sched_pickcpu()
1456 				    atomic_load_char(&TDQ_CPU(cpu)->tdq_lowpri);  in sched_pickcpu()
1457 				if (CPU_ISSET(cpu, &cg->cg_mask) &&  in sched_pickcpu()
1461 			if (cpu > cg->cg_last) {  in sched_pickcpu()
1463 				return (ts->ts_cpu);  in sched_pickcpu()
1467 			return (ts->ts_cpu);  in sched_pickcpu()
1476 	for (ccg = NULL; cg != NULL; cg = cg->cg_parent) {  in sched_pickcpu()
1477 		if (cg->cg_flags & CG_FLAG_THREAD)  in sched_pickcpu()
1479 		if (cg->cg_children == 1 || cg->cg_count == 1)  in sched_pickcpu()
1481 		if (cg->cg_level == CG_SHARE_NONE ||  in sched_pickcpu()
1482 		    (!intr && !SCHED_AFFINITY(ts, cg->cg_level)))  in sched_pickcpu()
1489 	cpu = -1;  in sched_pickcpu()
1490 	mask = &td->td_cpuset->cs_mask;  in sched_pickcpu()
1491 	pri = td->td_priority;  in sched_pickcpu()
1499 		cpu = sched_lowest(ccg, mask, pri, INT_MAX, ts->ts_cpu, r);  in sched_pickcpu()
1506 		    INT_MAX, ts->ts_cpu, r);  in sched_pickcpu()
1512 		cpu = sched_lowest(cpu_top, mask, pri, INT_MAX, ts->ts_cpu, r);  in sched_pickcpu()
1518 		cpu = sched_lowest(cpu_top, mask, -1, INT_MAX, ts->ts_cpu, r);  in sched_pickcpu()
1525 	 * Compare the lowest loaded cpu to current cpu.  in sched_pickcpu()
1528 	if (THREAD_CAN_SCHED(td, self) && TDQ_SELF()->tdq_lowpri > pri &&  in sched_pickcpu()
1529 	    atomic_load_char(&tdq->tdq_lowpri) < PRI_MIN_IDLE &&  in sched_pickcpu()
1534 	if (cpu != ts->ts_cpu)  in sched_pickcpu()
1558 	td = runq_first_thread_range(rq, RQ_TS_POL_MIN, RQ_TS_POL_MIN + off - 1);  in runq_choose_timeshare()
1578 	td = runq_choose_realtime(&tdq->tdq_runq);  in tdq_choose()
1581 	td = runq_choose_timeshare(&tdq->tdq_runq, tdq->tdq_ts_deq_off);  in tdq_choose()
1583 		KASSERT(td->td_priority >= PRI_MIN_BATCH,  in tdq_choose()
1585 		    td->td_priority));  in tdq_choose()
1588 	td = runq_choose_idle(&tdq->tdq_runq);  in tdq_choose()
1590 		KASSERT(td->td_priority >= PRI_MIN_IDLE,  in tdq_choose()
1592 		    td->td_priority));  in tdq_choose()
1608 	runq_init(&tdq->tdq_runq);  in tdq_setup()
1609 	tdq->tdq_id = id;  in tdq_setup()
1610 	snprintf(tdq->tdq_name, sizeof(tdq->tdq_name),  in tdq_setup()
1612 	mtx_init(&tdq->tdq_lock, tdq->tdq_name, "sched lock", MTX_SPIN);  in tdq_setup()
1614 	snprintf(tdq->tdq_loadname, sizeof(tdq->tdq_loadname),  in tdq_setup()
1630 		tdq->tdq_cg = smp_topo_find(cpu_top, i);  in sched_setup_smp()
1631 		if (tdq->tdq_cg == NULL)  in sched_setup_smp()
1660 	tdq->tdq_curthread = &thread0;  in sched_setup()
1661 	tdq->tdq_lowpri = thread0.td_priority;  in sched_setup()
1710  * waiting on a run-queue.  Would be prettier if we had floating point.
1715  *                           scaling factor
1716  * interactivity score =  ---------------------
1723  *                                                 scaling factor
1724  * interactivity score = 2 * scaling factor  -  ---------------------
1740 		ts->ts_runtime >= ts->ts_slptime)  in sched_interact_score()
1743 	if (ts->ts_runtime > ts->ts_slptime) {  in sched_interact_score()
1744 		div = max(1, ts->ts_runtime / SCHED_INTERACT_HALF);  in sched_interact_score()
1746 		    (SCHED_INTERACT_HALF - (ts->ts_slptime / div)));  in sched_interact_score()
1748 	if (ts->ts_slptime > ts->ts_runtime) {  in sched_interact_score()
1749 		div = max(1, ts->ts_slptime / SCHED_INTERACT_HALF);  in sched_interact_score()
1750 		return (ts->ts_runtime / div);  in sched_interact_score()
1753 	if (ts->ts_runtime)  in sched_interact_score()
1764  * Scale the scheduling priority according to the "interactivity" of this
1773 	if (PRI_BASE(td->td_pri_class) != PRI_TIMESHARE)  in sched_priority()
1776 	nice = td->td_proc->p_nice;  in sched_priority()
1793 		pri += (PRI_MAX_INTERACT - PRI_MIN_INTERACT + 1) * score /  in sched_priority()
1805 		const u_int cpu_pri_off = (((SCHED_PRI_CPU_RANGE - 1) *  in sched_priority()
1837 	sum = ts->ts_runtime + ts->ts_slptime;  in sched_interact_update()
1846 		if (ts->ts_runtime > ts->ts_slptime) {  in sched_interact_update()
1847 			ts->ts_runtime = SCHED_SLP_RUN_MAX;  in sched_interact_update()
1848 			ts->ts_slptime = 1;  in sched_interact_update()
1850 			ts->ts_slptime = SCHED_SLP_RUN_MAX;  in sched_interact_update()
1851 			ts->ts_runtime = 1;  in sched_interact_update()
1861 		ts->ts_runtime /= 2;  in sched_interact_update()
1862 		ts->ts_slptime /= 2;  in sched_interact_update()
1865 	ts->ts_runtime = (ts->ts_runtime / 5) * 4;  in sched_interact_update()
1866 	ts->ts_slptime = (ts->ts_slptime / 5) * 4;  in sched_interact_update()
1870  * Scale back the interactivity history when a child thread is created.  The
1883 	sum = ts->ts_runtime + ts->ts_slptime;  in sched_interact_fork()
1886 		ts->ts_runtime /= ratio;  in sched_interact_fork()
1887 		ts->ts_slptime /= ratio;  in sched_interact_fork()
1903 	ts0->ts_ftick = (u_int)ticks;  in schedinit()
1904 	ts0->ts_ltick = ts0->ts_ftick;  in schedinit()
1905 	ts0->ts_slice = 0;  in schedinit()
1906 	ts0->ts_cpu = curcpu;	/* set valid CPU number */  in schedinit()
1926 	PCPU_GET(idlethread)->td_lock = TDQ_LOCKPTR(TDQ_SELF());  in schedinit_ap()
1956 	const u_int lu_span = t - ts->ts_ltick;  in sched_pctcpu_update()
1964 		ts->ts_ticks = run ? (t_tgt << SCHED_TICK_SHIFT) : 0;  in sched_pctcpu_update()
1965 		ts->ts_ftick = t - t_tgt;  in sched_pctcpu_update()
1966 		ts->ts_ltick = t;  in sched_pctcpu_update()
1970 	if (t - ts->ts_ftick >= t_max) {  in sched_pctcpu_update()
1986 		ts->ts_ticks = SCHED_TICK_RUN_SHIFTED(ts) /  in sched_pctcpu_update()
1987 		    SCHED_TICK_LENGTH(ts) * (t_tgt - lu_span);  in sched_pctcpu_update()
1988 		ts->ts_ftick = t - t_tgt;  in sched_pctcpu_update()
1992 		ts->ts_ticks += lu_span << SCHED_TICK_SHIFT;  in sched_pctcpu_update()
1993 	ts->ts_ltick = t;  in sched_pctcpu_update()
1997  * Adjust the priority of a thread.  Move it to the appropriate run-queue
1998  * if necessary.  This is the back-end for several priority related
2008 	    "prio:%d", td->td_priority, "new prio:%d", prio,  in sched_thread_priority()
2010 	SDT_PROBE3(sched, , , change__pri, td, td->td_proc, prio);  in sched_thread_priority()
2011 	if (td != curthread && prio < td->td_priority) {  in sched_thread_priority()
2013 		    "lend prio", "prio:%d", td->td_priority, "new prio:%d",  in sched_thread_priority()
2015 		SDT_PROBE4(sched, , , lend__pri, td, td->td_proc, prio,   in sched_thread_priority()
2019 	if (td->td_priority == prio)  in sched_thread_priority()
2024 	 * queue.  This could be optimized to not re-add in some  in sched_thread_priority()
2027 	if (TD_ON_RUNQ(td) && prio < td->td_priority) {  in sched_thread_priority()
2029 		td->td_priority = prio;  in sched_thread_priority()
2035 	 * information so other cpus are aware of our current priority.  in sched_thread_priority()
2038 		tdq = TDQ_CPU(td_get_sched(td)->ts_cpu);  in sched_thread_priority()
2039 		oldpri = td->td_priority;  in sched_thread_priority()
2040 		td->td_priority = prio;  in sched_thread_priority()
2041 		if (prio < tdq->tdq_lowpri)  in sched_thread_priority()
2042 			tdq->tdq_lowpri = prio;  in sched_thread_priority()
2043 		else if (tdq->tdq_lowpri == oldpri)  in sched_thread_priority()
2047 	td->td_priority = prio;  in sched_thread_priority()
2058 	td->td_flags |= TDF_BORROWING;  in sched_lend_prio()
2064  * over.  The prio argument is the minimum priority the thread
2075 	if (td->td_base_pri >= PRI_MIN_TIMESHARE &&  in sched_unlend_prio()
2076 	    td->td_base_pri <= PRI_MAX_TIMESHARE)  in sched_unlend_prio()
2077 		base_pri = td->td_user_pri;  in sched_unlend_prio()
2079 		base_pri = td->td_base_pri;  in sched_unlend_prio()
2081 		td->td_flags &= ~TDF_BORROWING;  in sched_unlend_prio()
2096 	td->td_base_pri = prio;  in sched_prio()
2102 	if (td->td_flags & TDF_BORROWING && td->td_priority < prio)  in sched_prio()
2106 	oldprio = td->td_priority;  in sched_prio()
2124 	MPASS(td->td_pri_class == PRI_ITHD);  in sched_ithread_prio()
2125 	td->td_base_ithread_pri = prio;  in sched_ithread_prio()
2130  * Set the base user priority, does not effect current running priority.
2136 	td->td_base_user_pri = prio;  in sched_user_prio()
2137 	if (td->td_lend_user_pri <= prio)  in sched_user_prio()
2139 	td->td_user_pri = prio;  in sched_user_prio()
2147 	td->td_lend_user_pri = prio;  in sched_lend_user_prio()
2148 	td->td_user_pri = min(prio, td->td_base_user_pri);  in sched_lend_user_prio()
2149 	if (td->td_priority > td->td_user_pri)  in sched_lend_user_prio()
2150 		sched_prio(td, td->td_user_pri);  in sched_lend_user_prio()
2151 	else if (td->td_priority != td->td_user_pri)  in sched_lend_user_prio()
2162 	if (td->td_lend_user_pri == prio)  in sched_lend_user_prio_cond()
2184 	    tdq->tdq_cg == NULL)  in tdq_trysteal()
2191 	for (i = 1, cg = tdq->tdq_cg, goup = 0; ; ) {  in tdq_trysteal()
2208 		if (cpu == -1) {  in tdq_trysteal()
2210 				cg = cg->cg_parent;  in tdq_trysteal()
2217 			parent = cg->cg_parent;  in tdq_trysteal()
2222 			if (parent->cg_children == 2) {  in tdq_trysteal()
2223 				if (cg == &parent->cg_child[0])  in tdq_trysteal()
2224 					cg = &parent->cg_child[1];  in tdq_trysteal()
2226 					cg = &parent->cg_child[0];  in tdq_trysteal()
2249 		if (tdq->tdq_load > 0)  in tdq_trysteal()
2267 		if (tdq_move(steal, tdq) == -1) {  in tdq_trysteal()
2291 	    (td_get_sched(td)->ts_flags & TSF_BOUND) != 0,  in sched_switch_migrate()
2293 	KASSERT(!CPU_ABSENT(td_get_sched(td)->ts_cpu), ("sched_switch_migrate: "  in sched_switch_migrate()
2294 	    "thread %s queued on absent CPU %d.", td->td_name,  in sched_switch_migrate()
2295 	    td_get_sched(td)->ts_cpu));  in sched_switch_migrate()
2296 	tdn = TDQ_CPU(td_get_sched(td)->ts_cpu);  in sched_switch_migrate()
2302 	 * prevent preemption while we're holding neither run-queue lock.  in sched_switch_migrate()
2320 	atomic_store_rel_ptr((volatile uintptr_t *)&td->td_lock,  in thread_unblock_switch()
2350 	pickcpu = (td->td_flags & TDF_PICKCPU) != 0;  in sched_switch()
2352 		ts->ts_rltick = (u_int)ticks - affinity * MAX_CACHE_LEVELS;  in sched_switch()
2354 		ts->ts_rltick = (u_int)ticks;  in sched_switch()
2356 	td->td_lastcpu = td->td_oncpu;  in sched_switch()
2357 	preempted = (td->td_flags & TDF_SLICEEND) == 0 &&  in sched_switch()
2359 	td->td_flags &= ~(TDF_PICKCPU | TDF_SLICEEND);  in sched_switch()
2361 	td->td_owepreempt = 0;  in sched_switch()
2362 	atomic_store_char(&tdq->tdq_owepreempt, 0);  in sched_switch()
2379 		if (THREAD_CAN_MIGRATE(td) && (!THREAD_CAN_SCHED(td, ts->ts_cpu)  in sched_switch()
2381 			ts->ts_cpu = sched_pickcpu(td, 0);  in sched_switch()
2383 		if (ts->ts_cpu == cpuid)  in sched_switch()
2395 		if (tdq->tdq_load == 0)  in sched_switch()
2403 		    "prio:%d", td->td_priority);  in sched_switch()
2406 		    "prio:%d", td->td_priority, "wmesg:\"%s\"", td->td_wmesg,  in sched_switch()
2407 		    "lockname:\"%s\"", td->td_lockname);  in sched_switch()
2412 	 * appropriate cpu run-queue or sleep-queue and with the current  in sched_switch()
2413 	 * thread-queue locked.  in sched_switch()
2416 	MPASS(td == tdq->tdq_curthread);  in sched_switch()
2426 		if (PMC_PROC_IS_USING_PMCS(td->td_proc))  in sched_switch()
2429 		SDT_PROBE2(sched, , , off__cpu, newtd, newtd->td_proc);  in sched_switch()
2446 		td->td_oncpu = NOCPU;  in sched_switch()
2448 		cpuid = td->td_oncpu = PCPU_GET(cpuid);  in sched_switch()
2452 		if (PMC_PROC_IS_USING_PMCS(td->td_proc))  in sched_switch()
2459 	KASSERT(curthread->td_md.md_spinlock_count == 1,  in sched_switch()
2460 	    ("invalid count %d", curthread->td_md.md_spinlock_count));  in sched_switch()
2463 	    "prio:%d", td->td_priority);  in sched_switch()
2476 	p->p_nice = nice;  in sched_nice()
2480 		sched_prio(td, td->td_base_user_pri);  in sched_nice()
2494 	td->td_slptick = ticks;  in sched_sleep()
2495 	if (PRI_BASE(td->td_pri_class) != PRI_TIMESHARE)  in sched_sleep()
2499 	else if (static_boost && td->td_priority > static_boost)  in sched_sleep()
2522 	slptick = td->td_slptick;  in sched_wakeup()
2523 	td->td_slptick = 0;  in sched_wakeup()
2525 		ts->ts_slptime += (ticks - slptick) << SCHED_TICK_SHIFT;  in sched_wakeup()
2534 	if (PRI_BASE(td->td_pri_class) == PRI_ITHD &&  in sched_wakeup()
2535 	    td->td_priority != td->td_base_ithread_pri)  in sched_wakeup()
2536 		sched_prio(td, td->td_base_ithread_pri);  in sched_wakeup()
2539 	 * Reset the slice value since we slept and advanced the round-robin.  in sched_wakeup()
2541 	ts->ts_slice = 0;  in sched_wakeup()
2560 	td_get_sched(td)->ts_runtime += tickincr;  in sched_fork()
2582 	child->td_oncpu = NOCPU;  in sched_fork_thread()
2583 	child->td_lastcpu = NOCPU;  in sched_fork_thread()
2584 	child->td_lock = TDQ_LOCKPTR(tdq);  in sched_fork_thread()
2585 	child->td_cpuset = cpuset_ref(td->td_cpuset);  in sched_fork_thread()
2586 	child->td_domain.dr_policy = td->td_cpuset->cs_domain;  in sched_fork_thread()
2587 	ts2->ts_cpu = ts->ts_cpu;  in sched_fork_thread()
2588 	ts2->ts_flags = 0;  in sched_fork_thread()
2592 	ts2->ts_ticks = ts->ts_ticks;  in sched_fork_thread()
2593 	ts2->ts_ltick = ts->ts_ltick;  in sched_fork_thread()
2594 	ts2->ts_ftick = ts->ts_ftick;  in sched_fork_thread()
2598 	child->td_priority = child->td_base_pri;  in sched_fork_thread()
2602 	ts2->ts_slptime = ts->ts_slptime;  in sched_fork_thread()
2603 	ts2->ts_runtime = ts->ts_runtime;  in sched_fork_thread()
2605 	ts2->ts_slice = tdq_slice(tdq) - sched_slice_min;  in sched_fork_thread()
2607 	bzero(ts2->ts_name, sizeof(ts2->ts_name));  in sched_fork_thread()
2619 	if (td->td_pri_class == class)  in sched_class()
2621 	td->td_pri_class = class;  in sched_class()
2633 	    "prio:%d", child->td_priority);  in sched_exit()
2650 	    "prio:%d", child->td_priority);  in sched_exit_thread()
2657 	td_get_sched(td)->ts_runtime += td_get_sched(child)->ts_runtime;  in sched_exit_thread()
2669 	SDT_PROBE2(sched, , , surrender, td, td->td_proc);  in sched_preempt()
2674 	if (td->td_priority > tdq->tdq_lowpri) {  in sched_preempt()
2675 		if (td->td_critnest == 1) {  in sched_preempt()
2683 		td->td_owepreempt = 1;  in sched_preempt()
2685 		tdq->tdq_owepreempt = 0;  in sched_preempt()
2691  * Fix priorities on return to user-space.  Priorities may be elevated due
2699 	td->td_priority = td->td_user_pri;  in sched_userret_slowpath()
2700 	td->td_base_pri = td->td_user_pri;  in sched_userret_slowpath()
2707     "Interrupt thread preemptions due to time-sharing");
2716 	if (PRI_BASE(td->td_pri_class) == PRI_ITHD)  in td_slice()
2739 		balance_ticks -= cnt;  in sched_clock()
2749 	tdq->tdq_oldswitchcnt = tdq->tdq_switchcnt;  in sched_clock()
2750 	tdq->tdq_switchcnt = tdq->tdq_load;  in sched_clock()
2755 	 * anti-starvation and "nice" behaviors after the switch to a single  in sched_clock()
2756 	 * 256-queue runqueue, since the queue insert offset is incremented by  in sched_clock()
2759 	 * of 64 before (separate runqueue), we apply a factor 7/4 when  in sched_clock()
2763 	if (tdq->tdq_ts_off == tdq->tdq_ts_deq_off) {  in sched_clock()
2764 		tdq->tdq_ts_ticks += cnt;  in sched_clock()
2765 		tdq->tdq_ts_off = (tdq->tdq_ts_off + 2 * cnt -  in sched_clock()
2766 		    tdq-> tdq_ts_ticks / 4) % RQ_TS_POL_MODULO;  in sched_clock()
2767 		tdq->tdq_ts_ticks %= 4;  in sched_clock()
2772 	if ((td->td_pri_class & PRI_FIFO_BIT) || TD_IS_IDLETHREAD(td))  in sched_clock()
2775 	if (PRI_BASE(td->td_pri_class) == PRI_TIMESHARE) {  in sched_clock()
2780 		td_get_sched(td)->ts_runtime += tickincr * cnt;  in sched_clock()
2786 	 * Force a context switch if the current thread has used up a full  in sched_clock()
2789 	ts->ts_slice += cnt;  in sched_clock()
2790 	if (ts->ts_slice >= td_slice(td, tdq)) {  in sched_clock()
2791 		ts->ts_slice = 0;  in sched_clock()
2797 		if (PRI_BASE(td->td_pri_class) == PRI_ITHD) {  in sched_clock()
2799 			td->td_owepreempt = 1;  in sched_clock()
2800 			if (td->td_base_pri + RQ_PPQ < PRI_MAX_ITHD) {  in sched_clock()
2802 				sched_prio(td, td->td_base_pri + RQ_PPQ);  in sched_clock()
2806 			td->td_flags |= TDF_SLICEEND;  in sched_clock()
2819  * Return whether the current CPU has runnable tasks.  Used for in-kernel
2833  * the run-queue while running however the load remains.
2846 		tdq->tdq_lowpri = td->td_priority;  in sched_choose()
2848 		tdq->tdq_lowpri = PRI_MAX_IDLE;  in sched_choose()
2851 	tdq->tdq_curthread = td;  in sched_choose()
2869 	cpri = ctd->td_priority;  in sched_setpreempt()
2876 	ctd->td_owepreempt = 1;  in sched_setpreempt()
2891 	KASSERT((td->td_inhibitors == 0),  in tdq_add()
2895 	KASSERT(td->td_flags & TDF_INMEM,  in tdq_add()
2898 	lowpri = tdq->tdq_lowpri;  in tdq_add()
2899 	if (td->td_priority < lowpri)  in tdq_add()
2900 		tdq->tdq_lowpri = td->td_priority;  in tdq_add()
2921 	    "prio:%d", td->td_priority, KTR_ATTR_LINKED,  in sched_add()
2925 	SDT_PROBE4(sched, , , enqueue, td, td->td_proc, NULL,   in sched_add()
2930 	 * run-queue.  in sched_add()
2932 	if (PRI_BASE(td->td_pri_class) == PRI_TIMESHARE)  in sched_add()
2945 		sched_setpreempt(td->td_priority);  in sched_add()
2949 	 * Now that the thread is moving to the run-queue, set the lock  in sched_add()
2952 	if (td->td_lock != TDQ_LOCKPTR(tdq)) {  in sched_add()
2955 			td->td_lock = TDQ_LOCKPTR(tdq);  in sched_add()
2961 		sched_setpreempt(td->td_priority);  in sched_add()
2968  * Remove a thread from a run-queue without running it.  This is used
2978 	    "prio:%d", td->td_priority);  in sched_rem()
2979 	SDT_PROBE3(sched, , , dequeue, td, td->td_proc, NULL);  in sched_rem()
2980 	tdq = TDQ_CPU(td_get_sched(td)->ts_cpu);  in sched_rem()
2982 	MPASS(td->td_lock == TDQ_LOCKPTR(tdq));  in sched_rem()
2988 	if (td->td_priority == tdq->tdq_lowpri)  in sched_rem()
3006 	pctcpu = ((FSHIFT >= SCHED_TICK_SHIFT ? /* Resolved at compile-time. */  in sched_pctcpu()
3007 	    (SCHED_TICK_RUN_SHIFTED(ts) << (FSHIFT - SCHED_TICK_SHIFT)) :  in sched_pctcpu()
3008 	    (SCHED_TICK_RUN_SHIFTED(ts) >> (SCHED_TICK_SHIFT - FSHIFT))) +  in sched_pctcpu()
3025 	if (THREAD_CAN_SCHED(td, ts->ts_cpu))  in sched_affinity()
3041 		ipi_cpu(ts->ts_cpu, IPI_PREEMPT);  in sched_affinity()
3056 	if (ts->ts_flags & TSF_BOUND)  in sched_bind()
3059 	ts->ts_flags |= TSF_BOUND;  in sched_bind()
3063 	ts->ts_cpu = cpu;  in sched_bind()
3080 	if ((ts->ts_flags & TSF_BOUND) == 0)  in sched_unbind()
3082 	ts->ts_flags &= ~TSF_BOUND;  in sched_unbind()
3090 	return (td_get_sched(td)->ts_flags & TSF_BOUND);  in sched_is_bound()
3115 		total += atomic_load_int(&TDQ_CPU(i)->tdq_sysload);  in sched_load()
3118 	return (atomic_load_int(&TDQ_SELF()->tdq_sysload));  in sched_load()
3136     ((tdq)->tdq_cg != NULL && ((tdq)->tdq_cg->cg_flags & CG_FLAG_THREAD) == 0)
3156 	oldswitchcnt = -1;  in sched_idletd()
3194 		atomic_store_int(&tdq->tdq_cpu_idle, 1);  in sched_idletd()
3207 			atomic_store_int(&tdq->tdq_cpu_idle, 0);  in sched_idletd()
3211 		atomic_store_int(&tdq->tdq_cpu_idle, 0);  in sched_idletd()
3214 		 * Account thread-less hardware interrupts and  in sched_idletd()
3239 	KASSERT(curthread->td_md.md_spinlock_count == 1,  in sched_throw_grab()
3240 	    ("invalid count %d", curthread->td_md.md_spinlock_count));  in sched_throw_grab()
3290 	td->td_lastcpu = td->td_oncpu;  in sched_throw()
3291 	td->td_oncpu = NOCPU;  in sched_throw()
3316 	 * non-nested critical section with the scheduler lock held.  in sched_fork_exit()
3318 	KASSERT(curthread->td_md.md_spinlock_count == 1,  in sched_fork_exit()
3319 	    ("invalid count %d", curthread->td_md.md_spinlock_count));  in sched_fork_exit()
3324 	MPASS(td->td_lock == TDQ_LOCKPTR(tdq));  in sched_fork_exit()
3325 	td->td_oncpu = cpuid;  in sched_fork_exit()
3327 	    "prio:%d", td->td_priority);  in sched_fork_exit()
3341 	if (ts->ts_name[0] == '\0')  in sched_tdname()
3342 		snprintf(ts->ts_name, sizeof(ts->ts_name),  in sched_tdname()
3343 		    "%s tid %d", td->td_name, td->td_tid);  in sched_tdname()
3344 	return (ts->ts_name);  in sched_tdname()
3346 	return (td->td_name);  in sched_tdname()
3357 	ts->ts_name[0] = '\0';  in sched_clear_tdname()
3374 	sbuf_printf(sb, "%*s<group level=\"%d\" cache-level=\"%d\">\n", indent,  in sysctl_kern_sched_topology_spec_internal()
3375 	    "", 1 + indent / 2, cg->cg_level);  in sysctl_kern_sched_topology_spec_internal()
3377 	    cg->cg_count, cpusetobj_strprint(cpusetbuf, &cg->cg_mask));  in sysctl_kern_sched_topology_spec_internal()
3379 	for (i = cg->cg_first; i <= cg->cg_last; i++) {  in sysctl_kern_sched_topology_spec_internal()
3380 		if (CPU_ISSET(i, &cg->cg_mask)) {  in sysctl_kern_sched_topology_spec_internal()
3390 	if (cg->cg_flags != 0) {  in sysctl_kern_sched_topology_spec_internal()
3392 		if ((cg->cg_flags & CG_FLAG_HTT) != 0)  in sysctl_kern_sched_topology_spec_internal()
3394 		if ((cg->cg_flags & CG_FLAG_THREAD) != 0)  in sysctl_kern_sched_topology_spec_internal()
3396 		if ((cg->cg_flags & CG_FLAG_SMT) != 0)  in sysctl_kern_sched_topology_spec_internal()
3398 		if ((cg->cg_flags & CG_FLAG_NODE) != 0)  in sysctl_kern_sched_topology_spec_internal()
3403 	if (cg->cg_children > 0) {  in sysctl_kern_sched_topology_spec_internal()
3405 		for (i = 0; i < cg->cg_children; i++)  in sysctl_kern_sched_topology_spec_internal()
3407 			    &cg->cg_child[i], indent+2);  in sysctl_kern_sched_topology_spec_internal()
3451 	if (error != 0 || req->newptr == NULL)  in sysctl_kern_quantum()
3488     "Enables the long-term load balancer");
3491     "Average period in stathz ticks to run the long-term balancer");
3509     "Decay factor used for updating %CPU in 4BSD scheduler");