xref: /freebsd/sys/kern/sched_shim.c (revision ef1218add1d3d2baca9d3b8bcfb57e05ef6fc503)
1 /*
2  * Copyright 2026 The FreeBSD Foundation
3  *
4  * SPDX-License-Identifier: BSD-2-Clause
5  *
6  * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
7  * under sponsorship from the FreeBSD Foundation.
8  */
9 
10 #include "opt_sched.h"
11 
12 #include <sys/systm.h>
13 #include <sys/kernel.h>
14 #include <sys/lock.h>
15 #include <sys/proc.h>
16 #include <sys/runq.h>
17 #include <sys/sbuf.h>
18 #include <sys/sched.h>
19 #include <sys/smp.h>
20 #include <sys/sysctl.h>
21 #include <machine/ifunc.h>
22 
23 const struct sched_instance *active_sched;
24 
25 #define	__DEFINE_SHIM(__m, __r, __n, __p, __a)	\
26 	DEFINE_IFUNC(, __r, __n, __p)		\
27 	{					\
28 		return (active_sched->__m);	\
29 	}
30 #define	DEFINE_SHIM0(__m, __r, __n)	\
31     __DEFINE_SHIM(__m, __r, __n, (void), ())
32 #define	DEFINE_SHIM1(__m, __r, __n, __t1, __a1)	\
33     __DEFINE_SHIM(__m, __r, __n, (__t1 __a1), (__a1))
34 #define	DEFINE_SHIM2(__m, __r, __n, __t1, __a1, __t2, __a2)	\
35     __DEFINE_SHIM(__m, __r, __n, (__t1 __a1, __t2 __a2), (__a1, __a2))
36 
37 DEFINE_SHIM0(load, int, sched_load)
38 DEFINE_SHIM0(rr_interval, int, sched_rr_interval)
39 DEFINE_SHIM0(runnable, bool, sched_runnable)
40 DEFINE_SHIM2(exit, void, sched_exit, struct proc *, p,
41     struct thread *, childtd)
42 DEFINE_SHIM2(fork, void, sched_fork, struct thread *, td,
43     struct thread *, childtd)
44 DEFINE_SHIM1(fork_exit, void, sched_fork_exit, struct thread *, td)
45 DEFINE_SHIM2(class, void, sched_class, struct thread *, td, int, class)
46 DEFINE_SHIM2(nice, void, sched_nice, struct proc *, p, int, nice)
47 DEFINE_SHIM0(ap_entry, void, sched_ap_entry)
48 DEFINE_SHIM2(exit_thread, void, sched_exit_thread, struct thread *, td,
49     struct thread *, child)
50 DEFINE_SHIM1(estcpu, u_int, sched_estcpu, struct thread *, td)
51 DEFINE_SHIM2(fork_thread, void, sched_fork_thread, struct thread *, td,
52     struct thread *, child)
53 DEFINE_SHIM2(ithread_prio, void, sched_ithread_prio, struct thread *, td,
54     u_char, prio)
55 DEFINE_SHIM2(lend_prio, void, sched_lend_prio, struct thread *, td,
56     u_char, prio)
57 DEFINE_SHIM2(lend_user_prio, void, sched_lend_user_prio, struct thread *, td,
58     u_char, pri)
59 DEFINE_SHIM2(lend_user_prio_cond, void, sched_lend_user_prio_cond,
60     struct thread *, td, u_char, pri)
61 DEFINE_SHIM1(pctcpu, fixpt_t, sched_pctcpu, struct thread *, td)
62 DEFINE_SHIM2(prio, void, sched_prio, struct thread *, td, u_char, prio)
63 DEFINE_SHIM2(sleep, void, sched_sleep, struct thread *, td, int, prio)
64 DEFINE_SHIM2(sswitch, void, sched_switch, struct thread *, td, int, flags)
65 DEFINE_SHIM1(throw, void, sched_throw, struct thread *, td)
66 DEFINE_SHIM2(unlend_prio, void, sched_unlend_prio, struct thread *, td,
67     u_char, prio)
68 DEFINE_SHIM2(user_prio, void, sched_user_prio, struct thread *, td,
69     u_char, prio)
70 DEFINE_SHIM1(userret_slowpath, void, sched_userret_slowpath,
71     struct thread *, td)
72 DEFINE_SHIM2(add, void, sched_add, struct thread *, td, int, flags)
73 DEFINE_SHIM0(choose, struct thread *, sched_choose)
74 DEFINE_SHIM2(clock, void, sched_clock, struct thread *, td, int, cnt)
75 DEFINE_SHIM1(idletd, void, sched_idletd, void *, dummy)
76 DEFINE_SHIM1(preempt, void, sched_preempt, struct thread *, td)
77 DEFINE_SHIM1(relinquish, void, sched_relinquish, struct thread *, td)
78 DEFINE_SHIM1(rem, void, sched_rem, struct thread *, td)
79 DEFINE_SHIM2(wakeup, void, sched_wakeup, struct thread *, td, int, srqflags)
80 DEFINE_SHIM2(bind, void, sched_bind, struct thread *, td, int, cpu)
81 DEFINE_SHIM1(unbind, void, sched_unbind, struct thread *, td)
82 DEFINE_SHIM1(is_bound, int, sched_is_bound, struct thread *, td)
83 DEFINE_SHIM1(affinity, void, sched_affinity, struct thread *, td)
84 DEFINE_SHIM0(sizeof_proc, int, sched_sizeof_proc)
85 DEFINE_SHIM0(sizeof_thread, int, sched_sizeof_thread)
86 DEFINE_SHIM1(tdname, char *, sched_tdname, struct thread *, td)
87 DEFINE_SHIM1(clear_tdname, void, sched_clear_tdname, struct thread *, td)
88 DEFINE_SHIM0(do_timer_accounting, bool, sched_do_timer_accounting)
89 DEFINE_SHIM1(find_l2_neighbor, int, sched_find_l2_neighbor, int, cpu)
90 DEFINE_SHIM0(init_ap, void, schedinit_ap)
91 
92 
93 SCHED_STAT_DEFINE(ithread_demotions, "Interrupt thread priority demotions");
94 SCHED_STAT_DEFINE(ithread_preemptions,
95     "Interrupt thread preemptions due to time-sharing");
96 
97 SDT_PROVIDER_DEFINE(sched);
98 
99 SDT_PROBE_DEFINE3(sched, , , change__pri, "struct thread *",
100     "struct proc *", "uint8_t");
101 SDT_PROBE_DEFINE3(sched, , , dequeue, "struct thread *",
102     "struct proc *", "void *");
103 SDT_PROBE_DEFINE4(sched, , , enqueue, "struct thread *",
104     "struct proc *", "void *", "int");
105 SDT_PROBE_DEFINE4(sched, , , lend__pri, "struct thread *",
106     "struct proc *", "uint8_t", "struct thread *");
107 SDT_PROBE_DEFINE2(sched, , , load__change, "int", "int");
108 SDT_PROBE_DEFINE2(sched, , , off__cpu, "struct thread *",
109     "struct proc *");
110 SDT_PROBE_DEFINE(sched, , , on__cpu);
111 SDT_PROBE_DEFINE(sched, , , remain__cpu);
112 SDT_PROBE_DEFINE2(sched, , , surrender, "struct thread *",
113     "struct proc *");
114 
115 #ifdef KDTRACE_HOOKS
116 #include <sys/dtrace_bsd.h>
117 int __read_mostly		dtrace_vtime_active;
118 dtrace_vtime_switch_func_t	dtrace_vtime_switch_func;
119 #endif
120 
121 static char sched_name[32] = "ULE";
122 
123 SET_DECLARE(sched_instance_set, struct sched_selection);
124 
125 void
sched_instance_select(void)126 sched_instance_select(void)
127 {
128 	struct sched_selection *s, **ss;
129 	int i;
130 
131 	TUNABLE_STR_FETCH("kern.sched.name", sched_name, sizeof(sched_name));
132 	SET_FOREACH(ss, sched_instance_set) {
133 		s = *ss;
134 		for (i = 0; s->name[i] == sched_name[i]; i++) {
135 			if (s->name[i] == '\0') {
136 				active_sched = s->instance;
137 				return;
138 			}
139 		}
140 	}
141 
142 	/*
143 	 * No scheduler matching the configuration was found.  If
144 	 * there is any scheduler compiled in, at all, use the first
145 	 * scheduler from the linker set.
146 	 */
147 	if (SET_BEGIN(sched_instance_set) < SET_LIMIT(sched_instance_set)) {
148 		s = *SET_BEGIN(sched_instance_set);
149 		active_sched = s->instance;
150 		for (i = 0;; i++) {
151 			sched_name[i] = s->name[i];
152 			if (s->name[i] == '\0')
153 				break;
154 		}
155 	}
156 }
157 
158 void
schedinit(void)159 schedinit(void)
160 {
161 	if (active_sched == NULL)
162 		panic("Cannot find scheduler %s", sched_name);
163 	active_sched->init();
164 }
165 
166 struct cpu_group __read_mostly *cpu_top;		/* CPU topology */
167 
168 static void
sched_setup(void * dummy)169 sched_setup(void *dummy)
170 {
171 	cpu_top = smp_topo();
172 	active_sched->setup();
173 }
174 SYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL);
175 
176 static void
sched_initticks(void * dummy)177 sched_initticks(void *dummy)
178 {
179 	active_sched->initticks();
180 }
181 SYSINIT(sched_initticks, SI_SUB_CLOCKS, SI_ORDER_THIRD, sched_initticks,
182     NULL);
183 
184 static void
sched_schedcpu(void)185 sched_schedcpu(void)
186 {
187 	active_sched->schedcpu();
188 }
189 SYSINIT(schedcpu, SI_SUB_LAST, SI_ORDER_FIRST, sched_schedcpu, NULL);
190 
191 SYSCTL_NODE(_kern, OID_AUTO, sched, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
192     "Scheduler");
193 
194 SYSCTL_STRING(_kern_sched, OID_AUTO, name, CTLFLAG_RD, sched_name, 0,
195     "Scheduler name");
196 
197 static int
sysctl_kern_sched_available(SYSCTL_HANDLER_ARGS)198 sysctl_kern_sched_available(SYSCTL_HANDLER_ARGS)
199 {
200 	struct sched_selection *s, **ss;
201 	struct sbuf *sb, sm;
202 	int error;
203 	bool first;
204 
205 	sb = sbuf_new_for_sysctl(&sm, NULL, 0, req);
206 	if (sb == NULL)
207 		return (ENOMEM);
208 	first = true;
209 	SET_FOREACH(ss, sched_instance_set) {
210 		s = *ss;
211 		if (first)
212 			first = false;
213 		else
214 			sbuf_cat(sb, ",");
215 		sbuf_cat(sb, s->name);
216 	}
217 	error = sbuf_finish(sb);
218 	sbuf_delete(sb);
219 	return (error);
220 }
221 
222 SYSCTL_PROC(_kern_sched, OID_AUTO, available,
223     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE,
224     NULL, 0, sysctl_kern_sched_available, "A",
225     "List of available schedulers");
226 
227 fixpt_t ccpu;
228 SYSCTL_UINT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0,
229     "Decay factor used for updating %CPU");
230 
231 /*
232  * Build the CPU topology dump string. Is recursively called to collect
233  * the topology tree.
234  */
235 static int
sysctl_kern_sched_topology_spec_internal(struct sbuf * sb,struct cpu_group * cg,int indent)236 sysctl_kern_sched_topology_spec_internal(struct sbuf *sb,
237     struct cpu_group *cg, int indent)
238 {
239 	char cpusetbuf[CPUSETBUFSIZ];
240 	int i, first;
241 
242 	if (cpu_top == NULL) {
243 		sbuf_printf(sb, "%*s<group level=\"1\" cache-level=\"1\">\n",
244 		    indent, "");
245 		sbuf_printf(sb, "%*s</group>\n", indent, "");
246 		return (0);
247 	}
248 
249 	sbuf_printf(sb, "%*s<group level=\"%d\" cache-level=\"%d\">\n", indent,
250 	    "", 1 + indent / 2, cg->cg_level);
251 	sbuf_printf(sb, "%*s <cpu count=\"%d\" mask=\"%s\">", indent, "",
252 	    cg->cg_count, cpusetobj_strprint(cpusetbuf, &cg->cg_mask));
253 	first = TRUE;
254 	for (i = cg->cg_first; i <= cg->cg_last; i++) {
255 		if (CPU_ISSET(i, &cg->cg_mask)) {
256 			if (!first)
257 				sbuf_cat(sb, ", ");
258 			else
259 				first = FALSE;
260 			sbuf_printf(sb, "%d", i);
261 		}
262 	}
263 	sbuf_cat(sb, "</cpu>\n");
264 
265 	if (cg->cg_flags != 0) {
266 		sbuf_printf(sb, "%*s <flags>", indent, "");
267 		if ((cg->cg_flags & CG_FLAG_HTT) != 0)
268 			sbuf_cat(sb, "<flag name=\"HTT\">HTT group</flag>");
269 		if ((cg->cg_flags & CG_FLAG_THREAD) != 0)
270 			sbuf_cat(sb, "<flag name=\"THREAD\">THREAD group</flag>");
271 		if ((cg->cg_flags & CG_FLAG_SMT) != 0)
272 			sbuf_cat(sb, "<flag name=\"SMT\">SMT group</flag>");
273 		if ((cg->cg_flags & CG_FLAG_NODE) != 0)
274 			sbuf_cat(sb, "<flag name=\"NODE\">NUMA node</flag>");
275 		sbuf_cat(sb, "</flags>\n");
276 	}
277 
278 	if (cg->cg_children > 0) {
279 		sbuf_printf(sb, "%*s <children>\n", indent, "");
280 		for (i = 0; i < cg->cg_children; i++)
281 			sysctl_kern_sched_topology_spec_internal(sb,
282 			    &cg->cg_child[i], indent + 2);
283 		sbuf_printf(sb, "%*s </children>\n", indent, "");
284 	}
285 	sbuf_printf(sb, "%*s</group>\n", indent, "");
286 	return (0);
287 }
288 
289 /*
290  * Sysctl handler for retrieving topology dump. It's a wrapper for
291  * the recursive sysctl_kern_smp_topology_spec_internal().
292  */
293 static int
sysctl_kern_sched_topology_spec(SYSCTL_HANDLER_ARGS)294 sysctl_kern_sched_topology_spec(SYSCTL_HANDLER_ARGS)
295 {
296 	struct sbuf *topo;
297 	int err;
298 
299 	topo = sbuf_new_for_sysctl(NULL, NULL, 512, req);
300 	if (topo == NULL)
301 		return (ENOMEM);
302 
303 	sbuf_cat(topo, "<groups>\n");
304 	err = sysctl_kern_sched_topology_spec_internal(topo, cpu_top, 1);
305 	sbuf_cat(topo, "</groups>\n");
306 
307 	if (err == 0)
308 		err = sbuf_finish(topo);
309 	sbuf_delete(topo);
310 	return (err);
311 }
312 
313 SYSCTL_PROC(_kern_sched, OID_AUTO, topology_spec, CTLTYPE_STRING |
314     CTLFLAG_MPSAFE | CTLFLAG_RD, NULL, 0,
315     sysctl_kern_sched_topology_spec, "A",
316     "XML dump of detected CPU topology");
317