xref: /freebsd/sys/kern/sched_shim.c (revision 120ca8d74b46caa260702485e30fe5f9f9984682)
1 /*
2  * Copyright 2026 The FreeBSD Foundation
3  *
4  * SPDX-License-Identifier: BSD-2-Clause
5  *
6  * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
7  * under sponsorship from the FreeBSD Foundation.
8  */
9 
10 #include "opt_sched.h"
11 
12 #include <sys/systm.h>
13 #include <sys/kernel.h>
14 #include <sys/lock.h>
15 #include <sys/proc.h>
16 #include <sys/runq.h>
17 #include <sys/sbuf.h>
18 #include <sys/sched.h>
19 #include <sys/smp.h>
20 #include <sys/sysctl.h>
21 #include <machine/ifunc.h>
22 
23 const struct sched_instance *active_sched;
24 
25 #ifndef __DO_NOT_HAVE_SYS_IFUNCS
26 #define	__DEFINE_SHIM(__m, __r, __n, __p, __a)	\
27 	DEFINE_IFUNC(, __r, __n, __p)		\
28 	{					\
29 		return (active_sched->__m);	\
30 	}
31 #else
32 #define	__DEFINE_SHIM(__m, __r, __n, __p, __a)	\
33 	__r					\
34 	__n __p					\
35 	{					\
36 		return (active_sched->__m __a);	\
37 	}
38 #endif
39 #define	DEFINE_SHIM0(__m, __r, __n)	\
40     __DEFINE_SHIM(__m, __r, __n, (void), ())
41 #define	DEFINE_SHIM1(__m, __r, __n, __t1, __a1)	\
42     __DEFINE_SHIM(__m, __r, __n, (__t1 __a1), (__a1))
43 #define	DEFINE_SHIM2(__m, __r, __n, __t1, __a1, __t2, __a2)	\
44     __DEFINE_SHIM(__m, __r, __n, (__t1 __a1, __t2 __a2), (__a1, __a2))
45 
46 DEFINE_SHIM0(load, int, sched_load)
47 DEFINE_SHIM0(rr_interval, int, sched_rr_interval)
48 DEFINE_SHIM0(runnable, bool, sched_runnable)
49 DEFINE_SHIM2(exit, void, sched_exit, struct proc *, p,
50     struct thread *, childtd)
51 DEFINE_SHIM2(fork, void, sched_fork, struct thread *, td,
52     struct thread *, childtd)
53 DEFINE_SHIM1(fork_exit, void, sched_fork_exit, struct thread *, td)
54 DEFINE_SHIM2(class, void, sched_class, struct thread *, td, int, class)
55 DEFINE_SHIM2(nice, void, sched_nice, struct proc *, p, int, nice)
56 DEFINE_SHIM0(ap_entry, void, sched_ap_entry)
57 DEFINE_SHIM2(exit_thread, void, sched_exit_thread, struct thread *, td,
58     struct thread *, child)
59 DEFINE_SHIM1(estcpu, u_int, sched_estcpu, struct thread *, td)
60 DEFINE_SHIM2(fork_thread, void, sched_fork_thread, struct thread *, td,
61     struct thread *, child)
62 DEFINE_SHIM2(ithread_prio, void, sched_ithread_prio, struct thread *, td,
63     u_char, prio)
64 DEFINE_SHIM2(lend_prio, void, sched_lend_prio, struct thread *, td,
65     u_char, prio)
66 DEFINE_SHIM2(lend_user_prio, void, sched_lend_user_prio, struct thread *, td,
67     u_char, pri)
68 DEFINE_SHIM2(lend_user_prio_cond, void, sched_lend_user_prio_cond,
69     struct thread *, td, u_char, pri)
70 DEFINE_SHIM1(pctcpu, fixpt_t, sched_pctcpu, struct thread *, td)
71 DEFINE_SHIM2(prio, void, sched_prio, struct thread *, td, u_char, prio)
72 DEFINE_SHIM2(sleep, void, sched_sleep, struct thread *, td, int, prio)
73 DEFINE_SHIM2(sswitch, void, sched_switch, struct thread *, td, int, flags)
74 DEFINE_SHIM1(throw, void, sched_throw, struct thread *, td)
75 DEFINE_SHIM2(unlend_prio, void, sched_unlend_prio, struct thread *, td,
76     u_char, prio)
77 DEFINE_SHIM2(user_prio, void, sched_user_prio, struct thread *, td,
78     u_char, prio)
79 DEFINE_SHIM1(userret_slowpath, void, sched_userret_slowpath,
80     struct thread *, td)
81 DEFINE_SHIM2(add, void, sched_add, struct thread *, td, int, flags)
82 DEFINE_SHIM0(choose, struct thread *, sched_choose)
83 DEFINE_SHIM2(clock, void, sched_clock, struct thread *, td, int, cnt)
84 DEFINE_SHIM1(idletd, void, sched_idletd, void *, dummy)
85 DEFINE_SHIM1(preempt, void, sched_preempt, struct thread *, td)
86 DEFINE_SHIM1(relinquish, void, sched_relinquish, struct thread *, td)
87 DEFINE_SHIM1(rem, void, sched_rem, struct thread *, td)
88 DEFINE_SHIM2(wakeup, void, sched_wakeup, struct thread *, td, int, srqflags)
89 DEFINE_SHIM2(bind, void, sched_bind, struct thread *, td, int, cpu)
90 DEFINE_SHIM1(unbind, void, sched_unbind, struct thread *, td)
91 DEFINE_SHIM1(is_bound, int, sched_is_bound, struct thread *, td)
92 DEFINE_SHIM1(affinity, void, sched_affinity, struct thread *, td)
93 DEFINE_SHIM0(sizeof_proc, int, sched_sizeof_proc)
94 DEFINE_SHIM0(sizeof_thread, int, sched_sizeof_thread)
95 DEFINE_SHIM1(tdname, char *, sched_tdname, struct thread *, td)
96 DEFINE_SHIM1(clear_tdname, void, sched_clear_tdname, struct thread *, td)
97 DEFINE_SHIM0(do_timer_accounting, bool, sched_do_timer_accounting)
98 DEFINE_SHIM1(find_l2_neighbor, int, sched_find_l2_neighbor, int, cpu)
99 DEFINE_SHIM0(init_ap, void, schedinit_ap)
100 
101 
102 SCHED_STAT_DEFINE(ithread_demotions, "Interrupt thread priority demotions");
103 SCHED_STAT_DEFINE(ithread_preemptions,
104     "Interrupt thread preemptions due to time-sharing");
105 
106 SDT_PROVIDER_DEFINE(sched);
107 
108 SDT_PROBE_DEFINE3(sched, , , change__pri, "struct thread *",
109     "struct proc *", "uint8_t");
110 SDT_PROBE_DEFINE3(sched, , , dequeue, "struct thread *",
111     "struct proc *", "void *");
112 SDT_PROBE_DEFINE4(sched, , , enqueue, "struct thread *",
113     "struct proc *", "void *", "int");
114 SDT_PROBE_DEFINE4(sched, , , lend__pri, "struct thread *",
115     "struct proc *", "uint8_t", "struct thread *");
116 SDT_PROBE_DEFINE2(sched, , , load__change, "int", "int");
117 SDT_PROBE_DEFINE2(sched, , , off__cpu, "struct thread *",
118     "struct proc *");
119 SDT_PROBE_DEFINE(sched, , , on__cpu);
120 SDT_PROBE_DEFINE(sched, , , remain__cpu);
121 SDT_PROBE_DEFINE2(sched, , , surrender, "struct thread *",
122     "struct proc *");
123 
124 #ifdef KDTRACE_HOOKS
125 #include <sys/dtrace_bsd.h>
126 int __read_mostly		dtrace_vtime_active;
127 dtrace_vtime_switch_func_t	dtrace_vtime_switch_func;
128 #endif
129 
130 static char sched_name[32] = "ULE";
131 
132 SET_DECLARE(sched_instance_set, struct sched_selection);
133 
134 void
135 sched_instance_select(void)
136 {
137 	struct sched_selection *s, **ss;
138 	int i;
139 
140 	TUNABLE_STR_FETCH("kern.sched.name", sched_name, sizeof(sched_name));
141 	SET_FOREACH(ss, sched_instance_set) {
142 		s = *ss;
143 		for (i = 0; s->name[i] == sched_name[i]; i++) {
144 			if (s->name[i] == '\0') {
145 				active_sched = s->instance;
146 				return;
147 			}
148 		}
149 	}
150 
151 	/*
152 	 * No scheduler matching the configuration was found.  If
153 	 * there is any scheduler compiled in, at all, use the first
154 	 * scheduler from the linker set.
155 	 */
156 	if (SET_BEGIN(sched_instance_set) < SET_LIMIT(sched_instance_set)) {
157 		s = *SET_BEGIN(sched_instance_set);
158 		active_sched = s->instance;
159 		for (i = 0;; i++) {
160 			sched_name[i] = s->name[i];
161 			if (s->name[i] == '\0')
162 				break;
163 		}
164 	}
165 }
166 
167 void
168 schedinit(void)
169 {
170 	if (active_sched == NULL)
171 		panic("Cannot find scheduler %s", sched_name);
172 	active_sched->init();
173 }
174 
175 struct cpu_group __read_mostly *cpu_top;		/* CPU topology */
176 
177 static void
178 sched_setup(void *dummy)
179 {
180 	cpu_top = smp_topo();
181 	active_sched->setup();
182 }
183 SYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL);
184 
185 static void
186 sched_initticks(void *dummy)
187 {
188 	active_sched->initticks();
189 }
190 SYSINIT(sched_initticks, SI_SUB_CLOCKS, SI_ORDER_THIRD, sched_initticks,
191     NULL);
192 
193 static void
194 sched_schedcpu(void)
195 {
196 	active_sched->schedcpu();
197 }
198 SYSINIT(schedcpu, SI_SUB_LAST, SI_ORDER_FIRST, sched_schedcpu, NULL);
199 
200 SYSCTL_NODE(_kern, OID_AUTO, sched, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
201     "Scheduler");
202 
203 SYSCTL_STRING(_kern_sched, OID_AUTO, name, CTLFLAG_RD, sched_name, 0,
204     "Scheduler name");
205 
206 static int
207 sysctl_kern_sched_available(SYSCTL_HANDLER_ARGS)
208 {
209 	struct sched_selection *s, **ss;
210 	struct sbuf *sb, sm;
211 	int error;
212 	bool first;
213 
214 	sb = sbuf_new_for_sysctl(&sm, NULL, 0, req);
215 	if (sb == NULL)
216 		return (ENOMEM);
217 	first = true;
218 	SET_FOREACH(ss, sched_instance_set) {
219 		s = *ss;
220 		if (first)
221 			first = false;
222 		else
223 			sbuf_cat(sb, ",");
224 		sbuf_cat(sb, s->name);
225 	}
226 	error = sbuf_finish(sb);
227 	sbuf_delete(sb);
228 	return (error);
229 }
230 
231 SYSCTL_PROC(_kern_sched, OID_AUTO, available,
232     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE,
233     NULL, 0, sysctl_kern_sched_available, "A",
234     "List of available schedulers");
235 
236 fixpt_t ccpu;
237 SYSCTL_UINT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0,
238     "Decay factor used for updating %CPU");
239 
240 /*
241  * Build the CPU topology dump string. Is recursively called to collect
242  * the topology tree.
243  */
244 static int
245 sysctl_kern_sched_topology_spec_internal(struct sbuf *sb,
246     struct cpu_group *cg, int indent)
247 {
248 	char cpusetbuf[CPUSETBUFSIZ];
249 	int i, first;
250 
251 	if (cpu_top == NULL) {
252 		sbuf_printf(sb, "%*s<group level=\"1\" cache-level=\"1\">\n",
253 		    indent, "");
254 		sbuf_printf(sb, "%*s</group>\n", indent, "");
255 		return (0);
256 	}
257 
258 	sbuf_printf(sb, "%*s<group level=\"%d\" cache-level=\"%d\">\n", indent,
259 	    "", 1 + indent / 2, cg->cg_level);
260 	sbuf_printf(sb, "%*s <cpu count=\"%d\" mask=\"%s\">", indent, "",
261 	    cg->cg_count, cpusetobj_strprint(cpusetbuf, &cg->cg_mask));
262 	first = TRUE;
263 	for (i = cg->cg_first; i <= cg->cg_last; i++) {
264 		if (CPU_ISSET(i, &cg->cg_mask)) {
265 			if (!first)
266 				sbuf_cat(sb, ", ");
267 			else
268 				first = FALSE;
269 			sbuf_printf(sb, "%d", i);
270 		}
271 	}
272 	sbuf_cat(sb, "</cpu>\n");
273 
274 	if (cg->cg_flags != 0) {
275 		sbuf_printf(sb, "%*s <flags>", indent, "");
276 		if ((cg->cg_flags & CG_FLAG_HTT) != 0)
277 			sbuf_cat(sb, "<flag name=\"HTT\">HTT group</flag>");
278 		if ((cg->cg_flags & CG_FLAG_THREAD) != 0)
279 			sbuf_cat(sb, "<flag name=\"THREAD\">THREAD group</flag>");
280 		if ((cg->cg_flags & CG_FLAG_SMT) != 0)
281 			sbuf_cat(sb, "<flag name=\"SMT\">SMT group</flag>");
282 		if ((cg->cg_flags & CG_FLAG_NODE) != 0)
283 			sbuf_cat(sb, "<flag name=\"NODE\">NUMA node</flag>");
284 		sbuf_cat(sb, "</flags>\n");
285 	}
286 
287 	if (cg->cg_children > 0) {
288 		sbuf_printf(sb, "%*s <children>\n", indent, "");
289 		for (i = 0; i < cg->cg_children; i++)
290 			sysctl_kern_sched_topology_spec_internal(sb,
291 			    &cg->cg_child[i], indent + 2);
292 		sbuf_printf(sb, "%*s </children>\n", indent, "");
293 	}
294 	sbuf_printf(sb, "%*s</group>\n", indent, "");
295 	return (0);
296 }
297 
298 /*
299  * Sysctl handler for retrieving topology dump. It's a wrapper for
300  * the recursive sysctl_kern_smp_topology_spec_internal().
301  */
302 static int
303 sysctl_kern_sched_topology_spec(SYSCTL_HANDLER_ARGS)
304 {
305 	struct sbuf *topo;
306 	int err;
307 
308 	topo = sbuf_new_for_sysctl(NULL, NULL, 512, req);
309 	if (topo == NULL)
310 		return (ENOMEM);
311 
312 	sbuf_cat(topo, "<groups>\n");
313 	err = sysctl_kern_sched_topology_spec_internal(topo, cpu_top, 1);
314 	sbuf_cat(topo, "</groups>\n");
315 
316 	if (err == 0)
317 		err = sbuf_finish(topo);
318 	sbuf_delete(topo);
319 	return (err);
320 }
321 
322 SYSCTL_PROC(_kern_sched, OID_AUTO, topology_spec, CTLTYPE_STRING |
323     CTLFLAG_MPSAFE | CTLFLAG_RD, NULL, 0,
324     sysctl_kern_sched_topology_spec, "A",
325     "XML dump of detected CPU topology");
326