1 /* 2 * Copyright 2026 The FreeBSD Foundation 3 * 4 * SPDX-License-Identifier: BSD-2-Clause 5 * 6 * This software was developed by Konstantin Belousov <kib@FreeBSD.org> 7 * under sponsorship from the FreeBSD Foundation. 8 */ 9 10 #include "opt_sched.h" 11 12 #include <sys/systm.h> 13 #include <sys/kernel.h> 14 #include <sys/lock.h> 15 #include <sys/proc.h> 16 #include <sys/runq.h> 17 #include <sys/sbuf.h> 18 #include <sys/sched.h> 19 #include <sys/smp.h> 20 #include <sys/sysctl.h> 21 #include <machine/ifunc.h> 22 23 const struct sched_instance *active_sched; 24 25 #ifndef __DO_NOT_HAVE_SYS_IFUNCS 26 #define __DEFINE_SHIM(__m, __r, __n, __p, __a) \ 27 DEFINE_IFUNC(, __r, __n, __p) \ 28 { \ 29 return (active_sched->__m); \ 30 } 31 #else 32 #define __DEFINE_SHIM(__m, __r, __n, __p, __a) \ 33 __r \ 34 __n __p \ 35 { \ 36 return (active_sched->__m __a); \ 37 } 38 #endif 39 #define DEFINE_SHIM0(__m, __r, __n) \ 40 __DEFINE_SHIM(__m, __r, __n, (void), ()) 41 #define DEFINE_SHIM1(__m, __r, __n, __t1, __a1) \ 42 __DEFINE_SHIM(__m, __r, __n, (__t1 __a1), (__a1)) 43 #define DEFINE_SHIM2(__m, __r, __n, __t1, __a1, __t2, __a2) \ 44 __DEFINE_SHIM(__m, __r, __n, (__t1 __a1, __t2 __a2), (__a1, __a2)) 45 46 DEFINE_SHIM0(load, int, sched_load) 47 DEFINE_SHIM0(rr_interval, int, sched_rr_interval) 48 DEFINE_SHIM0(runnable, bool, sched_runnable) 49 DEFINE_SHIM2(exit, void, sched_exit, struct proc *, p, 50 struct thread *, childtd) 51 DEFINE_SHIM2(fork, void, sched_fork, struct thread *, td, 52 struct thread *, childtd) 53 DEFINE_SHIM1(fork_exit, void, sched_fork_exit, struct thread *, td) 54 DEFINE_SHIM2(class, void, sched_class, struct thread *, td, int, class) 55 DEFINE_SHIM2(nice, void, sched_nice, struct proc *, p, int, nice) 56 DEFINE_SHIM0(ap_entry, void, sched_ap_entry) 57 DEFINE_SHIM2(exit_thread, void, sched_exit_thread, struct thread *, td, 58 struct thread *, child) 59 DEFINE_SHIM1(estcpu, u_int, sched_estcpu, struct thread *, td) 60 DEFINE_SHIM2(fork_thread, void, sched_fork_thread, struct thread *, td, 61 struct thread *, child) 62 DEFINE_SHIM2(ithread_prio, void, sched_ithread_prio, struct thread *, td, 63 u_char, prio) 64 DEFINE_SHIM2(lend_prio, void, sched_lend_prio, struct thread *, td, 65 u_char, prio) 66 DEFINE_SHIM2(lend_user_prio, void, sched_lend_user_prio, struct thread *, td, 67 u_char, pri) 68 DEFINE_SHIM2(lend_user_prio_cond, void, sched_lend_user_prio_cond, 69 struct thread *, td, u_char, pri) 70 DEFINE_SHIM1(pctcpu, fixpt_t, sched_pctcpu, struct thread *, td) 71 DEFINE_SHIM2(prio, void, sched_prio, struct thread *, td, u_char, prio) 72 DEFINE_SHIM2(sleep, void, sched_sleep, struct thread *, td, int, prio) 73 DEFINE_SHIM2(sswitch, void, sched_switch, struct thread *, td, int, flags) 74 DEFINE_SHIM1(throw, void, sched_throw, struct thread *, td) 75 DEFINE_SHIM2(unlend_prio, void, sched_unlend_prio, struct thread *, td, 76 u_char, prio) 77 DEFINE_SHIM2(user_prio, void, sched_user_prio, struct thread *, td, 78 u_char, prio) 79 DEFINE_SHIM1(userret_slowpath, void, sched_userret_slowpath, 80 struct thread *, td) 81 DEFINE_SHIM2(add, void, sched_add, struct thread *, td, int, flags) 82 DEFINE_SHIM0(choose, struct thread *, sched_choose) 83 DEFINE_SHIM2(clock, void, sched_clock, struct thread *, td, int, cnt) 84 DEFINE_SHIM1(idletd, void, sched_idletd, void *, dummy) 85 DEFINE_SHIM1(preempt, void, sched_preempt, struct thread *, td) 86 DEFINE_SHIM1(relinquish, void, sched_relinquish, struct thread *, td) 87 DEFINE_SHIM1(rem, void, sched_rem, struct thread *, td) 88 DEFINE_SHIM2(wakeup, void, sched_wakeup, struct thread *, td, int, srqflags) 89 DEFINE_SHIM2(bind, void, sched_bind, struct thread *, td, int, cpu) 90 DEFINE_SHIM1(unbind, void, sched_unbind, struct thread *, td) 91 DEFINE_SHIM1(is_bound, int, sched_is_bound, struct thread *, td) 92 DEFINE_SHIM1(affinity, void, sched_affinity, struct thread *, td) 93 DEFINE_SHIM0(sizeof_proc, int, sched_sizeof_proc) 94 DEFINE_SHIM0(sizeof_thread, int, sched_sizeof_thread) 95 DEFINE_SHIM1(tdname, char *, sched_tdname, struct thread *, td) 96 DEFINE_SHIM1(clear_tdname, void, sched_clear_tdname, struct thread *, td) 97 DEFINE_SHIM0(do_timer_accounting, bool, sched_do_timer_accounting) 98 DEFINE_SHIM1(find_l2_neighbor, int, sched_find_l2_neighbor, int, cpu) 99 DEFINE_SHIM0(init_ap, void, schedinit_ap) 100 101 102 SCHED_STAT_DEFINE(ithread_demotions, "Interrupt thread priority demotions"); 103 SCHED_STAT_DEFINE(ithread_preemptions, 104 "Interrupt thread preemptions due to time-sharing"); 105 106 SDT_PROVIDER_DEFINE(sched); 107 108 SDT_PROBE_DEFINE3(sched, , , change__pri, "struct thread *", 109 "struct proc *", "uint8_t"); 110 SDT_PROBE_DEFINE3(sched, , , dequeue, "struct thread *", 111 "struct proc *", "void *"); 112 SDT_PROBE_DEFINE4(sched, , , enqueue, "struct thread *", 113 "struct proc *", "void *", "int"); 114 SDT_PROBE_DEFINE4(sched, , , lend__pri, "struct thread *", 115 "struct proc *", "uint8_t", "struct thread *"); 116 SDT_PROBE_DEFINE2(sched, , , load__change, "int", "int"); 117 SDT_PROBE_DEFINE2(sched, , , off__cpu, "struct thread *", 118 "struct proc *"); 119 SDT_PROBE_DEFINE(sched, , , on__cpu); 120 SDT_PROBE_DEFINE(sched, , , remain__cpu); 121 SDT_PROBE_DEFINE2(sched, , , surrender, "struct thread *", 122 "struct proc *"); 123 124 #ifdef KDTRACE_HOOKS 125 #include <sys/dtrace_bsd.h> 126 int __read_mostly dtrace_vtime_active; 127 dtrace_vtime_switch_func_t dtrace_vtime_switch_func; 128 #endif 129 130 static char sched_name[32] = "ULE"; 131 132 SET_DECLARE(sched_instance_set, struct sched_selection); 133 134 void 135 sched_instance_select(void) 136 { 137 struct sched_selection *s, **ss; 138 int i; 139 140 TUNABLE_STR_FETCH("kern.sched.name", sched_name, sizeof(sched_name)); 141 SET_FOREACH(ss, sched_instance_set) { 142 s = *ss; 143 for (i = 0; s->name[i] == sched_name[i]; i++) { 144 if (s->name[i] == '\0') { 145 active_sched = s->instance; 146 return; 147 } 148 } 149 } 150 151 /* 152 * No scheduler matching the configuration was found. If 153 * there is any scheduler compiled in, at all, use the first 154 * scheduler from the linker set. 155 */ 156 if (SET_BEGIN(sched_instance_set) < SET_LIMIT(sched_instance_set)) { 157 s = *SET_BEGIN(sched_instance_set); 158 active_sched = s->instance; 159 for (i = 0;; i++) { 160 sched_name[i] = s->name[i]; 161 if (s->name[i] == '\0') 162 break; 163 } 164 } 165 } 166 167 void 168 schedinit(void) 169 { 170 if (active_sched == NULL) 171 panic("Cannot find scheduler %s", sched_name); 172 active_sched->init(); 173 } 174 175 struct cpu_group __read_mostly *cpu_top; /* CPU topology */ 176 177 static void 178 sched_setup(void *dummy) 179 { 180 cpu_top = smp_topo(); 181 active_sched->setup(); 182 } 183 SYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL); 184 185 static void 186 sched_initticks(void *dummy) 187 { 188 active_sched->initticks(); 189 } 190 SYSINIT(sched_initticks, SI_SUB_CLOCKS, SI_ORDER_THIRD, sched_initticks, 191 NULL); 192 193 static void 194 sched_schedcpu(void) 195 { 196 active_sched->schedcpu(); 197 } 198 SYSINIT(schedcpu, SI_SUB_LAST, SI_ORDER_FIRST, sched_schedcpu, NULL); 199 200 SYSCTL_NODE(_kern, OID_AUTO, sched, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 201 "Scheduler"); 202 203 SYSCTL_STRING(_kern_sched, OID_AUTO, name, CTLFLAG_RD, sched_name, 0, 204 "Scheduler name"); 205 206 static int 207 sysctl_kern_sched_available(SYSCTL_HANDLER_ARGS) 208 { 209 struct sched_selection *s, **ss; 210 struct sbuf *sb, sm; 211 int error; 212 bool first; 213 214 sb = sbuf_new_for_sysctl(&sm, NULL, 0, req); 215 if (sb == NULL) 216 return (ENOMEM); 217 first = true; 218 SET_FOREACH(ss, sched_instance_set) { 219 s = *ss; 220 if (first) 221 first = false; 222 else 223 sbuf_cat(sb, ","); 224 sbuf_cat(sb, s->name); 225 } 226 error = sbuf_finish(sb); 227 sbuf_delete(sb); 228 return (error); 229 } 230 231 SYSCTL_PROC(_kern_sched, OID_AUTO, available, 232 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 233 NULL, 0, sysctl_kern_sched_available, "A", 234 "List of available schedulers"); 235 236 fixpt_t ccpu; 237 SYSCTL_UINT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, 238 "Decay factor used for updating %CPU"); 239 240 /* 241 * Build the CPU topology dump string. Is recursively called to collect 242 * the topology tree. 243 */ 244 static int 245 sysctl_kern_sched_topology_spec_internal(struct sbuf *sb, 246 struct cpu_group *cg, int indent) 247 { 248 char cpusetbuf[CPUSETBUFSIZ]; 249 int i, first; 250 251 if (cpu_top == NULL) { 252 sbuf_printf(sb, "%*s<group level=\"1\" cache-level=\"1\">\n", 253 indent, ""); 254 sbuf_printf(sb, "%*s</group>\n", indent, ""); 255 return (0); 256 } 257 258 sbuf_printf(sb, "%*s<group level=\"%d\" cache-level=\"%d\">\n", indent, 259 "", 1 + indent / 2, cg->cg_level); 260 sbuf_printf(sb, "%*s <cpu count=\"%d\" mask=\"%s\">", indent, "", 261 cg->cg_count, cpusetobj_strprint(cpusetbuf, &cg->cg_mask)); 262 first = TRUE; 263 for (i = cg->cg_first; i <= cg->cg_last; i++) { 264 if (CPU_ISSET(i, &cg->cg_mask)) { 265 if (!first) 266 sbuf_cat(sb, ", "); 267 else 268 first = FALSE; 269 sbuf_printf(sb, "%d", i); 270 } 271 } 272 sbuf_cat(sb, "</cpu>\n"); 273 274 if (cg->cg_flags != 0) { 275 sbuf_printf(sb, "%*s <flags>", indent, ""); 276 if ((cg->cg_flags & CG_FLAG_HTT) != 0) 277 sbuf_cat(sb, "<flag name=\"HTT\">HTT group</flag>"); 278 if ((cg->cg_flags & CG_FLAG_THREAD) != 0) 279 sbuf_cat(sb, "<flag name=\"THREAD\">THREAD group</flag>"); 280 if ((cg->cg_flags & CG_FLAG_SMT) != 0) 281 sbuf_cat(sb, "<flag name=\"SMT\">SMT group</flag>"); 282 if ((cg->cg_flags & CG_FLAG_NODE) != 0) 283 sbuf_cat(sb, "<flag name=\"NODE\">NUMA node</flag>"); 284 sbuf_cat(sb, "</flags>\n"); 285 } 286 287 if (cg->cg_children > 0) { 288 sbuf_printf(sb, "%*s <children>\n", indent, ""); 289 for (i = 0; i < cg->cg_children; i++) 290 sysctl_kern_sched_topology_spec_internal(sb, 291 &cg->cg_child[i], indent + 2); 292 sbuf_printf(sb, "%*s </children>\n", indent, ""); 293 } 294 sbuf_printf(sb, "%*s</group>\n", indent, ""); 295 return (0); 296 } 297 298 /* 299 * Sysctl handler for retrieving topology dump. It's a wrapper for 300 * the recursive sysctl_kern_smp_topology_spec_internal(). 301 */ 302 static int 303 sysctl_kern_sched_topology_spec(SYSCTL_HANDLER_ARGS) 304 { 305 struct sbuf *topo; 306 int err; 307 308 topo = sbuf_new_for_sysctl(NULL, NULL, 512, req); 309 if (topo == NULL) 310 return (ENOMEM); 311 312 sbuf_cat(topo, "<groups>\n"); 313 err = sysctl_kern_sched_topology_spec_internal(topo, cpu_top, 1); 314 sbuf_cat(topo, "</groups>\n"); 315 316 if (err == 0) 317 err = sbuf_finish(topo); 318 sbuf_delete(topo); 319 return (err); 320 } 321 322 SYSCTL_PROC(_kern_sched, OID_AUTO, topology_spec, CTLTYPE_STRING | 323 CTLFLAG_MPSAFE | CTLFLAG_RD, NULL, 0, 324 sysctl_kern_sched_topology_spec, "A", 325 "XML dump of detected CPU topology"); 326