xref: /linux/tools/sched_ext/scx_userland.bpf.c (revision c17ee635fd3a482b2ad2bf5e269755c2eae5f25e)
1*cc4448d0SEmil Tsalapatis /* SPDX-License-Identifier: GPL-2.0 */
2*cc4448d0SEmil Tsalapatis /*
3*cc4448d0SEmil Tsalapatis  * A minimal userland scheduler.
4*cc4448d0SEmil Tsalapatis  *
5*cc4448d0SEmil Tsalapatis  * In terms of scheduling, this provides two different types of behaviors:
6*cc4448d0SEmil Tsalapatis  * 1. A global FIFO scheduling order for _any_ tasks that have CPU affinity.
7*cc4448d0SEmil Tsalapatis  *    All such tasks are direct-dispatched from the kernel, and are never
8*cc4448d0SEmil Tsalapatis  *    enqueued in user space.
9*cc4448d0SEmil Tsalapatis  * 2. A primitive vruntime scheduler that is implemented in user space, for all
10*cc4448d0SEmil Tsalapatis  *    other tasks.
11*cc4448d0SEmil Tsalapatis  *
12*cc4448d0SEmil Tsalapatis  * Some parts of this example user space scheduler could be implemented more
13*cc4448d0SEmil Tsalapatis  * efficiently using more complex and sophisticated data structures. For
14*cc4448d0SEmil Tsalapatis  * example, rather than using BPF_MAP_TYPE_QUEUE's,
15*cc4448d0SEmil Tsalapatis  * BPF_MAP_TYPE_{USER_}RINGBUF's could be used for exchanging messages between
16*cc4448d0SEmil Tsalapatis  * user space and kernel space. Similarly, we use a simple vruntime-sorted list
17*cc4448d0SEmil Tsalapatis  * in user space, but an rbtree could be used instead.
18*cc4448d0SEmil Tsalapatis  *
19*cc4448d0SEmil Tsalapatis  * Copyright (c) 2022 Meta Platforms, Inc. and affiliates.
20*cc4448d0SEmil Tsalapatis  * Copyright (c) 2022 Tejun Heo <tj@kernel.org>
21*cc4448d0SEmil Tsalapatis  * Copyright (c) 2022 David Vernet <dvernet@meta.com>
22*cc4448d0SEmil Tsalapatis  */
23*cc4448d0SEmil Tsalapatis #include <scx/common.bpf.h>
24*cc4448d0SEmil Tsalapatis #include "scx_userland.h"
25*cc4448d0SEmil Tsalapatis 
26*cc4448d0SEmil Tsalapatis /*
27*cc4448d0SEmil Tsalapatis  * Maximum amount of tasks enqueued/dispatched between kernel and user-space.
28*cc4448d0SEmil Tsalapatis  */
29*cc4448d0SEmil Tsalapatis #define MAX_ENQUEUED_TASKS 4096
30*cc4448d0SEmil Tsalapatis 
31*cc4448d0SEmil Tsalapatis char _license[] SEC("license") = "GPL";
32*cc4448d0SEmil Tsalapatis 
33*cc4448d0SEmil Tsalapatis const volatile s32 usersched_pid;
34*cc4448d0SEmil Tsalapatis 
35*cc4448d0SEmil Tsalapatis /* !0 for veristat, set during init */
36*cc4448d0SEmil Tsalapatis const volatile u32 num_possible_cpus = 64;
37*cc4448d0SEmil Tsalapatis 
38*cc4448d0SEmil Tsalapatis /* Stats that are printed by user space. */
39*cc4448d0SEmil Tsalapatis u64 nr_failed_enqueues, nr_kernel_enqueues, nr_user_enqueues;
40*cc4448d0SEmil Tsalapatis 
41*cc4448d0SEmil Tsalapatis /*
42*cc4448d0SEmil Tsalapatis  * Number of tasks that are queued for scheduling.
43*cc4448d0SEmil Tsalapatis  *
44*cc4448d0SEmil Tsalapatis  * This number is incremented by the BPF component when a task is queued to the
45*cc4448d0SEmil Tsalapatis  * user-space scheduler and it must be decremented by the user-space scheduler
46*cc4448d0SEmil Tsalapatis  * when a task is consumed.
47*cc4448d0SEmil Tsalapatis  */
48*cc4448d0SEmil Tsalapatis volatile u64 nr_queued;
49*cc4448d0SEmil Tsalapatis 
50*cc4448d0SEmil Tsalapatis /*
51*cc4448d0SEmil Tsalapatis  * Number of tasks that are waiting for scheduling.
52*cc4448d0SEmil Tsalapatis  *
53*cc4448d0SEmil Tsalapatis  * This number must be updated by the user-space scheduler to keep track if
54*cc4448d0SEmil Tsalapatis  * there is still some scheduling work to do.
55*cc4448d0SEmil Tsalapatis  */
56*cc4448d0SEmil Tsalapatis volatile u64 nr_scheduled;
57*cc4448d0SEmil Tsalapatis 
58*cc4448d0SEmil Tsalapatis UEI_DEFINE(uei);
59*cc4448d0SEmil Tsalapatis 
60*cc4448d0SEmil Tsalapatis /*
61*cc4448d0SEmil Tsalapatis  * The map containing tasks that are enqueued in user space from the kernel.
62*cc4448d0SEmil Tsalapatis  *
63*cc4448d0SEmil Tsalapatis  * This map is drained by the user space scheduler.
64*cc4448d0SEmil Tsalapatis  */
65*cc4448d0SEmil Tsalapatis struct {
66*cc4448d0SEmil Tsalapatis 	__uint(type, BPF_MAP_TYPE_QUEUE);
67*cc4448d0SEmil Tsalapatis 	__uint(max_entries, MAX_ENQUEUED_TASKS);
68*cc4448d0SEmil Tsalapatis 	__type(value, struct scx_userland_enqueued_task);
69*cc4448d0SEmil Tsalapatis } enqueued SEC(".maps");
70*cc4448d0SEmil Tsalapatis 
71*cc4448d0SEmil Tsalapatis /*
72*cc4448d0SEmil Tsalapatis  * The map containing tasks that are dispatched to the kernel from user space.
73*cc4448d0SEmil Tsalapatis  *
74*cc4448d0SEmil Tsalapatis  * Drained by the kernel in userland_dispatch().
75*cc4448d0SEmil Tsalapatis  */
76*cc4448d0SEmil Tsalapatis struct {
77*cc4448d0SEmil Tsalapatis 	__uint(type, BPF_MAP_TYPE_QUEUE);
78*cc4448d0SEmil Tsalapatis 	__uint(max_entries, MAX_ENQUEUED_TASKS);
79*cc4448d0SEmil Tsalapatis 	__type(value, s32);
80*cc4448d0SEmil Tsalapatis } dispatched SEC(".maps");
81*cc4448d0SEmil Tsalapatis 
82*cc4448d0SEmil Tsalapatis /* Per-task scheduling context */
83*cc4448d0SEmil Tsalapatis struct task_ctx {
84*cc4448d0SEmil Tsalapatis 	bool force_local; /* Dispatch directly to local DSQ */
85*cc4448d0SEmil Tsalapatis };
86*cc4448d0SEmil Tsalapatis 
87*cc4448d0SEmil Tsalapatis /* Map that contains task-local storage. */
88*cc4448d0SEmil Tsalapatis struct {
89*cc4448d0SEmil Tsalapatis 	__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
90*cc4448d0SEmil Tsalapatis 	__uint(map_flags, BPF_F_NO_PREALLOC);
91*cc4448d0SEmil Tsalapatis 	__type(key, int);
92*cc4448d0SEmil Tsalapatis 	__type(value, struct task_ctx);
93*cc4448d0SEmil Tsalapatis } task_ctx_stor SEC(".maps");
94*cc4448d0SEmil Tsalapatis 
95*cc4448d0SEmil Tsalapatis /*
96*cc4448d0SEmil Tsalapatis  * Flag used to wake-up the user-space scheduler.
97*cc4448d0SEmil Tsalapatis  */
98*cc4448d0SEmil Tsalapatis static volatile u32 usersched_needed;
99*cc4448d0SEmil Tsalapatis 
100*cc4448d0SEmil Tsalapatis /*
101*cc4448d0SEmil Tsalapatis  * Set user-space scheduler wake-up flag (equivalent to an atomic release
102*cc4448d0SEmil Tsalapatis  * operation).
103*cc4448d0SEmil Tsalapatis  */
104*cc4448d0SEmil Tsalapatis static void set_usersched_needed(void)
105*cc4448d0SEmil Tsalapatis {
106*cc4448d0SEmil Tsalapatis 	__sync_fetch_and_or(&usersched_needed, 1);
107*cc4448d0SEmil Tsalapatis }
108*cc4448d0SEmil Tsalapatis 
109*cc4448d0SEmil Tsalapatis /*
110*cc4448d0SEmil Tsalapatis  * Check and clear user-space scheduler wake-up flag (equivalent to an atomic
111*cc4448d0SEmil Tsalapatis  * acquire operation).
112*cc4448d0SEmil Tsalapatis  */
113*cc4448d0SEmil Tsalapatis static bool test_and_clear_usersched_needed(void)
114*cc4448d0SEmil Tsalapatis {
115*cc4448d0SEmil Tsalapatis 	return __sync_fetch_and_and(&usersched_needed, 0) == 1;
116*cc4448d0SEmil Tsalapatis }
117*cc4448d0SEmil Tsalapatis 
118*cc4448d0SEmil Tsalapatis static bool is_usersched_task(const struct task_struct *p)
119*cc4448d0SEmil Tsalapatis {
120*cc4448d0SEmil Tsalapatis 	return p->pid == usersched_pid;
121*cc4448d0SEmil Tsalapatis }
122*cc4448d0SEmil Tsalapatis 
123*cc4448d0SEmil Tsalapatis static bool keep_in_kernel(const struct task_struct *p)
124*cc4448d0SEmil Tsalapatis {
125*cc4448d0SEmil Tsalapatis 	return p->nr_cpus_allowed < num_possible_cpus;
126*cc4448d0SEmil Tsalapatis }
127*cc4448d0SEmil Tsalapatis 
128*cc4448d0SEmil Tsalapatis static struct task_struct *usersched_task(void)
129*cc4448d0SEmil Tsalapatis {
130*cc4448d0SEmil Tsalapatis 	struct task_struct *p;
131*cc4448d0SEmil Tsalapatis 
132*cc4448d0SEmil Tsalapatis 	p = bpf_task_from_pid(usersched_pid);
133*cc4448d0SEmil Tsalapatis 	/*
134*cc4448d0SEmil Tsalapatis 	 * Should never happen -- the usersched task should always be managed
135*cc4448d0SEmil Tsalapatis 	 * by sched_ext.
136*cc4448d0SEmil Tsalapatis 	 */
137*cc4448d0SEmil Tsalapatis 	if (!p)
138*cc4448d0SEmil Tsalapatis 		scx_bpf_error("Failed to find usersched task %d", usersched_pid);
139*cc4448d0SEmil Tsalapatis 
140*cc4448d0SEmil Tsalapatis 	return p;
141*cc4448d0SEmil Tsalapatis }
142*cc4448d0SEmil Tsalapatis 
143*cc4448d0SEmil Tsalapatis s32 BPF_STRUCT_OPS(userland_select_cpu, struct task_struct *p,
144*cc4448d0SEmil Tsalapatis 		   s32 prev_cpu, u64 wake_flags)
145*cc4448d0SEmil Tsalapatis {
146*cc4448d0SEmil Tsalapatis 	if (keep_in_kernel(p)) {
147*cc4448d0SEmil Tsalapatis 		s32 cpu;
148*cc4448d0SEmil Tsalapatis 		struct task_ctx *tctx;
149*cc4448d0SEmil Tsalapatis 
150*cc4448d0SEmil Tsalapatis 		tctx = bpf_task_storage_get(&task_ctx_stor, p, 0, 0);
151*cc4448d0SEmil Tsalapatis 		if (!tctx) {
152*cc4448d0SEmil Tsalapatis 			scx_bpf_error("Failed to look up task-local storage for %s", p->comm);
153*cc4448d0SEmil Tsalapatis 			return -ESRCH;
154*cc4448d0SEmil Tsalapatis 		}
155*cc4448d0SEmil Tsalapatis 
156*cc4448d0SEmil Tsalapatis 		if (p->nr_cpus_allowed == 1 ||
157*cc4448d0SEmil Tsalapatis 		    scx_bpf_test_and_clear_cpu_idle(prev_cpu)) {
158*cc4448d0SEmil Tsalapatis 			tctx->force_local = true;
159*cc4448d0SEmil Tsalapatis 			return prev_cpu;
160*cc4448d0SEmil Tsalapatis 		}
161*cc4448d0SEmil Tsalapatis 
162*cc4448d0SEmil Tsalapatis 		cpu = scx_bpf_pick_idle_cpu(p->cpus_ptr, 0);
163*cc4448d0SEmil Tsalapatis 		if (cpu >= 0) {
164*cc4448d0SEmil Tsalapatis 			tctx->force_local = true;
165*cc4448d0SEmil Tsalapatis 			return cpu;
166*cc4448d0SEmil Tsalapatis 		}
167*cc4448d0SEmil Tsalapatis 	}
168*cc4448d0SEmil Tsalapatis 
169*cc4448d0SEmil Tsalapatis 	return prev_cpu;
170*cc4448d0SEmil Tsalapatis }
171*cc4448d0SEmil Tsalapatis 
172*cc4448d0SEmil Tsalapatis static void dispatch_user_scheduler(void)
173*cc4448d0SEmil Tsalapatis {
174*cc4448d0SEmil Tsalapatis 	struct task_struct *p;
175*cc4448d0SEmil Tsalapatis 
176*cc4448d0SEmil Tsalapatis 	p = usersched_task();
177*cc4448d0SEmil Tsalapatis 	if (p) {
178*cc4448d0SEmil Tsalapatis 		scx_bpf_dsq_insert(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, 0);
179*cc4448d0SEmil Tsalapatis 		bpf_task_release(p);
180*cc4448d0SEmil Tsalapatis 	}
181*cc4448d0SEmil Tsalapatis }
182*cc4448d0SEmil Tsalapatis 
183*cc4448d0SEmil Tsalapatis static void enqueue_task_in_user_space(struct task_struct *p, u64 enq_flags)
184*cc4448d0SEmil Tsalapatis {
185*cc4448d0SEmil Tsalapatis 	struct scx_userland_enqueued_task task = {};
186*cc4448d0SEmil Tsalapatis 
187*cc4448d0SEmil Tsalapatis 	task.pid = p->pid;
188*cc4448d0SEmil Tsalapatis 	task.sum_exec_runtime = p->se.sum_exec_runtime;
189*cc4448d0SEmil Tsalapatis 	task.weight = p->scx.weight;
190*cc4448d0SEmil Tsalapatis 
191*cc4448d0SEmil Tsalapatis 	if (bpf_map_push_elem(&enqueued, &task, 0)) {
192*cc4448d0SEmil Tsalapatis 		/*
193*cc4448d0SEmil Tsalapatis 		 * If we fail to enqueue the task in user space, put it
194*cc4448d0SEmil Tsalapatis 		 * directly on the global DSQ.
195*cc4448d0SEmil Tsalapatis 		 */
196*cc4448d0SEmil Tsalapatis 		__sync_fetch_and_add(&nr_failed_enqueues, 1);
197*cc4448d0SEmil Tsalapatis 		scx_bpf_dsq_insert(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags);
198*cc4448d0SEmil Tsalapatis 	} else {
199*cc4448d0SEmil Tsalapatis 		__sync_fetch_and_add(&nr_user_enqueues, 1);
200*cc4448d0SEmil Tsalapatis 		set_usersched_needed();
201*cc4448d0SEmil Tsalapatis 	}
202*cc4448d0SEmil Tsalapatis }
203*cc4448d0SEmil Tsalapatis 
204*cc4448d0SEmil Tsalapatis void BPF_STRUCT_OPS(userland_enqueue, struct task_struct *p, u64 enq_flags)
205*cc4448d0SEmil Tsalapatis {
206*cc4448d0SEmil Tsalapatis 	if (keep_in_kernel(p)) {
207*cc4448d0SEmil Tsalapatis 		u64 dsq_id = SCX_DSQ_GLOBAL;
208*cc4448d0SEmil Tsalapatis 		struct task_ctx *tctx;
209*cc4448d0SEmil Tsalapatis 
210*cc4448d0SEmil Tsalapatis 		tctx = bpf_task_storage_get(&task_ctx_stor, p, 0, 0);
211*cc4448d0SEmil Tsalapatis 		if (!tctx) {
212*cc4448d0SEmil Tsalapatis 			scx_bpf_error("Failed to lookup task ctx for %s", p->comm);
213*cc4448d0SEmil Tsalapatis 			return;
214*cc4448d0SEmil Tsalapatis 		}
215*cc4448d0SEmil Tsalapatis 
216*cc4448d0SEmil Tsalapatis 		if (tctx->force_local)
217*cc4448d0SEmil Tsalapatis 			dsq_id = SCX_DSQ_LOCAL;
218*cc4448d0SEmil Tsalapatis 		tctx->force_local = false;
219*cc4448d0SEmil Tsalapatis 		scx_bpf_dsq_insert(p, dsq_id, SCX_SLICE_DFL, enq_flags);
220*cc4448d0SEmil Tsalapatis 		__sync_fetch_and_add(&nr_kernel_enqueues, 1);
221*cc4448d0SEmil Tsalapatis 		return;
222*cc4448d0SEmil Tsalapatis 	} else if (!is_usersched_task(p)) {
223*cc4448d0SEmil Tsalapatis 		enqueue_task_in_user_space(p, enq_flags);
224*cc4448d0SEmil Tsalapatis 	}
225*cc4448d0SEmil Tsalapatis }
226*cc4448d0SEmil Tsalapatis 
227*cc4448d0SEmil Tsalapatis void BPF_STRUCT_OPS(userland_dispatch, s32 cpu, struct task_struct *prev)
228*cc4448d0SEmil Tsalapatis {
229*cc4448d0SEmil Tsalapatis 	if (test_and_clear_usersched_needed())
230*cc4448d0SEmil Tsalapatis 		dispatch_user_scheduler();
231*cc4448d0SEmil Tsalapatis 
232*cc4448d0SEmil Tsalapatis 	bpf_repeat(MAX_ENQUEUED_TASKS) {
233*cc4448d0SEmil Tsalapatis 		s32 pid;
234*cc4448d0SEmil Tsalapatis 		struct task_struct *p;
235*cc4448d0SEmil Tsalapatis 
236*cc4448d0SEmil Tsalapatis 		if (bpf_map_pop_elem(&dispatched, &pid))
237*cc4448d0SEmil Tsalapatis 			break;
238*cc4448d0SEmil Tsalapatis 
239*cc4448d0SEmil Tsalapatis 		/*
240*cc4448d0SEmil Tsalapatis 		 * The task could have exited by the time we get around to
241*cc4448d0SEmil Tsalapatis 		 * dispatching it. Treat this as a normal occurrence, and simply
242*cc4448d0SEmil Tsalapatis 		 * move onto the next iteration.
243*cc4448d0SEmil Tsalapatis 		 */
244*cc4448d0SEmil Tsalapatis 		p = bpf_task_from_pid(pid);
245*cc4448d0SEmil Tsalapatis 		if (!p)
246*cc4448d0SEmil Tsalapatis 			continue;
247*cc4448d0SEmil Tsalapatis 
248*cc4448d0SEmil Tsalapatis 		scx_bpf_dsq_insert(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, 0);
249*cc4448d0SEmil Tsalapatis 		bpf_task_release(p);
250*cc4448d0SEmil Tsalapatis 	}
251*cc4448d0SEmil Tsalapatis }
252*cc4448d0SEmil Tsalapatis 
253*cc4448d0SEmil Tsalapatis /*
254*cc4448d0SEmil Tsalapatis  * A CPU is about to change its idle state. If the CPU is going idle, ensure
255*cc4448d0SEmil Tsalapatis  * that the user-space scheduler has a chance to run if there is any remaining
256*cc4448d0SEmil Tsalapatis  * work to do.
257*cc4448d0SEmil Tsalapatis  */
258*cc4448d0SEmil Tsalapatis void BPF_STRUCT_OPS(userland_update_idle, s32 cpu, bool idle)
259*cc4448d0SEmil Tsalapatis {
260*cc4448d0SEmil Tsalapatis 	/*
261*cc4448d0SEmil Tsalapatis 	 * Don't do anything if we exit from and idle state, a CPU owner will
262*cc4448d0SEmil Tsalapatis 	 * be assigned in .running().
263*cc4448d0SEmil Tsalapatis 	 */
264*cc4448d0SEmil Tsalapatis 	if (!idle)
265*cc4448d0SEmil Tsalapatis 		return;
266*cc4448d0SEmil Tsalapatis 	/*
267*cc4448d0SEmil Tsalapatis 	 * A CPU is now available, notify the user-space scheduler that tasks
268*cc4448d0SEmil Tsalapatis 	 * can be dispatched, if there is at least one task waiting to be
269*cc4448d0SEmil Tsalapatis 	 * scheduled, either queued (accounted in nr_queued) or scheduled
270*cc4448d0SEmil Tsalapatis 	 * (accounted in nr_scheduled).
271*cc4448d0SEmil Tsalapatis 	 *
272*cc4448d0SEmil Tsalapatis 	 * NOTE: nr_queued is incremented by the BPF component, more exactly in
273*cc4448d0SEmil Tsalapatis 	 * enqueue(), when a task is sent to the user-space scheduler, then
274*cc4448d0SEmil Tsalapatis 	 * the scheduler drains the queued tasks (updating nr_queued) and adds
275*cc4448d0SEmil Tsalapatis 	 * them to its internal data structures / state; at this point tasks
276*cc4448d0SEmil Tsalapatis 	 * become "scheduled" and the user-space scheduler will take care of
277*cc4448d0SEmil Tsalapatis 	 * updating nr_scheduled accordingly; lastly tasks will be dispatched
278*cc4448d0SEmil Tsalapatis 	 * and the user-space scheduler will update nr_scheduled again.
279*cc4448d0SEmil Tsalapatis 	 *
280*cc4448d0SEmil Tsalapatis 	 * Checking both counters allows to determine if there is still some
281*cc4448d0SEmil Tsalapatis 	 * pending work to do for the scheduler: new tasks have been queued
282*cc4448d0SEmil Tsalapatis 	 * since last check, or there are still tasks "queued" or "scheduled"
283*cc4448d0SEmil Tsalapatis 	 * since the previous user-space scheduler run. If the counters are
284*cc4448d0SEmil Tsalapatis 	 * both zero it is pointless to wake-up the scheduler (even if a CPU
285*cc4448d0SEmil Tsalapatis 	 * becomes idle), because there is nothing to do.
286*cc4448d0SEmil Tsalapatis 	 *
287*cc4448d0SEmil Tsalapatis 	 * Keep in mind that update_idle() doesn't run concurrently with the
288*cc4448d0SEmil Tsalapatis 	 * user-space scheduler (that is single-threaded): this function is
289*cc4448d0SEmil Tsalapatis 	 * naturally serialized with the user-space scheduler code, therefore
290*cc4448d0SEmil Tsalapatis 	 * this check here is also safe from a concurrency perspective.
291*cc4448d0SEmil Tsalapatis 	 */
292*cc4448d0SEmil Tsalapatis 	if (nr_queued || nr_scheduled) {
293*cc4448d0SEmil Tsalapatis 		/*
294*cc4448d0SEmil Tsalapatis 		 * Kick the CPU to make it immediately ready to accept
295*cc4448d0SEmil Tsalapatis 		 * dispatched tasks.
296*cc4448d0SEmil Tsalapatis 		 */
297*cc4448d0SEmil Tsalapatis 		set_usersched_needed();
298*cc4448d0SEmil Tsalapatis 		scx_bpf_kick_cpu(cpu, 0);
299*cc4448d0SEmil Tsalapatis 	}
300*cc4448d0SEmil Tsalapatis }
301*cc4448d0SEmil Tsalapatis 
302*cc4448d0SEmil Tsalapatis s32 BPF_STRUCT_OPS(userland_init_task, struct task_struct *p,
303*cc4448d0SEmil Tsalapatis 		   struct scx_init_task_args *args)
304*cc4448d0SEmil Tsalapatis {
305*cc4448d0SEmil Tsalapatis 	if (bpf_task_storage_get(&task_ctx_stor, p, 0,
306*cc4448d0SEmil Tsalapatis 				 BPF_LOCAL_STORAGE_GET_F_CREATE))
307*cc4448d0SEmil Tsalapatis 		return 0;
308*cc4448d0SEmil Tsalapatis 	else
309*cc4448d0SEmil Tsalapatis 		return -ENOMEM;
310*cc4448d0SEmil Tsalapatis }
311*cc4448d0SEmil Tsalapatis 
312*cc4448d0SEmil Tsalapatis s32 BPF_STRUCT_OPS(userland_init)
313*cc4448d0SEmil Tsalapatis {
314*cc4448d0SEmil Tsalapatis 	if (num_possible_cpus == 0) {
315*cc4448d0SEmil Tsalapatis 		scx_bpf_error("User scheduler # CPUs uninitialized (%d)",
316*cc4448d0SEmil Tsalapatis 			      num_possible_cpus);
317*cc4448d0SEmil Tsalapatis 		return -EINVAL;
318*cc4448d0SEmil Tsalapatis 	}
319*cc4448d0SEmil Tsalapatis 
320*cc4448d0SEmil Tsalapatis 	if (usersched_pid <= 0) {
321*cc4448d0SEmil Tsalapatis 		scx_bpf_error("User scheduler pid uninitialized (%d)",
322*cc4448d0SEmil Tsalapatis 			      usersched_pid);
323*cc4448d0SEmil Tsalapatis 		return -EINVAL;
324*cc4448d0SEmil Tsalapatis 	}
325*cc4448d0SEmil Tsalapatis 
326*cc4448d0SEmil Tsalapatis 	return 0;
327*cc4448d0SEmil Tsalapatis }
328*cc4448d0SEmil Tsalapatis 
329*cc4448d0SEmil Tsalapatis void BPF_STRUCT_OPS(userland_exit, struct scx_exit_info *ei)
330*cc4448d0SEmil Tsalapatis {
331*cc4448d0SEmil Tsalapatis 	UEI_RECORD(uei, ei);
332*cc4448d0SEmil Tsalapatis }
333*cc4448d0SEmil Tsalapatis 
334*cc4448d0SEmil Tsalapatis SCX_OPS_DEFINE(userland_ops,
335*cc4448d0SEmil Tsalapatis 	       .select_cpu		= (void *)userland_select_cpu,
336*cc4448d0SEmil Tsalapatis 	       .enqueue			= (void *)userland_enqueue,
337*cc4448d0SEmil Tsalapatis 	       .dispatch		= (void *)userland_dispatch,
338*cc4448d0SEmil Tsalapatis 	       .update_idle		= (void *)userland_update_idle,
339*cc4448d0SEmil Tsalapatis 	       .init_task		= (void *)userland_init_task,
340*cc4448d0SEmil Tsalapatis 	       .init			= (void *)userland_init,
341*cc4448d0SEmil Tsalapatis 	       .exit			= (void *)userland_exit,
342*cc4448d0SEmil Tsalapatis 	       .flags			= SCX_OPS_ENQ_LAST |
343*cc4448d0SEmil Tsalapatis 					  SCX_OPS_KEEP_BUILTIN_IDLE,
344*cc4448d0SEmil Tsalapatis 	       .name			= "userland");
345