xref: /linux/tools/sched_ext/scx_pair.bpf.c (revision c17ee635fd3a482b2ad2bf5e269755c2eae5f25e)
1*f0262b10SEmil Tsalapatis /* SPDX-License-Identifier: GPL-2.0 */
2*f0262b10SEmil Tsalapatis /*
3*f0262b10SEmil Tsalapatis  * A demo sched_ext core-scheduler which always makes every sibling CPU pair
4*f0262b10SEmil Tsalapatis  * execute from the same CPU cgroup.
5*f0262b10SEmil Tsalapatis  *
6*f0262b10SEmil Tsalapatis  * This scheduler is a minimal implementation and would need some form of
7*f0262b10SEmil Tsalapatis  * priority handling both inside each cgroup and across the cgroups to be
8*f0262b10SEmil Tsalapatis  * practically useful.
9*f0262b10SEmil Tsalapatis  *
10*f0262b10SEmil Tsalapatis  * Each CPU in the system is paired with exactly one other CPU, according to a
11*f0262b10SEmil Tsalapatis  * "stride" value that can be specified when the BPF scheduler program is first
12*f0262b10SEmil Tsalapatis  * loaded. Throughout the runtime of the scheduler, these CPU pairs guarantee
13*f0262b10SEmil Tsalapatis  * that they will only ever schedule tasks that belong to the same CPU cgroup.
14*f0262b10SEmil Tsalapatis  *
15*f0262b10SEmil Tsalapatis  * Scheduler Initialization
16*f0262b10SEmil Tsalapatis  * ------------------------
17*f0262b10SEmil Tsalapatis  *
18*f0262b10SEmil Tsalapatis  * The scheduler BPF program is first initialized from user space, before it is
19*f0262b10SEmil Tsalapatis  * enabled. During this initialization process, each CPU on the system is
20*f0262b10SEmil Tsalapatis  * assigned several values that are constant throughout its runtime:
21*f0262b10SEmil Tsalapatis  *
22*f0262b10SEmil Tsalapatis  * 1. *Pair CPU*: The CPU that it synchronizes with when making scheduling
23*f0262b10SEmil Tsalapatis  *		  decisions. Paired CPUs always schedule tasks from the same
24*f0262b10SEmil Tsalapatis  *		  CPU cgroup, and synchronize with each other to guarantee
25*f0262b10SEmil Tsalapatis  *		  that this constraint is not violated.
26*f0262b10SEmil Tsalapatis  * 2. *Pair ID*:  Each CPU pair is assigned a Pair ID, which is used to access
27*f0262b10SEmil Tsalapatis  *		  a struct pair_ctx object that is shared between the pair.
28*f0262b10SEmil Tsalapatis  * 3. *In-pair-index*: An index, 0 or 1, that is assigned to each core in the
29*f0262b10SEmil Tsalapatis  *		       pair. Each struct pair_ctx has an active_mask field,
30*f0262b10SEmil Tsalapatis  *		       which is a bitmap used to indicate whether each core
31*f0262b10SEmil Tsalapatis  *		       in the pair currently has an actively running task.
32*f0262b10SEmil Tsalapatis  *		       This index specifies which entry in the bitmap corresponds
33*f0262b10SEmil Tsalapatis  *		       to each CPU in the pair.
34*f0262b10SEmil Tsalapatis  *
35*f0262b10SEmil Tsalapatis  * During this initialization, the CPUs are paired according to a "stride" that
36*f0262b10SEmil Tsalapatis  * may be specified when invoking the user space program that initializes and
37*f0262b10SEmil Tsalapatis  * loads the scheduler. By default, the stride is 1/2 the total number of CPUs.
38*f0262b10SEmil Tsalapatis  *
39*f0262b10SEmil Tsalapatis  * Tasks and cgroups
40*f0262b10SEmil Tsalapatis  * -----------------
41*f0262b10SEmil Tsalapatis  *
42*f0262b10SEmil Tsalapatis  * Every cgroup in the system is registered with the scheduler using the
43*f0262b10SEmil Tsalapatis  * pair_cgroup_init() callback, and every task in the system is associated with
44*f0262b10SEmil Tsalapatis  * exactly one cgroup. At a high level, the idea with the pair scheduler is to
45*f0262b10SEmil Tsalapatis  * always schedule tasks from the same cgroup within a given CPU pair. When a
46*f0262b10SEmil Tsalapatis  * task is enqueued (i.e. passed to the pair_enqueue() callback function), its
47*f0262b10SEmil Tsalapatis  * cgroup ID is read from its task struct, and then a corresponding queue map
48*f0262b10SEmil Tsalapatis  * is used to FIFO-enqueue the task for that cgroup.
49*f0262b10SEmil Tsalapatis  *
50*f0262b10SEmil Tsalapatis  * If you look through the implementation of the scheduler, you'll notice that
51*f0262b10SEmil Tsalapatis  * there is quite a bit of complexity involved with looking up the per-cgroup
52*f0262b10SEmil Tsalapatis  * FIFO queue that we enqueue tasks in. For example, there is a cgrp_q_idx_hash
53*f0262b10SEmil Tsalapatis  * BPF hash map that is used to map a cgroup ID to a globally unique ID that's
54*f0262b10SEmil Tsalapatis  * allocated in the BPF program. This is done because we use separate maps to
55*f0262b10SEmil Tsalapatis  * store the FIFO queue of tasks, and the length of that map, per cgroup. This
56*f0262b10SEmil Tsalapatis  * complexity is only present because of current deficiencies in BPF that will
57*f0262b10SEmil Tsalapatis  * soon be addressed. The main point to keep in mind is that newly enqueued
58*f0262b10SEmil Tsalapatis  * tasks are added to their cgroup's FIFO queue.
59*f0262b10SEmil Tsalapatis  *
60*f0262b10SEmil Tsalapatis  * Dispatching tasks
61*f0262b10SEmil Tsalapatis  * -----------------
62*f0262b10SEmil Tsalapatis  *
63*f0262b10SEmil Tsalapatis  * This section will describe how enqueued tasks are dispatched and scheduled.
64*f0262b10SEmil Tsalapatis  * Tasks are dispatched in pair_dispatch(), and at a high level the workflow is
65*f0262b10SEmil Tsalapatis  * as follows:
66*f0262b10SEmil Tsalapatis  *
67*f0262b10SEmil Tsalapatis  * 1. Fetch the struct pair_ctx for the current CPU. As mentioned above, this is
68*f0262b10SEmil Tsalapatis  *    the structure that's used to synchronize amongst the two pair CPUs in their
69*f0262b10SEmil Tsalapatis  *    scheduling decisions. After any of the following events have occurred:
70*f0262b10SEmil Tsalapatis  *
71*f0262b10SEmil Tsalapatis  * - The cgroup's slice run has expired, or
72*f0262b10SEmil Tsalapatis  * - The cgroup becomes empty, or
73*f0262b10SEmil Tsalapatis  * - Either CPU in the pair is preempted by a higher priority scheduling class
74*f0262b10SEmil Tsalapatis  *
75*f0262b10SEmil Tsalapatis  * The cgroup transitions to the draining state and stops executing new tasks
76*f0262b10SEmil Tsalapatis  * from the cgroup.
77*f0262b10SEmil Tsalapatis  *
78*f0262b10SEmil Tsalapatis  * 2. If the pair is still executing a task, mark the pair_ctx as draining, and
79*f0262b10SEmil Tsalapatis  *    wait for the pair CPU to be preempted.
80*f0262b10SEmil Tsalapatis  *
81*f0262b10SEmil Tsalapatis  * 3. Otherwise, if the pair CPU is not running a task, we can move onto
82*f0262b10SEmil Tsalapatis  *    scheduling new tasks. Pop the next cgroup id from the top_q queue.
83*f0262b10SEmil Tsalapatis  *
84*f0262b10SEmil Tsalapatis  * 4. Pop a task from that cgroup's FIFO task queue, and begin executing it.
85*f0262b10SEmil Tsalapatis  *
86*f0262b10SEmil Tsalapatis  * Note again that this scheduling behavior is simple, but the implementation
87*f0262b10SEmil Tsalapatis  * is complex mostly because this it hits several BPF shortcomings and has to
88*f0262b10SEmil Tsalapatis  * work around in often awkward ways. Most of the shortcomings are expected to
89*f0262b10SEmil Tsalapatis  * be resolved in the near future which should allow greatly simplifying this
90*f0262b10SEmil Tsalapatis  * scheduler.
91*f0262b10SEmil Tsalapatis  *
92*f0262b10SEmil Tsalapatis  * Dealing with preemption
93*f0262b10SEmil Tsalapatis  * -----------------------
94*f0262b10SEmil Tsalapatis  *
95*f0262b10SEmil Tsalapatis  * SCX is the lowest priority sched_class, and could be preempted by them at
96*f0262b10SEmil Tsalapatis  * any time. To address this, the scheduler implements pair_cpu_release() and
97*f0262b10SEmil Tsalapatis  * pair_cpu_acquire() callbacks which are invoked by the core scheduler when
98*f0262b10SEmil Tsalapatis  * the scheduler loses and gains control of the CPU respectively.
99*f0262b10SEmil Tsalapatis  *
100*f0262b10SEmil Tsalapatis  * In pair_cpu_release(), we mark the pair_ctx as having been preempted, and
101*f0262b10SEmil Tsalapatis  * then invoke:
102*f0262b10SEmil Tsalapatis  *
103*f0262b10SEmil Tsalapatis  * scx_bpf_kick_cpu(pair_cpu, SCX_KICK_PREEMPT | SCX_KICK_WAIT);
104*f0262b10SEmil Tsalapatis  *
105*f0262b10SEmil Tsalapatis  * This preempts the pair CPU, and waits until it has re-entered the scheduler
106*f0262b10SEmil Tsalapatis  * before returning. This is necessary to ensure that the higher priority
107*f0262b10SEmil Tsalapatis  * sched_class that preempted our scheduler does not schedule a task
108*f0262b10SEmil Tsalapatis  * concurrently with our pair CPU.
109*f0262b10SEmil Tsalapatis  *
110*f0262b10SEmil Tsalapatis  * When the CPU is re-acquired in pair_cpu_acquire(), we unmark the preemption
111*f0262b10SEmil Tsalapatis  * in the pair_ctx, and send another resched IPI to the pair CPU to re-enable
112*f0262b10SEmil Tsalapatis  * pair scheduling.
113*f0262b10SEmil Tsalapatis  *
114*f0262b10SEmil Tsalapatis  * Copyright (c) 2022 Meta Platforms, Inc. and affiliates.
115*f0262b10SEmil Tsalapatis  * Copyright (c) 2022 Tejun Heo <tj@kernel.org>
116*f0262b10SEmil Tsalapatis  * Copyright (c) 2022 David Vernet <dvernet@meta.com>
117*f0262b10SEmil Tsalapatis  */
118*f0262b10SEmil Tsalapatis #include <scx/common.bpf.h>
119*f0262b10SEmil Tsalapatis #include "scx_pair.h"
120*f0262b10SEmil Tsalapatis 
121*f0262b10SEmil Tsalapatis char _license[] SEC("license") = "GPL";
122*f0262b10SEmil Tsalapatis 
123*f0262b10SEmil Tsalapatis /* !0 for veristat, set during init */
124*f0262b10SEmil Tsalapatis const volatile u32 nr_cpu_ids = 1;
125*f0262b10SEmil Tsalapatis 
126*f0262b10SEmil Tsalapatis /* a pair of CPUs stay on a cgroup for this duration */
127*f0262b10SEmil Tsalapatis const volatile u32 pair_batch_dur_ns;
128*f0262b10SEmil Tsalapatis 
129*f0262b10SEmil Tsalapatis /* cpu ID -> pair cpu ID */
130*f0262b10SEmil Tsalapatis const volatile s32 RESIZABLE_ARRAY(rodata, pair_cpu);
131*f0262b10SEmil Tsalapatis 
132*f0262b10SEmil Tsalapatis /* cpu ID -> pair_id */
133*f0262b10SEmil Tsalapatis const volatile u32 RESIZABLE_ARRAY(rodata, pair_id);
134*f0262b10SEmil Tsalapatis 
135*f0262b10SEmil Tsalapatis /* CPU ID -> CPU # in the pair (0 or 1) */
136*f0262b10SEmil Tsalapatis const volatile u32 RESIZABLE_ARRAY(rodata, in_pair_idx);
137*f0262b10SEmil Tsalapatis 
138*f0262b10SEmil Tsalapatis struct pair_ctx {
139*f0262b10SEmil Tsalapatis 	struct bpf_spin_lock	lock;
140*f0262b10SEmil Tsalapatis 
141*f0262b10SEmil Tsalapatis 	/* the cgroup the pair is currently executing */
142*f0262b10SEmil Tsalapatis 	u64			cgid;
143*f0262b10SEmil Tsalapatis 
144*f0262b10SEmil Tsalapatis 	/* the pair started executing the current cgroup at */
145*f0262b10SEmil Tsalapatis 	u64			started_at;
146*f0262b10SEmil Tsalapatis 
147*f0262b10SEmil Tsalapatis 	/* whether the current cgroup is draining */
148*f0262b10SEmil Tsalapatis 	bool			draining;
149*f0262b10SEmil Tsalapatis 
150*f0262b10SEmil Tsalapatis 	/* the CPUs that are currently active on the cgroup */
151*f0262b10SEmil Tsalapatis 	u32			active_mask;
152*f0262b10SEmil Tsalapatis 
153*f0262b10SEmil Tsalapatis 	/*
154*f0262b10SEmil Tsalapatis 	 * the CPUs that are currently preempted and running tasks in a
155*f0262b10SEmil Tsalapatis 	 * different scheduler.
156*f0262b10SEmil Tsalapatis 	 */
157*f0262b10SEmil Tsalapatis 	u32			preempted_mask;
158*f0262b10SEmil Tsalapatis };
159*f0262b10SEmil Tsalapatis 
160*f0262b10SEmil Tsalapatis struct {
161*f0262b10SEmil Tsalapatis 	__uint(type, BPF_MAP_TYPE_ARRAY);
162*f0262b10SEmil Tsalapatis 	__type(key, u32);
163*f0262b10SEmil Tsalapatis 	__type(value, struct pair_ctx);
164*f0262b10SEmil Tsalapatis } pair_ctx SEC(".maps");
165*f0262b10SEmil Tsalapatis 
166*f0262b10SEmil Tsalapatis /* queue of cgrp_q's possibly with tasks on them */
167*f0262b10SEmil Tsalapatis struct {
168*f0262b10SEmil Tsalapatis 	__uint(type, BPF_MAP_TYPE_QUEUE);
169*f0262b10SEmil Tsalapatis 	/*
170*f0262b10SEmil Tsalapatis 	 * Because it's difficult to build strong synchronization encompassing
171*f0262b10SEmil Tsalapatis 	 * multiple non-trivial operations in BPF, this queue is managed in an
172*f0262b10SEmil Tsalapatis 	 * opportunistic way so that we guarantee that a cgroup w/ active tasks
173*f0262b10SEmil Tsalapatis 	 * is always on it but possibly multiple times. Once we have more robust
174*f0262b10SEmil Tsalapatis 	 * synchronization constructs and e.g. linked list, we should be able to
175*f0262b10SEmil Tsalapatis 	 * do this in a prettier way but for now just size it big enough.
176*f0262b10SEmil Tsalapatis 	 */
177*f0262b10SEmil Tsalapatis 	__uint(max_entries, 4 * MAX_CGRPS);
178*f0262b10SEmil Tsalapatis 	__type(value, u64);
179*f0262b10SEmil Tsalapatis } top_q SEC(".maps");
180*f0262b10SEmil Tsalapatis 
181*f0262b10SEmil Tsalapatis /* per-cgroup q which FIFOs the tasks from the cgroup */
182*f0262b10SEmil Tsalapatis struct cgrp_q {
183*f0262b10SEmil Tsalapatis 	__uint(type, BPF_MAP_TYPE_QUEUE);
184*f0262b10SEmil Tsalapatis 	__uint(max_entries, MAX_QUEUED);
185*f0262b10SEmil Tsalapatis 	__type(value, u32);
186*f0262b10SEmil Tsalapatis };
187*f0262b10SEmil Tsalapatis 
188*f0262b10SEmil Tsalapatis /*
189*f0262b10SEmil Tsalapatis  * Ideally, we want to allocate cgrp_q and cgrq_q_len in the cgroup local
190*f0262b10SEmil Tsalapatis  * storage; however, a cgroup local storage can only be accessed from the BPF
191*f0262b10SEmil Tsalapatis  * progs attached to the cgroup. For now, work around by allocating array of
192*f0262b10SEmil Tsalapatis  * cgrp_q's and then allocating per-cgroup indices.
193*f0262b10SEmil Tsalapatis  *
194*f0262b10SEmil Tsalapatis  * Another caveat: It's difficult to populate a large array of maps statically
195*f0262b10SEmil Tsalapatis  * or from BPF. Initialize it from userland.
196*f0262b10SEmil Tsalapatis  */
197*f0262b10SEmil Tsalapatis struct {
198*f0262b10SEmil Tsalapatis 	__uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
199*f0262b10SEmil Tsalapatis 	__uint(max_entries, MAX_CGRPS);
200*f0262b10SEmil Tsalapatis 	__type(key, s32);
201*f0262b10SEmil Tsalapatis 	__array(values, struct cgrp_q);
202*f0262b10SEmil Tsalapatis } cgrp_q_arr SEC(".maps");
203*f0262b10SEmil Tsalapatis 
204*f0262b10SEmil Tsalapatis static u64 cgrp_q_len[MAX_CGRPS];
205*f0262b10SEmil Tsalapatis 
206*f0262b10SEmil Tsalapatis /*
207*f0262b10SEmil Tsalapatis  * This and cgrp_q_idx_hash combine into a poor man's IDR. This likely would be
208*f0262b10SEmil Tsalapatis  * useful to have as a map type.
209*f0262b10SEmil Tsalapatis  */
210*f0262b10SEmil Tsalapatis static u32 cgrp_q_idx_cursor;
211*f0262b10SEmil Tsalapatis static u64 cgrp_q_idx_busy[MAX_CGRPS];
212*f0262b10SEmil Tsalapatis 
213*f0262b10SEmil Tsalapatis /*
214*f0262b10SEmil Tsalapatis  * All added up, the following is what we do:
215*f0262b10SEmil Tsalapatis  *
216*f0262b10SEmil Tsalapatis  * 1. When a cgroup is enabled, RR cgroup_q_idx_busy array doing cmpxchg looking
217*f0262b10SEmil Tsalapatis  *    for a free ID. If not found, fail cgroup creation with -EBUSY.
218*f0262b10SEmil Tsalapatis  *
219*f0262b10SEmil Tsalapatis  * 2. Hash the cgroup ID to the allocated cgrp_q_idx in the following
220*f0262b10SEmil Tsalapatis  *    cgrp_q_idx_hash.
221*f0262b10SEmil Tsalapatis  *
222*f0262b10SEmil Tsalapatis  * 3. Whenever a cgrp_q needs to be accessed, first look up the cgrp_q_idx from
223*f0262b10SEmil Tsalapatis  *    cgrp_q_idx_hash and then access the corresponding entry in cgrp_q_arr.
224*f0262b10SEmil Tsalapatis  *
225*f0262b10SEmil Tsalapatis  * This is sadly complicated for something pretty simple. Hopefully, we should
226*f0262b10SEmil Tsalapatis  * be able to simplify in the future.
227*f0262b10SEmil Tsalapatis  */
228*f0262b10SEmil Tsalapatis struct {
229*f0262b10SEmil Tsalapatis 	__uint(type, BPF_MAP_TYPE_HASH);
230*f0262b10SEmil Tsalapatis 	__uint(max_entries, MAX_CGRPS);
231*f0262b10SEmil Tsalapatis 	__uint(key_size, sizeof(u64));		/* cgrp ID */
232*f0262b10SEmil Tsalapatis 	__uint(value_size, sizeof(s32));	/* cgrp_q idx */
233*f0262b10SEmil Tsalapatis } cgrp_q_idx_hash SEC(".maps");
234*f0262b10SEmil Tsalapatis 
235*f0262b10SEmil Tsalapatis /* statistics */
236*f0262b10SEmil Tsalapatis u64 nr_total, nr_dispatched, nr_missing, nr_kicks, nr_preemptions;
237*f0262b10SEmil Tsalapatis u64 nr_exps, nr_exp_waits, nr_exp_empty;
238*f0262b10SEmil Tsalapatis u64 nr_cgrp_next, nr_cgrp_coll, nr_cgrp_empty;
239*f0262b10SEmil Tsalapatis 
240*f0262b10SEmil Tsalapatis UEI_DEFINE(uei);
241*f0262b10SEmil Tsalapatis 
242*f0262b10SEmil Tsalapatis void BPF_STRUCT_OPS(pair_enqueue, struct task_struct *p, u64 enq_flags)
243*f0262b10SEmil Tsalapatis {
244*f0262b10SEmil Tsalapatis 	struct cgroup *cgrp;
245*f0262b10SEmil Tsalapatis 	struct cgrp_q *cgq;
246*f0262b10SEmil Tsalapatis 	s32 pid = p->pid;
247*f0262b10SEmil Tsalapatis 	u64 cgid;
248*f0262b10SEmil Tsalapatis 	u32 *q_idx;
249*f0262b10SEmil Tsalapatis 	u64 *cgq_len;
250*f0262b10SEmil Tsalapatis 
251*f0262b10SEmil Tsalapatis 	__sync_fetch_and_add(&nr_total, 1);
252*f0262b10SEmil Tsalapatis 
253*f0262b10SEmil Tsalapatis 	cgrp = scx_bpf_task_cgroup(p);
254*f0262b10SEmil Tsalapatis 	cgid = cgrp->kn->id;
255*f0262b10SEmil Tsalapatis 	bpf_cgroup_release(cgrp);
256*f0262b10SEmil Tsalapatis 
257*f0262b10SEmil Tsalapatis 	/* find the cgroup's q and push @p into it */
258*f0262b10SEmil Tsalapatis 	q_idx = bpf_map_lookup_elem(&cgrp_q_idx_hash, &cgid);
259*f0262b10SEmil Tsalapatis 	if (!q_idx) {
260*f0262b10SEmil Tsalapatis 		scx_bpf_error("failed to lookup q_idx for cgroup[%llu]", cgid);
261*f0262b10SEmil Tsalapatis 		return;
262*f0262b10SEmil Tsalapatis 	}
263*f0262b10SEmil Tsalapatis 
264*f0262b10SEmil Tsalapatis 	cgq = bpf_map_lookup_elem(&cgrp_q_arr, q_idx);
265*f0262b10SEmil Tsalapatis 	if (!cgq) {
266*f0262b10SEmil Tsalapatis 		scx_bpf_error("failed to lookup q_arr for cgroup[%llu] q_idx[%u]",
267*f0262b10SEmil Tsalapatis 			      cgid, *q_idx);
268*f0262b10SEmil Tsalapatis 		return;
269*f0262b10SEmil Tsalapatis 	}
270*f0262b10SEmil Tsalapatis 
271*f0262b10SEmil Tsalapatis 	if (bpf_map_push_elem(cgq, &pid, 0)) {
272*f0262b10SEmil Tsalapatis 		scx_bpf_error("cgroup[%llu] queue overflow", cgid);
273*f0262b10SEmil Tsalapatis 		return;
274*f0262b10SEmil Tsalapatis 	}
275*f0262b10SEmil Tsalapatis 
276*f0262b10SEmil Tsalapatis 	/* bump q len, if going 0 -> 1, queue cgroup into the top_q */
277*f0262b10SEmil Tsalapatis 	cgq_len = MEMBER_VPTR(cgrp_q_len, [*q_idx]);
278*f0262b10SEmil Tsalapatis 	if (!cgq_len) {
279*f0262b10SEmil Tsalapatis 		scx_bpf_error("MEMBER_VTPR malfunction");
280*f0262b10SEmil Tsalapatis 		return;
281*f0262b10SEmil Tsalapatis 	}
282*f0262b10SEmil Tsalapatis 
283*f0262b10SEmil Tsalapatis 	if (!__sync_fetch_and_add(cgq_len, 1) &&
284*f0262b10SEmil Tsalapatis 	    bpf_map_push_elem(&top_q, &cgid, 0)) {
285*f0262b10SEmil Tsalapatis 		scx_bpf_error("top_q overflow");
286*f0262b10SEmil Tsalapatis 		return;
287*f0262b10SEmil Tsalapatis 	}
288*f0262b10SEmil Tsalapatis }
289*f0262b10SEmil Tsalapatis 
290*f0262b10SEmil Tsalapatis static int lookup_pairc_and_mask(s32 cpu, struct pair_ctx **pairc, u32 *mask)
291*f0262b10SEmil Tsalapatis {
292*f0262b10SEmil Tsalapatis 	u32 *vptr;
293*f0262b10SEmil Tsalapatis 
294*f0262b10SEmil Tsalapatis 	vptr = (u32 *)ARRAY_ELEM_PTR(pair_id, cpu, nr_cpu_ids);
295*f0262b10SEmil Tsalapatis 	if (!vptr)
296*f0262b10SEmil Tsalapatis 		return -EINVAL;
297*f0262b10SEmil Tsalapatis 
298*f0262b10SEmil Tsalapatis 	*pairc = bpf_map_lookup_elem(&pair_ctx, vptr);
299*f0262b10SEmil Tsalapatis 	if (!(*pairc))
300*f0262b10SEmil Tsalapatis 		return -EINVAL;
301*f0262b10SEmil Tsalapatis 
302*f0262b10SEmil Tsalapatis 	vptr = (u32 *)ARRAY_ELEM_PTR(in_pair_idx, cpu, nr_cpu_ids);
303*f0262b10SEmil Tsalapatis 	if (!vptr)
304*f0262b10SEmil Tsalapatis 		return -EINVAL;
305*f0262b10SEmil Tsalapatis 
306*f0262b10SEmil Tsalapatis 	*mask = 1U << *vptr;
307*f0262b10SEmil Tsalapatis 
308*f0262b10SEmil Tsalapatis 	return 0;
309*f0262b10SEmil Tsalapatis }
310*f0262b10SEmil Tsalapatis 
311*f0262b10SEmil Tsalapatis __attribute__((noinline))
312*f0262b10SEmil Tsalapatis static int try_dispatch(s32 cpu)
313*f0262b10SEmil Tsalapatis {
314*f0262b10SEmil Tsalapatis 	struct pair_ctx *pairc;
315*f0262b10SEmil Tsalapatis 	struct bpf_map *cgq_map;
316*f0262b10SEmil Tsalapatis 	struct task_struct *p;
317*f0262b10SEmil Tsalapatis 	u64 now = scx_bpf_now();
318*f0262b10SEmil Tsalapatis 	bool kick_pair = false;
319*f0262b10SEmil Tsalapatis 	bool expired, pair_preempted;
320*f0262b10SEmil Tsalapatis 	u32 *vptr, in_pair_mask;
321*f0262b10SEmil Tsalapatis 	s32 pid, q_idx;
322*f0262b10SEmil Tsalapatis 	u64 cgid;
323*f0262b10SEmil Tsalapatis 	int ret;
324*f0262b10SEmil Tsalapatis 
325*f0262b10SEmil Tsalapatis 	ret = lookup_pairc_and_mask(cpu, &pairc, &in_pair_mask);
326*f0262b10SEmil Tsalapatis 	if (ret) {
327*f0262b10SEmil Tsalapatis 		scx_bpf_error("failed to lookup pairc and in_pair_mask for cpu[%d]",
328*f0262b10SEmil Tsalapatis 			      cpu);
329*f0262b10SEmil Tsalapatis 		return -ENOENT;
330*f0262b10SEmil Tsalapatis 	}
331*f0262b10SEmil Tsalapatis 
332*f0262b10SEmil Tsalapatis 	bpf_spin_lock(&pairc->lock);
333*f0262b10SEmil Tsalapatis 	pairc->active_mask &= ~in_pair_mask;
334*f0262b10SEmil Tsalapatis 
335*f0262b10SEmil Tsalapatis 	expired = time_before(pairc->started_at + pair_batch_dur_ns, now);
336*f0262b10SEmil Tsalapatis 	if (expired || pairc->draining) {
337*f0262b10SEmil Tsalapatis 		u64 new_cgid = 0;
338*f0262b10SEmil Tsalapatis 
339*f0262b10SEmil Tsalapatis 		__sync_fetch_and_add(&nr_exps, 1);
340*f0262b10SEmil Tsalapatis 
341*f0262b10SEmil Tsalapatis 		/*
342*f0262b10SEmil Tsalapatis 		 * We're done with the current cgid. An obvious optimization
343*f0262b10SEmil Tsalapatis 		 * would be not draining if the next cgroup is the current one.
344*f0262b10SEmil Tsalapatis 		 * For now, be dumb and always expire.
345*f0262b10SEmil Tsalapatis 		 */
346*f0262b10SEmil Tsalapatis 		pairc->draining = true;
347*f0262b10SEmil Tsalapatis 
348*f0262b10SEmil Tsalapatis 		pair_preempted = pairc->preempted_mask;
349*f0262b10SEmil Tsalapatis 		if (pairc->active_mask || pair_preempted) {
350*f0262b10SEmil Tsalapatis 			/*
351*f0262b10SEmil Tsalapatis 			 * The other CPU is still active, or is no longer under
352*f0262b10SEmil Tsalapatis 			 * our control due to e.g. being preempted by a higher
353*f0262b10SEmil Tsalapatis 			 * priority sched_class. We want to wait until this
354*f0262b10SEmil Tsalapatis 			 * cgroup expires, or until control of our pair CPU has
355*f0262b10SEmil Tsalapatis 			 * been returned to us.
356*f0262b10SEmil Tsalapatis 			 *
357*f0262b10SEmil Tsalapatis 			 * If the pair controls its CPU, and the time already
358*f0262b10SEmil Tsalapatis 			 * expired, kick.  When the other CPU arrives at
359*f0262b10SEmil Tsalapatis 			 * dispatch and clears its active mask, it'll push the
360*f0262b10SEmil Tsalapatis 			 * pair to the next cgroup and kick this CPU.
361*f0262b10SEmil Tsalapatis 			 */
362*f0262b10SEmil Tsalapatis 			__sync_fetch_and_add(&nr_exp_waits, 1);
363*f0262b10SEmil Tsalapatis 			bpf_spin_unlock(&pairc->lock);
364*f0262b10SEmil Tsalapatis 			if (expired && !pair_preempted)
365*f0262b10SEmil Tsalapatis 				kick_pair = true;
366*f0262b10SEmil Tsalapatis 			goto out_maybe_kick;
367*f0262b10SEmil Tsalapatis 		}
368*f0262b10SEmil Tsalapatis 
369*f0262b10SEmil Tsalapatis 		bpf_spin_unlock(&pairc->lock);
370*f0262b10SEmil Tsalapatis 
371*f0262b10SEmil Tsalapatis 		/*
372*f0262b10SEmil Tsalapatis 		 * Pick the next cgroup. It'd be easier / cleaner to not drop
373*f0262b10SEmil Tsalapatis 		 * pairc->lock and use stronger synchronization here especially
374*f0262b10SEmil Tsalapatis 		 * given that we'll be switching cgroups significantly less
375*f0262b10SEmil Tsalapatis 		 * frequently than tasks. Unfortunately, bpf_spin_lock can't
376*f0262b10SEmil Tsalapatis 		 * really protect anything non-trivial. Let's do opportunistic
377*f0262b10SEmil Tsalapatis 		 * operations instead.
378*f0262b10SEmil Tsalapatis 		 */
379*f0262b10SEmil Tsalapatis 		bpf_repeat(BPF_MAX_LOOPS) {
380*f0262b10SEmil Tsalapatis 			u32 *q_idx;
381*f0262b10SEmil Tsalapatis 			u64 *cgq_len;
382*f0262b10SEmil Tsalapatis 
383*f0262b10SEmil Tsalapatis 			if (bpf_map_pop_elem(&top_q, &new_cgid)) {
384*f0262b10SEmil Tsalapatis 				/* no active cgroup, go idle */
385*f0262b10SEmil Tsalapatis 				__sync_fetch_and_add(&nr_exp_empty, 1);
386*f0262b10SEmil Tsalapatis 				return 0;
387*f0262b10SEmil Tsalapatis 			}
388*f0262b10SEmil Tsalapatis 
389*f0262b10SEmil Tsalapatis 			q_idx = bpf_map_lookup_elem(&cgrp_q_idx_hash, &new_cgid);
390*f0262b10SEmil Tsalapatis 			if (!q_idx)
391*f0262b10SEmil Tsalapatis 				continue;
392*f0262b10SEmil Tsalapatis 
393*f0262b10SEmil Tsalapatis 			/*
394*f0262b10SEmil Tsalapatis 			 * This is the only place where empty cgroups are taken
395*f0262b10SEmil Tsalapatis 			 * off the top_q.
396*f0262b10SEmil Tsalapatis 			 */
397*f0262b10SEmil Tsalapatis 			cgq_len = MEMBER_VPTR(cgrp_q_len, [*q_idx]);
398*f0262b10SEmil Tsalapatis 			if (!cgq_len || !*cgq_len)
399*f0262b10SEmil Tsalapatis 				continue;
400*f0262b10SEmil Tsalapatis 
401*f0262b10SEmil Tsalapatis 			/*
402*f0262b10SEmil Tsalapatis 			 * If it has any tasks, requeue as we may race and not
403*f0262b10SEmil Tsalapatis 			 * execute it.
404*f0262b10SEmil Tsalapatis 			 */
405*f0262b10SEmil Tsalapatis 			bpf_map_push_elem(&top_q, &new_cgid, 0);
406*f0262b10SEmil Tsalapatis 			break;
407*f0262b10SEmil Tsalapatis 		}
408*f0262b10SEmil Tsalapatis 
409*f0262b10SEmil Tsalapatis 		bpf_spin_lock(&pairc->lock);
410*f0262b10SEmil Tsalapatis 
411*f0262b10SEmil Tsalapatis 		/*
412*f0262b10SEmil Tsalapatis 		 * The other CPU may already have started on a new cgroup while
413*f0262b10SEmil Tsalapatis 		 * we dropped the lock. Make sure that we're still draining and
414*f0262b10SEmil Tsalapatis 		 * start on the new cgroup.
415*f0262b10SEmil Tsalapatis 		 */
416*f0262b10SEmil Tsalapatis 		if (pairc->draining && !pairc->active_mask) {
417*f0262b10SEmil Tsalapatis 			__sync_fetch_and_add(&nr_cgrp_next, 1);
418*f0262b10SEmil Tsalapatis 			pairc->cgid = new_cgid;
419*f0262b10SEmil Tsalapatis 			pairc->started_at = now;
420*f0262b10SEmil Tsalapatis 			pairc->draining = false;
421*f0262b10SEmil Tsalapatis 			kick_pair = true;
422*f0262b10SEmil Tsalapatis 		} else {
423*f0262b10SEmil Tsalapatis 			__sync_fetch_and_add(&nr_cgrp_coll, 1);
424*f0262b10SEmil Tsalapatis 		}
425*f0262b10SEmil Tsalapatis 	}
426*f0262b10SEmil Tsalapatis 
427*f0262b10SEmil Tsalapatis 	cgid = pairc->cgid;
428*f0262b10SEmil Tsalapatis 	pairc->active_mask |= in_pair_mask;
429*f0262b10SEmil Tsalapatis 	bpf_spin_unlock(&pairc->lock);
430*f0262b10SEmil Tsalapatis 
431*f0262b10SEmil Tsalapatis 	/* again, it'd be better to do all these with the lock held, oh well */
432*f0262b10SEmil Tsalapatis 	vptr = bpf_map_lookup_elem(&cgrp_q_idx_hash, &cgid);
433*f0262b10SEmil Tsalapatis 	if (!vptr) {
434*f0262b10SEmil Tsalapatis 		scx_bpf_error("failed to lookup q_idx for cgroup[%llu]", cgid);
435*f0262b10SEmil Tsalapatis 		return -ENOENT;
436*f0262b10SEmil Tsalapatis 	}
437*f0262b10SEmil Tsalapatis 	q_idx = *vptr;
438*f0262b10SEmil Tsalapatis 
439*f0262b10SEmil Tsalapatis 	/* claim one task from cgrp_q w/ q_idx */
440*f0262b10SEmil Tsalapatis 	bpf_repeat(BPF_MAX_LOOPS) {
441*f0262b10SEmil Tsalapatis 		u64 *cgq_len, len;
442*f0262b10SEmil Tsalapatis 
443*f0262b10SEmil Tsalapatis 		cgq_len = MEMBER_VPTR(cgrp_q_len, [q_idx]);
444*f0262b10SEmil Tsalapatis 		if (!cgq_len || !(len = *(volatile u64 *)cgq_len)) {
445*f0262b10SEmil Tsalapatis 			/* the cgroup must be empty, expire and repeat */
446*f0262b10SEmil Tsalapatis 			__sync_fetch_and_add(&nr_cgrp_empty, 1);
447*f0262b10SEmil Tsalapatis 			bpf_spin_lock(&pairc->lock);
448*f0262b10SEmil Tsalapatis 			pairc->draining = true;
449*f0262b10SEmil Tsalapatis 			pairc->active_mask &= ~in_pair_mask;
450*f0262b10SEmil Tsalapatis 			bpf_spin_unlock(&pairc->lock);
451*f0262b10SEmil Tsalapatis 			return -EAGAIN;
452*f0262b10SEmil Tsalapatis 		}
453*f0262b10SEmil Tsalapatis 
454*f0262b10SEmil Tsalapatis 		if (__sync_val_compare_and_swap(cgq_len, len, len - 1) != len)
455*f0262b10SEmil Tsalapatis 			continue;
456*f0262b10SEmil Tsalapatis 
457*f0262b10SEmil Tsalapatis 		break;
458*f0262b10SEmil Tsalapatis 	}
459*f0262b10SEmil Tsalapatis 
460*f0262b10SEmil Tsalapatis 	cgq_map = bpf_map_lookup_elem(&cgrp_q_arr, &q_idx);
461*f0262b10SEmil Tsalapatis 	if (!cgq_map) {
462*f0262b10SEmil Tsalapatis 		scx_bpf_error("failed to lookup cgq_map for cgroup[%llu] q_idx[%d]",
463*f0262b10SEmil Tsalapatis 			      cgid, q_idx);
464*f0262b10SEmil Tsalapatis 		return -ENOENT;
465*f0262b10SEmil Tsalapatis 	}
466*f0262b10SEmil Tsalapatis 
467*f0262b10SEmil Tsalapatis 	if (bpf_map_pop_elem(cgq_map, &pid)) {
468*f0262b10SEmil Tsalapatis 		scx_bpf_error("cgq_map is empty for cgroup[%llu] q_idx[%d]",
469*f0262b10SEmil Tsalapatis 			      cgid, q_idx);
470*f0262b10SEmil Tsalapatis 		return -ENOENT;
471*f0262b10SEmil Tsalapatis 	}
472*f0262b10SEmil Tsalapatis 
473*f0262b10SEmil Tsalapatis 	p = bpf_task_from_pid(pid);
474*f0262b10SEmil Tsalapatis 	if (p) {
475*f0262b10SEmil Tsalapatis 		__sync_fetch_and_add(&nr_dispatched, 1);
476*f0262b10SEmil Tsalapatis 		scx_bpf_dsq_insert(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, 0);
477*f0262b10SEmil Tsalapatis 		bpf_task_release(p);
478*f0262b10SEmil Tsalapatis 	} else {
479*f0262b10SEmil Tsalapatis 		/* we don't handle dequeues, retry on lost tasks */
480*f0262b10SEmil Tsalapatis 		__sync_fetch_and_add(&nr_missing, 1);
481*f0262b10SEmil Tsalapatis 		return -EAGAIN;
482*f0262b10SEmil Tsalapatis 	}
483*f0262b10SEmil Tsalapatis 
484*f0262b10SEmil Tsalapatis out_maybe_kick:
485*f0262b10SEmil Tsalapatis 	if (kick_pair) {
486*f0262b10SEmil Tsalapatis 		s32 *pair = (s32 *)ARRAY_ELEM_PTR(pair_cpu, cpu, nr_cpu_ids);
487*f0262b10SEmil Tsalapatis 		if (pair) {
488*f0262b10SEmil Tsalapatis 			__sync_fetch_and_add(&nr_kicks, 1);
489*f0262b10SEmil Tsalapatis 			scx_bpf_kick_cpu(*pair, SCX_KICK_PREEMPT);
490*f0262b10SEmil Tsalapatis 		}
491*f0262b10SEmil Tsalapatis 	}
492*f0262b10SEmil Tsalapatis 	return 0;
493*f0262b10SEmil Tsalapatis }
494*f0262b10SEmil Tsalapatis 
495*f0262b10SEmil Tsalapatis void BPF_STRUCT_OPS(pair_dispatch, s32 cpu, struct task_struct *prev)
496*f0262b10SEmil Tsalapatis {
497*f0262b10SEmil Tsalapatis 	bpf_repeat(BPF_MAX_LOOPS) {
498*f0262b10SEmil Tsalapatis 		if (try_dispatch(cpu) != -EAGAIN)
499*f0262b10SEmil Tsalapatis 			break;
500*f0262b10SEmil Tsalapatis 	}
501*f0262b10SEmil Tsalapatis }
502*f0262b10SEmil Tsalapatis 
503*f0262b10SEmil Tsalapatis void BPF_STRUCT_OPS(pair_cpu_acquire, s32 cpu, struct scx_cpu_acquire_args *args)
504*f0262b10SEmil Tsalapatis {
505*f0262b10SEmil Tsalapatis 	int ret;
506*f0262b10SEmil Tsalapatis 	u32 in_pair_mask;
507*f0262b10SEmil Tsalapatis 	struct pair_ctx *pairc;
508*f0262b10SEmil Tsalapatis 	bool kick_pair;
509*f0262b10SEmil Tsalapatis 
510*f0262b10SEmil Tsalapatis 	ret = lookup_pairc_and_mask(cpu, &pairc, &in_pair_mask);
511*f0262b10SEmil Tsalapatis 	if (ret)
512*f0262b10SEmil Tsalapatis 		return;
513*f0262b10SEmil Tsalapatis 
514*f0262b10SEmil Tsalapatis 	bpf_spin_lock(&pairc->lock);
515*f0262b10SEmil Tsalapatis 	pairc->preempted_mask &= ~in_pair_mask;
516*f0262b10SEmil Tsalapatis 	/* Kick the pair CPU, unless it was also preempted. */
517*f0262b10SEmil Tsalapatis 	kick_pair = !pairc->preempted_mask;
518*f0262b10SEmil Tsalapatis 	bpf_spin_unlock(&pairc->lock);
519*f0262b10SEmil Tsalapatis 
520*f0262b10SEmil Tsalapatis 	if (kick_pair) {
521*f0262b10SEmil Tsalapatis 		s32 *pair = (s32 *)ARRAY_ELEM_PTR(pair_cpu, cpu, nr_cpu_ids);
522*f0262b10SEmil Tsalapatis 
523*f0262b10SEmil Tsalapatis 		if (pair) {
524*f0262b10SEmil Tsalapatis 			__sync_fetch_and_add(&nr_kicks, 1);
525*f0262b10SEmil Tsalapatis 			scx_bpf_kick_cpu(*pair, SCX_KICK_PREEMPT);
526*f0262b10SEmil Tsalapatis 		}
527*f0262b10SEmil Tsalapatis 	}
528*f0262b10SEmil Tsalapatis }
529*f0262b10SEmil Tsalapatis 
530*f0262b10SEmil Tsalapatis void BPF_STRUCT_OPS(pair_cpu_release, s32 cpu, struct scx_cpu_release_args *args)
531*f0262b10SEmil Tsalapatis {
532*f0262b10SEmil Tsalapatis 	int ret;
533*f0262b10SEmil Tsalapatis 	u32 in_pair_mask;
534*f0262b10SEmil Tsalapatis 	struct pair_ctx *pairc;
535*f0262b10SEmil Tsalapatis 	bool kick_pair;
536*f0262b10SEmil Tsalapatis 
537*f0262b10SEmil Tsalapatis 	ret = lookup_pairc_and_mask(cpu, &pairc, &in_pair_mask);
538*f0262b10SEmil Tsalapatis 	if (ret)
539*f0262b10SEmil Tsalapatis 		return;
540*f0262b10SEmil Tsalapatis 
541*f0262b10SEmil Tsalapatis 	bpf_spin_lock(&pairc->lock);
542*f0262b10SEmil Tsalapatis 	pairc->preempted_mask |= in_pair_mask;
543*f0262b10SEmil Tsalapatis 	pairc->active_mask &= ~in_pair_mask;
544*f0262b10SEmil Tsalapatis 	/* Kick the pair CPU if it's still running. */
545*f0262b10SEmil Tsalapatis 	kick_pair = pairc->active_mask;
546*f0262b10SEmil Tsalapatis 	pairc->draining = true;
547*f0262b10SEmil Tsalapatis 	bpf_spin_unlock(&pairc->lock);
548*f0262b10SEmil Tsalapatis 
549*f0262b10SEmil Tsalapatis 	if (kick_pair) {
550*f0262b10SEmil Tsalapatis 		s32 *pair = (s32 *)ARRAY_ELEM_PTR(pair_cpu, cpu, nr_cpu_ids);
551*f0262b10SEmil Tsalapatis 
552*f0262b10SEmil Tsalapatis 		if (pair) {
553*f0262b10SEmil Tsalapatis 			__sync_fetch_and_add(&nr_kicks, 1);
554*f0262b10SEmil Tsalapatis 			scx_bpf_kick_cpu(*pair, SCX_KICK_PREEMPT | SCX_KICK_WAIT);
555*f0262b10SEmil Tsalapatis 		}
556*f0262b10SEmil Tsalapatis 	}
557*f0262b10SEmil Tsalapatis 	__sync_fetch_and_add(&nr_preemptions, 1);
558*f0262b10SEmil Tsalapatis }
559*f0262b10SEmil Tsalapatis 
560*f0262b10SEmil Tsalapatis s32 BPF_STRUCT_OPS(pair_cgroup_init, struct cgroup *cgrp)
561*f0262b10SEmil Tsalapatis {
562*f0262b10SEmil Tsalapatis 	u64 cgid = cgrp->kn->id;
563*f0262b10SEmil Tsalapatis 	s32 i, q_idx;
564*f0262b10SEmil Tsalapatis 
565*f0262b10SEmil Tsalapatis 	bpf_for(i, 0, MAX_CGRPS) {
566*f0262b10SEmil Tsalapatis 		q_idx = __sync_fetch_and_add(&cgrp_q_idx_cursor, 1) % MAX_CGRPS;
567*f0262b10SEmil Tsalapatis 		if (!__sync_val_compare_and_swap(&cgrp_q_idx_busy[q_idx], 0, 1))
568*f0262b10SEmil Tsalapatis 			break;
569*f0262b10SEmil Tsalapatis 	}
570*f0262b10SEmil Tsalapatis 	if (i == MAX_CGRPS)
571*f0262b10SEmil Tsalapatis 		return -EBUSY;
572*f0262b10SEmil Tsalapatis 
573*f0262b10SEmil Tsalapatis 	if (bpf_map_update_elem(&cgrp_q_idx_hash, &cgid, &q_idx, BPF_ANY)) {
574*f0262b10SEmil Tsalapatis 		u64 *busy = MEMBER_VPTR(cgrp_q_idx_busy, [q_idx]);
575*f0262b10SEmil Tsalapatis 		if (busy)
576*f0262b10SEmil Tsalapatis 			*busy = 0;
577*f0262b10SEmil Tsalapatis 		return -EBUSY;
578*f0262b10SEmil Tsalapatis 	}
579*f0262b10SEmil Tsalapatis 
580*f0262b10SEmil Tsalapatis 	return 0;
581*f0262b10SEmil Tsalapatis }
582*f0262b10SEmil Tsalapatis 
583*f0262b10SEmil Tsalapatis void BPF_STRUCT_OPS(pair_cgroup_exit, struct cgroup *cgrp)
584*f0262b10SEmil Tsalapatis {
585*f0262b10SEmil Tsalapatis 	u64 cgid = cgrp->kn->id;
586*f0262b10SEmil Tsalapatis 	s32 *q_idx;
587*f0262b10SEmil Tsalapatis 
588*f0262b10SEmil Tsalapatis 	q_idx = bpf_map_lookup_elem(&cgrp_q_idx_hash, &cgid);
589*f0262b10SEmil Tsalapatis 	if (q_idx) {
590*f0262b10SEmil Tsalapatis 		u64 *busy = MEMBER_VPTR(cgrp_q_idx_busy, [*q_idx]);
591*f0262b10SEmil Tsalapatis 		if (busy)
592*f0262b10SEmil Tsalapatis 			*busy = 0;
593*f0262b10SEmil Tsalapatis 		bpf_map_delete_elem(&cgrp_q_idx_hash, &cgid);
594*f0262b10SEmil Tsalapatis 	}
595*f0262b10SEmil Tsalapatis }
596*f0262b10SEmil Tsalapatis 
597*f0262b10SEmil Tsalapatis void BPF_STRUCT_OPS(pair_exit, struct scx_exit_info *ei)
598*f0262b10SEmil Tsalapatis {
599*f0262b10SEmil Tsalapatis 	UEI_RECORD(uei, ei);
600*f0262b10SEmil Tsalapatis }
601*f0262b10SEmil Tsalapatis 
602*f0262b10SEmil Tsalapatis SCX_OPS_DEFINE(pair_ops,
603*f0262b10SEmil Tsalapatis 	       .enqueue			= (void *)pair_enqueue,
604*f0262b10SEmil Tsalapatis 	       .dispatch		= (void *)pair_dispatch,
605*f0262b10SEmil Tsalapatis 	       .cpu_acquire		= (void *)pair_cpu_acquire,
606*f0262b10SEmil Tsalapatis 	       .cpu_release		= (void *)pair_cpu_release,
607*f0262b10SEmil Tsalapatis 	       .cgroup_init		= (void *)pair_cgroup_init,
608*f0262b10SEmil Tsalapatis 	       .cgroup_exit		= (void *)pair_cgroup_exit,
609*f0262b10SEmil Tsalapatis 	       .exit			= (void *)pair_exit,
610*f0262b10SEmil Tsalapatis 	       .name			= "pair");
611