1 /* SPDX-License-Identifier: GPL-2.0 */ 2 /* 3 * A simple five-level FIFO queue scheduler. 4 * 5 * There are five FIFOs implemented as arena-backed doubly-linked lists 6 * threaded through per-task context. A task gets assigned to one depending on 7 * its compound weight. Each CPU round robins through the FIFOs and dispatches 8 * more from FIFOs with higher indices - 1 from queue0, 2 from queue1, 4 from 9 * queue2 and so on. 10 * 11 * This scheduler demonstrates: 12 * 13 * - BPF-side queueing using TIDs. 14 * - BPF arena for scheduler state. 15 * - Core-sched support. 16 * 17 * This scheduler is primarily for demonstration and testing of sched_ext 18 * features and unlikely to be useful for actual workloads. 19 * 20 * Copyright (c) 2022 Meta Platforms, Inc. and affiliates. 21 * Copyright (c) 2022 Tejun Heo <tj@kernel.org> 22 * Copyright (c) 2022 David Vernet <dvernet@meta.com> 23 */ 24 #include <scx/common.bpf.h> 25 26 #include "scx_qmap.h" 27 28 enum consts { 29 ONE_SEC_IN_NS = 1000000000, 30 ONE_MSEC_IN_NS = 1000000, 31 LOWPRI_INTV_NS = 10 * ONE_MSEC_IN_NS, 32 SHARED_DSQ = 0, 33 HIGHPRI_DSQ = 1, 34 LOWPRI_DSQ = 2, 35 HIGHPRI_WEIGHT = 8668, /* this is what -20 maps to */ 36 }; 37 38 char _license[] SEC("license") = "GPL"; 39 40 const volatile u64 slice_ns; 41 const volatile u32 stall_user_nth; 42 const volatile u32 stall_kernel_nth; 43 const volatile u32 dsp_inf_loop_after; 44 const volatile u32 dsp_batch; 45 const volatile bool highpri_boosting; 46 const volatile bool print_dsqs_and_events; 47 const volatile bool print_msgs; 48 const volatile u64 sub_cgroup_id; 49 const volatile s32 disallow_tgid; 50 const volatile bool suppress_dump; 51 const volatile bool always_enq_immed; 52 const volatile u32 immed_stress_nth; 53 const volatile u32 max_tasks; 54 55 /* 56 * Optional cid-override test harness. When cid_override_mode is non-zero, 57 * qmap_init() calls scx_bpf_cid_override() with the caller-supplied 58 * cpu_to_cid array to exercise the kfunc's acceptance and error paths. 59 * 60 * 0 = disabled 61 * 1 = valid reverse mapping 62 * 2 = invalid: duplicate cid assignment 63 * 3 = invalid: out-of-range cid 64 */ 65 const volatile u32 cid_override_mode; 66 /* 67 * Array lives in bss (writable) because scx_bpf_cid_override()'s BPF 68 * verifier signature treats its len-paired pointer as read/write - rodata 69 * fails verification with "write into map forbidden". Userspace populates 70 * it before SCX_OPS_LOAD, same as rodata, and nothing writes it after. 71 */ 72 s32 cid_override_cpu_to_cid[SCX_QMAP_MAX_CPUS]; 73 74 UEI_DEFINE(uei); 75 76 /* 77 * All scheduler state - per-cpu context, stats counters, core-sched sequence 78 * numbers, sub-sched cgroup ids - lives in this single BPF arena map. Userspace 79 * reaches it via skel->arena->qa. 80 */ 81 struct { 82 __uint(type, BPF_MAP_TYPE_ARENA); 83 __uint(map_flags, BPF_F_MMAPABLE); 84 __uint(max_entries, 1 << 16); /* upper bound in pages */ 85 #if defined(__TARGET_ARCH_arm64) || defined(__aarch64__) 86 __ulong(map_extra, 0x1ull << 32); /* user/BPF mmap base */ 87 #else 88 __ulong(map_extra, 0x1ull << 44); 89 #endif 90 } arena SEC(".maps"); 91 92 struct qmap_arena __arena_global qa; 93 94 /* 95 * Global idle-cid tracking, maintained via update_idle / cpu_offline and 96 * scanned by the direct-dispatch path. Allocated in qmap_init() from one 97 * arena page, sized to the full cid space. 98 */ 99 struct scx_cmask __arena *qa_idle_cids; 100 101 /* Per-queue locks. Each in its own .data section as bpf_res_spin_lock requires. */ 102 __hidden struct bpf_res_spin_lock qa_q_lock0 SEC(".data.qa_q_lock0"); 103 __hidden struct bpf_res_spin_lock qa_q_lock1 SEC(".data.qa_q_lock1"); 104 __hidden struct bpf_res_spin_lock qa_q_lock2 SEC(".data.qa_q_lock2"); 105 __hidden struct bpf_res_spin_lock qa_q_lock3 SEC(".data.qa_q_lock3"); 106 __hidden struct bpf_res_spin_lock qa_q_lock4 SEC(".data.qa_q_lock4"); 107 108 static struct bpf_res_spin_lock *qa_q_lock(s32 qid) 109 { 110 switch (qid) { 111 case 0: return &qa_q_lock0; 112 case 1: return &qa_q_lock1; 113 case 2: return &qa_q_lock2; 114 case 3: return &qa_q_lock3; 115 case 4: return &qa_q_lock4; 116 default: return NULL; 117 } 118 } 119 120 /* 121 * If enabled, CPU performance target is set according to the queue index 122 * according to the following table. 123 */ 124 static const u32 qidx_to_cpuperf_target[] = { 125 [0] = SCX_CPUPERF_ONE * 0 / 4, 126 [1] = SCX_CPUPERF_ONE * 1 / 4, 127 [2] = SCX_CPUPERF_ONE * 2 / 4, 128 [3] = SCX_CPUPERF_ONE * 3 / 4, 129 [4] = SCX_CPUPERF_ONE * 4 / 4, 130 }; 131 132 /* 133 * Per-queue sequence numbers to implement core-sched ordering. 134 * 135 * Tail seq is assigned to each queued task and incremented. Head seq tracks the 136 * sequence number of the latest dispatched task. The distance between the a 137 * task's seq and the associated queue's head seq is called the queue distance 138 * and used when comparing two tasks for ordering. See qmap_core_sched_before(). 139 */ 140 141 /* 142 * Per-task scheduling context. Allocated from the qa.task_ctxs[] slab in 143 * arena. While the task is alive the entry is referenced from task_ctx_stor; 144 * while it's free the entry sits on the free list singly-linked through 145 * @next_free. 146 * 147 * When the task is queued on one of the five priority FIFOs, @q_idx is the 148 * queue index and @q_next/@q_prev link it in the queue's doubly-linked list. 149 * @q_idx is -1 when the task isn't on any queue. 150 */ 151 struct task_ctx { 152 struct task_ctx __arena *next_free; /* only valid on free list */ 153 struct task_ctx __arena *q_next; /* queue link, NULL if tail */ 154 struct task_ctx __arena *q_prev; /* queue link, NULL if head */ 155 struct qmap_fifo __arena *fifo; /* queue we're on, NULL if not queued */ 156 u64 tid; 157 s32 pid; /* for dump only */ 158 bool force_local; /* Dispatch directly to local_dsq */ 159 bool highpri; 160 u64 core_sched_seq; 161 struct scx_cmask cpus_allowed; /* per-task affinity in cid space */ 162 }; 163 164 /* 165 * Slab stride for task_ctx. cpus_allowed's flex array bits[] overlaps the 166 * tail bytes appended per entry; struct_size() gives the actual per-entry 167 * footprint. 168 */ 169 #define TASK_CTX_STRIDE \ 170 struct_size_t(struct task_ctx, cpus_allowed.bits, \ 171 CMASK_NR_WORDS(SCX_QMAP_MAX_CPUS)) 172 173 /* All task_ctx pointers are arena pointers. */ 174 typedef struct task_ctx __arena task_ctx_t; 175 176 /* Holds an arena pointer to the task's slab entry. */ 177 struct task_ctx_stor_val { 178 task_ctx_t *taskc; 179 }; 180 181 struct { 182 __uint(type, BPF_MAP_TYPE_TASK_STORAGE); 183 __uint(map_flags, BPF_F_NO_PREALLOC); 184 __type(key, int); 185 __type(value, struct task_ctx_stor_val); 186 } task_ctx_stor SEC(".maps"); 187 188 /* Protects the task_ctx slab free list. */ 189 __hidden struct bpf_res_spin_lock qa_task_lock SEC(".data.qa_task_lock"); 190 191 static int qmap_spin_lock(struct bpf_res_spin_lock *lock) 192 { 193 if (bpf_res_spin_lock(lock)) { 194 scx_bpf_error("res_spin_lock failed"); 195 return -EBUSY; 196 } 197 return 0; 198 } 199 200 /* 201 * Try prev_cid, then scan taskc->cpus_allowed AND qa_idle_cids round-robin 202 * from prev_cid + 1. Atomic claim retries on race; bounded by 203 * IDLE_PICK_RETRIES to keep the verifier's insn budget in check. 204 */ 205 #define IDLE_PICK_RETRIES 16 206 207 static s32 pick_direct_dispatch_cid(struct task_struct *p, s32 prev_cid, 208 task_ctx_t *taskc) 209 { 210 u32 nr_cids = scx_bpf_nr_cids(); 211 s32 cid; 212 u32 i; 213 214 if (!always_enq_immed && p->nr_cpus_allowed == 1) 215 return prev_cid; 216 217 if (cmask_test_and_clear(prev_cid, qa_idle_cids)) 218 return prev_cid; 219 220 cid = prev_cid; 221 bpf_for(i, 0, IDLE_PICK_RETRIES) { 222 cid = cmask_next_and_set_wrap(&taskc->cpus_allowed, 223 qa_idle_cids, cid + 1); 224 barrier_var(cid); 225 if (cid >= nr_cids) 226 return -1; 227 if (cmask_test_and_clear(cid, qa_idle_cids)) 228 return cid; 229 } 230 return -1; 231 } 232 233 /* 234 * Force a reference to the arena map. The verifier associates an arena with 235 * a program by finding an LD_IMM64 instruction that loads the arena's BPF 236 * map; programs that only use arena pointers returned from task-local 237 * storage (like qmap_select_cpu) never reference @arena directly. Without 238 * this, the verifier rejects addr_space_cast with "addr_space_cast insn 239 * can only be used in a program that has an associated arena". 240 */ 241 #define QMAP_TOUCH_ARENA() do { asm volatile("" :: "r"(&arena)); } while (0) 242 243 static task_ctx_t *lookup_task_ctx(struct task_struct *p) 244 { 245 struct task_ctx_stor_val *v; 246 247 QMAP_TOUCH_ARENA(); 248 249 v = bpf_task_storage_get(&task_ctx_stor, p, 0, 0); 250 if (!v || !v->taskc) 251 return NULL; 252 return v->taskc; 253 } 254 255 /* Append @taskc to the tail of @fifo. Must not already be queued. */ 256 static void qmap_fifo_enqueue(struct qmap_fifo __arena *fifo, task_ctx_t *taskc) 257 { 258 struct bpf_res_spin_lock *lock = qa_q_lock(fifo->idx); 259 260 if (!lock || qmap_spin_lock(lock)) 261 return; 262 taskc->fifo = fifo; 263 taskc->q_next = NULL; 264 taskc->q_prev = fifo->tail; 265 if (fifo->tail) 266 fifo->tail->q_next = taskc; 267 else 268 fifo->head = taskc; 269 fifo->tail = taskc; 270 bpf_res_spin_unlock(lock); 271 } 272 273 /* Pop the head of @fifo. Returns NULL if empty. */ 274 static task_ctx_t *qmap_fifo_pop(struct qmap_fifo __arena *fifo) 275 { 276 struct bpf_res_spin_lock *lock = qa_q_lock(fifo->idx); 277 task_ctx_t *taskc; 278 279 if (!lock || qmap_spin_lock(lock)) 280 return NULL; 281 taskc = fifo->head; 282 if (taskc) { 283 fifo->head = taskc->q_next; 284 if (taskc->q_next) 285 taskc->q_next->q_prev = NULL; 286 else 287 fifo->tail = NULL; 288 taskc->q_next = NULL; 289 taskc->q_prev = NULL; 290 taskc->fifo = NULL; 291 } 292 bpf_res_spin_unlock(lock); 293 return taskc; 294 } 295 296 /* Remove @taskc from its fifo. No-op if not queued. */ 297 static void qmap_fifo_remove(task_ctx_t *taskc) 298 { 299 struct qmap_fifo __arena *fifo = taskc->fifo; 300 struct bpf_res_spin_lock *lock; 301 302 if (!fifo) 303 return; 304 305 lock = qa_q_lock(fifo->idx); 306 if (!lock || qmap_spin_lock(lock)) 307 return; 308 309 /* Re-check under lock — a concurrent pop may have cleared fifo. */ 310 if (taskc->fifo != fifo) { 311 bpf_res_spin_unlock(lock); 312 return; 313 } 314 315 if (taskc->q_next) 316 taskc->q_next->q_prev = taskc->q_prev; 317 else 318 fifo->tail = taskc->q_prev; 319 if (taskc->q_prev) 320 taskc->q_prev->q_next = taskc->q_next; 321 else 322 fifo->head = taskc->q_next; 323 taskc->q_next = NULL; 324 taskc->q_prev = NULL; 325 taskc->fifo = NULL; 326 bpf_res_spin_unlock(lock); 327 } 328 329 s32 BPF_STRUCT_OPS(qmap_select_cid, struct task_struct *p, 330 s32 prev_cid, u64 wake_flags) 331 { 332 task_ctx_t *taskc; 333 s32 cid; 334 335 if (!(taskc = lookup_task_ctx(p))) 336 return prev_cid; 337 338 if (p->scx.weight < 2 && !(p->flags & PF_KTHREAD)) 339 return prev_cid; 340 341 cid = pick_direct_dispatch_cid(p, prev_cid, taskc); 342 343 if (cid >= 0) { 344 taskc->force_local = true; 345 return cid; 346 } else { 347 return prev_cid; 348 } 349 } 350 351 static int weight_to_idx(u32 weight) 352 { 353 /* Coarsely map the compound weight to a FIFO. */ 354 if (weight <= 25) 355 return 0; 356 else if (weight <= 50) 357 return 1; 358 else if (weight < 200) 359 return 2; 360 else if (weight < 400) 361 return 3; 362 else 363 return 4; 364 } 365 366 void BPF_STRUCT_OPS(qmap_enqueue, struct task_struct *p, u64 enq_flags) 367 { 368 static u32 user_cnt, kernel_cnt; 369 task_ctx_t *taskc; 370 int idx = weight_to_idx(p->scx.weight); 371 s32 cid; 372 373 if (enq_flags & SCX_ENQ_REENQ) { 374 __sync_fetch_and_add(&qa.nr_reenqueued, 1); 375 if (scx_bpf_task_cid(p) == 0) 376 __sync_fetch_and_add(&qa.nr_reenqueued_cid0, 1); 377 } 378 379 if (p->flags & PF_KTHREAD) { 380 if (stall_kernel_nth && !(++kernel_cnt % stall_kernel_nth)) 381 return; 382 } else { 383 if (stall_user_nth && !(++user_cnt % stall_user_nth)) 384 return; 385 } 386 387 if (qa.test_error_cnt && !--qa.test_error_cnt) 388 scx_bpf_error("test triggering error"); 389 390 if (!(taskc = lookup_task_ctx(p))) 391 return; 392 393 /* 394 * All enqueued tasks must have their core_sched_seq updated for correct 395 * core-sched ordering. Also, take a look at the end of qmap_dispatch(). 396 */ 397 taskc->core_sched_seq = qa.core_sched_tail_seqs[idx]++; 398 399 /* 400 * IMMED stress testing: Every immed_stress_nth'th enqueue, dispatch 401 * directly to prev_cpu's local DSQ even when busy to force dsq->nr > 1 402 * and exercise the kernel IMMED reenqueue trigger paths. 403 */ 404 if (immed_stress_nth && !(enq_flags & SCX_ENQ_REENQ)) { 405 static u32 immed_stress_cnt; 406 407 if (!(++immed_stress_cnt % immed_stress_nth)) { 408 taskc->force_local = false; 409 scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL_ON | scx_bpf_task_cid(p), 410 slice_ns, enq_flags); 411 return; 412 } 413 } 414 415 /* 416 * If qmap_select_cid() is telling us to or this is the last runnable 417 * task on the CPU, enqueue locally. 418 */ 419 if (taskc->force_local) { 420 taskc->force_local = false; 421 scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL, slice_ns, enq_flags); 422 return; 423 } 424 425 /* see lowpri_timerfn() */ 426 if (__COMPAT_has_generic_reenq() && 427 p->scx.weight < 2 && !(p->flags & PF_KTHREAD) && !(enq_flags & SCX_ENQ_REENQ)) { 428 scx_bpf_dsq_insert(p, LOWPRI_DSQ, slice_ns, enq_flags); 429 return; 430 } 431 432 /* if select_cid() wasn't called, try direct dispatch */ 433 if (!__COMPAT_is_enq_cpu_selected(enq_flags) && 434 (cid = pick_direct_dispatch_cid(p, scx_bpf_task_cid(p), taskc)) >= 0) { 435 __sync_fetch_and_add(&qa.nr_ddsp_from_enq, 1); 436 scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL_ON | cid, slice_ns, enq_flags); 437 return; 438 } 439 440 /* 441 * If the task was re-enqueued due to the CPU being preempted by a 442 * higher priority scheduling class, just re-enqueue the task directly 443 * on the global DSQ. As we want another CPU to pick it up, find and 444 * kick an idle cid. 445 */ 446 if (enq_flags & SCX_ENQ_REENQ) { 447 s32 cid; 448 449 scx_bpf_dsq_insert(p, SHARED_DSQ, 0, enq_flags); 450 cid = cmask_next_and_set_wrap(&taskc->cpus_allowed, 451 qa_idle_cids, 0); 452 if (cid < scx_bpf_nr_cids()) 453 scx_bpf_kick_cid(cid, SCX_KICK_IDLE); 454 return; 455 } 456 457 /* Queue on the selected FIFO. */ 458 qmap_fifo_enqueue(&qa.fifos[idx], taskc); 459 460 if (highpri_boosting && p->scx.weight >= HIGHPRI_WEIGHT) { 461 taskc->highpri = true; 462 __sync_fetch_and_add(&qa.nr_highpri_queued, 1); 463 } 464 __sync_fetch_and_add(&qa.nr_enqueued, 1); 465 } 466 467 void BPF_STRUCT_OPS(qmap_dequeue, struct task_struct *p, u64 deq_flags) 468 { 469 task_ctx_t *taskc; 470 471 __sync_fetch_and_add(&qa.nr_dequeued, 1); 472 if (deq_flags & SCX_DEQ_CORE_SCHED_EXEC) 473 __sync_fetch_and_add(&qa.nr_core_sched_execed, 1); 474 475 taskc = lookup_task_ctx(p); 476 if (taskc && taskc->fifo) { 477 if (taskc->highpri) 478 __sync_fetch_and_sub(&qa.nr_highpri_queued, 1); 479 qmap_fifo_remove(taskc); 480 } 481 } 482 483 static void update_core_sched_head_seq(struct task_struct *p) 484 { 485 int idx = weight_to_idx(p->scx.weight); 486 task_ctx_t *taskc; 487 488 if ((taskc = lookup_task_ctx(p))) 489 qa.core_sched_head_seqs[idx] = taskc->core_sched_seq; 490 } 491 492 /* 493 * To demonstrate the use of scx_bpf_dsq_move(), implement silly selective 494 * priority boosting mechanism by scanning SHARED_DSQ looking for highpri tasks, 495 * moving them to HIGHPRI_DSQ and then consuming them first. This makes minor 496 * difference only when dsp_batch is larger than 1. 497 * 498 * scx_bpf_dispatch[_vtime]_from_dsq() are allowed both from ops.dispatch() and 499 * non-rq-lock holding BPF programs. As demonstration, this function is called 500 * from qmap_dispatch() and monitor_timerfn(). 501 */ 502 static bool dispatch_highpri(bool from_timer) 503 { 504 struct task_struct *p; 505 s32 this_cid = scx_bpf_this_cid(); 506 u32 nr_cids = scx_bpf_nr_cids(); 507 508 /* scan SHARED_DSQ and move highpri tasks to HIGHPRI_DSQ */ 509 bpf_for_each(scx_dsq, p, SHARED_DSQ, 0) { 510 static u64 highpri_seq; 511 task_ctx_t *taskc; 512 513 if (!(taskc = lookup_task_ctx(p))) 514 return false; 515 516 if (taskc->highpri) { 517 /* exercise the set_*() and vtime interface too */ 518 scx_bpf_dsq_move_set_slice(BPF_FOR_EACH_ITER, slice_ns * 2); 519 scx_bpf_dsq_move_set_vtime(BPF_FOR_EACH_ITER, highpri_seq++); 520 scx_bpf_dsq_move_vtime(BPF_FOR_EACH_ITER, p, HIGHPRI_DSQ, 0); 521 } 522 } 523 524 /* 525 * Scan HIGHPRI_DSQ and dispatch until a task that can run here is 526 * found. Prefer this_cid if the task allows it; otherwise RR-scan the 527 * task's cpus_allowed starting after this_cid. 528 */ 529 bpf_for_each(scx_dsq, p, HIGHPRI_DSQ, 0) { 530 task_ctx_t *taskc; 531 bool dispatched = false; 532 s32 cid; 533 534 if (!(taskc = lookup_task_ctx(p))) 535 return false; 536 537 if (cmask_test(this_cid, &taskc->cpus_allowed)) 538 cid = this_cid; 539 else 540 cid = cmask_next_set_wrap(&taskc->cpus_allowed, 541 this_cid + 1); 542 if (cid >= nr_cids) 543 continue; 544 545 if (scx_bpf_dsq_move(BPF_FOR_EACH_ITER, p, SCX_DSQ_LOCAL_ON | cid, 546 SCX_ENQ_PREEMPT)) { 547 if (cid == this_cid) { 548 dispatched = true; 549 __sync_fetch_and_add(&qa.nr_expedited_local, 1); 550 } else { 551 __sync_fetch_and_add(&qa.nr_expedited_remote, 1); 552 } 553 if (from_timer) 554 __sync_fetch_and_add(&qa.nr_expedited_from_timer, 1); 555 } else { 556 __sync_fetch_and_add(&qa.nr_expedited_lost, 1); 557 } 558 559 if (dispatched) 560 return true; 561 } 562 563 return false; 564 } 565 566 void BPF_STRUCT_OPS(qmap_dispatch, s32 cid, struct task_struct *prev) 567 { 568 struct task_struct *p; 569 struct cpu_ctx __arena *cpuc; 570 task_ctx_t *taskc; 571 u32 batch = dsp_batch ?: 1; 572 s32 i; 573 574 if (dispatch_highpri(false)) 575 return; 576 577 if (!qa.nr_highpri_queued && scx_bpf_dsq_move_to_local(SHARED_DSQ, 0)) 578 return; 579 580 if (dsp_inf_loop_after && qa.nr_dispatched > dsp_inf_loop_after) { 581 /* 582 * PID 2 should be kthreadd which should mostly be idle and off 583 * the scheduler. Let's keep dispatching it to force the kernel 584 * to call this function over and over again. 585 */ 586 p = bpf_task_from_pid(2); 587 if (p) { 588 scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL, slice_ns, 0); 589 bpf_task_release(p); 590 return; 591 } 592 } 593 594 cpuc = &qa.cpu_ctxs[scx_bpf_this_cid()]; 595 596 for (i = 0; i < 5; i++) { 597 /* Advance the dispatch cursor and pick the fifo. */ 598 if (!cpuc->dsp_cnt) { 599 cpuc->dsp_idx = (cpuc->dsp_idx + 1) % 5; 600 cpuc->dsp_cnt = 1 << cpuc->dsp_idx; 601 } 602 603 /* Dispatch or advance. */ 604 bpf_repeat(BPF_MAX_LOOPS) { 605 task_ctx_t *taskc; 606 607 taskc = qmap_fifo_pop(&qa.fifos[cpuc->dsp_idx]); 608 if (!taskc) 609 break; 610 611 p = scx_bpf_tid_to_task(taskc->tid); 612 if (!p) 613 continue; 614 615 if (taskc->highpri) 616 __sync_fetch_and_sub(&qa.nr_highpri_queued, 1); 617 618 update_core_sched_head_seq(p); 619 __sync_fetch_and_add(&qa.nr_dispatched, 1); 620 621 scx_bpf_dsq_insert(p, SHARED_DSQ, slice_ns, 0); 622 623 /* 624 * scx_qmap uses a global BPF queue that any CPU's 625 * dispatch can pop from. If this CPU popped a task that 626 * can't run here, it gets stranded on SHARED_DSQ after 627 * consume_dispatch_q() skips it. Kick the task's home 628 * CPU so it drains SHARED_DSQ. 629 * 630 * There's a race between the pop and the flush of the 631 * buffered dsq_insert: 632 * 633 * CPU 0 (dispatching) CPU 1 (home, idle) 634 * ~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~ 635 * pop from BPF queue 636 * dsq_insert(buffered) 637 * balance: 638 * SHARED_DSQ empty 639 * BPF queue empty 640 * -> goes idle 641 * flush -> on SHARED 642 * kick CPU 1 643 * wakes, drains task 644 * 645 * The kick prevents indefinite stalls but a per-CPU 646 * kthread like ksoftirqd can be briefly stranded when 647 * its home CPU enters idle with softirq pending, 648 * triggering: 649 * 650 * "NOHZ tick-stop error: local softirq work is pending, handler #N!!!" 651 * 652 * from report_idle_softirq(). The kick lands shortly 653 * after and the home CPU drains the task. This could be 654 * avoided by e.g. dispatching pinned tasks to local or 655 * global DSQs, but the current code is left as-is to 656 * document this class of issue -- other schedulers 657 * seeing similar warnings can use this as a reference. 658 */ 659 if (!cmask_test(cid, &taskc->cpus_allowed)) 660 scx_bpf_kick_cid(scx_bpf_task_cid(p), 0); 661 662 batch--; 663 cpuc->dsp_cnt--; 664 if (!batch || !scx_bpf_dispatch_nr_slots()) { 665 if (dispatch_highpri(false)) 666 return; 667 scx_bpf_dsq_move_to_local(SHARED_DSQ, 0); 668 return; 669 } 670 if (!cpuc->dsp_cnt) 671 break; 672 } 673 674 cpuc->dsp_cnt = 0; 675 } 676 677 for (i = 0; i < MAX_SUB_SCHEDS; i++) { 678 if (qa.sub_sched_cgroup_ids[i] && 679 scx_bpf_sub_dispatch(qa.sub_sched_cgroup_ids[i])) 680 return; 681 } 682 683 /* 684 * No other tasks. @prev will keep running. Update its core_sched_seq as 685 * if the task were enqueued and dispatched immediately. 686 */ 687 if (prev) { 688 taskc = lookup_task_ctx(prev); 689 if (!taskc) 690 return; 691 692 taskc->core_sched_seq = 693 qa.core_sched_tail_seqs[weight_to_idx(prev->scx.weight)]++; 694 } 695 } 696 697 void BPF_STRUCT_OPS(qmap_tick, struct task_struct *p) 698 { 699 struct cpu_ctx __arena *cpuc = &qa.cpu_ctxs[scx_bpf_this_cid()]; 700 int idx; 701 702 /* 703 * Use the running avg of weights to select the target cpuperf level. 704 * This is a demonstration of the cpuperf feature rather than a 705 * practical strategy to regulate CPU frequency. 706 */ 707 cpuc->avg_weight = cpuc->avg_weight * 3 / 4 + p->scx.weight / 4; 708 idx = weight_to_idx(cpuc->avg_weight); 709 cpuc->cpuperf_target = qidx_to_cpuperf_target[idx]; 710 711 scx_bpf_cidperf_set(scx_bpf_task_cid(p), cpuc->cpuperf_target); 712 } 713 714 /* 715 * The distance from the head of the queue scaled by the weight of the queue. 716 * The lower the number, the older the task and the higher the priority. 717 */ 718 static s64 task_qdist(struct task_struct *p) 719 { 720 int idx = weight_to_idx(p->scx.weight); 721 task_ctx_t *taskc; 722 s64 qdist; 723 724 taskc = lookup_task_ctx(p); 725 if (!taskc) 726 return 0; 727 728 qdist = taskc->core_sched_seq - qa.core_sched_head_seqs[idx]; 729 730 /* 731 * As queue index increments, the priority doubles. The queue w/ index 3 732 * is dispatched twice more frequently than 2. Reflect the difference by 733 * scaling qdists accordingly. Note that the shift amount needs to be 734 * flipped depending on the sign to avoid flipping priority direction. 735 */ 736 if (qdist >= 0) 737 return qdist << (4 - idx); 738 else 739 return qdist << idx; 740 } 741 742 /* 743 * This is called to determine the task ordering when core-sched is picking 744 * tasks to execute on SMT siblings and should encode about the same ordering as 745 * the regular scheduling path. Use the priority-scaled distances from the head 746 * of the queues to compare the two tasks which should be consistent with the 747 * dispatch path behavior. 748 */ 749 bool BPF_STRUCT_OPS(qmap_core_sched_before, 750 struct task_struct *a, struct task_struct *b) 751 { 752 return task_qdist(a) > task_qdist(b); 753 } 754 755 /* 756 * sched_switch tracepoint and cpu_release handlers are no longer needed. 757 * With SCX_OPS_ALWAYS_ENQ_IMMED, wakeup_preempt_scx() reenqueues IMMED 758 * tasks when a higher-priority scheduling class takes the CPU. 759 */ 760 761 s32 BPF_STRUCT_OPS_SLEEPABLE(qmap_init_task, struct task_struct *p, 762 struct scx_init_task_args *args) 763 { 764 struct task_ctx_stor_val *v; 765 task_ctx_t *taskc; 766 767 if (p->tgid == disallow_tgid) 768 p->scx.disallow = true; 769 770 /* pop a slab entry off the free list */ 771 if (qmap_spin_lock(&qa_task_lock)) 772 return -EBUSY; 773 taskc = qa.task_free_head; 774 if (taskc) 775 qa.task_free_head = taskc->next_free; 776 bpf_res_spin_unlock(&qa_task_lock); 777 if (!taskc) { 778 scx_bpf_error("task_ctx slab exhausted (max_tasks=%u)", max_tasks); 779 return -ENOMEM; 780 } 781 782 taskc->next_free = NULL; 783 taskc->q_next = NULL; 784 taskc->q_prev = NULL; 785 taskc->fifo = NULL; 786 taskc->tid = p->scx.tid; 787 taskc->pid = p->pid; 788 taskc->force_local = false; 789 taskc->highpri = false; 790 taskc->core_sched_seq = 0; 791 cmask_init(&taskc->cpus_allowed, 0, scx_bpf_nr_cids()); 792 bpf_rcu_read_lock(); 793 cmask_from_cpumask(&taskc->cpus_allowed, p->cpus_ptr); 794 bpf_rcu_read_unlock(); 795 796 v = bpf_task_storage_get(&task_ctx_stor, p, NULL, 797 BPF_LOCAL_STORAGE_GET_F_CREATE); 798 if (!v) { 799 /* push back to the free list */ 800 if (!qmap_spin_lock(&qa_task_lock)) { 801 taskc->next_free = qa.task_free_head; 802 qa.task_free_head = taskc; 803 bpf_res_spin_unlock(&qa_task_lock); 804 } 805 return -ENOMEM; 806 } 807 v->taskc = taskc; 808 return 0; 809 } 810 811 void BPF_STRUCT_OPS(qmap_exit_task, struct task_struct *p, 812 struct scx_exit_task_args *args) 813 { 814 struct task_ctx_stor_val *v; 815 task_ctx_t *taskc; 816 817 v = bpf_task_storage_get(&task_ctx_stor, p, NULL, 0); 818 if (!v || !v->taskc) 819 return; 820 taskc = v->taskc; 821 v->taskc = NULL; 822 823 if (qmap_spin_lock(&qa_task_lock)) 824 return; 825 taskc->next_free = qa.task_free_head; 826 qa.task_free_head = taskc; 827 bpf_res_spin_unlock(&qa_task_lock); 828 } 829 830 void BPF_STRUCT_OPS(qmap_dump, struct scx_dump_ctx *dctx) 831 { 832 task_ctx_t *taskc; 833 s32 i; 834 835 QMAP_TOUCH_ARENA(); 836 837 if (suppress_dump) 838 return; 839 840 /* 841 * Walk the queue lists without locking - kfunc calls (scx_bpf_dump) 842 * aren't in the verifier's kfunc_spin_allowed() list so we can't hold 843 * a lock and dump. Best-effort; racing may print stale tids but the 844 * walk is bounded by bpf_repeat() so it always terminates. 845 */ 846 bpf_for(i, 0, 5) { 847 scx_bpf_dump("QMAP FIFO[%d]:", i); 848 taskc = qa.fifos[i].head; 849 bpf_repeat(4096) { 850 if (!taskc) 851 break; 852 scx_bpf_dump(" %d:%llu", taskc->pid, taskc->tid); 853 taskc = taskc->q_next; 854 } 855 scx_bpf_dump("\n"); 856 } 857 } 858 859 void BPF_STRUCT_OPS(qmap_dump_cid, struct scx_dump_ctx *dctx, s32 cid, bool idle) 860 { 861 struct cpu_ctx __arena *cpuc = &qa.cpu_ctxs[cid]; 862 863 if (suppress_dump || idle) 864 return; 865 866 scx_bpf_dump("QMAP: dsp_idx=%llu dsp_cnt=%llu avg_weight=%u cpuperf_target=%u", 867 cpuc->dsp_idx, cpuc->dsp_cnt, cpuc->avg_weight, 868 cpuc->cpuperf_target); 869 } 870 871 void BPF_STRUCT_OPS(qmap_dump_task, struct scx_dump_ctx *dctx, struct task_struct *p) 872 { 873 struct task_ctx_stor_val *v; 874 task_ctx_t *taskc; 875 876 QMAP_TOUCH_ARENA(); 877 878 if (suppress_dump) 879 return; 880 v = bpf_task_storage_get(&task_ctx_stor, p, NULL, 0); 881 if (!v || !v->taskc) 882 return; 883 taskc = v->taskc; 884 885 scx_bpf_dump("QMAP: force_local=%d core_sched_seq=%llu", 886 taskc->force_local, taskc->core_sched_seq); 887 } 888 889 s32 BPF_STRUCT_OPS(qmap_cgroup_init, struct cgroup *cgrp, struct scx_cgroup_init_args *args) 890 { 891 if (print_msgs) 892 bpf_printk("CGRP INIT %llu weight=%u period=%lu quota=%ld burst=%lu", 893 cgrp->kn->id, args->weight, args->bw_period_us, 894 args->bw_quota_us, args->bw_burst_us); 895 return 0; 896 } 897 898 void BPF_STRUCT_OPS(qmap_cgroup_set_weight, struct cgroup *cgrp, u32 weight) 899 { 900 if (print_msgs) 901 bpf_printk("CGRP SET %llu weight=%u", cgrp->kn->id, weight); 902 } 903 904 void BPF_STRUCT_OPS(qmap_cgroup_set_bandwidth, struct cgroup *cgrp, 905 u64 period_us, u64 quota_us, u64 burst_us) 906 { 907 if (print_msgs) 908 bpf_printk("CGRP SET %llu period=%lu quota=%ld burst=%lu", 909 cgrp->kn->id, period_us, quota_us, burst_us); 910 } 911 912 void BPF_STRUCT_OPS(qmap_update_idle, s32 cid, bool idle) 913 { 914 QMAP_TOUCH_ARENA(); 915 if (idle) 916 cmask_set(cid, qa_idle_cids); 917 else 918 cmask_clear(cid, qa_idle_cids); 919 } 920 921 void BPF_STRUCT_OPS(qmap_set_cmask, struct task_struct *p, 922 const struct scx_cmask *cmask_in) 923 { 924 struct scx_cmask __arena *cmask = (struct scx_cmask __arena *)(long)cmask_in; 925 task_ctx_t *taskc; 926 927 taskc = lookup_task_ctx(p); 928 if (!taskc) 929 return; 930 cmask_copy(&taskc->cpus_allowed, cmask); 931 } 932 933 struct monitor_timer { 934 struct bpf_timer timer; 935 }; 936 937 struct { 938 __uint(type, BPF_MAP_TYPE_ARRAY); 939 __uint(max_entries, 1); 940 __type(key, u32); 941 __type(value, struct monitor_timer); 942 } monitor_timer SEC(".maps"); 943 944 /* 945 * Aggregate cidperf across the first nr_online_cids cids. Post-hotplug 946 * the first-N-are-online invariant drifts, so some cap/cur values may 947 * be stale. For this demo monitor that's fine; the scheduler exits on 948 * the enable-time hotplug_seq mismatch and userspace restarts, which 949 * rebuilds the layout. 950 */ 951 static void monitor_cpuperf(void) 952 { 953 u32 nr_online = scx_bpf_nr_online_cids(); 954 u64 cap_sum = 0, cur_sum = 0, cur_min = SCX_CPUPERF_ONE, cur_max = 0; 955 u64 target_sum = 0, target_min = SCX_CPUPERF_ONE, target_max = 0; 956 s32 cid; 957 958 QMAP_TOUCH_ARENA(); 959 960 bpf_for(cid, 0, nr_online) { 961 struct cpu_ctx __arena *cpuc = &qa.cpu_ctxs[cid]; 962 u32 cap = scx_bpf_cidperf_cap(cid); 963 u32 cur = scx_bpf_cidperf_cur(cid); 964 u32 target; 965 966 cur_min = cur < cur_min ? cur : cur_min; 967 cur_max = cur > cur_max ? cur : cur_max; 968 969 cur_sum += (u64)cur * cap / SCX_CPUPERF_ONE; 970 cap_sum += cap; 971 972 target = cpuc->cpuperf_target; 973 target_sum += target; 974 target_min = target < target_min ? target : target_min; 975 target_max = target > target_max ? target : target_max; 976 } 977 978 if (!nr_online || !cap_sum) 979 return; 980 981 qa.cpuperf_min = cur_min; 982 qa.cpuperf_avg = cur_sum * SCX_CPUPERF_ONE / cap_sum; 983 qa.cpuperf_max = cur_max; 984 985 qa.cpuperf_target_min = target_min; 986 qa.cpuperf_target_avg = target_sum / nr_online; 987 qa.cpuperf_target_max = target_max; 988 } 989 990 /* 991 * Dump the currently queued tasks in the shared DSQ to demonstrate the usage of 992 * scx_bpf_dsq_nr_queued() and DSQ iterator. Raise the dispatch batch count to 993 * see meaningful dumps in the trace pipe. 994 */ 995 static void dump_shared_dsq(void) 996 { 997 struct task_struct *p; 998 s32 nr; 999 1000 if (!(nr = scx_bpf_dsq_nr_queued(SHARED_DSQ))) 1001 return; 1002 1003 bpf_printk("Dumping %d tasks in SHARED_DSQ in reverse order", nr); 1004 1005 bpf_rcu_read_lock(); 1006 bpf_for_each(scx_dsq, p, SHARED_DSQ, SCX_DSQ_ITER_REV) 1007 bpf_printk("%s[%d]", p->comm, p->pid); 1008 bpf_rcu_read_unlock(); 1009 } 1010 1011 static int monitor_timerfn(void *map, int *key, struct bpf_timer *timer) 1012 { 1013 bpf_rcu_read_lock(); 1014 dispatch_highpri(true); 1015 bpf_rcu_read_unlock(); 1016 1017 monitor_cpuperf(); 1018 1019 if (print_dsqs_and_events) { 1020 struct scx_event_stats events; 1021 1022 dump_shared_dsq(); 1023 1024 __COMPAT_scx_bpf_events(&events, sizeof(events)); 1025 1026 bpf_printk("%35s: %lld", "SCX_EV_SELECT_CPU_FALLBACK", 1027 scx_read_event(&events, SCX_EV_SELECT_CPU_FALLBACK)); 1028 bpf_printk("%35s: %lld", "SCX_EV_DISPATCH_LOCAL_DSQ_OFFLINE", 1029 scx_read_event(&events, SCX_EV_DISPATCH_LOCAL_DSQ_OFFLINE)); 1030 bpf_printk("%35s: %lld", "SCX_EV_DISPATCH_KEEP_LAST", 1031 scx_read_event(&events, SCX_EV_DISPATCH_KEEP_LAST)); 1032 bpf_printk("%35s: %lld", "SCX_EV_ENQ_SKIP_EXITING", 1033 scx_read_event(&events, SCX_EV_ENQ_SKIP_EXITING)); 1034 bpf_printk("%35s: %lld", "SCX_EV_REFILL_SLICE_DFL", 1035 scx_read_event(&events, SCX_EV_REFILL_SLICE_DFL)); 1036 bpf_printk("%35s: %lld", "SCX_EV_BYPASS_DURATION", 1037 scx_read_event(&events, SCX_EV_BYPASS_DURATION)); 1038 bpf_printk("%35s: %lld", "SCX_EV_BYPASS_DISPATCH", 1039 scx_read_event(&events, SCX_EV_BYPASS_DISPATCH)); 1040 bpf_printk("%35s: %lld", "SCX_EV_BYPASS_ACTIVATE", 1041 scx_read_event(&events, SCX_EV_BYPASS_ACTIVATE)); 1042 } 1043 1044 bpf_timer_start(timer, ONE_SEC_IN_NS, 0); 1045 return 0; 1046 } 1047 1048 struct lowpri_timer { 1049 struct bpf_timer timer; 1050 }; 1051 1052 struct { 1053 __uint(type, BPF_MAP_TYPE_ARRAY); 1054 __uint(max_entries, 1); 1055 __type(key, u32); 1056 __type(value, struct lowpri_timer); 1057 } lowpri_timer SEC(".maps"); 1058 1059 /* 1060 * Nice 19 tasks are put into the lowpri DSQ. Every 10ms, reenq is triggered and 1061 * the tasks are transferred to SHARED_DSQ. 1062 */ 1063 static int lowpri_timerfn(void *map, int *key, struct bpf_timer *timer) 1064 { 1065 scx_bpf_dsq_reenq(LOWPRI_DSQ, 0); 1066 bpf_timer_start(timer, LOWPRI_INTV_NS, 0); 1067 return 0; 1068 } 1069 1070 s32 BPF_STRUCT_OPS_SLEEPABLE(qmap_init) 1071 { 1072 u8 __arena *slab; 1073 u32 nr_pages, key = 0, i; 1074 u32 nr_cids, nr_cpu_ids; 1075 struct bpf_timer *timer; 1076 s32 ret; 1077 1078 nr_cids = scx_bpf_nr_cids(); 1079 nr_cpu_ids = scx_bpf_nr_cpu_ids(); 1080 1081 if (nr_cids > SCX_QMAP_MAX_CPUS) { 1082 scx_bpf_error("nr_cids=%u exceeds SCX_QMAP_MAX_CPUS=%d", 1083 nr_cids, SCX_QMAP_MAX_CPUS); 1084 return -EINVAL; 1085 } 1086 if (nr_cpu_ids > SCX_QMAP_MAX_CPUS) { 1087 scx_bpf_error("nr_cpu_ids=%u exceeds SCX_QMAP_MAX_CPUS=%d", 1088 nr_cpu_ids, SCX_QMAP_MAX_CPUS); 1089 return -EINVAL; 1090 } 1091 1092 /* 1093 * cid-override test hook. Must run before anything that reads the 1094 * cid space (scx_bpf_nr_cids, cmask_init, etc.). On invalid input, 1095 * the kfunc calls scx_error() which aborts the scheduler. 1096 */ 1097 if (cid_override_mode) { 1098 scx_bpf_cid_override((const s32 *)cid_override_cpu_to_cid, 1099 nr_cpu_ids * sizeof(s32)); 1100 } 1101 1102 /* 1103 * Allocate the task_ctx slab in arena and thread the entire slab onto 1104 * the free list. max_tasks is set by userspace before load. Each entry 1105 * is TASK_CTX_STRIDE bytes - task_ctx's trailing cpus_allowed flex 1106 * array extends into the stride tail. 1107 */ 1108 if (!max_tasks) { 1109 scx_bpf_error("max_tasks must be > 0"); 1110 return -EINVAL; 1111 } 1112 1113 nr_pages = (max_tasks * TASK_CTX_STRIDE + PAGE_SIZE - 1) / PAGE_SIZE; 1114 slab = bpf_arena_alloc_pages(&arena, NULL, nr_pages, NUMA_NO_NODE, 0); 1115 if (!slab) { 1116 scx_bpf_error("failed to allocate task_ctx slab"); 1117 return -ENOMEM; 1118 } 1119 qa.task_ctxs = (task_ctx_t *)slab; 1120 1121 bpf_for(i, 0, 5) 1122 qa.fifos[i].idx = i; 1123 1124 bpf_for(i, 0, max_tasks) { 1125 task_ctx_t *cur = (task_ctx_t *)(slab + i * TASK_CTX_STRIDE); 1126 task_ctx_t *next = (i + 1 < max_tasks) ? 1127 (task_ctx_t *)(slab + (i + 1) * TASK_CTX_STRIDE) : NULL; 1128 cur->next_free = next; 1129 } 1130 qa.task_free_head = (task_ctx_t *)slab; 1131 1132 /* 1133 * Allocate and initialize the idle cmask. Starts empty - update_idle 1134 * fills it as cpus enter idle. 1135 */ 1136 qa_idle_cids = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0); 1137 if (!qa_idle_cids) { 1138 scx_bpf_error("failed to allocate idle cmask"); 1139 return -ENOMEM; 1140 } 1141 cmask_init(qa_idle_cids, 0, nr_cids); 1142 1143 ret = scx_bpf_create_dsq(SHARED_DSQ, -1); 1144 if (ret) { 1145 scx_bpf_error("failed to create DSQ %d (%d)", SHARED_DSQ, ret); 1146 return ret; 1147 } 1148 1149 ret = scx_bpf_create_dsq(HIGHPRI_DSQ, -1); 1150 if (ret) { 1151 scx_bpf_error("failed to create DSQ %d (%d)", HIGHPRI_DSQ, ret); 1152 return ret; 1153 } 1154 1155 ret = scx_bpf_create_dsq(LOWPRI_DSQ, -1); 1156 if (ret) 1157 return ret; 1158 1159 timer = bpf_map_lookup_elem(&monitor_timer, &key); 1160 if (!timer) 1161 return -ESRCH; 1162 bpf_timer_init(timer, &monitor_timer, CLOCK_MONOTONIC); 1163 bpf_timer_set_callback(timer, monitor_timerfn); 1164 ret = bpf_timer_start(timer, ONE_SEC_IN_NS, 0); 1165 if (ret) 1166 return ret; 1167 1168 if (__COMPAT_has_generic_reenq()) { 1169 /* see lowpri_timerfn() */ 1170 timer = bpf_map_lookup_elem(&lowpri_timer, &key); 1171 if (!timer) 1172 return -ESRCH; 1173 bpf_timer_init(timer, &lowpri_timer, CLOCK_MONOTONIC); 1174 bpf_timer_set_callback(timer, lowpri_timerfn); 1175 ret = bpf_timer_start(timer, LOWPRI_INTV_NS, 0); 1176 if (ret) 1177 return ret; 1178 } 1179 1180 return 0; 1181 } 1182 1183 void BPF_STRUCT_OPS(qmap_exit, struct scx_exit_info *ei) 1184 { 1185 UEI_RECORD(uei, ei); 1186 } 1187 1188 s32 BPF_STRUCT_OPS(qmap_sub_attach, struct scx_sub_attach_args *args) 1189 { 1190 s32 i; 1191 1192 for (i = 0; i < MAX_SUB_SCHEDS; i++) { 1193 if (!qa.sub_sched_cgroup_ids[i]) { 1194 qa.sub_sched_cgroup_ids[i] = args->ops->sub_cgroup_id; 1195 bpf_printk("attaching sub-sched[%d] on %s", 1196 i, args->cgroup_path); 1197 return 0; 1198 } 1199 } 1200 1201 return -ENOSPC; 1202 } 1203 1204 void BPF_STRUCT_OPS(qmap_sub_detach, struct scx_sub_detach_args *args) 1205 { 1206 s32 i; 1207 1208 for (i = 0; i < MAX_SUB_SCHEDS; i++) { 1209 if (qa.sub_sched_cgroup_ids[i] == args->ops->sub_cgroup_id) { 1210 qa.sub_sched_cgroup_ids[i] = 0; 1211 bpf_printk("detaching sub-sched[%d] on %s", 1212 i, args->cgroup_path); 1213 break; 1214 } 1215 } 1216 } 1217 1218 SCX_OPS_CID_DEFINE(qmap_ops, 1219 .flags = SCX_OPS_ENQ_EXITING | SCX_OPS_TID_TO_TASK, 1220 .select_cid = (void *)qmap_select_cid, 1221 .enqueue = (void *)qmap_enqueue, 1222 .dequeue = (void *)qmap_dequeue, 1223 .dispatch = (void *)qmap_dispatch, 1224 .tick = (void *)qmap_tick, 1225 .core_sched_before = (void *)qmap_core_sched_before, 1226 .set_cmask = (void *)qmap_set_cmask, 1227 .update_idle = (void *)qmap_update_idle, 1228 .init_task = (void *)qmap_init_task, 1229 .exit_task = (void *)qmap_exit_task, 1230 .dump = (void *)qmap_dump, 1231 .dump_cid = (void *)qmap_dump_cid, 1232 .dump_task = (void *)qmap_dump_task, 1233 .cgroup_init = (void *)qmap_cgroup_init, 1234 .cgroup_set_weight = (void *)qmap_cgroup_set_weight, 1235 .cgroup_set_bandwidth = (void *)qmap_cgroup_set_bandwidth, 1236 .sub_attach = (void *)qmap_sub_attach, 1237 .sub_detach = (void *)qmap_sub_detach, 1238 .init = (void *)qmap_init, 1239 .exit = (void *)qmap_exit, 1240 .timeout_ms = 5000U, 1241 .name = "qmap"); 1242