1*bba2c361STejun Heo /* SPDX-License-Identifier: GPL-2.0 */ 2*bba2c361STejun Heo /* 3*bba2c361STejun Heo * BPF extensible scheduler class: Documentation/scheduler/sched-ext.rst 4*bba2c361STejun Heo * 5*bba2c361STejun Heo * scx_arena_pool: kernel-side sub-allocator over BPF-arena pages. 6*bba2c361STejun Heo * 7*bba2c361STejun Heo * Each chunk added to @sch->arena_pool comes from one 8*bba2c361STejun Heo * bpf_arena_alloc_pages_sleepable() call and is registered at the 9*bba2c361STejun Heo * kernel-side mapping address. Callers translate to the BPF-arena form 10*bba2c361STejun Heo * themselves if needed. 11*bba2c361STejun Heo * 12*bba2c361STejun Heo * Allocations grow the pool on demand. Underlying arena pages are released 13*bba2c361STejun Heo * when the arena map itself is torn down. 14*bba2c361STejun Heo * 15*bba2c361STejun Heo * Copyright (c) 2026 Meta Platforms, Inc. and affiliates. 16*bba2c361STejun Heo * Copyright (c) 2026 Tejun Heo <tj@kernel.org> 17*bba2c361STejun Heo */ 18*bba2c361STejun Heo 19*bba2c361STejun Heo enum scx_arena_consts { 20*bba2c361STejun Heo SCX_ARENA_MIN_ORDER = 3, /* 8-byte minimum sub-allocation */ 21*bba2c361STejun Heo SCX_ARENA_GROW_PAGES = 4, /* per growth */ 22*bba2c361STejun Heo }; 23*bba2c361STejun Heo 24*bba2c361STejun Heo s32 scx_arena_pool_init(struct scx_sched *sch) 25*bba2c361STejun Heo { 26*bba2c361STejun Heo if (!sch->arena_map) 27*bba2c361STejun Heo return 0; 28*bba2c361STejun Heo 29*bba2c361STejun Heo sch->arena_pool = gen_pool_create(SCX_ARENA_MIN_ORDER, NUMA_NO_NODE); 30*bba2c361STejun Heo if (!sch->arena_pool) 31*bba2c361STejun Heo return -ENOMEM; 32*bba2c361STejun Heo return 0; 33*bba2c361STejun Heo } 34*bba2c361STejun Heo 35*bba2c361STejun Heo static void scx_arena_clear_chunk(struct gen_pool *pool, struct gen_pool_chunk *chunk, 36*bba2c361STejun Heo void *data) 37*bba2c361STejun Heo { 38*bba2c361STejun Heo int order = pool->min_alloc_order; 39*bba2c361STejun Heo size_t chunk_sz = chunk->end_addr - chunk->start_addr + 1; 40*bba2c361STejun Heo unsigned long end_bit = chunk_sz >> order; 41*bba2c361STejun Heo unsigned long b, e; 42*bba2c361STejun Heo 43*bba2c361STejun Heo for_each_set_bitrange(b, e, chunk->bits, end_bit) 44*bba2c361STejun Heo gen_pool_free(pool, chunk->start_addr + (b << order), 45*bba2c361STejun Heo (e - b) << order); 46*bba2c361STejun Heo } 47*bba2c361STejun Heo 48*bba2c361STejun Heo /* 49*bba2c361STejun Heo * Tear down the pool. Outstanding gen_pool allocations are freed via 50*bba2c361STejun Heo * scx_arena_clear_chunk() so gen_pool_destroy() doesn't BUG. The underlying 51*bba2c361STejun Heo * arena pages are released when the arena map itself is torn down. 52*bba2c361STejun Heo */ 53*bba2c361STejun Heo void scx_arena_pool_destroy(struct scx_sched *sch) 54*bba2c361STejun Heo { 55*bba2c361STejun Heo if (!sch->arena_pool) 56*bba2c361STejun Heo return; 57*bba2c361STejun Heo gen_pool_for_each_chunk(sch->arena_pool, scx_arena_clear_chunk, NULL); 58*bba2c361STejun Heo gen_pool_destroy(sch->arena_pool); 59*bba2c361STejun Heo sch->arena_pool = NULL; 60*bba2c361STejun Heo } 61*bba2c361STejun Heo 62*bba2c361STejun Heo /* 63*bba2c361STejun Heo * Grow the pool by @page_cnt pages. bpf_arena_alloc_pages_sleepable() and 64*bba2c361STejun Heo * gen_pool_add() (which calls vzalloc(GFP_KERNEL)) require a sleepable 65*bba2c361STejun Heo * context. 66*bba2c361STejun Heo */ 67*bba2c361STejun Heo static int scx_arena_grow(struct scx_sched *sch, u32 page_cnt) 68*bba2c361STejun Heo { 69*bba2c361STejun Heo u64 kern_vm_start; 70*bba2c361STejun Heo u32 uaddr32; 71*bba2c361STejun Heo void *p; 72*bba2c361STejun Heo int ret; 73*bba2c361STejun Heo 74*bba2c361STejun Heo if (!sch->arena_map || !sch->arena_pool) 75*bba2c361STejun Heo return -EINVAL; 76*bba2c361STejun Heo 77*bba2c361STejun Heo p = bpf_arena_alloc_pages_sleepable(sch->arena_map, NULL, 78*bba2c361STejun Heo page_cnt, NUMA_NO_NODE, 0); 79*bba2c361STejun Heo if (!p) 80*bba2c361STejun Heo return -ENOMEM; 81*bba2c361STejun Heo 82*bba2c361STejun Heo uaddr32 = (u32)(unsigned long)p; 83*bba2c361STejun Heo /* arena.o, which defines these, is built only on MMU && 64BIT */ 84*bba2c361STejun Heo #if defined(CONFIG_MMU) && defined(CONFIG_64BIT) 85*bba2c361STejun Heo kern_vm_start = bpf_arena_map_kern_vm_start(sch->arena_map); 86*bba2c361STejun Heo #else 87*bba2c361STejun Heo kern_vm_start = 0; 88*bba2c361STejun Heo #endif 89*bba2c361STejun Heo 90*bba2c361STejun Heo ret = gen_pool_add(sch->arena_pool, kern_vm_start + uaddr32, 91*bba2c361STejun Heo page_cnt * PAGE_SIZE, NUMA_NO_NODE); 92*bba2c361STejun Heo if (ret) { 93*bba2c361STejun Heo bpf_arena_free_pages_non_sleepable(sch->arena_map, p, page_cnt); 94*bba2c361STejun Heo return ret; 95*bba2c361STejun Heo } 96*bba2c361STejun Heo return 0; 97*bba2c361STejun Heo } 98*bba2c361STejun Heo 99*bba2c361STejun Heo /* 100*bba2c361STejun Heo * Allocate @size bytes from the arena pool. Returns kernel VA on success, NULL 101*bba2c361STejun Heo * on failure. May grow the pool via scx_arena_grow() which sleeps. Caller must 102*bba2c361STejun Heo * be in a GFP_KERNEL context. 103*bba2c361STejun Heo */ 104*bba2c361STejun Heo void *scx_arena_alloc(struct scx_sched *sch, size_t size) 105*bba2c361STejun Heo { 106*bba2c361STejun Heo unsigned long kern_va; 107*bba2c361STejun Heo u32 page_cnt; 108*bba2c361STejun Heo 109*bba2c361STejun Heo might_sleep(); 110*bba2c361STejun Heo 111*bba2c361STejun Heo if (!sch->arena_pool) 112*bba2c361STejun Heo return NULL; 113*bba2c361STejun Heo 114*bba2c361STejun Heo while (true) { 115*bba2c361STejun Heo kern_va = gen_pool_alloc(sch->arena_pool, size); 116*bba2c361STejun Heo if (kern_va) 117*bba2c361STejun Heo break; 118*bba2c361STejun Heo page_cnt = max_t(u32, SCX_ARENA_GROW_PAGES, 119*bba2c361STejun Heo (size + PAGE_SIZE - 1) >> PAGE_SHIFT); 120*bba2c361STejun Heo if (scx_arena_grow(sch, page_cnt)) 121*bba2c361STejun Heo return NULL; 122*bba2c361STejun Heo } 123*bba2c361STejun Heo 124*bba2c361STejun Heo return (void *)kern_va; 125*bba2c361STejun Heo } 126*bba2c361STejun Heo 127*bba2c361STejun Heo void scx_arena_free(struct scx_sched *sch, void *kern_va, size_t size) 128*bba2c361STejun Heo { 129*bba2c361STejun Heo if (sch->arena_pool && kern_va) 130*bba2c361STejun Heo gen_pool_free(sch->arena_pool, (unsigned long)kern_va, size); 131*bba2c361STejun Heo } 132