1bba2c361STejun Heo /* SPDX-License-Identifier: GPL-2.0 */ 2bba2c361STejun Heo /* 3bba2c361STejun Heo * BPF extensible scheduler class: Documentation/scheduler/sched-ext.rst 4bba2c361STejun Heo * 5bba2c361STejun Heo * scx_arena_pool: kernel-side sub-allocator over BPF-arena pages. 6bba2c361STejun Heo * 7bba2c361STejun Heo * Each chunk added to @sch->arena_pool comes from one 8bba2c361STejun Heo * bpf_arena_alloc_pages_sleepable() call and is registered at the 9bba2c361STejun Heo * kernel-side mapping address. Callers translate to the BPF-arena form 10bba2c361STejun Heo * themselves if needed. 11bba2c361STejun Heo * 12bba2c361STejun Heo * Allocations grow the pool on demand. Underlying arena pages are released 13bba2c361STejun Heo * when the arena map itself is torn down. 14bba2c361STejun Heo * 15bba2c361STejun Heo * Copyright (c) 2026 Meta Platforms, Inc. and affiliates. 16bba2c361STejun Heo * Copyright (c) 2026 Tejun Heo <tj@kernel.org> 17bba2c361STejun Heo */ 18*3cd1f76bSTejun Heo #include <linux/genalloc.h> 19*3cd1f76bSTejun Heo 20*3cd1f76bSTejun Heo #include "internal.h" 21*3cd1f76bSTejun Heo #include "arena.h" 22bba2c361STejun Heo 23bba2c361STejun Heo enum scx_arena_consts { 24bba2c361STejun Heo SCX_ARENA_MIN_ORDER = 3, /* 8-byte minimum sub-allocation */ 25bba2c361STejun Heo SCX_ARENA_GROW_PAGES = 4, /* per growth */ 26bba2c361STejun Heo }; 27bba2c361STejun Heo 28bba2c361STejun Heo s32 scx_arena_pool_init(struct scx_sched *sch) 29bba2c361STejun Heo { 30bba2c361STejun Heo if (!sch->arena_map) 31bba2c361STejun Heo return 0; 32bba2c361STejun Heo 33bba2c361STejun Heo sch->arena_pool = gen_pool_create(SCX_ARENA_MIN_ORDER, NUMA_NO_NODE); 34bba2c361STejun Heo if (!sch->arena_pool) 35bba2c361STejun Heo return -ENOMEM; 36bba2c361STejun Heo return 0; 37bba2c361STejun Heo } 38bba2c361STejun Heo 39bba2c361STejun Heo static void scx_arena_clear_chunk(struct gen_pool *pool, struct gen_pool_chunk *chunk, 40bba2c361STejun Heo void *data) 41bba2c361STejun Heo { 42bba2c361STejun Heo int order = pool->min_alloc_order; 43bba2c361STejun Heo size_t chunk_sz = chunk->end_addr - chunk->start_addr + 1; 44bba2c361STejun Heo unsigned long end_bit = chunk_sz >> order; 45bba2c361STejun Heo unsigned long b, e; 46bba2c361STejun Heo 47bba2c361STejun Heo for_each_set_bitrange(b, e, chunk->bits, end_bit) 48bba2c361STejun Heo gen_pool_free(pool, chunk->start_addr + (b << order), 49bba2c361STejun Heo (e - b) << order); 50bba2c361STejun Heo } 51bba2c361STejun Heo 52bba2c361STejun Heo /* 53bba2c361STejun Heo * Tear down the pool. Outstanding gen_pool allocations are freed via 54bba2c361STejun Heo * scx_arena_clear_chunk() so gen_pool_destroy() doesn't BUG. The underlying 55bba2c361STejun Heo * arena pages are released when the arena map itself is torn down. 56bba2c361STejun Heo */ 57bba2c361STejun Heo void scx_arena_pool_destroy(struct scx_sched *sch) 58bba2c361STejun Heo { 59bba2c361STejun Heo if (!sch->arena_pool) 60bba2c361STejun Heo return; 61bba2c361STejun Heo gen_pool_for_each_chunk(sch->arena_pool, scx_arena_clear_chunk, NULL); 62bba2c361STejun Heo gen_pool_destroy(sch->arena_pool); 63bba2c361STejun Heo sch->arena_pool = NULL; 64bba2c361STejun Heo } 65bba2c361STejun Heo 66bba2c361STejun Heo /* 67bba2c361STejun Heo * Grow the pool by @page_cnt pages. bpf_arena_alloc_pages_sleepable() and 68bba2c361STejun Heo * gen_pool_add() (which calls vzalloc(GFP_KERNEL)) require a sleepable 69bba2c361STejun Heo * context. 70bba2c361STejun Heo */ 71bba2c361STejun Heo static int scx_arena_grow(struct scx_sched *sch, u32 page_cnt) 72bba2c361STejun Heo { 73bba2c361STejun Heo u64 kern_vm_start; 74bba2c361STejun Heo u32 uaddr32; 75bba2c361STejun Heo void *p; 76bba2c361STejun Heo int ret; 77bba2c361STejun Heo 78bba2c361STejun Heo if (!sch->arena_map || !sch->arena_pool) 79bba2c361STejun Heo return -EINVAL; 80bba2c361STejun Heo 81bba2c361STejun Heo p = bpf_arena_alloc_pages_sleepable(sch->arena_map, NULL, 82bba2c361STejun Heo page_cnt, NUMA_NO_NODE, 0); 83bba2c361STejun Heo if (!p) 84bba2c361STejun Heo return -ENOMEM; 85bba2c361STejun Heo 86bba2c361STejun Heo uaddr32 = (u32)(unsigned long)p; 87bba2c361STejun Heo /* arena.o, which defines these, is built only on MMU && 64BIT */ 88bba2c361STejun Heo #if defined(CONFIG_MMU) && defined(CONFIG_64BIT) 89bba2c361STejun Heo kern_vm_start = bpf_arena_map_kern_vm_start(sch->arena_map); 90bba2c361STejun Heo #else 91bba2c361STejun Heo kern_vm_start = 0; 92bba2c361STejun Heo #endif 93bba2c361STejun Heo 94bba2c361STejun Heo ret = gen_pool_add(sch->arena_pool, kern_vm_start + uaddr32, 95bba2c361STejun Heo page_cnt * PAGE_SIZE, NUMA_NO_NODE); 96bba2c361STejun Heo if (ret) { 97bba2c361STejun Heo bpf_arena_free_pages_non_sleepable(sch->arena_map, p, page_cnt); 98bba2c361STejun Heo return ret; 99bba2c361STejun Heo } 100bba2c361STejun Heo return 0; 101bba2c361STejun Heo } 102bba2c361STejun Heo 103bba2c361STejun Heo /* 104bba2c361STejun Heo * Allocate @size bytes from the arena pool. Returns kernel VA on success, NULL 105bba2c361STejun Heo * on failure. May grow the pool via scx_arena_grow() which sleeps. Caller must 106bba2c361STejun Heo * be in a GFP_KERNEL context. 107bba2c361STejun Heo */ 108bba2c361STejun Heo void *scx_arena_alloc(struct scx_sched *sch, size_t size) 109bba2c361STejun Heo { 110bba2c361STejun Heo unsigned long kern_va; 111bba2c361STejun Heo u32 page_cnt; 112bba2c361STejun Heo 113bba2c361STejun Heo might_sleep(); 114bba2c361STejun Heo 115bba2c361STejun Heo if (!sch->arena_pool) 116bba2c361STejun Heo return NULL; 117bba2c361STejun Heo 118bba2c361STejun Heo while (true) { 119bba2c361STejun Heo kern_va = gen_pool_alloc(sch->arena_pool, size); 120bba2c361STejun Heo if (kern_va) 121bba2c361STejun Heo break; 122bba2c361STejun Heo page_cnt = max_t(u32, SCX_ARENA_GROW_PAGES, 123bba2c361STejun Heo (size + PAGE_SIZE - 1) >> PAGE_SHIFT); 124bba2c361STejun Heo if (scx_arena_grow(sch, page_cnt)) 125bba2c361STejun Heo return NULL; 126bba2c361STejun Heo } 127bba2c361STejun Heo 128bba2c361STejun Heo return (void *)kern_va; 129bba2c361STejun Heo } 130bba2c361STejun Heo 131bba2c361STejun Heo void scx_arena_free(struct scx_sched *sch, void *kern_va, size_t size) 132bba2c361STejun Heo { 133bba2c361STejun Heo if (sch->arena_pool && kern_va) 134bba2c361STejun Heo gen_pool_free(sch->arena_pool, (unsigned long)kern_va, size); 135bba2c361STejun Heo } 136