xref: /linux/kernel/sched/ext/arena.c (revision bba2c3615bd6cfee7456d1130f2e6b01b3f4e9ba)
1*bba2c361STejun Heo /* SPDX-License-Identifier: GPL-2.0 */
2*bba2c361STejun Heo /*
3*bba2c361STejun Heo  * BPF extensible scheduler class: Documentation/scheduler/sched-ext.rst
4*bba2c361STejun Heo  *
5*bba2c361STejun Heo  * scx_arena_pool: kernel-side sub-allocator over BPF-arena pages.
6*bba2c361STejun Heo  *
7*bba2c361STejun Heo  * Each chunk added to @sch->arena_pool comes from one
8*bba2c361STejun Heo  * bpf_arena_alloc_pages_sleepable() call and is registered at the
9*bba2c361STejun Heo  * kernel-side mapping address. Callers translate to the BPF-arena form
10*bba2c361STejun Heo  * themselves if needed.
11*bba2c361STejun Heo  *
12*bba2c361STejun Heo  * Allocations grow the pool on demand. Underlying arena pages are released
13*bba2c361STejun Heo  * when the arena map itself is torn down.
14*bba2c361STejun Heo  *
15*bba2c361STejun Heo  * Copyright (c) 2026 Meta Platforms, Inc. and affiliates.
16*bba2c361STejun Heo  * Copyright (c) 2026 Tejun Heo <tj@kernel.org>
17*bba2c361STejun Heo  */
18*bba2c361STejun Heo 
19*bba2c361STejun Heo enum scx_arena_consts {
20*bba2c361STejun Heo 	SCX_ARENA_MIN_ORDER		= 3,	/* 8-byte minimum sub-allocation */
21*bba2c361STejun Heo 	SCX_ARENA_GROW_PAGES		= 4,	/* per growth */
22*bba2c361STejun Heo };
23*bba2c361STejun Heo 
24*bba2c361STejun Heo s32 scx_arena_pool_init(struct scx_sched *sch)
25*bba2c361STejun Heo {
26*bba2c361STejun Heo 	if (!sch->arena_map)
27*bba2c361STejun Heo 		return 0;
28*bba2c361STejun Heo 
29*bba2c361STejun Heo 	sch->arena_pool = gen_pool_create(SCX_ARENA_MIN_ORDER, NUMA_NO_NODE);
30*bba2c361STejun Heo 	if (!sch->arena_pool)
31*bba2c361STejun Heo 		return -ENOMEM;
32*bba2c361STejun Heo 	return 0;
33*bba2c361STejun Heo }
34*bba2c361STejun Heo 
35*bba2c361STejun Heo static void scx_arena_clear_chunk(struct gen_pool *pool, struct gen_pool_chunk *chunk,
36*bba2c361STejun Heo 				  void *data)
37*bba2c361STejun Heo {
38*bba2c361STejun Heo 	int order = pool->min_alloc_order;
39*bba2c361STejun Heo 	size_t chunk_sz = chunk->end_addr - chunk->start_addr + 1;
40*bba2c361STejun Heo 	unsigned long end_bit = chunk_sz >> order;
41*bba2c361STejun Heo 	unsigned long b, e;
42*bba2c361STejun Heo 
43*bba2c361STejun Heo 	for_each_set_bitrange(b, e, chunk->bits, end_bit)
44*bba2c361STejun Heo 		gen_pool_free(pool, chunk->start_addr + (b << order),
45*bba2c361STejun Heo 			      (e - b) << order);
46*bba2c361STejun Heo }
47*bba2c361STejun Heo 
48*bba2c361STejun Heo /*
49*bba2c361STejun Heo  * Tear down the pool. Outstanding gen_pool allocations are freed via
50*bba2c361STejun Heo  * scx_arena_clear_chunk() so gen_pool_destroy() doesn't BUG. The underlying
51*bba2c361STejun Heo  * arena pages are released when the arena map itself is torn down.
52*bba2c361STejun Heo  */
53*bba2c361STejun Heo void scx_arena_pool_destroy(struct scx_sched *sch)
54*bba2c361STejun Heo {
55*bba2c361STejun Heo 	if (!sch->arena_pool)
56*bba2c361STejun Heo 		return;
57*bba2c361STejun Heo 	gen_pool_for_each_chunk(sch->arena_pool, scx_arena_clear_chunk, NULL);
58*bba2c361STejun Heo 	gen_pool_destroy(sch->arena_pool);
59*bba2c361STejun Heo 	sch->arena_pool = NULL;
60*bba2c361STejun Heo }
61*bba2c361STejun Heo 
62*bba2c361STejun Heo /*
63*bba2c361STejun Heo  * Grow the pool by @page_cnt pages. bpf_arena_alloc_pages_sleepable() and
64*bba2c361STejun Heo  * gen_pool_add() (which calls vzalloc(GFP_KERNEL)) require a sleepable
65*bba2c361STejun Heo  * context.
66*bba2c361STejun Heo  */
67*bba2c361STejun Heo static int scx_arena_grow(struct scx_sched *sch, u32 page_cnt)
68*bba2c361STejun Heo {
69*bba2c361STejun Heo 	u64 kern_vm_start;
70*bba2c361STejun Heo 	u32 uaddr32;
71*bba2c361STejun Heo 	void *p;
72*bba2c361STejun Heo 	int ret;
73*bba2c361STejun Heo 
74*bba2c361STejun Heo 	if (!sch->arena_map || !sch->arena_pool)
75*bba2c361STejun Heo 		return -EINVAL;
76*bba2c361STejun Heo 
77*bba2c361STejun Heo 	p = bpf_arena_alloc_pages_sleepable(sch->arena_map, NULL,
78*bba2c361STejun Heo 					    page_cnt, NUMA_NO_NODE, 0);
79*bba2c361STejun Heo 	if (!p)
80*bba2c361STejun Heo 		return -ENOMEM;
81*bba2c361STejun Heo 
82*bba2c361STejun Heo 	uaddr32 = (u32)(unsigned long)p;
83*bba2c361STejun Heo 	/* arena.o, which defines these, is built only on MMU && 64BIT */
84*bba2c361STejun Heo #if defined(CONFIG_MMU) && defined(CONFIG_64BIT)
85*bba2c361STejun Heo 	kern_vm_start = bpf_arena_map_kern_vm_start(sch->arena_map);
86*bba2c361STejun Heo #else
87*bba2c361STejun Heo 	kern_vm_start = 0;
88*bba2c361STejun Heo #endif
89*bba2c361STejun Heo 
90*bba2c361STejun Heo 	ret = gen_pool_add(sch->arena_pool, kern_vm_start + uaddr32,
91*bba2c361STejun Heo 			   page_cnt * PAGE_SIZE, NUMA_NO_NODE);
92*bba2c361STejun Heo 	if (ret) {
93*bba2c361STejun Heo 		bpf_arena_free_pages_non_sleepable(sch->arena_map, p, page_cnt);
94*bba2c361STejun Heo 		return ret;
95*bba2c361STejun Heo 	}
96*bba2c361STejun Heo 	return 0;
97*bba2c361STejun Heo }
98*bba2c361STejun Heo 
99*bba2c361STejun Heo /*
100*bba2c361STejun Heo  * Allocate @size bytes from the arena pool. Returns kernel VA on success, NULL
101*bba2c361STejun Heo  * on failure. May grow the pool via scx_arena_grow() which sleeps. Caller must
102*bba2c361STejun Heo  * be in a GFP_KERNEL context.
103*bba2c361STejun Heo  */
104*bba2c361STejun Heo void *scx_arena_alloc(struct scx_sched *sch, size_t size)
105*bba2c361STejun Heo {
106*bba2c361STejun Heo 	unsigned long kern_va;
107*bba2c361STejun Heo 	u32 page_cnt;
108*bba2c361STejun Heo 
109*bba2c361STejun Heo 	might_sleep();
110*bba2c361STejun Heo 
111*bba2c361STejun Heo 	if (!sch->arena_pool)
112*bba2c361STejun Heo 		return NULL;
113*bba2c361STejun Heo 
114*bba2c361STejun Heo 	while (true) {
115*bba2c361STejun Heo 		kern_va = gen_pool_alloc(sch->arena_pool, size);
116*bba2c361STejun Heo 		if (kern_va)
117*bba2c361STejun Heo 			break;
118*bba2c361STejun Heo 		page_cnt = max_t(u32, SCX_ARENA_GROW_PAGES,
119*bba2c361STejun Heo 				 (size + PAGE_SIZE - 1) >> PAGE_SHIFT);
120*bba2c361STejun Heo 		if (scx_arena_grow(sch, page_cnt))
121*bba2c361STejun Heo 			return NULL;
122*bba2c361STejun Heo 	}
123*bba2c361STejun Heo 
124*bba2c361STejun Heo 	return (void *)kern_va;
125*bba2c361STejun Heo }
126*bba2c361STejun Heo 
127*bba2c361STejun Heo void scx_arena_free(struct scx_sched *sch, void *kern_va, size_t size)
128*bba2c361STejun Heo {
129*bba2c361STejun Heo 	if (sch->arena_pool && kern_va)
130*bba2c361STejun Heo 		gen_pool_free(sch->arena_pool, (unsigned long)kern_va, size);
131*bba2c361STejun Heo }
132