xref: /linux/tools/testing/selftests/bpf/libarena/src/asan.bpf.c (revision b9b23fe1761117f4a0109a25d16d337c900437ad)
1 // SPDX-License-Identifier: LGPL-2.1 OR BSD-2-Clause
2 /* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */
3 #include <vmlinux.h>
4 #include <libarena/common.h>
5 #include <libarena/asan.h>
6 
7 
8 enum {
9 	/*
10 	 * Is the access checked by check_region_inline
11 	 * a read or a write?
12 	 */
13 	ASAN_READ		= 0x0U,
14 	ASAN_WRITE		= 0x1U,
15 };
16 
17 /*
18  * Address sanitizer (ASAN) for arena-based BPF programs, inspired
19  * by KASAN.
20  *
21  * The API
22  * -------
23  *
24  * The implementation includes two kinds of components: Implementation
25  * of ASAN hooks injected by LLVM into the program, and API calls that
26  * allocators use to mark memory as valid or invalid. The full list is:
27  *
28  * LLVM stubs:
29  *
30  * void __asan_{load, store}<size>(intptr_t addr)
31  *	Checks whether an access is valid. All variations covered
32  *	by check_region_inline().
33  *
34  * void __asan_{store, load}((intptr_t addr, ssize_t size)
35  *
36  * void __asan_report_{load, store}<size>(intptr_t addr)
37  *	Report an access violation for the program. Used when LLVM
38  *	uses direct code generation for shadow map checks.
39  *
40  * void *__asan_memcpy(void *d, const void *s, size_t n)
41  * void *__asan_memmove(void *d, const void *s, size_t n)
42  * void *__asan_memset(void *p, int c, size_t n)
43  *	Hooks for ASAN instrumentation of the LLVM mem* builtins.
44  *	Currently unimplemented just like the builtins themselves.
45  *
46  * API methods:
47  *
48  * asan_init()
49  *	Initialize the ASAN map for the arena.
50  *
51  * asan_poison()
52  *	Mark a region of memory as poisoned. Accessing poisoned memory
53  *	causes asan_report() to fire. Invoked during free().
54  *
55  * asan_unpoison()
56  *	Mark a region as unpoisoned after alloc().
57  *
58  * asan_shadow_set()
59  *	Check a byte's validity directly.
60  *
61  * The Algorithm In Brief
62  * ----------------------
63  * Each group of 8 bytes is mapped to a "granule" in the shadow map. This
64  * granule is the size of the byte and describes which bytes are valid.
65  * Possible values are:
66  *
67  * 0: All bytes are valid. Makes checks in the middle of an allocated region
68  * (most of them) fast.
69  * (0, 7]: How many consecutive bytes are valid, starting from the lowest one.
70  * The tradeoff is that we can't poison individual bytes in the middle of a
71  * valid region.
72  * [0x80, 0xff]: Special poison values, can be used to denote specific error
73  * modes (e.g., recently freed vs uninitialized memory).
74  *
75  * The mapping between a memory location and its shadow is:
76  * shadow_addr = shadow_base + (addr >> 3). We retain the 8:1 data:shadow
77  * ratio of existing ASAN implementations as a compromise between tracking
78  * granularity and space usage/scan overhead.
79  */
80 
81 #ifdef BPF_ARENA_ASAN
82 
83 #pragma clang attribute push(__attribute__((no_sanitize("address"))), \
84 			     apply_to = function)
85 
86 #define SHADOW_ALL_ZEROES ((u64)-1)
87 
88 /*
89  * Canary variable for ASAN violations. Set to the offending address.
90  */
91 volatile u64 asan_violated = 0;
92 
93 /*
94  * Shadow map occupancy map.
95  */
96 volatile u64 __asan_shadow_memory_dynamic_address;
97 
98 volatile u32 asan_reported = false;
99 volatile bool asan_inited = false;
100 
101 /*
102  * Set during program load.
103  */
104 volatile bool asan_report_once = false;
105 
106 /*
107  * BPF does not currently support the memset/memcpy/memcmp intrinsics.
108  * For large sequential copies, or assignments of large data structures,
109  * the frontend will generate an intrinsic that causes the BPF backend
110  * to exit due to a missing implementation. Provide a simple implementation
111  * just for memset to use it for poisoning/unpoisoning the map.
112  */
113 __weak int asan_memset(s8 __arena *dst, s8 val, size_t size)
114 {
115 	size_t i;
116 
117 	for (i = zero; i < size && can_loop; i++)
118 		dst[i] = val;
119 
120 	return 0;
121 }
122 
123 /* Validate a 1-byte access, always within a single byte. */
124 static __always_inline bool memory_is_poisoned_1(s8 __arena *addr)
125 {
126 	s8 shadow_value = *(s8 __arena *)mem_to_shadow(addr);
127 
128 	/* Byte is 0, access is valid. */
129 	if (likely(!shadow_value))
130 		return false;
131 
132 	/*
133 	 * Byte is non-zero. Access is valid if granule offset in [0, shadow_value),
134 	 * so the memory is poisoned if shadow_value is negative or smaller than
135 	 * the granule's value.
136 	 */
137 
138 	return ASAN_GRANULE(addr) >= shadow_value;
139 }
140 
141 /* Validate a 2- 4-, 8-byte access, shadow spans up to 2 bytes. */
142 static __always_inline bool memory_is_poisoned_2_4_8(s8 __arena *addr, u64 size)
143 {
144 	u64 end = (u64)addr + size - 1;
145 
146 	/*
147 	 * Region fully within a single byte (addition didn't
148 	 * overflow above ASAN_GRANULE).
149 	 */
150 	if (likely(ASAN_GRANULE(end) >= size - 1))
151 		return memory_is_poisoned_1((s8 __arena *)end);
152 
153 	/*
154 	 * Otherwise first byte must be fully unpoisoned, and second byte
155 	 * must be unpoisoned up to the end of the accessed region.
156 	 */
157 
158 	return *(s8 __arena *)mem_to_shadow(addr) || memory_is_poisoned_1((s8 __arena *)end);
159 }
160 
161 __weak bool asan_shadow_set(void __arena *addr)
162 {
163 	return memory_is_poisoned_1(addr);
164 }
165 
166 static __always_inline u64 first_nonzero_byte(u64 addr, size_t size)
167 {
168 	while (size && can_loop) {
169 		if (unlikely(*(s8 __arena *)addr))
170 			return addr;
171 		addr += 1;
172 		size -= 1;
173 	}
174 
175 	return SHADOW_ALL_ZEROES;
176 }
177 
178 static __always_inline bool memory_is_poisoned_n(s8 __arena *addr, u64 size)
179 {
180 	u64 ret;
181 	u64 start;
182 	u64 end;
183 
184 	/* Size of [start, end] is end - start + 1. */
185 	start = (u64)mem_to_shadow(addr);
186 	end = (u64)mem_to_shadow(addr + size - 1);
187 
188 	ret = first_nonzero_byte(start, (end - start) + 1);
189 	if (likely(ret == SHADOW_ALL_ZEROES))
190 		return false;
191 
192 	return unlikely(ret != end || ASAN_GRANULE(addr + size - 1) >= *(s8 __arena *)end);
193 }
194 
195 __weak int asan_report(s8 __arena *addr, size_t sz, u32 flags)
196 {
197 	u32 reported = __sync_val_compare_and_swap(&asan_reported, false, true);
198 
199 	/* Only report the first ASAN violation. */
200 	if (reported && asan_report_once)
201 		return 0;
202 
203 	asan_violated = (u64)addr;
204 
205 	arena_stderr("Memory violation for address %p (0x%lx) for %s of size %ld\n",
206 			addr, (u64)addr,
207 			(flags & ASAN_WRITE) ? "write" : "read",
208 			sz);
209 	bpf_stream_print_stack(BPF_STDERR);
210 
211 	return 0;
212 }
213 
214 static __always_inline bool check_asan_args(s8 __arena *addr, size_t size,
215 					    bool *result)
216 {
217 	bool valid = true;
218 
219 	/* Size 0 accesses are valid even if the address is invalid. */
220 	if (unlikely(size == 0))
221 		goto confirmed_valid;
222 
223 	/*
224 	 * Wraparound is possible for values close to the the edge of the
225 	 * 4GiB boundary of the arena (last valid address is 1UL << 32 - 1).
226 	 *
227 	 *
228 	 * The wraparound detection below works for small sizes. check_asan_args is
229 	 * always called from the builtin ASAN checks, so 1 <= size <= 64. Even
230 	 * for storeN/loadN that we do not expect to encounter the intrinsics will
231 	 * not have a large enough size that:
232 	 *
233 	 * - addr + size  > MAX_U32
234 	 * - (u32)(addr + size) > (u32) addr
235 	 *
236 	 * which would defeat wraparound detection.
237 	 */
238 	if (unlikely((u32)(u64)(addr + size) < (u32)(u64)addr))
239 		goto confirmed_invalid;
240 
241 	return false;
242 
243 confirmed_invalid:
244 	valid = false;
245 
246 	/* FALLTHROUGH */
247 confirmed_valid:
248 	*result = valid;
249 
250 	return true;
251 }
252 
253 static __always_inline bool check_region_inline(intptr_t ptr, size_t size,
254 						u32 flags)
255 {
256 	s8 __arena *addr = (s8 __arena *)(u64)ptr;
257 	bool is_poisoned, is_valid;
258 
259 	if (check_asan_args(addr, size, &is_valid)) {
260 		if (!is_valid)
261 			asan_report(addr, size, flags);
262 		return is_valid;
263 	}
264 
265 	switch (size) {
266 	case 1:
267 		is_poisoned = memory_is_poisoned_1(addr);
268 		break;
269 	case 2:
270 	case 4:
271 	case 8:
272 		is_poisoned = memory_is_poisoned_2_4_8(addr, size);
273 		break;
274 	default:
275 		is_poisoned = memory_is_poisoned_n(addr, size);
276 	}
277 
278 	if (is_poisoned) {
279 		asan_report(addr, size, flags);
280 		return false;
281 	}
282 
283 	return true;
284 }
285 
286 /*
287  * __alias is not supported for BPF so define *__noabort() variants as wrappers.
288  */
289 #define DEFINE_ASAN_LOAD_STORE(size)                                  \
290 	__hidden void __asan_store##size(intptr_t addr)                  \
291 	{                                                             \
292 		check_region_inline(addr, size, ASAN_WRITE);          \
293 	}                                                             \
294 	__hidden void __asan_store##size##_noabort(intptr_t addr)        \
295 	{                                                             \
296 		check_region_inline(addr, size, ASAN_WRITE);          \
297 	}                                                             \
298 	__hidden void __asan_load##size(intptr_t addr)                   \
299 	{                                                             \
300 		check_region_inline(addr, size, ASAN_READ);           \
301 	}                                                             \
302 	__hidden void __asan_load##size##_noabort(intptr_t addr)         \
303 	{                                                             \
304 		check_region_inline(addr, size, ASAN_READ);           \
305 	}                                                             \
306 	__hidden void __asan_report_store##size(intptr_t addr)           \
307 	{                                                             \
308 		asan_report((s8 __arena *)addr, size, ASAN_WRITE);           \
309 	}                                                             \
310 	__hidden void __asan_report_store##size##_noabort(intptr_t addr) \
311 	{                                                             \
312 		asan_report((s8 __arena *)addr, size, ASAN_WRITE);           \
313 	}                                                             \
314 	__hidden void __asan_report_load##size(intptr_t addr)            \
315 	{                                                             \
316 		asan_report((s8 __arena *)addr, size, ASAN_READ);            \
317 	}                                                             \
318 	__hidden void __asan_report_load##size##_noabort(intptr_t addr)  \
319 	{                                                             \
320 		asan_report((s8 __arena *)addr, size, ASAN_READ);            \
321 	}
322 
323 DEFINE_ASAN_LOAD_STORE(1);
324 DEFINE_ASAN_LOAD_STORE(2);
325 DEFINE_ASAN_LOAD_STORE(4);
326 DEFINE_ASAN_LOAD_STORE(8);
327 
328 void __asan_storeN(intptr_t addr, ssize_t size)
329 {
330 	check_region_inline(addr, size, ASAN_WRITE);
331 }
332 
333 void __asan_storeN_noabort(intptr_t addr, ssize_t size)
334 {
335 	check_region_inline(addr, size, ASAN_WRITE);
336 }
337 
338 void __asan_loadN(intptr_t addr, ssize_t size)
339 {
340 	check_region_inline(addr, size, ASAN_READ);
341 }
342 
343 void __asan_loadN_noabort(intptr_t addr, ssize_t size)
344 {
345 	check_region_inline(addr, size, ASAN_READ);
346 }
347 
348 /*
349  * We currently do not sanitize globals.
350  */
351 void __asan_register_globals(intptr_t globals, size_t n)
352 {
353 }
354 
355 void __asan_unregister_globals(intptr_t globals, size_t n)
356 {
357 }
358 
359 /*
360  * We do not currently have memcpy/memmove/memset intrinsics
361  * in LLVM. Do not implement sanitization.
362  */
363 void *__asan_memcpy(void *d, const void *s, size_t n)
364 {
365 	arena_stderr("ASAN: Unexpected %s call", __func__);
366 	return NULL;
367 }
368 
369 void *__asan_memmove(void *d, const void *s, size_t n)
370 {
371 	arena_stderr("ASAN: Unexpected %s call", __func__);
372 	return NULL;
373 }
374 
375 void *__asan_memset(void *p, int c, size_t n)
376 {
377 	arena_stderr("ASAN: Unexpected %s call", __func__);
378 	return NULL;
379 }
380 
381 /*
382  * Poisoning code, used when we add more freed memory to the allocator by:
383  * 	a) pulling memory from the arena segment using bpf_arena_alloc_pages()
384  * 	b) freeing memory from application code
385  */
386 __hidden __noasan int asan_poison(void __arena *addr, s8 val, size_t size)
387 {
388 	s8 __arena *shadow;
389 	size_t len;
390 
391 	/*
392 	 * Poisoning from a non-granule address makes no sense: We can only allocate
393 	 * memory to the application that has a granule-aligned starting address,
394 	 * and bpf_arena_alloc_pages returns page-aligned memory. A non-aligned
395 	 * addr then implies we're freeing a different address than the one we
396 	 * allocated.
397 	 */
398 	if (unlikely((u64)addr & ASAN_GRANULE_MASK))
399 		return -EINVAL;
400 
401 	/*
402 	 * We cannot free an unaligned region because it'd be possible that we
403 	 * cannot describe the resulting poisoning state of the granule in
404 	 * the ASAN encoding.
405 	 *
406 	 * Every granule represents a region of memory that looks like the
407 	 * following (P for poisoned bytes, C for clear):
408 	 *
409 	 * <Clear>  <Poisoned>
410 	 * [ C C C ... P P ]
411 	 *
412 	 * The value of the granule's shadow map is the number of clear bytes in
413 	 * it. We cannot represent granules with the following state:
414 	 *
415 	 * [ P P ... C C ... P P ]
416 	 *
417 	 * That would be possible if we could free unaligned regions, so prevent that.
418 	 */
419 	if (unlikely(size & ASAN_GRANULE_MASK))
420 		return -EINVAL;
421 
422 	shadow = mem_to_shadow(addr);
423 	len = size >> ASAN_SHADOW_SHIFT;
424 
425 	asan_memset(shadow, val, len);
426 
427 	return 0;
428 }
429 
430 /*
431  * Unpoisoning code for marking memory as valid during allocation calls.
432  *
433  * Very similar to asan_poison, except we need to round up instead of
434  * down, then partially poison the last granule if necessary.
435  *
436  * Partial poisoning is useful for keeping the padding poisoned. Allocations
437  * are granule-aligned, so we we're reserving granule-aligned sizes for the
438  * allocation. However, we want to still treat accesses to the padding as
439  * invalid. Partial poisoning takes care of that. Freeing and poisoning the
440  * memory is still done in granule-aligned sizes and repoisons the already
441  * poisoned padding.
442  */
443 __hidden __noasan int asan_unpoison(void __arena *addr, size_t size)
444 {
445 	size_t partial = size & ASAN_GRANULE_MASK;
446 	s8 __arena *shadow;
447 	size_t len;
448 
449 	/*
450 	 * We cannot allocate in the middle of the granule. The ASAN shadow
451 	 * map encoding only describes regions of memory where every granule
452 	 * follows this format (P for poisoned, C for clear):
453 	 *
454 	 * <Clear>  <Poisoned>
455 	 * [ C C C ... P P ]
456 	 *
457 	 * This is so we can use a single number in [0, ASAN_SHADOW_SCALE)
458 	 * to represent the poison state of the granule.
459 	 */
460 	if (unlikely((u64)addr & ASAN_GRANULE_MASK))
461 		return -EINVAL;
462 
463 	shadow = mem_to_shadow(addr);
464 	len = size >> ASAN_SHADOW_SHIFT;
465 
466 	asan_memset(shadow, 0, len);
467 
468 	/*
469 	 * If we are allocating a non-granule aligned region, we need to adjust
470 	 * the last byte of the shadow map to list how many bytes in the granule
471 	 * are unpoisoned. If the region is aligned, then the memset call above
472 	 * was enough.
473 	 */
474 	if (partial)
475 		shadow[len] = partial;
476 
477 	return 0;
478 }
479 
480 /*
481  * Initialize ASAN state when necessary. Triggered from userspace before
482  * allocator startup.
483  */
484 SEC("syscall")
485 __weak __noasan int asan_init(struct asan_init_args *args)
486 {
487 	u64 globals_pages = args->arena_globals_pages;
488 	u64 all_pages = args->arena_all_pages;
489 	u64 shadow_map, shadow_pgoff;
490 	u64 shadow_pages;
491 
492 	if (asan_inited)
493 		return 0;
494 
495 	/*
496 	 * Round up the shadow map size to the nearest page.
497 	 */
498 	shadow_pages = all_pages >> ASAN_SHADOW_SHIFT;
499 	if ((all_pages & ((1 << ASAN_SHADOW_SHIFT) - 1)))
500 		shadow_pages += 1;
501 
502 	if (all_pages > (1ULL << 32) / __PAGE_SIZE) {
503 		arena_stderr("error: arena size %lx too large", all_pages);
504 		return -EINVAL;
505 	}
506 
507 	if (globals_pages > all_pages) {
508 		arena_stderr("error: globals %lx do not fit in arena %lx",
509 				globals_pages, all_pages);
510 		return -EINVAL;
511 	}
512 
513 	if (globals_pages + shadow_pages >= all_pages) {
514 		arena_stderr("error: globals %lx do not leave room for shadow map %lx "
515 				"(arena pages %lx)",
516 				globals_pages, shadow_pages, all_pages);
517 		return -EINVAL;
518 	}
519 
520 	shadow_pgoff = all_pages - shadow_pages - globals_pages;
521 	__asan_shadow_memory_dynamic_address = shadow_pgoff * __PAGE_SIZE;
522 
523 	/*
524 	 * Allocate the last (1/ASAN_SHADOW_SCALE)th of an arena's pages for the map
525 	 * We find the offset and size from the arena map.
526 	 *
527 	 * The allocated map pages are zeroed out, meaning all memory is marked as valid
528 	 * even if it's not allocated already. This is expected: Since the actual memory
529 	 * pages are not allocated, accesses to it will trigger page faults and will be
530 	 * reported through BPF streams. Any pages allocated through bpf_arena_alloc_pages
531 	 * should be poisoned by the allocator right after the call succeeds.
532 	 */
533 	shadow_map = (u64)bpf_arena_alloc_pages(
534 		&arena, (void __arena *)__asan_shadow_memory_dynamic_address,
535 		shadow_pages, NUMA_NO_NODE, 0);
536 	if (!shadow_map) {
537 		arena_stderr("Could not allocate shadow map\n");
538 
539 		__asan_shadow_memory_dynamic_address = 0;
540 
541 		return -ENOMEM;
542 	}
543 
544 	asan_inited = true;
545 
546 	return 0;
547 }
548 
549 #pragma clang attribute pop
550 
551 #endif /* BPF_ARENA_ASAN */
552 
553 __weak char _license[] SEC("license") = "GPL";
554