1 // SPDX-License-Identifier: LGPL-2.1 OR BSD-2-Clause 2 /* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */ 3 #include <vmlinux.h> 4 #include <libarena/common.h> 5 #include <libarena/asan.h> 6 7 8 enum { 9 /* 10 * Is the access checked by check_region_inline 11 * a read or a write? 12 */ 13 ASAN_READ = 0x0U, 14 ASAN_WRITE = 0x1U, 15 }; 16 17 /* 18 * Address sanitizer (ASAN) for arena-based BPF programs, inspired 19 * by KASAN. 20 * 21 * The API 22 * ------- 23 * 24 * The implementation includes two kinds of components: Implementation 25 * of ASAN hooks injected by LLVM into the program, and API calls that 26 * allocators use to mark memory as valid or invalid. The full list is: 27 * 28 * LLVM stubs: 29 * 30 * void __asan_{load, store}<size>(intptr_t addr) 31 * Checks whether an access is valid. All variations covered 32 * by check_region_inline(). 33 * 34 * void __asan_{store, load}((intptr_t addr, ssize_t size) 35 * 36 * void __asan_report_{load, store}<size>(intptr_t addr) 37 * Report an access violation for the program. Used when LLVM 38 * uses direct code generation for shadow map checks. 39 * 40 * void *__asan_memcpy(void *d, const void *s, size_t n) 41 * void *__asan_memmove(void *d, const void *s, size_t n) 42 * void *__asan_memset(void *p, int c, size_t n) 43 * Hooks for ASAN instrumentation of the LLVM mem* builtins. 44 * Currently unimplemented just like the builtins themselves. 45 * 46 * API methods: 47 * 48 * asan_init() 49 * Initialize the ASAN map for the arena. 50 * 51 * asan_poison() 52 * Mark a region of memory as poisoned. Accessing poisoned memory 53 * causes asan_report() to fire. Invoked during free(). 54 * 55 * asan_unpoison() 56 * Mark a region as unpoisoned after alloc(). 57 * 58 * asan_shadow_set() 59 * Check a byte's validity directly. 60 * 61 * The Algorithm In Brief 62 * ---------------------- 63 * Each group of 8 bytes is mapped to a "granule" in the shadow map. This 64 * granule is the size of the byte and describes which bytes are valid. 65 * Possible values are: 66 * 67 * 0: All bytes are valid. Makes checks in the middle of an allocated region 68 * (most of them) fast. 69 * (0, 7]: How many consecutive bytes are valid, starting from the lowest one. 70 * The tradeoff is that we can't poison individual bytes in the middle of a 71 * valid region. 72 * [0x80, 0xff]: Special poison values, can be used to denote specific error 73 * modes (e.g., recently freed vs uninitialized memory). 74 * 75 * The mapping between a memory location and its shadow is: 76 * shadow_addr = shadow_base + (addr >> 3). We retain the 8:1 data:shadow 77 * ratio of existing ASAN implementations as a compromise between tracking 78 * granularity and space usage/scan overhead. 79 */ 80 81 #ifdef BPF_ARENA_ASAN 82 83 #pragma clang attribute push(__attribute__((no_sanitize("address"))), \ 84 apply_to = function) 85 86 #define SHADOW_ALL_ZEROES ((u64)-1) 87 88 /* 89 * Canary variable for ASAN violations. Set to the offending address. 90 */ 91 volatile u64 asan_violated = 0; 92 93 /* 94 * Shadow map occupancy map. 95 */ 96 volatile u64 __asan_shadow_memory_dynamic_address; 97 98 volatile u32 asan_reported = false; 99 volatile bool asan_inited = false; 100 101 /* 102 * Set during program load. 103 */ 104 volatile bool asan_report_once = false; 105 106 /* 107 * BPF does not currently support the memset/memcpy/memcmp intrinsics. 108 * For large sequential copies, or assignments of large data structures, 109 * the frontend will generate an intrinsic that causes the BPF backend 110 * to exit due to a missing implementation. Provide a simple implementation 111 * just for memset to use it for poisoning/unpoisoning the map. 112 */ 113 __weak int asan_memset(s8 __arena *dst, s8 val, size_t size) 114 { 115 size_t i; 116 117 for (i = zero; i < size && can_loop; i++) 118 dst[i] = val; 119 120 return 0; 121 } 122 123 /* Validate a 1-byte access, always within a single byte. */ 124 static __always_inline bool memory_is_poisoned_1(s8 __arena *addr) 125 { 126 s8 shadow_value = *(s8 __arena *)mem_to_shadow(addr); 127 128 /* Byte is 0, access is valid. */ 129 if (likely(!shadow_value)) 130 return false; 131 132 /* 133 * Byte is non-zero. Access is valid if granule offset in [0, shadow_value), 134 * so the memory is poisoned if shadow_value is negative or smaller than 135 * the granule's value. 136 */ 137 138 return ASAN_GRANULE(addr) >= shadow_value; 139 } 140 141 /* Validate a 2- 4-, 8-byte access, shadow spans up to 2 bytes. */ 142 static __always_inline bool memory_is_poisoned_2_4_8(s8 __arena *addr, u64 size) 143 { 144 u64 end = (u64)addr + size - 1; 145 146 /* 147 * Region fully within a single byte (addition didn't 148 * overflow above ASAN_GRANULE). 149 */ 150 if (likely(ASAN_GRANULE(end) >= size - 1)) 151 return memory_is_poisoned_1((s8 __arena *)end); 152 153 /* 154 * Otherwise first byte must be fully unpoisoned, and second byte 155 * must be unpoisoned up to the end of the accessed region. 156 */ 157 158 return *(s8 __arena *)mem_to_shadow(addr) || memory_is_poisoned_1((s8 __arena *)end); 159 } 160 161 __weak bool asan_shadow_set(void __arena *addr) 162 { 163 return memory_is_poisoned_1(addr); 164 } 165 166 static __always_inline u64 first_nonzero_byte(u64 addr, size_t size) 167 { 168 while (size && can_loop) { 169 if (unlikely(*(s8 __arena *)addr)) 170 return addr; 171 addr += 1; 172 size -= 1; 173 } 174 175 return SHADOW_ALL_ZEROES; 176 } 177 178 static __always_inline bool memory_is_poisoned_n(s8 __arena *addr, u64 size) 179 { 180 u64 ret; 181 u64 start; 182 u64 end; 183 184 /* Size of [start, end] is end - start + 1. */ 185 start = (u64)mem_to_shadow(addr); 186 end = (u64)mem_to_shadow(addr + size - 1); 187 188 ret = first_nonzero_byte(start, (end - start) + 1); 189 if (likely(ret == SHADOW_ALL_ZEROES)) 190 return false; 191 192 return unlikely(ret != end || ASAN_GRANULE(addr + size - 1) >= *(s8 __arena *)end); 193 } 194 195 __weak int asan_report(s8 __arena *addr, size_t sz, u32 flags) 196 { 197 u32 reported = __sync_val_compare_and_swap(&asan_reported, false, true); 198 199 /* Only report the first ASAN violation. */ 200 if (reported && asan_report_once) 201 return 0; 202 203 asan_violated = (u64)addr; 204 205 arena_stderr("Memory violation for address %p (0x%lx) for %s of size %ld\n", 206 addr, (u64)addr, 207 (flags & ASAN_WRITE) ? "write" : "read", 208 sz); 209 bpf_stream_print_stack(BPF_STDERR); 210 211 return 0; 212 } 213 214 static __always_inline bool check_asan_args(s8 __arena *addr, size_t size, 215 bool *result) 216 { 217 bool valid = true; 218 219 /* Size 0 accesses are valid even if the address is invalid. */ 220 if (unlikely(size == 0)) 221 goto confirmed_valid; 222 223 /* 224 * Wraparound is possible for values close to the the edge of the 225 * 4GiB boundary of the arena (last valid address is 1UL << 32 - 1). 226 * 227 * 228 * The wraparound detection below works for small sizes. check_asan_args is 229 * always called from the builtin ASAN checks, so 1 <= size <= 64. Even 230 * for storeN/loadN that we do not expect to encounter the intrinsics will 231 * not have a large enough size that: 232 * 233 * - addr + size > MAX_U32 234 * - (u32)(addr + size) > (u32) addr 235 * 236 * which would defeat wraparound detection. 237 */ 238 if (unlikely((u32)(u64)(addr + size) < (u32)(u64)addr)) 239 goto confirmed_invalid; 240 241 return false; 242 243 confirmed_invalid: 244 valid = false; 245 246 /* FALLTHROUGH */ 247 confirmed_valid: 248 *result = valid; 249 250 return true; 251 } 252 253 static __always_inline bool check_region_inline(intptr_t ptr, size_t size, 254 u32 flags) 255 { 256 s8 __arena *addr = (s8 __arena *)(u64)ptr; 257 bool is_poisoned, is_valid; 258 259 if (check_asan_args(addr, size, &is_valid)) { 260 if (!is_valid) 261 asan_report(addr, size, flags); 262 return is_valid; 263 } 264 265 switch (size) { 266 case 1: 267 is_poisoned = memory_is_poisoned_1(addr); 268 break; 269 case 2: 270 case 4: 271 case 8: 272 is_poisoned = memory_is_poisoned_2_4_8(addr, size); 273 break; 274 default: 275 is_poisoned = memory_is_poisoned_n(addr, size); 276 } 277 278 if (is_poisoned) { 279 asan_report(addr, size, flags); 280 return false; 281 } 282 283 return true; 284 } 285 286 /* 287 * __alias is not supported for BPF so define *__noabort() variants as wrappers. 288 */ 289 #define DEFINE_ASAN_LOAD_STORE(size) \ 290 __hidden void __asan_store##size(intptr_t addr) \ 291 { \ 292 check_region_inline(addr, size, ASAN_WRITE); \ 293 } \ 294 __hidden void __asan_store##size##_noabort(intptr_t addr) \ 295 { \ 296 check_region_inline(addr, size, ASAN_WRITE); \ 297 } \ 298 __hidden void __asan_load##size(intptr_t addr) \ 299 { \ 300 check_region_inline(addr, size, ASAN_READ); \ 301 } \ 302 __hidden void __asan_load##size##_noabort(intptr_t addr) \ 303 { \ 304 check_region_inline(addr, size, ASAN_READ); \ 305 } \ 306 __hidden void __asan_report_store##size(intptr_t addr) \ 307 { \ 308 asan_report((s8 __arena *)addr, size, ASAN_WRITE); \ 309 } \ 310 __hidden void __asan_report_store##size##_noabort(intptr_t addr) \ 311 { \ 312 asan_report((s8 __arena *)addr, size, ASAN_WRITE); \ 313 } \ 314 __hidden void __asan_report_load##size(intptr_t addr) \ 315 { \ 316 asan_report((s8 __arena *)addr, size, ASAN_READ); \ 317 } \ 318 __hidden void __asan_report_load##size##_noabort(intptr_t addr) \ 319 { \ 320 asan_report((s8 __arena *)addr, size, ASAN_READ); \ 321 } 322 323 DEFINE_ASAN_LOAD_STORE(1); 324 DEFINE_ASAN_LOAD_STORE(2); 325 DEFINE_ASAN_LOAD_STORE(4); 326 DEFINE_ASAN_LOAD_STORE(8); 327 328 void __asan_storeN(intptr_t addr, ssize_t size) 329 { 330 check_region_inline(addr, size, ASAN_WRITE); 331 } 332 333 void __asan_storeN_noabort(intptr_t addr, ssize_t size) 334 { 335 check_region_inline(addr, size, ASAN_WRITE); 336 } 337 338 void __asan_loadN(intptr_t addr, ssize_t size) 339 { 340 check_region_inline(addr, size, ASAN_READ); 341 } 342 343 void __asan_loadN_noabort(intptr_t addr, ssize_t size) 344 { 345 check_region_inline(addr, size, ASAN_READ); 346 } 347 348 /* 349 * We currently do not sanitize globals. 350 */ 351 void __asan_register_globals(intptr_t globals, size_t n) 352 { 353 } 354 355 void __asan_unregister_globals(intptr_t globals, size_t n) 356 { 357 } 358 359 /* 360 * We do not currently have memcpy/memmove/memset intrinsics 361 * in LLVM. Do not implement sanitization. 362 */ 363 void *__asan_memcpy(void *d, const void *s, size_t n) 364 { 365 arena_stderr("ASAN: Unexpected %s call", __func__); 366 return NULL; 367 } 368 369 void *__asan_memmove(void *d, const void *s, size_t n) 370 { 371 arena_stderr("ASAN: Unexpected %s call", __func__); 372 return NULL; 373 } 374 375 void *__asan_memset(void *p, int c, size_t n) 376 { 377 arena_stderr("ASAN: Unexpected %s call", __func__); 378 return NULL; 379 } 380 381 /* 382 * Poisoning code, used when we add more freed memory to the allocator by: 383 * a) pulling memory from the arena segment using bpf_arena_alloc_pages() 384 * b) freeing memory from application code 385 */ 386 __hidden __noasan int asan_poison(void __arena *addr, s8 val, size_t size) 387 { 388 s8 __arena *shadow; 389 size_t len; 390 391 /* 392 * Poisoning from a non-granule address makes no sense: We can only allocate 393 * memory to the application that has a granule-aligned starting address, 394 * and bpf_arena_alloc_pages returns page-aligned memory. A non-aligned 395 * addr then implies we're freeing a different address than the one we 396 * allocated. 397 */ 398 if (unlikely((u64)addr & ASAN_GRANULE_MASK)) 399 return -EINVAL; 400 401 /* 402 * We cannot free an unaligned region because it'd be possible that we 403 * cannot describe the resulting poisoning state of the granule in 404 * the ASAN encoding. 405 * 406 * Every granule represents a region of memory that looks like the 407 * following (P for poisoned bytes, C for clear): 408 * 409 * <Clear> <Poisoned> 410 * [ C C C ... P P ] 411 * 412 * The value of the granule's shadow map is the number of clear bytes in 413 * it. We cannot represent granules with the following state: 414 * 415 * [ P P ... C C ... P P ] 416 * 417 * That would be possible if we could free unaligned regions, so prevent that. 418 */ 419 if (unlikely(size & ASAN_GRANULE_MASK)) 420 return -EINVAL; 421 422 shadow = mem_to_shadow(addr); 423 len = size >> ASAN_SHADOW_SHIFT; 424 425 asan_memset(shadow, val, len); 426 427 return 0; 428 } 429 430 /* 431 * Unpoisoning code for marking memory as valid during allocation calls. 432 * 433 * Very similar to asan_poison, except we need to round up instead of 434 * down, then partially poison the last granule if necessary. 435 * 436 * Partial poisoning is useful for keeping the padding poisoned. Allocations 437 * are granule-aligned, so we we're reserving granule-aligned sizes for the 438 * allocation. However, we want to still treat accesses to the padding as 439 * invalid. Partial poisoning takes care of that. Freeing and poisoning the 440 * memory is still done in granule-aligned sizes and repoisons the already 441 * poisoned padding. 442 */ 443 __hidden __noasan int asan_unpoison(void __arena *addr, size_t size) 444 { 445 size_t partial = size & ASAN_GRANULE_MASK; 446 s8 __arena *shadow; 447 size_t len; 448 449 /* 450 * We cannot allocate in the middle of the granule. The ASAN shadow 451 * map encoding only describes regions of memory where every granule 452 * follows this format (P for poisoned, C for clear): 453 * 454 * <Clear> <Poisoned> 455 * [ C C C ... P P ] 456 * 457 * This is so we can use a single number in [0, ASAN_SHADOW_SCALE) 458 * to represent the poison state of the granule. 459 */ 460 if (unlikely((u64)addr & ASAN_GRANULE_MASK)) 461 return -EINVAL; 462 463 shadow = mem_to_shadow(addr); 464 len = size >> ASAN_SHADOW_SHIFT; 465 466 asan_memset(shadow, 0, len); 467 468 /* 469 * If we are allocating a non-granule aligned region, we need to adjust 470 * the last byte of the shadow map to list how many bytes in the granule 471 * are unpoisoned. If the region is aligned, then the memset call above 472 * was enough. 473 */ 474 if (partial) 475 shadow[len] = partial; 476 477 return 0; 478 } 479 480 /* 481 * Initialize ASAN state when necessary. Triggered from userspace before 482 * allocator startup. 483 */ 484 SEC("syscall") 485 __weak __noasan int asan_init(struct asan_init_args *args) 486 { 487 u64 globals_pages = args->arena_globals_pages; 488 u64 all_pages = args->arena_all_pages; 489 u64 shadow_map, shadow_pgoff; 490 u64 shadow_pages; 491 492 if (asan_inited) 493 return 0; 494 495 /* 496 * Round up the shadow map size to the nearest page. 497 */ 498 shadow_pages = all_pages >> ASAN_SHADOW_SHIFT; 499 if ((all_pages & ((1 << ASAN_SHADOW_SHIFT) - 1))) 500 shadow_pages += 1; 501 502 if (all_pages > (1ULL << 32) / __PAGE_SIZE) { 503 arena_stderr("error: arena size %lx too large", all_pages); 504 return -EINVAL; 505 } 506 507 if (globals_pages > all_pages) { 508 arena_stderr("error: globals %lx do not fit in arena %lx", 509 globals_pages, all_pages); 510 return -EINVAL; 511 } 512 513 if (globals_pages + shadow_pages >= all_pages) { 514 arena_stderr("error: globals %lx do not leave room for shadow map %lx " 515 "(arena pages %lx)", 516 globals_pages, shadow_pages, all_pages); 517 return -EINVAL; 518 } 519 520 shadow_pgoff = all_pages - shadow_pages - globals_pages; 521 __asan_shadow_memory_dynamic_address = shadow_pgoff * __PAGE_SIZE; 522 523 /* 524 * Allocate the last (1/ASAN_SHADOW_SCALE)th of an arena's pages for the map 525 * We find the offset and size from the arena map. 526 * 527 * The allocated map pages are zeroed out, meaning all memory is marked as valid 528 * even if it's not allocated already. This is expected: Since the actual memory 529 * pages are not allocated, accesses to it will trigger page faults and will be 530 * reported through BPF streams. Any pages allocated through bpf_arena_alloc_pages 531 * should be poisoned by the allocator right after the call succeeds. 532 */ 533 shadow_map = (u64)bpf_arena_alloc_pages( 534 &arena, (void __arena *)__asan_shadow_memory_dynamic_address, 535 shadow_pages, NUMA_NO_NODE, 0); 536 if (!shadow_map) { 537 arena_stderr("Could not allocate shadow map\n"); 538 539 __asan_shadow_memory_dynamic_address = 0; 540 541 return -ENOMEM; 542 } 543 544 asan_inited = true; 545 546 return 0; 547 } 548 549 #pragma clang attribute pop 550 551 #endif /* BPF_ARENA_ASAN */ 552 553 __weak char _license[] SEC("license") = "GPL"; 554