1 // SPDX-License-Identifier: GPL-2.0-only 2 #include <linux/alloc_tag.h> 3 #include <linux/execmem.h> 4 #include <linux/fs.h> 5 #include <linux/gfp.h> 6 #include <linux/kallsyms.h> 7 #include <linux/module.h> 8 #include <linux/page_ext.h> 9 #include <linux/pgalloc_tag.h> 10 #include <linux/proc_fs.h> 11 #include <linux/rcupdate.h> 12 #include <linux/seq_buf.h> 13 #include <linux/seq_file.h> 14 #include <linux/string_choices.h> 15 #include <linux/vmalloc.h> 16 #include <linux/kmemleak.h> 17 18 #define ALLOCINFO_FILE_NAME "allocinfo" 19 #define MODULE_ALLOC_TAG_VMAP_SIZE (100000UL * sizeof(struct alloc_tag)) 20 #define SECTION_START(NAME) (CODETAG_SECTION_START_PREFIX NAME) 21 #define SECTION_STOP(NAME) (CODETAG_SECTION_STOP_PREFIX NAME) 22 23 #ifdef CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT 24 static bool mem_profiling_support = true; 25 #else 26 static bool mem_profiling_support; 27 #endif 28 29 static struct codetag_type *alloc_tag_cttype; 30 31 #ifdef CONFIG_ARCH_MODULE_NEEDS_WEAK_PER_CPU 32 DEFINE_PER_CPU(struct alloc_tag_counters, _shared_alloc_tag); 33 EXPORT_SYMBOL(_shared_alloc_tag); 34 #endif 35 36 DEFINE_STATIC_KEY_MAYBE(CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT, 37 mem_alloc_profiling_key); 38 EXPORT_SYMBOL(mem_alloc_profiling_key); 39 40 DEFINE_STATIC_KEY_FALSE(mem_profiling_compressed); 41 42 struct alloc_tag_kernel_section kernel_tags = { NULL, 0 }; 43 unsigned long alloc_tag_ref_mask; 44 int alloc_tag_ref_offs; 45 46 struct allocinfo_private { 47 struct codetag_iterator iter; 48 bool print_header; 49 }; 50 51 static void *allocinfo_start(struct seq_file *m, loff_t *pos) 52 { 53 struct allocinfo_private *priv; 54 loff_t node = *pos; 55 56 priv = (struct allocinfo_private *)m->private; 57 codetag_lock_module_list(alloc_tag_cttype, true); 58 if (node == 0) { 59 priv->print_header = true; 60 priv->iter = codetag_get_ct_iter(alloc_tag_cttype); 61 codetag_next_ct(&priv->iter); 62 } 63 return priv->iter.ct ? priv : NULL; 64 } 65 66 static void *allocinfo_next(struct seq_file *m, void *arg, loff_t *pos) 67 { 68 struct allocinfo_private *priv = (struct allocinfo_private *)arg; 69 struct codetag *ct = codetag_next_ct(&priv->iter); 70 71 (*pos)++; 72 if (!ct) 73 return NULL; 74 75 return priv; 76 } 77 78 static void allocinfo_stop(struct seq_file *m, void *arg) 79 { 80 codetag_lock_module_list(alloc_tag_cttype, false); 81 } 82 83 static void print_allocinfo_header(struct seq_buf *buf) 84 { 85 /* Output format version, so we can change it. */ 86 seq_buf_printf(buf, "allocinfo - version: 2.0\n"); 87 seq_buf_printf(buf, "# <size> <calls> <tag info>\n"); 88 } 89 90 static void alloc_tag_to_text(struct seq_buf *out, struct codetag *ct) 91 { 92 struct alloc_tag *tag = ct_to_alloc_tag(ct); 93 struct alloc_tag_counters counter = alloc_tag_read(tag); 94 s64 bytes = counter.bytes; 95 96 seq_buf_printf(out, "%12lli %8llu ", bytes, counter.calls); 97 codetag_to_text(out, ct); 98 if (unlikely(alloc_tag_is_inaccurate(tag))) 99 seq_buf_printf(out, " accurate:no"); 100 seq_buf_putc(out, ' '); 101 seq_buf_putc(out, '\n'); 102 } 103 104 static int allocinfo_show(struct seq_file *m, void *arg) 105 { 106 struct allocinfo_private *priv = (struct allocinfo_private *)arg; 107 char *bufp; 108 size_t n = seq_get_buf(m, &bufp); 109 struct seq_buf buf; 110 111 seq_buf_init(&buf, bufp, n); 112 if (priv->print_header) { 113 print_allocinfo_header(&buf); 114 priv->print_header = false; 115 } 116 alloc_tag_to_text(&buf, priv->iter.ct); 117 seq_commit(m, seq_buf_used(&buf)); 118 return 0; 119 } 120 121 static const struct seq_operations allocinfo_seq_op = { 122 .start = allocinfo_start, 123 .next = allocinfo_next, 124 .stop = allocinfo_stop, 125 .show = allocinfo_show, 126 }; 127 128 size_t alloc_tag_top_users(struct codetag_bytes *tags, size_t count, bool can_sleep) 129 { 130 struct codetag_iterator iter; 131 struct codetag *ct; 132 struct codetag_bytes n; 133 unsigned int i, nr = 0; 134 135 if (IS_ERR_OR_NULL(alloc_tag_cttype)) 136 return 0; 137 138 if (can_sleep) 139 codetag_lock_module_list(alloc_tag_cttype, true); 140 else if (!codetag_trylock_module_list(alloc_tag_cttype)) 141 return 0; 142 143 iter = codetag_get_ct_iter(alloc_tag_cttype); 144 while ((ct = codetag_next_ct(&iter))) { 145 struct alloc_tag_counters counter = alloc_tag_read(ct_to_alloc_tag(ct)); 146 147 n.ct = ct; 148 n.bytes = counter.bytes; 149 150 for (i = 0; i < nr; i++) 151 if (n.bytes > tags[i].bytes) 152 break; 153 154 if (i < count) { 155 nr -= nr == count; 156 memmove(&tags[i + 1], 157 &tags[i], 158 sizeof(tags[0]) * (nr - i)); 159 nr++; 160 tags[i] = n; 161 } 162 } 163 164 codetag_lock_module_list(alloc_tag_cttype, false); 165 166 return nr; 167 } 168 169 void pgalloc_tag_split(struct folio *folio, int old_order, int new_order) 170 { 171 int i; 172 struct alloc_tag *tag; 173 unsigned int nr_pages = 1 << new_order; 174 175 if (!mem_alloc_profiling_enabled()) 176 return; 177 178 tag = __pgalloc_tag_get(&folio->page); 179 if (!tag) 180 return; 181 182 for (i = nr_pages; i < (1 << old_order); i += nr_pages) { 183 union pgtag_ref_handle handle; 184 union codetag_ref ref; 185 186 if (get_page_tag_ref(folio_page(folio, i), &ref, &handle)) { 187 /* Set new reference to point to the original tag */ 188 alloc_tag_ref_set(&ref, tag); 189 update_page_tag_ref(handle, &ref); 190 put_page_tag_ref(handle); 191 } 192 } 193 } 194 195 void pgalloc_tag_swap(struct folio *new, struct folio *old) 196 { 197 union pgtag_ref_handle handle_old, handle_new; 198 union codetag_ref ref_old, ref_new; 199 struct alloc_tag *tag_old, *tag_new; 200 201 if (!mem_alloc_profiling_enabled()) 202 return; 203 204 tag_old = __pgalloc_tag_get(&old->page); 205 if (!tag_old) 206 return; 207 tag_new = __pgalloc_tag_get(&new->page); 208 if (!tag_new) 209 return; 210 211 if (!get_page_tag_ref(&old->page, &ref_old, &handle_old)) 212 return; 213 if (!get_page_tag_ref(&new->page, &ref_new, &handle_new)) { 214 put_page_tag_ref(handle_old); 215 return; 216 } 217 218 /* 219 * Clear tag references to avoid debug warning when using 220 * __alloc_tag_ref_set() with non-empty reference. 221 */ 222 set_codetag_empty(&ref_old); 223 set_codetag_empty(&ref_new); 224 225 /* swap tags */ 226 __alloc_tag_ref_set(&ref_old, tag_new); 227 update_page_tag_ref(handle_old, &ref_old); 228 __alloc_tag_ref_set(&ref_new, tag_old); 229 update_page_tag_ref(handle_new, &ref_new); 230 231 put_page_tag_ref(handle_old); 232 put_page_tag_ref(handle_new); 233 } 234 235 static void shutdown_mem_profiling(bool remove_file) 236 { 237 if (mem_alloc_profiling_enabled()) 238 static_branch_disable(&mem_alloc_profiling_key); 239 240 if (!mem_profiling_support) 241 return; 242 243 if (remove_file) 244 remove_proc_entry(ALLOCINFO_FILE_NAME, NULL); 245 mem_profiling_support = false; 246 } 247 248 void __init alloc_tag_sec_init(void) 249 { 250 struct alloc_tag *last_codetag; 251 252 if (!mem_profiling_support) 253 return; 254 255 if (!static_key_enabled(&mem_profiling_compressed)) 256 return; 257 258 kernel_tags.first_tag = (struct alloc_tag *)kallsyms_lookup_name( 259 SECTION_START(ALLOC_TAG_SECTION_NAME)); 260 last_codetag = (struct alloc_tag *)kallsyms_lookup_name( 261 SECTION_STOP(ALLOC_TAG_SECTION_NAME)); 262 kernel_tags.count = last_codetag - kernel_tags.first_tag; 263 264 /* Check if kernel tags fit into page flags */ 265 if (kernel_tags.count > (1UL << NR_UNUSED_PAGEFLAG_BITS)) { 266 shutdown_mem_profiling(false); /* allocinfo file does not exist yet */ 267 pr_err("%lu allocation tags cannot be references using %d available page flag bits. Memory allocation profiling is disabled!\n", 268 kernel_tags.count, NR_UNUSED_PAGEFLAG_BITS); 269 return; 270 } 271 272 alloc_tag_ref_offs = (LRU_REFS_PGOFF - NR_UNUSED_PAGEFLAG_BITS); 273 alloc_tag_ref_mask = ((1UL << NR_UNUSED_PAGEFLAG_BITS) - 1); 274 pr_debug("Memory allocation profiling compression is using %d page flag bits!\n", 275 NR_UNUSED_PAGEFLAG_BITS); 276 } 277 278 #ifdef CONFIG_MODULES 279 280 static struct maple_tree mod_area_mt = MTREE_INIT(mod_area_mt, MT_FLAGS_ALLOC_RANGE); 281 static struct vm_struct *vm_module_tags; 282 /* A dummy object used to indicate an unloaded module */ 283 static struct module unloaded_mod; 284 /* A dummy object used to indicate a module prepended area */ 285 static struct module prepend_mod; 286 287 struct alloc_tag_module_section module_tags; 288 289 static inline unsigned long alloc_tag_align(unsigned long val) 290 { 291 if (!static_key_enabled(&mem_profiling_compressed)) { 292 /* No alignment requirements when we are not indexing the tags */ 293 return val; 294 } 295 296 if (val % sizeof(struct alloc_tag) == 0) 297 return val; 298 return ((val / sizeof(struct alloc_tag)) + 1) * sizeof(struct alloc_tag); 299 } 300 301 static bool ensure_alignment(unsigned long align, unsigned int *prepend) 302 { 303 if (!static_key_enabled(&mem_profiling_compressed)) { 304 /* No alignment requirements when we are not indexing the tags */ 305 return true; 306 } 307 308 /* 309 * If alloc_tag size is not a multiple of required alignment, tag 310 * indexing does not work. 311 */ 312 if (!IS_ALIGNED(sizeof(struct alloc_tag), align)) 313 return false; 314 315 /* Ensure prepend consumes multiple of alloc_tag-sized blocks */ 316 if (*prepend) 317 *prepend = alloc_tag_align(*prepend); 318 319 return true; 320 } 321 322 static inline bool tags_addressable(void) 323 { 324 unsigned long tag_idx_count; 325 326 if (!static_key_enabled(&mem_profiling_compressed)) 327 return true; /* with page_ext tags are always addressable */ 328 329 tag_idx_count = CODETAG_ID_FIRST + kernel_tags.count + 330 module_tags.size / sizeof(struct alloc_tag); 331 332 return tag_idx_count < (1UL << NR_UNUSED_PAGEFLAG_BITS); 333 } 334 335 static bool needs_section_mem(struct module *mod, unsigned long size) 336 { 337 if (!mem_profiling_support) 338 return false; 339 340 return size >= sizeof(struct alloc_tag); 341 } 342 343 static bool clean_unused_counters(struct alloc_tag *start_tag, 344 struct alloc_tag *end_tag) 345 { 346 struct alloc_tag *tag; 347 bool ret = true; 348 349 for (tag = start_tag; tag <= end_tag; tag++) { 350 struct alloc_tag_counters counter; 351 352 if (!tag->counters) 353 continue; 354 355 counter = alloc_tag_read(tag); 356 if (!counter.bytes) { 357 free_percpu(tag->counters); 358 tag->counters = NULL; 359 } else { 360 ret = false; 361 } 362 } 363 364 return ret; 365 } 366 367 /* Called with mod_area_mt locked */ 368 static void clean_unused_module_areas_locked(void) 369 { 370 MA_STATE(mas, &mod_area_mt, 0, module_tags.size); 371 struct module *val; 372 373 mas_for_each(&mas, val, module_tags.size) { 374 struct alloc_tag *start_tag; 375 struct alloc_tag *end_tag; 376 377 if (val != &unloaded_mod) 378 continue; 379 380 /* Release area if all tags are unused */ 381 start_tag = (struct alloc_tag *)(module_tags.start_addr + mas.index); 382 end_tag = (struct alloc_tag *)(module_tags.start_addr + mas.last); 383 if (clean_unused_counters(start_tag, end_tag)) 384 mas_erase(&mas); 385 } 386 } 387 388 /* Called with mod_area_mt locked */ 389 static bool find_aligned_area(struct ma_state *mas, unsigned long section_size, 390 unsigned long size, unsigned int prepend, unsigned long align) 391 { 392 bool cleanup_done = false; 393 394 repeat: 395 /* Try finding exact size and hope the start is aligned */ 396 if (!mas_empty_area(mas, 0, section_size - 1, prepend + size)) { 397 if (IS_ALIGNED(mas->index + prepend, align)) 398 return true; 399 400 /* Try finding larger area to align later */ 401 mas_reset(mas); 402 if (!mas_empty_area(mas, 0, section_size - 1, 403 size + prepend + align - 1)) 404 return true; 405 } 406 407 /* No free area, try cleanup stale data and repeat the search once */ 408 if (!cleanup_done) { 409 clean_unused_module_areas_locked(); 410 cleanup_done = true; 411 mas_reset(mas); 412 goto repeat; 413 } 414 415 return false; 416 } 417 418 static int vm_module_tags_populate(void) 419 { 420 unsigned long phys_end = ALIGN_DOWN(module_tags.start_addr, PAGE_SIZE) + 421 (vm_module_tags->nr_pages << PAGE_SHIFT); 422 unsigned long new_end = module_tags.start_addr + module_tags.size; 423 424 if (phys_end < new_end) { 425 struct page **next_page = vm_module_tags->pages + vm_module_tags->nr_pages; 426 unsigned long old_shadow_end = ALIGN(phys_end, MODULE_ALIGN); 427 unsigned long new_shadow_end = ALIGN(new_end, MODULE_ALIGN); 428 unsigned long more_pages; 429 unsigned long nr = 0; 430 431 more_pages = ALIGN(new_end - phys_end, PAGE_SIZE) >> PAGE_SHIFT; 432 while (nr < more_pages) { 433 unsigned long allocated; 434 435 allocated = alloc_pages_bulk_node(GFP_KERNEL | __GFP_NOWARN, 436 NUMA_NO_NODE, more_pages - nr, next_page + nr); 437 438 if (!allocated) 439 break; 440 nr += allocated; 441 } 442 443 if (nr < more_pages || 444 vmap_pages_range(phys_end, phys_end + (nr << PAGE_SHIFT), PAGE_KERNEL, 445 next_page, PAGE_SHIFT) < 0) { 446 release_pages_arg arg = { .pages = next_page }; 447 448 /* Clean up and error out */ 449 release_pages(arg, nr); 450 return -ENOMEM; 451 } 452 453 vm_module_tags->nr_pages += nr; 454 455 /* 456 * Kasan allocates 1 byte of shadow for every 8 bytes of data. 457 * When kasan_alloc_module_shadow allocates shadow memory, 458 * its unit of allocation is a page. 459 * Therefore, here we need to align to MODULE_ALIGN. 460 */ 461 if (old_shadow_end < new_shadow_end) 462 kasan_alloc_module_shadow((void *)old_shadow_end, 463 new_shadow_end - old_shadow_end, 464 GFP_KERNEL); 465 } 466 467 /* 468 * Mark the pages as accessible, now that they are mapped. 469 * With hardware tag-based KASAN, marking is skipped for 470 * non-VM_ALLOC mappings, see __kasan_unpoison_vmalloc(). 471 */ 472 kasan_unpoison_vmalloc((void *)module_tags.start_addr, 473 new_end - module_tags.start_addr, 474 KASAN_VMALLOC_PROT_NORMAL); 475 476 return 0; 477 } 478 479 static void *reserve_module_tags(struct module *mod, unsigned long size, 480 unsigned int prepend, unsigned long align) 481 { 482 unsigned long section_size = module_tags.end_addr - module_tags.start_addr; 483 MA_STATE(mas, &mod_area_mt, 0, section_size - 1); 484 unsigned long offset; 485 void *ret = NULL; 486 487 /* If no tags return error */ 488 if (size < sizeof(struct alloc_tag)) 489 return ERR_PTR(-EINVAL); 490 491 /* 492 * align is always power of 2, so we can use IS_ALIGNED and ALIGN. 493 * align 0 or 1 means no alignment, to simplify set to 1. 494 */ 495 if (!align) 496 align = 1; 497 498 if (!ensure_alignment(align, &prepend)) { 499 shutdown_mem_profiling(true); 500 pr_err("%s: alignment %lu is incompatible with allocation tag indexing. Memory allocation profiling is disabled!\n", 501 mod->name, align); 502 return ERR_PTR(-EINVAL); 503 } 504 505 mas_lock(&mas); 506 if (!find_aligned_area(&mas, section_size, size, prepend, align)) { 507 ret = ERR_PTR(-ENOMEM); 508 goto unlock; 509 } 510 511 /* Mark found area as reserved */ 512 offset = mas.index; 513 offset += prepend; 514 offset = ALIGN(offset, align); 515 if (offset != mas.index) { 516 unsigned long pad_start = mas.index; 517 518 mas.last = offset - 1; 519 mas_store(&mas, &prepend_mod); 520 if (mas_is_err(&mas)) { 521 ret = ERR_PTR(xa_err(mas.node)); 522 goto unlock; 523 } 524 mas.index = offset; 525 mas.last = offset + size - 1; 526 mas_store(&mas, mod); 527 if (mas_is_err(&mas)) { 528 mas.index = pad_start; 529 mas_erase(&mas); 530 ret = ERR_PTR(xa_err(mas.node)); 531 } 532 } else { 533 mas.last = offset + size - 1; 534 mas_store(&mas, mod); 535 if (mas_is_err(&mas)) 536 ret = ERR_PTR(xa_err(mas.node)); 537 } 538 unlock: 539 mas_unlock(&mas); 540 541 if (IS_ERR(ret)) 542 return ret; 543 544 if (module_tags.size < offset + size) { 545 int grow_res; 546 547 module_tags.size = offset + size; 548 if (mem_alloc_profiling_enabled() && !tags_addressable()) { 549 shutdown_mem_profiling(true); 550 pr_warn("With module %s there are too many tags to fit in %d page flag bits. Memory allocation profiling is disabled!\n", 551 mod->name, NR_UNUSED_PAGEFLAG_BITS); 552 } 553 554 grow_res = vm_module_tags_populate(); 555 if (grow_res) { 556 shutdown_mem_profiling(true); 557 pr_err("Failed to allocate memory for allocation tags in the module %s. Memory allocation profiling is disabled!\n", 558 mod->name); 559 return ERR_PTR(grow_res); 560 } 561 } 562 563 return (struct alloc_tag *)(module_tags.start_addr + offset); 564 } 565 566 static void release_module_tags(struct module *mod, bool used) 567 { 568 MA_STATE(mas, &mod_area_mt, module_tags.size, module_tags.size); 569 struct alloc_tag *start_tag; 570 struct alloc_tag *end_tag; 571 struct module *val; 572 573 mas_lock(&mas); 574 mas_for_each_rev(&mas, val, 0) 575 if (val == mod) 576 break; 577 578 if (!val) /* module not found */ 579 goto out; 580 581 if (!used) 582 goto release_area; 583 584 start_tag = (struct alloc_tag *)(module_tags.start_addr + mas.index); 585 end_tag = (struct alloc_tag *)(module_tags.start_addr + mas.last); 586 if (!clean_unused_counters(start_tag, end_tag)) { 587 struct alloc_tag *tag; 588 589 for (tag = start_tag; tag <= end_tag; tag++) { 590 struct alloc_tag_counters counter; 591 592 if (!tag->counters) 593 continue; 594 595 counter = alloc_tag_read(tag); 596 pr_info("%s:%u module %s func:%s has %llu allocated at module unload\n", 597 tag->ct.filename, tag->ct.lineno, tag->ct.modname, 598 tag->ct.function, counter.bytes); 599 } 600 } else { 601 used = false; 602 } 603 release_area: 604 mas_store(&mas, used ? &unloaded_mod : NULL); 605 val = mas_prev_range(&mas, 0); 606 if (val == &prepend_mod) 607 mas_store(&mas, NULL); 608 out: 609 mas_unlock(&mas); 610 } 611 612 static int load_module(struct module *mod, struct codetag *start, struct codetag *stop) 613 { 614 /* Allocate module alloc_tag percpu counters */ 615 struct alloc_tag *start_tag; 616 struct alloc_tag *stop_tag; 617 struct alloc_tag *tag; 618 619 /* percpu counters for core allocations are already statically allocated */ 620 if (!mod) 621 return 0; 622 623 start_tag = ct_to_alloc_tag(start); 624 stop_tag = ct_to_alloc_tag(stop); 625 for (tag = start_tag; tag < stop_tag; tag++) { 626 WARN_ON(tag->counters); 627 tag->counters = alloc_percpu(struct alloc_tag_counters); 628 if (!tag->counters) { 629 while (--tag >= start_tag) { 630 free_percpu(tag->counters); 631 tag->counters = NULL; 632 } 633 pr_err("Failed to allocate memory for allocation tag percpu counters in the module %s\n", 634 mod->name); 635 return -ENOMEM; 636 } 637 638 /* 639 * Avoid a kmemleak false positive. The pointer to the counters is stored 640 * in the alloc_tag section of the module and cannot be directly accessed. 641 */ 642 kmemleak_ignore_percpu(tag->counters); 643 } 644 return 0; 645 } 646 647 static void replace_module(struct module *mod, struct module *new_mod) 648 { 649 MA_STATE(mas, &mod_area_mt, 0, module_tags.size); 650 struct module *val; 651 652 mas_lock(&mas); 653 mas_for_each(&mas, val, module_tags.size) { 654 if (val != mod) 655 continue; 656 657 mas_store_gfp(&mas, new_mod, GFP_KERNEL); 658 break; 659 } 660 mas_unlock(&mas); 661 } 662 663 static int __init alloc_mod_tags_mem(void) 664 { 665 /* Map space to copy allocation tags */ 666 vm_module_tags = execmem_vmap(MODULE_ALLOC_TAG_VMAP_SIZE); 667 if (!vm_module_tags) { 668 pr_err("Failed to map %lu bytes for module allocation tags\n", 669 MODULE_ALLOC_TAG_VMAP_SIZE); 670 module_tags.start_addr = 0; 671 return -ENOMEM; 672 } 673 674 vm_module_tags->pages = kmalloc_objs(struct page *, 675 get_vm_area_size(vm_module_tags) >> PAGE_SHIFT, 676 GFP_KERNEL | __GFP_ZERO); 677 if (!vm_module_tags->pages) { 678 free_vm_area(vm_module_tags); 679 return -ENOMEM; 680 } 681 682 module_tags.start_addr = (unsigned long)vm_module_tags->addr; 683 module_tags.end_addr = module_tags.start_addr + MODULE_ALLOC_TAG_VMAP_SIZE; 684 /* Ensure the base is alloc_tag aligned when required for indexing */ 685 module_tags.start_addr = alloc_tag_align(module_tags.start_addr); 686 687 return 0; 688 } 689 690 static void __init free_mod_tags_mem(void) 691 { 692 release_pages_arg arg = { .pages = vm_module_tags->pages }; 693 694 module_tags.start_addr = 0; 695 release_pages(arg, vm_module_tags->nr_pages); 696 kfree(vm_module_tags->pages); 697 free_vm_area(vm_module_tags); 698 } 699 700 #else /* CONFIG_MODULES */ 701 702 static inline int alloc_mod_tags_mem(void) { return 0; } 703 static inline void free_mod_tags_mem(void) {} 704 705 #endif /* CONFIG_MODULES */ 706 707 /* See: Documentation/mm/allocation-profiling.rst */ 708 static int __init setup_early_mem_profiling(char *str) 709 { 710 bool compressed = false; 711 bool enable; 712 713 if (!str || !str[0]) 714 return -EINVAL; 715 716 if (!strncmp(str, "never", 5)) { 717 enable = false; 718 mem_profiling_support = false; 719 pr_info("Memory allocation profiling is disabled!\n"); 720 } else { 721 char *token = strsep(&str, ","); 722 723 if (kstrtobool(token, &enable)) 724 return -EINVAL; 725 726 if (str) { 727 728 if (strcmp(str, "compressed")) 729 return -EINVAL; 730 731 compressed = true; 732 } 733 mem_profiling_support = true; 734 pr_info("Memory allocation profiling is enabled %s compression and is turned %s!\n", 735 compressed ? "with" : "without", str_on_off(enable)); 736 } 737 738 if (enable != mem_alloc_profiling_enabled()) { 739 if (enable) 740 static_branch_enable(&mem_alloc_profiling_key); 741 else 742 static_branch_disable(&mem_alloc_profiling_key); 743 } 744 if (compressed != static_key_enabled(&mem_profiling_compressed)) { 745 if (compressed) 746 static_branch_enable(&mem_profiling_compressed); 747 else 748 static_branch_disable(&mem_profiling_compressed); 749 } 750 751 return 0; 752 } 753 early_param("sysctl.vm.mem_profiling", setup_early_mem_profiling); 754 755 static __init bool need_page_alloc_tagging(void) 756 { 757 if (static_key_enabled(&mem_profiling_compressed)) 758 return false; 759 760 return mem_profiling_support; 761 } 762 763 #ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG 764 /* 765 * Track page allocations before page_ext is initialized. 766 * Some pages are allocated before page_ext becomes available, leaving 767 * their codetag uninitialized. Track these early PFNs so we can clear 768 * their codetag refs later to avoid warnings when they are freed. 769 * 770 * Early allocations include: 771 * - Base allocations independent of CPU count 772 * - Per-CPU allocations (e.g., CPU hotplug callbacks during smp_init, 773 * such as trace ring buffers, scheduler per-cpu data) 774 * 775 * For simplicity, we fix the size to 8192. 776 * If insufficient, a warning will be triggered to alert the user. 777 * 778 * TODO: Replace fixed-size array with dynamic allocation using 779 * a GFP flag similar to ___GFP_NO_OBJ_EXT to avoid recursion. 780 */ 781 #define EARLY_ALLOC_PFN_MAX 8192 782 783 static unsigned long early_pfns[EARLY_ALLOC_PFN_MAX] __initdata; 784 static atomic_t early_pfn_count __initdata = ATOMIC_INIT(0); 785 786 static void __init __alloc_tag_add_early_pfn(unsigned long pfn) 787 { 788 int old_idx, new_idx; 789 790 do { 791 old_idx = atomic_read(&early_pfn_count); 792 if (old_idx >= EARLY_ALLOC_PFN_MAX) { 793 pr_warn_once("Early page allocations before page_ext init exceeded EARLY_ALLOC_PFN_MAX (%d)\n", 794 EARLY_ALLOC_PFN_MAX); 795 return; 796 } 797 new_idx = old_idx + 1; 798 } while (!atomic_try_cmpxchg(&early_pfn_count, &old_idx, new_idx)); 799 800 early_pfns[old_idx] = pfn; 801 } 802 803 typedef void alloc_tag_add_func(unsigned long pfn); 804 static alloc_tag_add_func __rcu *alloc_tag_add_early_pfn_ptr __refdata = 805 RCU_INITIALIZER(__alloc_tag_add_early_pfn); 806 807 void alloc_tag_add_early_pfn(unsigned long pfn) 808 { 809 alloc_tag_add_func *alloc_tag_add; 810 811 if (static_key_enabled(&mem_profiling_compressed)) 812 return; 813 814 rcu_read_lock(); 815 alloc_tag_add = rcu_dereference(alloc_tag_add_early_pfn_ptr); 816 if (alloc_tag_add) 817 alloc_tag_add(pfn); 818 rcu_read_unlock(); 819 } 820 821 static void __init clear_early_alloc_pfn_tag_refs(void) 822 { 823 unsigned int i; 824 825 if (static_key_enabled(&mem_profiling_compressed)) 826 return; 827 828 rcu_assign_pointer(alloc_tag_add_early_pfn_ptr, NULL); 829 /* Make sure we are not racing with __alloc_tag_add_early_pfn() */ 830 synchronize_rcu(); 831 832 for (i = 0; i < atomic_read(&early_pfn_count); i++) { 833 unsigned long pfn = early_pfns[i]; 834 835 if (pfn_valid(pfn)) { 836 struct page *page = pfn_to_page(pfn); 837 union pgtag_ref_handle handle; 838 union codetag_ref ref; 839 840 if (get_page_tag_ref(page, &ref, &handle)) { 841 /* 842 * An early-allocated page could be freed and reallocated 843 * after its page_ext is initialized but before we clear it. 844 * In that case, it already has a valid tag set. 845 * We should not overwrite that valid tag with CODETAG_EMPTY. 846 * 847 * Note: there is still a small race window between checking 848 * ref.ct and calling set_codetag_empty(). We accept this 849 * race as it's unlikely and the extra complexity of atomic 850 * cmpxchg is not worth it for this debug-only code path. 851 */ 852 if (ref.ct) { 853 put_page_tag_ref(handle); 854 continue; 855 } 856 857 set_codetag_empty(&ref); 858 update_page_tag_ref(handle, &ref); 859 put_page_tag_ref(handle); 860 } 861 } 862 863 } 864 } 865 #else /* !CONFIG_MEM_ALLOC_PROFILING_DEBUG */ 866 static inline void __init clear_early_alloc_pfn_tag_refs(void) {} 867 #endif /* CONFIG_MEM_ALLOC_PROFILING_DEBUG */ 868 869 static __init void init_page_alloc_tagging(void) 870 { 871 clear_early_alloc_pfn_tag_refs(); 872 } 873 874 struct page_ext_operations page_alloc_tagging_ops = { 875 .size = sizeof(union codetag_ref), 876 .need = need_page_alloc_tagging, 877 .init = init_page_alloc_tagging, 878 }; 879 EXPORT_SYMBOL(page_alloc_tagging_ops); 880 881 #ifdef CONFIG_SYSCTL 882 /* 883 * Not using proc_do_static_key() directly to prevent enabling profiling 884 * after it was shut down. 885 */ 886 static int proc_mem_profiling_handler(const struct ctl_table *table, int write, 887 void *buffer, size_t *lenp, loff_t *ppos) 888 { 889 if (write) { 890 /* 891 * Call from do_sysctl_args() which is a no-op since the same 892 * value was already set by setup_early_mem_profiling. 893 * Return success to avoid warnings from do_sysctl_args(). 894 */ 895 if (!current->mm) 896 return 0; 897 898 #ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG 899 /* User can't toggle profiling while debugging */ 900 return -EACCES; 901 #endif 902 if (!mem_profiling_support) 903 return -EINVAL; 904 } 905 906 return proc_do_static_key(table, write, buffer, lenp, ppos); 907 } 908 909 910 static const struct ctl_table memory_allocation_profiling_sysctls[] = { 911 { 912 .procname = "mem_profiling", 913 .data = &mem_alloc_profiling_key, 914 .mode = 0644, 915 .proc_handler = proc_mem_profiling_handler, 916 }, 917 }; 918 919 static void __init sysctl_init(void) 920 { 921 register_sysctl_init("vm", memory_allocation_profiling_sysctls); 922 } 923 #else /* CONFIG_SYSCTL */ 924 static inline void sysctl_init(void) {} 925 #endif /* CONFIG_SYSCTL */ 926 927 static int __init alloc_tag_init(void) 928 { 929 const struct codetag_type_desc desc = { 930 .section = ALLOC_TAG_SECTION_NAME, 931 .tag_size = sizeof(struct alloc_tag), 932 #ifdef CONFIG_MODULES 933 .needs_section_mem = needs_section_mem, 934 .alloc_section_mem = reserve_module_tags, 935 .free_section_mem = release_module_tags, 936 .module_load = load_module, 937 .module_replaced = replace_module, 938 #endif 939 }; 940 int res; 941 942 sysctl_init(); 943 944 if (!mem_profiling_support) { 945 pr_info("Memory allocation profiling is not supported!\n"); 946 return 0; 947 } 948 949 if (!proc_create_seq_private(ALLOCINFO_FILE_NAME, 0400, NULL, &allocinfo_seq_op, 950 sizeof(struct allocinfo_private), NULL)) { 951 pr_err("Failed to create %s file\n", ALLOCINFO_FILE_NAME); 952 shutdown_mem_profiling(false); 953 return -ENOMEM; 954 } 955 956 res = alloc_mod_tags_mem(); 957 if (res) { 958 pr_err("Failed to reserve address space for module tags, errno = %d\n", res); 959 shutdown_mem_profiling(true); 960 return res; 961 } 962 963 alloc_tag_cttype = codetag_register_type(&desc); 964 if (IS_ERR(alloc_tag_cttype)) { 965 pr_err("Allocation tags registration failed, errno = %pe\n", alloc_tag_cttype); 966 free_mod_tags_mem(); 967 shutdown_mem_profiling(true); 968 return PTR_ERR(alloc_tag_cttype); 969 } 970 971 return 0; 972 } 973 module_init(alloc_tag_init); 974