1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * mm/kmemleak.c 4 * 5 * Copyright (C) 2008 ARM Limited 6 * Written by Catalin Marinas <catalin.marinas@arm.com> 7 * 8 * For more information on the algorithm and kmemleak usage, please see 9 * Documentation/dev-tools/kmemleak.rst. 10 * 11 * Notes on locking 12 * ---------------- 13 * 14 * The following locks and mutexes are used by kmemleak: 15 * 16 * - kmemleak_lock (raw_spinlock_t): protects the object_list as well as 17 * del_state modifications and accesses to the object trees 18 * (object_tree_root, object_phys_tree_root, object_percpu_tree_root). The 19 * object_list is the main list holding the metadata (struct 20 * kmemleak_object) for the allocated memory blocks. The object trees are 21 * red black trees used to look-up metadata based on a pointer to the 22 * corresponding memory block. The kmemleak_object structures are added to 23 * the object_list and the object tree root in the create_object() function 24 * called from the kmemleak_alloc{,_phys,_percpu}() callback and removed in 25 * delete_object() called from the kmemleak_free{,_phys,_percpu}() callback 26 * - kmemleak_object.lock (raw_spinlock_t): protects a kmemleak_object. 27 * Accesses to the metadata (e.g. count) are protected by this lock. Note 28 * that some members of this structure may be protected by other means 29 * (atomic or kmemleak_lock). This lock is also held when scanning the 30 * corresponding memory block to avoid the kernel freeing it via the 31 * kmemleak_free() callback. This is less heavyweight than holding a global 32 * lock like kmemleak_lock during scanning. 33 * - scan_mutex (mutex): ensures that only one thread may scan the memory for 34 * unreferenced objects at a time. The gray_list contains the objects which 35 * are already referenced or marked as false positives and need to be 36 * scanned. This list is only modified during a scanning episode when the 37 * scan_mutex is held. At the end of a scan, the gray_list is always empty. 38 * Note that the kmemleak_object.use_count is incremented when an object is 39 * added to the gray_list and therefore cannot be freed. This mutex also 40 * prevents multiple users of the "kmemleak" debugfs file together with 41 * modifications to the memory scanning parameters including the scan_thread 42 * pointer 43 * 44 * Locks and mutexes are acquired/nested in the following order: 45 * 46 * scan_mutex [-> object->lock] -> kmemleak_lock -> other_object->lock (SINGLE_DEPTH_NESTING) 47 * 48 * No kmemleak_lock and object->lock nesting is allowed outside scan_mutex 49 * regions. 50 * 51 * The kmemleak_object structures have a use_count incremented or decremented 52 * using the get_object()/put_object() functions. When the use_count becomes 53 * 0, this count can no longer be incremented and put_object() schedules the 54 * kmemleak_object freeing via an RCU callback. All calls to the get_object() 55 * function must be protected by rcu_read_lock() to avoid accessing a freed 56 * structure. 57 */ 58 59 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 60 61 #include <linux/init.h> 62 #include <linux/kernel.h> 63 #include <linux/list.h> 64 #include <linux/sched/signal.h> 65 #include <linux/sched/task.h> 66 #include <linux/sched/task_stack.h> 67 #include <linux/jiffies.h> 68 #include <linux/delay.h> 69 #include <linux/export.h> 70 #include <linux/kthread.h> 71 #include <linux/rbtree.h> 72 #include <linux/fs.h> 73 #include <linux/debugfs.h> 74 #include <linux/seq_file.h> 75 #include <linux/cpumask.h> 76 #include <linux/spinlock.h> 77 #include <linux/module.h> 78 #include <linux/mutex.h> 79 #include <linux/rcupdate.h> 80 #include <linux/stacktrace.h> 81 #include <linux/stackdepot.h> 82 #include <linux/cache.h> 83 #include <linux/percpu.h> 84 #include <linux/memblock.h> 85 #include <linux/pfn.h> 86 #include <linux/mmzone.h> 87 #include <linux/slab.h> 88 #include <linux/thread_info.h> 89 #include <linux/err.h> 90 #include <linux/uaccess.h> 91 #include <linux/string.h> 92 #include <linux/nodemask.h> 93 #include <linux/mm.h> 94 #include <linux/workqueue.h> 95 #include <linux/xarray.h> 96 #include <linux/crc32.h> 97 98 #include <asm/sections.h> 99 #include <asm/processor.h> 100 #include <linux/atomic.h> 101 102 #include <linux/kasan.h> 103 #include <linux/kfence.h> 104 #include <linux/kmemleak.h> 105 #include <linux/memory_hotplug.h> 106 107 /* 108 * Kmemleak configuration and common defines. 109 */ 110 #define MAX_TRACE 16 /* stack trace length */ 111 #define MSECS_MIN_AGE 5000 /* minimum object age for reporting */ 112 #define SECS_FIRST_SCAN 60 /* delay before the first scan */ 113 #define SECS_SCAN_WAIT 600 /* subsequent auto scanning delay */ 114 #define MAX_SCAN_SIZE 4096 /* maximum size of a scanned block */ 115 116 #define BYTES_PER_POINTER sizeof(void *) 117 118 /* scanning area inside a memory block */ 119 struct kmemleak_scan_area { 120 struct hlist_node node; 121 unsigned long start; 122 size_t size; 123 }; 124 125 #define KMEMLEAK_GREY 0 126 #define KMEMLEAK_BLACK -1 127 128 /* 129 * Structure holding the metadata for each allocated memory block. 130 * Modifications to such objects should be made while holding the 131 * object->lock. Insertions or deletions from object_list, gray_list or 132 * rb_node are already protected by the corresponding locks or mutex (see 133 * the notes on locking above). These objects are reference-counted 134 * (use_count) and freed using the RCU mechanism. 135 */ 136 struct kmemleak_object { 137 raw_spinlock_t lock; 138 unsigned int flags; /* object status flags */ 139 struct list_head object_list; 140 struct list_head gray_list; 141 struct rb_node rb_node; 142 struct rcu_head rcu; /* object_list lockless traversal */ 143 /* object usage count; object freed when use_count == 0 */ 144 atomic_t use_count; 145 unsigned int del_state; /* deletion state */ 146 unsigned long pointer; 147 size_t size; 148 /* pass surplus references to this pointer */ 149 unsigned long excess_ref; 150 /* minimum number of a pointers found before it is considered leak */ 151 int min_count; 152 /* the total number of pointers found pointing to this object */ 153 int count; 154 /* checksum for detecting modified objects */ 155 u32 checksum; 156 depot_stack_handle_t trace_handle; 157 /* memory ranges to be scanned inside an object (empty for all) */ 158 struct hlist_head area_list; 159 unsigned long jiffies; /* creation timestamp */ 160 pid_t pid; /* pid of the current task */ 161 /* per-scan dedup count, valid only while in scan-local dedup xarray */ 162 unsigned int dup_count; 163 char comm[TASK_COMM_LEN]; /* executable name */ 164 }; 165 166 /* flag representing the memory block allocation status */ 167 #define OBJECT_ALLOCATED (1 << 0) 168 /* flag set after the first reporting of an unreference object */ 169 #define OBJECT_REPORTED (1 << 1) 170 /* flag set to not scan the object */ 171 #define OBJECT_NO_SCAN (1 << 2) 172 /* flag set to fully scan the object when scan_area allocation failed */ 173 #define OBJECT_FULL_SCAN (1 << 3) 174 /* flag set for object allocated with physical address */ 175 #define OBJECT_PHYS (1 << 4) 176 /* flag set for per-CPU pointers */ 177 #define OBJECT_PERCPU (1 << 5) 178 179 /* set when __remove_object() called */ 180 #define DELSTATE_REMOVED (1 << 0) 181 /* set to temporarily prevent deletion from object_list */ 182 #define DELSTATE_NO_DELETE (1 << 1) 183 184 #define HEX_PREFIX " " 185 /* number of bytes to print per line; must be 16 or 32 */ 186 #define HEX_ROW_SIZE 16 187 /* number of bytes to print at a time (1, 2, 4, 8) */ 188 #define HEX_GROUP_SIZE 1 189 /* include ASCII after the hex output */ 190 #define HEX_ASCII 1 191 /* max number of lines to be printed */ 192 #define HEX_MAX_LINES 2 193 194 /* the list of all allocated objects */ 195 static LIST_HEAD(object_list); 196 /* the list of gray-colored objects (see color_gray comment below) */ 197 static LIST_HEAD(gray_list); 198 /* memory pool allocation */ 199 static struct kmemleak_object mem_pool[CONFIG_DEBUG_KMEMLEAK_MEM_POOL_SIZE]; 200 static int mem_pool_free_count = ARRAY_SIZE(mem_pool); 201 static LIST_HEAD(mem_pool_free_list); 202 /* search tree for object boundaries */ 203 static struct rb_root object_tree_root = RB_ROOT; 204 /* search tree for object (with OBJECT_PHYS flag) boundaries */ 205 static struct rb_root object_phys_tree_root = RB_ROOT; 206 /* search tree for object (with OBJECT_PERCPU flag) boundaries */ 207 static struct rb_root object_percpu_tree_root = RB_ROOT; 208 /* protecting the access to object_list, object_tree_root (or object_phys_tree_root) */ 209 static DEFINE_RAW_SPINLOCK(kmemleak_lock); 210 211 /* allocation caches for kmemleak internal data */ 212 static struct kmem_cache *object_cache; 213 static struct kmem_cache *scan_area_cache; 214 215 /* set if tracing memory operations is enabled */ 216 static int kmemleak_enabled __read_mostly = 1; 217 /* same as above but only for the kmemleak_free() callback */ 218 static int kmemleak_free_enabled __read_mostly = 1; 219 /* set in the late_initcall if there were no errors */ 220 static int kmemleak_late_initialized; 221 /* set if a fatal kmemleak error has occurred */ 222 static int kmemleak_error; 223 224 /* minimum and maximum address that may be valid pointers */ 225 static unsigned long min_addr = ULONG_MAX; 226 static unsigned long max_addr; 227 228 /* minimum and maximum address that may be valid per-CPU pointers */ 229 static unsigned long min_percpu_addr = ULONG_MAX; 230 static unsigned long max_percpu_addr; 231 232 static struct task_struct *scan_thread; 233 /* used to avoid reporting of recently allocated objects */ 234 static unsigned long jiffies_min_age; 235 static unsigned long jiffies_last_scan; 236 /* delay between automatic memory scannings */ 237 static unsigned long jiffies_scan_wait; 238 /* enables or disables the task stacks scanning */ 239 static int kmemleak_stack_scan = 1; 240 /* protects the memory scanning, parameters and debug/kmemleak file access */ 241 static DEFINE_MUTEX(scan_mutex); 242 /* setting kmemleak=on, will set this var, skipping the disable */ 243 static int kmemleak_skip_disable; 244 /* If there are leaks that can be reported */ 245 static bool kmemleak_found_leaks; 246 247 static bool kmemleak_verbose = IS_ENABLED(CONFIG_DEBUG_KMEMLEAK_VERBOSE); 248 module_param_named(verbose, kmemleak_verbose, bool, 0600); 249 250 static void kmemleak_disable(void); 251 252 /* 253 * Print a warning and dump the stack trace. 254 */ 255 #define kmemleak_warn(x...) do { \ 256 pr_warn(x); \ 257 dump_stack(); \ 258 } while (0) 259 260 /* 261 * Macro invoked when a serious kmemleak condition occurred and cannot be 262 * recovered from. Kmemleak will be disabled and further allocation/freeing 263 * tracing no longer available. 264 */ 265 #define kmemleak_stop(x...) do { \ 266 kmemleak_warn(x); \ 267 kmemleak_disable(); \ 268 } while (0) 269 270 #define warn_or_seq_printf(seq, fmt, ...) do { \ 271 if (seq) \ 272 seq_printf(seq, fmt, ##__VA_ARGS__); \ 273 else \ 274 pr_warn(fmt, ##__VA_ARGS__); \ 275 } while (0) 276 277 static void warn_or_seq_hex_dump(struct seq_file *seq, int prefix_type, 278 int rowsize, int groupsize, const void *buf, 279 size_t len, bool ascii) 280 { 281 if (seq) 282 seq_hex_dump(seq, HEX_PREFIX, prefix_type, rowsize, groupsize, 283 buf, len, ascii); 284 else 285 print_hex_dump(KERN_WARNING, pr_fmt(HEX_PREFIX), prefix_type, 286 rowsize, groupsize, buf, len, ascii); 287 } 288 289 /* 290 * Printing of the objects hex dump to the seq file. The number of lines to be 291 * printed is limited to HEX_MAX_LINES to prevent seq file spamming. The 292 * actual number of printed bytes depends on HEX_ROW_SIZE. It must be called 293 * with the object->lock held. 294 */ 295 static void hex_dump_object(struct seq_file *seq, 296 struct kmemleak_object *object) 297 { 298 const u8 *ptr = (const u8 *)object->pointer; 299 size_t len; 300 301 if (WARN_ON_ONCE(object->flags & OBJECT_PHYS)) 302 return; 303 304 if (object->flags & OBJECT_PERCPU) 305 ptr = (const u8 *)this_cpu_ptr((void __percpu *)object->pointer); 306 307 /* limit the number of lines to HEX_MAX_LINES */ 308 len = min_t(size_t, object->size, HEX_MAX_LINES * HEX_ROW_SIZE); 309 310 if (object->flags & OBJECT_PERCPU) 311 warn_or_seq_printf(seq, " hex dump (first %zu bytes on cpu %d):\n", 312 len, raw_smp_processor_id()); 313 else 314 warn_or_seq_printf(seq, " hex dump (first %zu bytes):\n", len); 315 kasan_disable_current(); 316 warn_or_seq_hex_dump(seq, DUMP_PREFIX_NONE, HEX_ROW_SIZE, 317 HEX_GROUP_SIZE, kasan_reset_tag((void *)ptr), len, HEX_ASCII); 318 kasan_enable_current(); 319 } 320 321 /* 322 * Object colors, encoded with count and min_count: 323 * - white - orphan object, not enough references to it (count < min_count) 324 * - gray - not orphan, not marked as false positive (min_count == 0) or 325 * sufficient references to it (count >= min_count) 326 * - black - ignore, it doesn't contain references (e.g. text section) 327 * (min_count == -1). No function defined for this color. 328 */ 329 static bool color_white(const struct kmemleak_object *object) 330 { 331 return object->count != KMEMLEAK_BLACK && 332 object->count < object->min_count; 333 } 334 335 static bool color_gray(const struct kmemleak_object *object) 336 { 337 return object->min_count != KMEMLEAK_BLACK && 338 object->count >= object->min_count; 339 } 340 341 /* 342 * Objects are considered unreferenced only if their color is white, they have 343 * not be deleted and have a minimum age to avoid false positives caused by 344 * pointers temporarily stored in CPU registers. 345 */ 346 static bool unreferenced_object(struct kmemleak_object *object) 347 { 348 return (color_white(object) && object->flags & OBJECT_ALLOCATED) && 349 time_before_eq(object->jiffies + jiffies_min_age, 350 jiffies_last_scan); 351 } 352 353 static const char *__object_type_str(struct kmemleak_object *object) 354 { 355 if (object->flags & OBJECT_PHYS) 356 return " (phys)"; 357 if (object->flags & OBJECT_PERCPU) 358 return " (percpu)"; 359 return ""; 360 } 361 362 /* 363 * Printing of the unreferenced objects information to the seq file. The 364 * print_unreferenced function must be called with the object->lock held. 365 */ 366 static void __print_unreferenced(struct seq_file *seq, 367 struct kmemleak_object *object, 368 bool hex_dump) 369 { 370 int i; 371 unsigned long *entries; 372 unsigned int nr_entries; 373 374 nr_entries = stack_depot_fetch(object->trace_handle, &entries); 375 warn_or_seq_printf(seq, "unreferenced object%s 0x%08lx (size %zu):\n", 376 __object_type_str(object), 377 object->pointer, object->size); 378 warn_or_seq_printf(seq, " comm \"%s\", pid %d, jiffies %lu\n", 379 object->comm, object->pid, object->jiffies); 380 if (hex_dump) 381 hex_dump_object(seq, object); 382 warn_or_seq_printf(seq, " backtrace (crc %x):\n", object->checksum); 383 384 for (i = 0; i < nr_entries; i++) { 385 void *ptr = (void *)entries[i]; 386 warn_or_seq_printf(seq, " %pS\n", ptr); 387 } 388 } 389 390 static void print_unreferenced(struct seq_file *seq, 391 struct kmemleak_object *object) 392 { 393 __print_unreferenced(seq, object, true); 394 } 395 396 /* 397 * Print the kmemleak_object information. This function is used mainly for 398 * debugging special cases when kmemleak operations. It must be called with 399 * the object->lock held. 400 */ 401 static void dump_object_info(struct kmemleak_object *object) 402 { 403 pr_notice("Object%s 0x%08lx (size %zu):\n", 404 __object_type_str(object), object->pointer, object->size); 405 pr_notice(" comm \"%s\", pid %d, jiffies %lu\n", 406 object->comm, object->pid, object->jiffies); 407 pr_notice(" min_count = %d\n", object->min_count); 408 pr_notice(" count = %d\n", object->count); 409 pr_notice(" flags = 0x%x\n", object->flags); 410 pr_notice(" checksum = %u\n", object->checksum); 411 pr_notice(" backtrace:\n"); 412 if (object->trace_handle) 413 stack_depot_print(object->trace_handle); 414 } 415 416 static struct rb_root *object_tree(unsigned long objflags) 417 { 418 if (objflags & OBJECT_PHYS) 419 return &object_phys_tree_root; 420 if (objflags & OBJECT_PERCPU) 421 return &object_percpu_tree_root; 422 return &object_tree_root; 423 } 424 425 /* 426 * Look-up a memory block metadata (kmemleak_object) in the object search 427 * tree based on a pointer value. If alias is 0, only values pointing to the 428 * beginning of the memory block are allowed. The kmemleak_lock must be held 429 * when calling this function. 430 */ 431 static struct kmemleak_object *__lookup_object(unsigned long ptr, int alias, 432 unsigned int objflags) 433 { 434 struct rb_node *rb = object_tree(objflags)->rb_node; 435 unsigned long untagged_ptr = (unsigned long)kasan_reset_tag((void *)ptr); 436 437 while (rb) { 438 struct kmemleak_object *object; 439 unsigned long untagged_objp; 440 441 object = rb_entry(rb, struct kmemleak_object, rb_node); 442 untagged_objp = (unsigned long)kasan_reset_tag((void *)object->pointer); 443 444 if (untagged_ptr < untagged_objp) 445 rb = object->rb_node.rb_left; 446 else if (untagged_objp + object->size <= untagged_ptr) 447 rb = object->rb_node.rb_right; 448 else if (untagged_objp == untagged_ptr || alias) 449 return object; 450 else { 451 /* 452 * Printk deferring due to the kmemleak_lock held. 453 * This is done to avoid deadlock. 454 */ 455 printk_deferred_enter(); 456 kmemleak_warn("Found object by alias at 0x%08lx\n", 457 ptr); 458 dump_object_info(object); 459 printk_deferred_exit(); 460 break; 461 } 462 } 463 return NULL; 464 } 465 466 /* Look-up a kmemleak object which allocated with virtual address. */ 467 static struct kmemleak_object *lookup_object(unsigned long ptr, int alias) 468 { 469 return __lookup_object(ptr, alias, 0); 470 } 471 472 /* 473 * Increment the object use_count. Return 1 if successful or 0 otherwise. Note 474 * that once an object's use_count reached 0, the RCU freeing was already 475 * registered and the object should no longer be used. This function must be 476 * called under the protection of rcu_read_lock(). 477 */ 478 static int get_object(struct kmemleak_object *object) 479 { 480 return atomic_inc_not_zero(&object->use_count); 481 } 482 483 /* 484 * Memory pool allocation and freeing. kmemleak_lock must not be held. 485 */ 486 static struct kmemleak_object *mem_pool_alloc(gfp_t gfp) 487 { 488 unsigned long flags; 489 struct kmemleak_object *object; 490 bool warn = false; 491 492 /* try the slab allocator first */ 493 if (object_cache) { 494 object = kmem_cache_alloc_noprof(object_cache, 495 gfp_nested_mask(gfp)); 496 if (object) 497 return object; 498 } 499 500 /* slab allocation failed, try the memory pool */ 501 raw_spin_lock_irqsave(&kmemleak_lock, flags); 502 object = list_first_entry_or_null(&mem_pool_free_list, 503 typeof(*object), object_list); 504 if (object) 505 list_del(&object->object_list); 506 else if (mem_pool_free_count) 507 object = &mem_pool[--mem_pool_free_count]; 508 else 509 warn = true; 510 raw_spin_unlock_irqrestore(&kmemleak_lock, flags); 511 if (warn) 512 pr_warn_once("Memory pool empty, consider increasing CONFIG_DEBUG_KMEMLEAK_MEM_POOL_SIZE\n"); 513 514 return object; 515 } 516 517 /* 518 * Return the object to either the slab allocator or the memory pool. 519 */ 520 static void mem_pool_free(struct kmemleak_object *object) 521 { 522 unsigned long flags; 523 524 if (object < mem_pool || object >= ARRAY_END(mem_pool)) { 525 kmem_cache_free(object_cache, object); 526 return; 527 } 528 529 /* add the object to the memory pool free list */ 530 raw_spin_lock_irqsave(&kmemleak_lock, flags); 531 list_add(&object->object_list, &mem_pool_free_list); 532 raw_spin_unlock_irqrestore(&kmemleak_lock, flags); 533 } 534 535 /* 536 * RCU callback to free a kmemleak_object. 537 */ 538 static void free_object_rcu(struct rcu_head *rcu) 539 { 540 struct hlist_node *tmp; 541 struct kmemleak_scan_area *area; 542 struct kmemleak_object *object = 543 container_of(rcu, struct kmemleak_object, rcu); 544 545 /* 546 * Once use_count is 0 (guaranteed by put_object), there is no other 547 * code accessing this object, hence no need for locking. 548 */ 549 hlist_for_each_entry_safe(area, tmp, &object->area_list, node) { 550 hlist_del(&area->node); 551 kmem_cache_free(scan_area_cache, area); 552 } 553 mem_pool_free(object); 554 } 555 556 /* 557 * Decrement the object use_count. Once the count is 0, free the object using 558 * an RCU callback. Since put_object() may be called via the kmemleak_free() -> 559 * delete_object() path, the delayed RCU freeing ensures that there is no 560 * recursive call to the kernel allocator. Lock-less RCU object_list traversal 561 * is also possible. 562 */ 563 static void put_object(struct kmemleak_object *object) 564 { 565 if (!atomic_dec_and_test(&object->use_count)) 566 return; 567 568 /* should only get here after delete_object was called */ 569 WARN_ON(object->flags & OBJECT_ALLOCATED); 570 571 /* 572 * It may be too early for the RCU callbacks, however, there is no 573 * concurrent object_list traversal when !object_cache and all objects 574 * came from the memory pool. Free the object directly. 575 */ 576 if (object_cache) 577 call_rcu(&object->rcu, free_object_rcu); 578 else 579 free_object_rcu(&object->rcu); 580 } 581 582 /* 583 * Look up an object in the object search tree and increase its use_count. 584 */ 585 static struct kmemleak_object *__find_and_get_object(unsigned long ptr, int alias, 586 unsigned int objflags) 587 { 588 unsigned long flags; 589 struct kmemleak_object *object; 590 591 rcu_read_lock(); 592 raw_spin_lock_irqsave(&kmemleak_lock, flags); 593 object = __lookup_object(ptr, alias, objflags); 594 raw_spin_unlock_irqrestore(&kmemleak_lock, flags); 595 596 /* check whether the object is still available */ 597 if (object && !get_object(object)) 598 object = NULL; 599 rcu_read_unlock(); 600 601 return object; 602 } 603 604 /* Look up and get an object which allocated with virtual address. */ 605 static struct kmemleak_object *find_and_get_object(unsigned long ptr, int alias) 606 { 607 return __find_and_get_object(ptr, alias, 0); 608 } 609 610 /* 611 * Remove an object from its object tree and object_list. Must be called with 612 * the kmemleak_lock held _if_ kmemleak is still enabled. 613 */ 614 static void __remove_object(struct kmemleak_object *object) 615 { 616 rb_erase(&object->rb_node, object_tree(object->flags)); 617 if (!(object->del_state & DELSTATE_NO_DELETE)) 618 list_del_rcu(&object->object_list); 619 object->del_state |= DELSTATE_REMOVED; 620 } 621 622 static struct kmemleak_object *__find_and_remove_object(unsigned long ptr, 623 int alias, 624 unsigned int objflags) 625 { 626 struct kmemleak_object *object; 627 628 object = __lookup_object(ptr, alias, objflags); 629 if (object) 630 __remove_object(object); 631 632 return object; 633 } 634 635 /* 636 * Look up an object in the object search tree and remove it from both object 637 * tree root and object_list. The returned object's use_count should be at 638 * least 1, as initially set by create_object(). 639 */ 640 static struct kmemleak_object *find_and_remove_object(unsigned long ptr, int alias, 641 unsigned int objflags) 642 { 643 unsigned long flags; 644 struct kmemleak_object *object; 645 646 raw_spin_lock_irqsave(&kmemleak_lock, flags); 647 object = __find_and_remove_object(ptr, alias, objflags); 648 raw_spin_unlock_irqrestore(&kmemleak_lock, flags); 649 650 return object; 651 } 652 653 static noinline depot_stack_handle_t set_track_prepare(void) 654 { 655 depot_stack_handle_t trace_handle; 656 unsigned long entries[MAX_TRACE]; 657 unsigned int nr_entries; 658 659 /* 660 * Use object_cache to determine whether kmemleak_init() has 661 * been invoked. stack_depot_early_init() is called before 662 * kmemleak_init() in mm_core_init(). 663 */ 664 if (!object_cache) 665 return 0; 666 nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 3); 667 trace_handle = stack_depot_save(entries, nr_entries, GFP_NOWAIT); 668 669 return trace_handle; 670 } 671 672 static struct kmemleak_object *__alloc_object(gfp_t gfp) 673 { 674 struct kmemleak_object *object; 675 676 object = mem_pool_alloc(gfp); 677 if (!object) { 678 pr_warn("Cannot allocate a kmemleak_object structure\n"); 679 kmemleak_disable(); 680 return NULL; 681 } 682 683 INIT_LIST_HEAD(&object->object_list); 684 INIT_LIST_HEAD(&object->gray_list); 685 INIT_HLIST_HEAD(&object->area_list); 686 raw_spin_lock_init(&object->lock); 687 atomic_set(&object->use_count, 1); 688 object->excess_ref = 0; 689 object->count = 0; /* white color initially */ 690 object->checksum = 0; 691 object->del_state = 0; 692 693 /* task information */ 694 if (in_hardirq()) { 695 object->pid = 0; 696 strscpy(object->comm, "hardirq"); 697 } else if (in_serving_softirq()) { 698 object->pid = 0; 699 strscpy(object->comm, "softirq"); 700 } else { 701 object->pid = current->pid; 702 /* 703 * There is a small chance of a race with set_task_comm(), 704 * however using get_task_comm() here may cause locking 705 * dependency issues with current->alloc_lock. In the worst 706 * case, the command line is not correct. 707 */ 708 strscpy(object->comm, current->comm); 709 } 710 711 /* kernel backtrace */ 712 object->trace_handle = set_track_prepare(); 713 714 return object; 715 } 716 717 static int __link_object(struct kmemleak_object *object, unsigned long ptr, 718 size_t size, int min_count, unsigned int objflags) 719 { 720 721 struct kmemleak_object *parent; 722 struct rb_node **link, *rb_parent; 723 unsigned long untagged_ptr; 724 unsigned long untagged_objp; 725 726 object->flags = OBJECT_ALLOCATED | objflags; 727 object->pointer = ptr; 728 object->size = kfence_ksize((void *)ptr) ?: size; 729 object->min_count = min_count; 730 object->jiffies = jiffies; 731 732 untagged_ptr = (unsigned long)kasan_reset_tag((void *)ptr); 733 /* 734 * Only update min_addr and max_addr with object storing virtual 735 * address. And update min_percpu_addr max_percpu_addr for per-CPU 736 * objects. 737 */ 738 if (objflags & OBJECT_PERCPU) { 739 min_percpu_addr = min(min_percpu_addr, untagged_ptr); 740 max_percpu_addr = max(max_percpu_addr, untagged_ptr + size); 741 } else if (!(objflags & OBJECT_PHYS)) { 742 min_addr = min(min_addr, untagged_ptr); 743 max_addr = max(max_addr, untagged_ptr + size); 744 } 745 link = &object_tree(objflags)->rb_node; 746 rb_parent = NULL; 747 while (*link) { 748 rb_parent = *link; 749 parent = rb_entry(rb_parent, struct kmemleak_object, rb_node); 750 untagged_objp = (unsigned long)kasan_reset_tag((void *)parent->pointer); 751 if (untagged_ptr + size <= untagged_objp) 752 link = &parent->rb_node.rb_left; 753 else if (untagged_objp + parent->size <= untagged_ptr) 754 link = &parent->rb_node.rb_right; 755 else { 756 /* 757 * Printk deferring due to the kmemleak_lock held. 758 * This is done to avoid deadlock. 759 */ 760 printk_deferred_enter(); 761 kmemleak_stop("Cannot insert 0x%lx into the object search tree (overlaps existing)\n", 762 ptr); 763 /* 764 * No need for parent->lock here since "parent" cannot 765 * be freed while the kmemleak_lock is held. 766 */ 767 dump_object_info(parent); 768 printk_deferred_exit(); 769 return -EEXIST; 770 } 771 } 772 rb_link_node(&object->rb_node, rb_parent, link); 773 rb_insert_color(&object->rb_node, object_tree(objflags)); 774 list_add_tail_rcu(&object->object_list, &object_list); 775 776 return 0; 777 } 778 779 /* 780 * Create the metadata (struct kmemleak_object) corresponding to an allocated 781 * memory block and add it to the object_list and object tree. 782 */ 783 static void __create_object(unsigned long ptr, size_t size, 784 int min_count, gfp_t gfp, unsigned int objflags) 785 { 786 struct kmemleak_object *object; 787 unsigned long flags; 788 int ret; 789 790 object = __alloc_object(gfp); 791 if (!object) 792 return; 793 794 raw_spin_lock_irqsave(&kmemleak_lock, flags); 795 ret = __link_object(object, ptr, size, min_count, objflags); 796 raw_spin_unlock_irqrestore(&kmemleak_lock, flags); 797 if (ret) 798 mem_pool_free(object); 799 } 800 801 /* Create kmemleak object which allocated with virtual address. */ 802 static void create_object(unsigned long ptr, size_t size, 803 int min_count, gfp_t gfp) 804 { 805 __create_object(ptr, size, min_count, gfp, 0); 806 } 807 808 /* Create kmemleak object which allocated with physical address. */ 809 static void create_object_phys(unsigned long ptr, size_t size, 810 int min_count, gfp_t gfp) 811 { 812 __create_object(ptr, size, min_count, gfp, OBJECT_PHYS); 813 } 814 815 /* Create kmemleak object corresponding to a per-CPU allocation. */ 816 static void create_object_percpu(unsigned long ptr, size_t size, 817 int min_count, gfp_t gfp) 818 { 819 __create_object(ptr, size, min_count, gfp, OBJECT_PERCPU); 820 } 821 822 /* 823 * Mark the object as not allocated and schedule RCU freeing via put_object(). 824 */ 825 static void __delete_object(struct kmemleak_object *object) 826 { 827 unsigned long flags; 828 829 WARN_ON(!(object->flags & OBJECT_ALLOCATED)); 830 WARN_ON(atomic_read(&object->use_count) < 1); 831 832 /* 833 * Locking here also ensures that the corresponding memory block 834 * cannot be freed when it is being scanned. 835 */ 836 raw_spin_lock_irqsave(&object->lock, flags); 837 object->flags &= ~OBJECT_ALLOCATED; 838 raw_spin_unlock_irqrestore(&object->lock, flags); 839 put_object(object); 840 } 841 842 /* 843 * Look up the metadata (struct kmemleak_object) corresponding to ptr and 844 * delete it. 845 */ 846 static void delete_object_full(unsigned long ptr, unsigned int objflags) 847 { 848 struct kmemleak_object *object; 849 850 object = find_and_remove_object(ptr, 0, objflags); 851 if (!object) 852 /* 853 * kmalloc_nolock() -> kfree() calls kmemleak_free() 854 * without kmemleak_alloc(). 855 */ 856 return; 857 __delete_object(object); 858 } 859 860 /* 861 * Look up the metadata (struct kmemleak_object) corresponding to ptr and 862 * delete it. If the memory block is partially freed, the function may create 863 * additional metadata for the remaining parts of the block. 864 */ 865 static void delete_object_part(unsigned long ptr, size_t size, 866 unsigned int objflags) 867 { 868 struct kmemleak_object *object, *object_l, *object_r; 869 unsigned long start, end, flags; 870 871 object_l = __alloc_object(GFP_KERNEL); 872 if (!object_l) 873 return; 874 875 object_r = __alloc_object(GFP_KERNEL); 876 if (!object_r) 877 goto out; 878 879 raw_spin_lock_irqsave(&kmemleak_lock, flags); 880 object = __find_and_remove_object(ptr, 1, objflags); 881 if (!object) 882 goto unlock; 883 884 /* 885 * Create one or two objects that may result from the memory block 886 * split. Note that partial freeing is only done by free_bootmem() and 887 * this happens before kmemleak_init() is called. 888 */ 889 start = object->pointer; 890 end = object->pointer + object->size; 891 if ((ptr > start) && 892 !__link_object(object_l, start, ptr - start, 893 object->min_count, objflags)) 894 object_l = NULL; 895 if ((ptr + size < end) && 896 !__link_object(object_r, ptr + size, end - ptr - size, 897 object->min_count, objflags)) 898 object_r = NULL; 899 900 unlock: 901 raw_spin_unlock_irqrestore(&kmemleak_lock, flags); 902 if (object) { 903 __delete_object(object); 904 } else { 905 #ifdef DEBUG 906 kmemleak_warn("Partially freeing unknown object at 0x%08lx (size %zu)\n", 907 ptr, size); 908 #endif 909 } 910 911 out: 912 if (object_l) 913 mem_pool_free(object_l); 914 if (object_r) 915 mem_pool_free(object_r); 916 } 917 918 static void __paint_it(struct kmemleak_object *object, int color) 919 { 920 object->min_count = color; 921 if (color == KMEMLEAK_BLACK) 922 object->flags |= OBJECT_NO_SCAN; 923 } 924 925 static void paint_it(struct kmemleak_object *object, int color) 926 { 927 unsigned long flags; 928 929 raw_spin_lock_irqsave(&object->lock, flags); 930 __paint_it(object, color); 931 raw_spin_unlock_irqrestore(&object->lock, flags); 932 } 933 934 static void paint_ptr(unsigned long ptr, int color, unsigned int objflags) 935 { 936 struct kmemleak_object *object; 937 938 object = __find_and_get_object(ptr, 0, objflags); 939 if (!object) 940 /* 941 * kmalloc_nolock() -> kfree_rcu() calls kmemleak_ignore() 942 * without kmemleak_alloc(). 943 */ 944 return; 945 paint_it(object, color); 946 put_object(object); 947 } 948 949 /* 950 * Mark an object permanently as gray-colored so that it can no longer be 951 * reported as a leak. This is used in general to mark a false positive. 952 */ 953 static void make_gray_object(unsigned long ptr) 954 { 955 paint_ptr(ptr, KMEMLEAK_GREY, 0); 956 } 957 958 /* 959 * Mark the object as black-colored so that it is ignored from scans and 960 * reporting. 961 */ 962 static void make_black_object(unsigned long ptr, unsigned int objflags) 963 { 964 paint_ptr(ptr, KMEMLEAK_BLACK, objflags); 965 } 966 967 /* 968 * Reset the checksum of an object. The immediate effect is that it will not 969 * be reported as a leak during the next scan until its checksum is updated. 970 */ 971 static void reset_checksum(unsigned long ptr) 972 { 973 unsigned long flags; 974 struct kmemleak_object *object; 975 976 object = find_and_get_object(ptr, 0); 977 if (!object) { 978 kmemleak_warn("Not resetting the checksum of an unknown object at 0x%08lx\n", 979 ptr); 980 return; 981 } 982 983 raw_spin_lock_irqsave(&object->lock, flags); 984 object->checksum = 0; 985 raw_spin_unlock_irqrestore(&object->lock, flags); 986 put_object(object); 987 } 988 989 /* 990 * Add a scanning area to the object. If at least one such area is added, 991 * kmemleak will only scan these ranges rather than the whole memory block. 992 */ 993 static void add_scan_area(unsigned long ptr, size_t size, gfp_t gfp) 994 { 995 unsigned long flags; 996 struct kmemleak_object *object; 997 struct kmemleak_scan_area *area = NULL; 998 unsigned long untagged_ptr; 999 unsigned long untagged_objp; 1000 1001 object = find_and_get_object(ptr, 1); 1002 if (!object) { 1003 kmemleak_warn("Adding scan area to unknown object at 0x%08lx\n", 1004 ptr); 1005 return; 1006 } 1007 1008 untagged_ptr = (unsigned long)kasan_reset_tag((void *)ptr); 1009 untagged_objp = (unsigned long)kasan_reset_tag((void *)object->pointer); 1010 1011 if (scan_area_cache) 1012 area = kmem_cache_alloc_noprof(scan_area_cache, 1013 gfp_nested_mask(gfp)); 1014 1015 raw_spin_lock_irqsave(&object->lock, flags); 1016 if (!area) { 1017 pr_warn_once("Cannot allocate a scan area, scanning the full object\n"); 1018 /* mark the object for full scan to avoid false positives */ 1019 object->flags |= OBJECT_FULL_SCAN; 1020 goto out_unlock; 1021 } 1022 if (size == SIZE_MAX) { 1023 size = untagged_objp + object->size - untagged_ptr; 1024 } else if (untagged_ptr + size > untagged_objp + object->size) { 1025 kmemleak_warn("Scan area larger than object 0x%08lx\n", ptr); 1026 dump_object_info(object); 1027 kmem_cache_free(scan_area_cache, area); 1028 goto out_unlock; 1029 } 1030 1031 INIT_HLIST_NODE(&area->node); 1032 area->start = ptr; 1033 area->size = size; 1034 1035 hlist_add_head(&area->node, &object->area_list); 1036 out_unlock: 1037 raw_spin_unlock_irqrestore(&object->lock, flags); 1038 put_object(object); 1039 } 1040 1041 /* 1042 * Any surplus references (object already gray) to 'ptr' are passed to 1043 * 'excess_ref'. This is used in the vmalloc() case where a pointer to 1044 * vm_struct may be used as an alternative reference to the vmalloc'ed object 1045 * (see free_thread_stack()). 1046 */ 1047 static void object_set_excess_ref(unsigned long ptr, unsigned long excess_ref) 1048 { 1049 unsigned long flags; 1050 struct kmemleak_object *object; 1051 1052 object = find_and_get_object(ptr, 0); 1053 if (!object) { 1054 kmemleak_warn("Setting excess_ref on unknown object at 0x%08lx\n", 1055 ptr); 1056 return; 1057 } 1058 1059 raw_spin_lock_irqsave(&object->lock, flags); 1060 object->excess_ref = excess_ref; 1061 raw_spin_unlock_irqrestore(&object->lock, flags); 1062 put_object(object); 1063 } 1064 1065 /* 1066 * Set the OBJECT_NO_SCAN flag for the object corresponding to the given 1067 * pointer. Such object will not be scanned by kmemleak but references to it 1068 * are searched. 1069 */ 1070 static void object_no_scan(unsigned long ptr) 1071 { 1072 unsigned long flags; 1073 struct kmemleak_object *object; 1074 1075 object = find_and_get_object(ptr, 0); 1076 if (!object) { 1077 kmemleak_warn("Not scanning unknown object at 0x%08lx\n", ptr); 1078 return; 1079 } 1080 1081 raw_spin_lock_irqsave(&object->lock, flags); 1082 object->flags |= OBJECT_NO_SCAN; 1083 raw_spin_unlock_irqrestore(&object->lock, flags); 1084 put_object(object); 1085 } 1086 1087 /** 1088 * kmemleak_alloc - register a newly allocated object 1089 * @ptr: pointer to beginning of the object 1090 * @size: size of the object 1091 * @min_count: minimum number of references to this object. If during memory 1092 * scanning a number of references less than @min_count is found, 1093 * the object is reported as a memory leak. If @min_count is 0, 1094 * the object is never reported as a leak. If @min_count is -1, 1095 * the object is ignored (not scanned and not reported as a leak) 1096 * @gfp: kmalloc() flags used for kmemleak internal memory allocations 1097 * 1098 * This function is called from the kernel allocators when a new object 1099 * (memory block) is allocated (kmem_cache_alloc, kmalloc etc.). 1100 */ 1101 void __ref kmemleak_alloc(const void *ptr, size_t size, int min_count, 1102 gfp_t gfp) 1103 { 1104 pr_debug("%s(0x%px, %zu, %d)\n", __func__, ptr, size, min_count); 1105 1106 if (kmemleak_enabled && ptr && !IS_ERR(ptr)) 1107 create_object((unsigned long)ptr, size, min_count, gfp); 1108 } 1109 EXPORT_SYMBOL_GPL(kmemleak_alloc); 1110 1111 /** 1112 * kmemleak_alloc_percpu - register a newly allocated __percpu object 1113 * @ptr: __percpu pointer to beginning of the object 1114 * @size: size of the object 1115 * @gfp: flags used for kmemleak internal memory allocations 1116 * 1117 * This function is called from the kernel percpu allocator when a new object 1118 * (memory block) is allocated (alloc_percpu). 1119 */ 1120 void __ref kmemleak_alloc_percpu(const void __percpu *ptr, size_t size, 1121 gfp_t gfp) 1122 { 1123 pr_debug("%s(0x%px, %zu)\n", __func__, ptr, size); 1124 1125 if (kmemleak_enabled && ptr && !IS_ERR_PCPU(ptr)) 1126 create_object_percpu((__force unsigned long)ptr, size, 1, gfp); 1127 } 1128 EXPORT_SYMBOL_GPL(kmemleak_alloc_percpu); 1129 1130 /** 1131 * kmemleak_vmalloc - register a newly vmalloc'ed object 1132 * @area: pointer to vm_struct 1133 * @size: size of the object 1134 * @gfp: __vmalloc() flags used for kmemleak internal memory allocations 1135 * 1136 * This function is called from the vmalloc() kernel allocator when a new 1137 * object (memory block) is allocated. 1138 */ 1139 void __ref kmemleak_vmalloc(const struct vm_struct *area, size_t size, gfp_t gfp) 1140 { 1141 pr_debug("%s(0x%px, %zu)\n", __func__, area, size); 1142 1143 /* 1144 * A min_count = 2 is needed because vm_struct contains a reference to 1145 * the virtual address of the vmalloc'ed block. 1146 */ 1147 if (kmemleak_enabled) { 1148 create_object((unsigned long)area->addr, size, 2, gfp); 1149 object_set_excess_ref((unsigned long)area, 1150 (unsigned long)area->addr); 1151 } 1152 } 1153 EXPORT_SYMBOL_GPL(kmemleak_vmalloc); 1154 1155 /** 1156 * kmemleak_free - unregister a previously registered object 1157 * @ptr: pointer to beginning of the object 1158 * 1159 * This function is called from the kernel allocators when an object (memory 1160 * block) is freed (kmem_cache_free, kfree, vfree etc.). 1161 */ 1162 void __ref kmemleak_free(const void *ptr) 1163 { 1164 pr_debug("%s(0x%px)\n", __func__, ptr); 1165 1166 if (kmemleak_free_enabled && ptr && !IS_ERR(ptr)) 1167 delete_object_full((unsigned long)ptr, 0); 1168 } 1169 EXPORT_SYMBOL_GPL(kmemleak_free); 1170 1171 /** 1172 * kmemleak_free_part - partially unregister a previously registered object 1173 * @ptr: pointer to the beginning or inside the object. This also 1174 * represents the start of the range to be freed 1175 * @size: size to be unregistered 1176 * 1177 * This function is called when only a part of a memory block is freed 1178 * (usually from the bootmem allocator). 1179 */ 1180 void __ref kmemleak_free_part(const void *ptr, size_t size) 1181 { 1182 pr_debug("%s(0x%px)\n", __func__, ptr); 1183 1184 if (kmemleak_enabled && ptr && !IS_ERR(ptr)) 1185 delete_object_part((unsigned long)ptr, size, 0); 1186 } 1187 EXPORT_SYMBOL_GPL(kmemleak_free_part); 1188 1189 /** 1190 * kmemleak_free_percpu - unregister a previously registered __percpu object 1191 * @ptr: __percpu pointer to beginning of the object 1192 * 1193 * This function is called from the kernel percpu allocator when an object 1194 * (memory block) is freed (free_percpu). 1195 */ 1196 void __ref kmemleak_free_percpu(const void __percpu *ptr) 1197 { 1198 pr_debug("%s(0x%px)\n", __func__, ptr); 1199 1200 if (kmemleak_free_enabled && ptr && !IS_ERR_PCPU(ptr)) 1201 delete_object_full((__force unsigned long)ptr, OBJECT_PERCPU); 1202 } 1203 EXPORT_SYMBOL_GPL(kmemleak_free_percpu); 1204 1205 /** 1206 * kmemleak_update_trace - update object allocation stack trace 1207 * @ptr: pointer to beginning of the object 1208 * 1209 * Override the object allocation stack trace for cases where the actual 1210 * allocation place is not always useful. 1211 */ 1212 void __ref kmemleak_update_trace(const void *ptr) 1213 { 1214 struct kmemleak_object *object; 1215 depot_stack_handle_t trace_handle; 1216 unsigned long flags; 1217 1218 pr_debug("%s(0x%px)\n", __func__, ptr); 1219 1220 if (!kmemleak_enabled || IS_ERR_OR_NULL(ptr)) 1221 return; 1222 1223 object = find_and_get_object((unsigned long)ptr, 1); 1224 if (!object) { 1225 #ifdef DEBUG 1226 kmemleak_warn("Updating stack trace for unknown object at %p\n", 1227 ptr); 1228 #endif 1229 return; 1230 } 1231 1232 trace_handle = set_track_prepare(); 1233 raw_spin_lock_irqsave(&object->lock, flags); 1234 object->trace_handle = trace_handle; 1235 raw_spin_unlock_irqrestore(&object->lock, flags); 1236 1237 put_object(object); 1238 } 1239 EXPORT_SYMBOL(kmemleak_update_trace); 1240 1241 /** 1242 * kmemleak_not_leak - mark an allocated object as false positive 1243 * @ptr: pointer to beginning of the object 1244 * 1245 * Calling this function on an object will cause the memory block to no longer 1246 * be reported as leak and always be scanned. 1247 */ 1248 void __ref kmemleak_not_leak(const void *ptr) 1249 { 1250 pr_debug("%s(0x%px)\n", __func__, ptr); 1251 1252 if (kmemleak_enabled && ptr && !IS_ERR(ptr)) 1253 make_gray_object((unsigned long)ptr); 1254 } 1255 EXPORT_SYMBOL(kmemleak_not_leak); 1256 1257 /** 1258 * kmemleak_transient_leak - mark an allocated object as transient false positive 1259 * @ptr: pointer to beginning of the object 1260 * 1261 * Calling this function on an object will cause the memory block to not be 1262 * reported as a leak temporarily. This may happen, for example, if the object 1263 * is part of a singly linked list and the ->next reference to it is changed. 1264 */ 1265 void __ref kmemleak_transient_leak(const void *ptr) 1266 { 1267 pr_debug("%s(0x%px)\n", __func__, ptr); 1268 1269 if (kmemleak_enabled && ptr && !IS_ERR(ptr)) 1270 reset_checksum((unsigned long)ptr); 1271 } 1272 EXPORT_SYMBOL(kmemleak_transient_leak); 1273 1274 /** 1275 * kmemleak_ignore_percpu - similar to kmemleak_ignore but taking a percpu 1276 * address argument 1277 * @ptr: percpu address of the object 1278 */ 1279 void __ref kmemleak_ignore_percpu(const void __percpu *ptr) 1280 { 1281 pr_debug("%s(0x%px)\n", __func__, ptr); 1282 1283 if (kmemleak_enabled && ptr && !IS_ERR_PCPU(ptr)) 1284 make_black_object((unsigned long)ptr, OBJECT_PERCPU); 1285 } 1286 EXPORT_SYMBOL_GPL(kmemleak_ignore_percpu); 1287 1288 /** 1289 * kmemleak_ignore - ignore an allocated object 1290 * @ptr: pointer to beginning of the object 1291 * 1292 * Calling this function on an object will cause the memory block to be 1293 * ignored (not scanned and not reported as a leak). This is usually done when 1294 * it is known that the corresponding block is not a leak and does not contain 1295 * any references to other allocated memory blocks. 1296 */ 1297 void __ref kmemleak_ignore(const void *ptr) 1298 { 1299 pr_debug("%s(0x%px)\n", __func__, ptr); 1300 1301 if (kmemleak_enabled && ptr && !IS_ERR(ptr)) 1302 make_black_object((unsigned long)ptr, 0); 1303 } 1304 EXPORT_SYMBOL(kmemleak_ignore); 1305 1306 /** 1307 * kmemleak_scan_area - limit the range to be scanned in an allocated object 1308 * @ptr: pointer to beginning or inside the object. This also 1309 * represents the start of the scan area 1310 * @size: size of the scan area 1311 * @gfp: kmalloc() flags used for kmemleak internal memory allocations 1312 * 1313 * This function is used when it is known that only certain parts of an object 1314 * contain references to other objects. Kmemleak will only scan these areas 1315 * reducing the number false negatives. 1316 */ 1317 void __ref kmemleak_scan_area(const void *ptr, size_t size, gfp_t gfp) 1318 { 1319 pr_debug("%s(0x%px)\n", __func__, ptr); 1320 1321 if (kmemleak_enabled && ptr && size && !IS_ERR(ptr)) 1322 add_scan_area((unsigned long)ptr, size, gfp); 1323 } 1324 EXPORT_SYMBOL(kmemleak_scan_area); 1325 1326 /** 1327 * kmemleak_no_scan - do not scan an allocated object 1328 * @ptr: pointer to beginning of the object 1329 * 1330 * This function notifies kmemleak not to scan the given memory block. Useful 1331 * in situations where it is known that the given object does not contain any 1332 * references to other objects. Kmemleak will not scan such objects reducing 1333 * the number of false negatives. 1334 */ 1335 void __ref kmemleak_no_scan(const void *ptr) 1336 { 1337 pr_debug("%s(0x%px)\n", __func__, ptr); 1338 1339 if (kmemleak_enabled && ptr && !IS_ERR(ptr)) 1340 object_no_scan((unsigned long)ptr); 1341 } 1342 EXPORT_SYMBOL(kmemleak_no_scan); 1343 1344 /** 1345 * kmemleak_alloc_phys - similar to kmemleak_alloc but taking a physical 1346 * address argument 1347 * @phys: physical address of the object 1348 * @size: size of the object 1349 * @gfp: kmalloc() flags used for kmemleak internal memory allocations 1350 */ 1351 void __ref kmemleak_alloc_phys(phys_addr_t phys, size_t size, gfp_t gfp) 1352 { 1353 pr_debug("%s(0x%px, %zu)\n", __func__, &phys, size); 1354 1355 if (kmemleak_enabled) 1356 /* 1357 * Create object with OBJECT_PHYS flag and 1358 * assume min_count 0. 1359 */ 1360 create_object_phys((unsigned long)phys, size, 0, gfp); 1361 } 1362 EXPORT_SYMBOL(kmemleak_alloc_phys); 1363 1364 /** 1365 * kmemleak_free_part_phys - similar to kmemleak_free_part but taking a 1366 * physical address argument 1367 * @phys: physical address if the beginning or inside an object. This 1368 * also represents the start of the range to be freed 1369 * @size: size to be unregistered 1370 */ 1371 void __ref kmemleak_free_part_phys(phys_addr_t phys, size_t size) 1372 { 1373 pr_debug("%s(0x%px)\n", __func__, &phys); 1374 1375 if (kmemleak_enabled) 1376 delete_object_part((unsigned long)phys, size, OBJECT_PHYS); 1377 } 1378 EXPORT_SYMBOL(kmemleak_free_part_phys); 1379 1380 /** 1381 * kmemleak_ignore_phys - similar to kmemleak_ignore but taking a physical 1382 * address argument 1383 * @phys: physical address of the object 1384 */ 1385 void __ref kmemleak_ignore_phys(phys_addr_t phys) 1386 { 1387 pr_debug("%s(0x%px)\n", __func__, &phys); 1388 1389 if (kmemleak_enabled) 1390 make_black_object((unsigned long)phys, OBJECT_PHYS); 1391 } 1392 EXPORT_SYMBOL(kmemleak_ignore_phys); 1393 1394 /* 1395 * Update an object's checksum and return true if it was modified. 1396 */ 1397 static bool update_checksum(struct kmemleak_object *object) 1398 { 1399 u32 old_csum = object->checksum; 1400 1401 if (WARN_ON_ONCE(object->flags & OBJECT_PHYS)) 1402 return false; 1403 1404 kasan_disable_current(); 1405 kcsan_disable_current(); 1406 if (object->flags & OBJECT_PERCPU) { 1407 unsigned int cpu; 1408 1409 object->checksum = 0; 1410 for_each_possible_cpu(cpu) { 1411 void *ptr = per_cpu_ptr((void __percpu *)object->pointer, cpu); 1412 1413 object->checksum ^= crc32(0, kasan_reset_tag((void *)ptr), object->size); 1414 } 1415 } else { 1416 object->checksum = crc32(0, kasan_reset_tag((void *)object->pointer), object->size); 1417 } 1418 kasan_enable_current(); 1419 kcsan_enable_current(); 1420 1421 return object->checksum != old_csum; 1422 } 1423 1424 /* 1425 * Update an object's references. object->lock must be held by the caller. 1426 */ 1427 static void update_refs(struct kmemleak_object *object) 1428 { 1429 if (!color_white(object)) { 1430 /* non-orphan, ignored or new */ 1431 return; 1432 } 1433 1434 /* 1435 * Increase the object's reference count (number of pointers to the 1436 * memory block). If this count reaches the required minimum, the 1437 * object's color will become gray and it will be added to the 1438 * gray_list. 1439 */ 1440 object->count++; 1441 if (color_gray(object)) { 1442 /* put_object() called when removing from gray_list */ 1443 WARN_ON(!get_object(object)); 1444 list_add_tail(&object->gray_list, &gray_list); 1445 } 1446 } 1447 1448 static void pointer_update_refs(struct kmemleak_object *scanned, 1449 unsigned long pointer, unsigned int objflags) 1450 { 1451 struct kmemleak_object *object; 1452 unsigned long untagged_ptr; 1453 unsigned long excess_ref; 1454 1455 untagged_ptr = (unsigned long)kasan_reset_tag((void *)pointer); 1456 if (objflags & OBJECT_PERCPU) { 1457 if (untagged_ptr < min_percpu_addr || untagged_ptr >= max_percpu_addr) 1458 return; 1459 } else { 1460 if (untagged_ptr < min_addr || untagged_ptr >= max_addr) 1461 return; 1462 } 1463 1464 /* 1465 * No need for get_object() here since we hold kmemleak_lock. 1466 * object->use_count cannot be dropped to 0 while the object 1467 * is still present in object_tree_root and object_list 1468 * (with updates protected by kmemleak_lock). 1469 */ 1470 object = __lookup_object(pointer, 1, objflags); 1471 if (!object) 1472 return; 1473 if (object == scanned) 1474 /* self referenced, ignore */ 1475 return; 1476 1477 /* 1478 * Avoid the lockdep recursive warning on object->lock being 1479 * previously acquired in scan_object(). These locks are 1480 * enclosed by scan_mutex. 1481 */ 1482 raw_spin_lock_nested(&object->lock, SINGLE_DEPTH_NESTING); 1483 /* only pass surplus references (object already gray) */ 1484 if (color_gray(object)) { 1485 excess_ref = object->excess_ref; 1486 /* no need for update_refs() if object already gray */ 1487 } else { 1488 excess_ref = 0; 1489 update_refs(object); 1490 } 1491 raw_spin_unlock(&object->lock); 1492 1493 if (excess_ref) { 1494 object = lookup_object(excess_ref, 0); 1495 if (!object) 1496 return; 1497 if (object == scanned) 1498 /* circular reference, ignore */ 1499 return; 1500 raw_spin_lock_nested(&object->lock, SINGLE_DEPTH_NESTING); 1501 update_refs(object); 1502 raw_spin_unlock(&object->lock); 1503 } 1504 } 1505 1506 /* 1507 * Memory scanning is a long process and it needs to be interruptible. This 1508 * function checks whether such interrupt condition occurred. 1509 */ 1510 static int scan_should_stop(void) 1511 { 1512 if (!kmemleak_enabled) 1513 return 1; 1514 1515 /* 1516 * This function may be called from either process or kthread context, 1517 * hence the need to check for both stop conditions. 1518 */ 1519 if (current->flags & PF_KTHREAD) 1520 return kthread_should_stop(); 1521 1522 return signal_pending(current); 1523 } 1524 1525 /* 1526 * Scan a memory block (exclusive range) for valid pointers and add those 1527 * found to the gray list. 1528 */ 1529 static void scan_block(void *_start, void *_end, 1530 struct kmemleak_object *scanned) 1531 { 1532 unsigned long *ptr; 1533 unsigned long *start = PTR_ALIGN(_start, BYTES_PER_POINTER); 1534 unsigned long *end = _end - (BYTES_PER_POINTER - 1); 1535 unsigned long flags; 1536 1537 raw_spin_lock_irqsave(&kmemleak_lock, flags); 1538 for (ptr = start; ptr < end; ptr++) { 1539 unsigned long pointer; 1540 1541 if (scan_should_stop()) 1542 break; 1543 1544 kasan_disable_current(); 1545 pointer = *(unsigned long *)kasan_reset_tag((void *)ptr); 1546 kasan_enable_current(); 1547 1548 pointer_update_refs(scanned, pointer, 0); 1549 pointer_update_refs(scanned, pointer, OBJECT_PERCPU); 1550 } 1551 raw_spin_unlock_irqrestore(&kmemleak_lock, flags); 1552 } 1553 1554 /* 1555 * Scan a large memory block in MAX_SCAN_SIZE chunks to reduce the latency. 1556 */ 1557 #ifdef CONFIG_SMP 1558 static void scan_large_block(void *start, void *end) 1559 { 1560 void *next; 1561 1562 while (start < end) { 1563 next = min(start + MAX_SCAN_SIZE, end); 1564 scan_block(start, next, NULL); 1565 start = next; 1566 cond_resched(); 1567 } 1568 } 1569 #endif 1570 1571 /* 1572 * Scan a memory block corresponding to a kmemleak_object. A condition is 1573 * that object->use_count >= 1. 1574 */ 1575 static void scan_object(struct kmemleak_object *object) 1576 { 1577 struct kmemleak_scan_area *area; 1578 unsigned long flags; 1579 1580 /* 1581 * Once the object->lock is acquired, the corresponding memory block 1582 * cannot be freed (the same lock is acquired in delete_object). 1583 */ 1584 raw_spin_lock_irqsave(&object->lock, flags); 1585 if (object->flags & OBJECT_NO_SCAN) 1586 goto out; 1587 if (!(object->flags & OBJECT_ALLOCATED)) 1588 /* already freed object */ 1589 goto out; 1590 1591 if (object->flags & OBJECT_PERCPU) { 1592 unsigned int cpu; 1593 1594 for_each_possible_cpu(cpu) { 1595 void *start = per_cpu_ptr((void __percpu *)object->pointer, cpu); 1596 void *end = start + object->size; 1597 1598 scan_block(start, end, object); 1599 1600 raw_spin_unlock_irqrestore(&object->lock, flags); 1601 cond_resched(); 1602 raw_spin_lock_irqsave(&object->lock, flags); 1603 if (!(object->flags & OBJECT_ALLOCATED)) 1604 break; 1605 } 1606 } else if (hlist_empty(&object->area_list) || 1607 object->flags & OBJECT_FULL_SCAN) { 1608 void *start = object->flags & OBJECT_PHYS ? 1609 __va((phys_addr_t)object->pointer) : 1610 (void *)object->pointer; 1611 void *end = start + object->size; 1612 void *next; 1613 1614 do { 1615 next = min(start + MAX_SCAN_SIZE, end); 1616 scan_block(start, next, object); 1617 1618 start = next; 1619 if (start >= end) 1620 break; 1621 1622 raw_spin_unlock_irqrestore(&object->lock, flags); 1623 cond_resched(); 1624 raw_spin_lock_irqsave(&object->lock, flags); 1625 } while (object->flags & OBJECT_ALLOCATED); 1626 } else { 1627 hlist_for_each_entry(area, &object->area_list, node) 1628 scan_block((void *)area->start, 1629 (void *)(area->start + area->size), 1630 object); 1631 } 1632 out: 1633 raw_spin_unlock_irqrestore(&object->lock, flags); 1634 } 1635 1636 /* 1637 * Scan the objects already referenced (gray objects). More objects will be 1638 * referenced and, if there are no memory leaks, all the objects are scanned. 1639 */ 1640 static void scan_gray_list(void) 1641 { 1642 struct kmemleak_object *object, *tmp; 1643 1644 /* 1645 * The list traversal is safe for both tail additions and removals 1646 * from inside the loop. The kmemleak objects cannot be freed from 1647 * outside the loop because their use_count was incremented. 1648 */ 1649 object = list_entry(gray_list.next, typeof(*object), gray_list); 1650 while (&object->gray_list != &gray_list) { 1651 cond_resched(); 1652 1653 /* may add new objects to the list */ 1654 if (!scan_should_stop()) 1655 scan_object(object); 1656 1657 tmp = list_entry(object->gray_list.next, typeof(*object), 1658 gray_list); 1659 1660 /* remove the object from the list and release it */ 1661 list_del(&object->gray_list); 1662 put_object(object); 1663 1664 object = tmp; 1665 } 1666 WARN_ON(!list_empty(&gray_list)); 1667 } 1668 1669 /* 1670 * Conditionally call resched() in an object iteration loop while making sure 1671 * that the given object won't go away without RCU read lock by performing a 1672 * get_object() if necessaary. 1673 */ 1674 static void kmemleak_cond_resched(struct kmemleak_object *object) 1675 { 1676 if (!get_object(object)) 1677 return; /* Try next object */ 1678 1679 raw_spin_lock_irq(&kmemleak_lock); 1680 if (object->del_state & DELSTATE_REMOVED) 1681 goto unlock_put; /* Object removed */ 1682 object->del_state |= DELSTATE_NO_DELETE; 1683 raw_spin_unlock_irq(&kmemleak_lock); 1684 1685 rcu_read_unlock(); 1686 cond_resched(); 1687 rcu_read_lock(); 1688 1689 raw_spin_lock_irq(&kmemleak_lock); 1690 if (object->del_state & DELSTATE_REMOVED) 1691 list_del_rcu(&object->object_list); 1692 object->del_state &= ~DELSTATE_NO_DELETE; 1693 unlock_put: 1694 raw_spin_unlock_irq(&kmemleak_lock); 1695 put_object(object); 1696 } 1697 1698 /* 1699 * Print one leak inline. The hex dump is gated on OBJECT_ALLOCATED so it 1700 * does not touch user memory that was freed concurrently; the rest of the 1701 * report (backtrace, comm, pid) is always emitted since the kmemleak_object 1702 * metadata is pinned by the caller. 1703 */ 1704 static void print_leak_locked(struct kmemleak_object *object, bool hex_dump) 1705 { 1706 raw_spin_lock_irq(&object->lock); 1707 __print_unreferenced(NULL, object, 1708 hex_dump && (object->flags & OBJECT_ALLOCATED)); 1709 raw_spin_unlock_irq(&object->lock); 1710 } 1711 1712 /* 1713 * Per-scan dedup table for verbose leak printing. The xarray is keyed by 1714 * stackdepot trace_handle and stores a pointer to the representative 1715 * kmemleak_object. The per-scan repeat count lives in object->dup_count. 1716 * 1717 * dedup_record() must run outside object->lock: xa_store() may take 1718 * mutexes (xa_node slab allocation) which lockdep would flag against the 1719 * raw spinlock object->lock. 1720 */ 1721 static void dedup_record(struct xarray *dedup, struct kmemleak_object *object, 1722 depot_stack_handle_t trace_handle) 1723 { 1724 struct kmemleak_object *rep; 1725 void *old; 1726 1727 /* 1728 * No stack trace to dedup against: early-boot allocation tracked 1729 * before kmemleak_init() set up object_cache, or stack_depot_save() 1730 * failure under memory pressure. 1731 */ 1732 if (!trace_handle) { 1733 print_leak_locked(object, true); 1734 return; 1735 } 1736 1737 /* stack is available, now we can de-dup */ 1738 rep = xa_load(dedup, trace_handle); 1739 if (rep) { 1740 rep->dup_count++; 1741 return; 1742 } 1743 1744 /* 1745 * Object is being torn down (use_count already hit zero); the 1746 * tracked memory at object->pointer is unsafe to read, so skip. 1747 */ 1748 if (!get_object(object)) 1749 return; 1750 1751 object->dup_count = 1; 1752 old = xa_store(dedup, trace_handle, object, GFP_ATOMIC); 1753 if (xa_is_err(old)) { 1754 /* xa_node allocation failed; fall back to inline print. */ 1755 print_leak_locked(object, true); 1756 put_object(object); 1757 return; 1758 } 1759 /* 1760 * scan_mutex serialises all writers to the dedup xarray, so xa_store() 1761 * after a NULL xa_load() must always overwrite an empty slot. 1762 */ 1763 WARN_ON_ONCE(old); 1764 } 1765 1766 /* 1767 * Drain the dedup table. Re-acquires object->lock and re-checks 1768 * OBJECT_ALLOCATED before printing: while get_object() pins the 1769 * kmemleak_object metadata, the underlying tracked allocation may have 1770 * been freed since the scan walked it (kmemleak_free clears 1771 * OBJECT_ALLOCATED under object->lock before the user memory goes away). 1772 * The hex dump is skipped for coalesced entries since the bytes would 1773 * differ across objects anyway. 1774 */ 1775 static void dedup_flush(struct xarray *dedup) 1776 { 1777 struct kmemleak_object *object; 1778 unsigned long idx; 1779 unsigned int dup; 1780 bool coalesced; 1781 1782 xa_for_each(dedup, idx, object) { 1783 dup = object->dup_count; 1784 coalesced = dup > 1; 1785 1786 print_leak_locked(object, !coalesced); 1787 if (coalesced) 1788 pr_warn(" ... and %u more object(s) with the same backtrace\n", 1789 dup - 1); 1790 put_object(object); 1791 xa_erase(dedup, idx); 1792 } 1793 } 1794 1795 /* 1796 * Scan data sections and all the referenced memory blocks allocated via the 1797 * kernel's standard allocators. This function must be called with the 1798 * scan_mutex held. 1799 */ 1800 static void kmemleak_scan(void) 1801 { 1802 struct kmemleak_object *object; 1803 struct zone *zone; 1804 int __maybe_unused i; 1805 struct xarray dedup; 1806 int new_leaks = 0; 1807 1808 jiffies_last_scan = jiffies; 1809 1810 /* prepare the kmemleak_object's */ 1811 rcu_read_lock(); 1812 list_for_each_entry_rcu(object, &object_list, object_list) { 1813 raw_spin_lock_irq(&object->lock); 1814 #ifdef DEBUG 1815 /* 1816 * With a few exceptions there should be a maximum of 1817 * 1 reference to any object at this point. 1818 */ 1819 if (atomic_read(&object->use_count) > 1) { 1820 pr_debug("object->use_count = %d\n", 1821 atomic_read(&object->use_count)); 1822 dump_object_info(object); 1823 } 1824 #endif 1825 1826 /* ignore objects outside lowmem (paint them black) */ 1827 if ((object->flags & OBJECT_PHYS) && 1828 !(object->flags & OBJECT_NO_SCAN)) { 1829 unsigned long phys = object->pointer; 1830 1831 if (PHYS_PFN(phys) < min_low_pfn || 1832 PHYS_PFN(phys + object->size) > max_low_pfn) 1833 __paint_it(object, KMEMLEAK_BLACK); 1834 } 1835 1836 /* reset the reference count (whiten the object) */ 1837 object->count = 0; 1838 if (color_gray(object) && get_object(object)) 1839 list_add_tail(&object->gray_list, &gray_list); 1840 1841 raw_spin_unlock_irq(&object->lock); 1842 1843 if (need_resched()) 1844 kmemleak_cond_resched(object); 1845 } 1846 rcu_read_unlock(); 1847 1848 #ifdef CONFIG_SMP 1849 /* per-cpu sections scanning */ 1850 for_each_possible_cpu(i) 1851 scan_large_block(__per_cpu_start + per_cpu_offset(i), 1852 __per_cpu_end + per_cpu_offset(i)); 1853 #endif 1854 1855 /* 1856 * Struct page scanning for each node. 1857 */ 1858 get_online_mems(); 1859 for_each_populated_zone(zone) { 1860 unsigned long start_pfn = zone->zone_start_pfn; 1861 unsigned long end_pfn = zone_end_pfn(zone); 1862 unsigned long pfn; 1863 1864 for (pfn = start_pfn; pfn < end_pfn; pfn++) { 1865 struct page *page = pfn_to_online_page(pfn); 1866 1867 if (!(pfn & 63)) 1868 cond_resched(); 1869 1870 if (!page) 1871 continue; 1872 1873 /* only scan pages belonging to this zone */ 1874 if (page_zone(page) != zone) 1875 continue; 1876 /* only scan if page is in use */ 1877 if (page_count(page) == 0) 1878 continue; 1879 scan_block(page, page + 1, NULL); 1880 } 1881 } 1882 put_online_mems(); 1883 1884 /* 1885 * Scanning the task stacks (may introduce false negatives). 1886 */ 1887 if (kmemleak_stack_scan) { 1888 struct task_struct *p, *g; 1889 1890 rcu_read_lock(); 1891 for_each_process_thread(g, p) { 1892 void *stack = try_get_task_stack(p); 1893 if (stack) { 1894 scan_block(stack, stack + THREAD_SIZE, NULL); 1895 put_task_stack(p); 1896 } 1897 } 1898 rcu_read_unlock(); 1899 } 1900 1901 /* 1902 * Scan the objects already referenced from the sections scanned 1903 * above. 1904 */ 1905 scan_gray_list(); 1906 1907 /* 1908 * Check for new or unreferenced objects modified since the previous 1909 * scan and color them gray until the next scan. 1910 */ 1911 rcu_read_lock(); 1912 list_for_each_entry_rcu(object, &object_list, object_list) { 1913 if (need_resched()) 1914 kmemleak_cond_resched(object); 1915 1916 /* 1917 * This is racy but we can save the overhead of lock/unlock 1918 * calls. The missed objects, if any, should be caught in 1919 * the next scan. 1920 */ 1921 if (!color_white(object)) 1922 continue; 1923 raw_spin_lock_irq(&object->lock); 1924 if (color_white(object) && (object->flags & OBJECT_ALLOCATED) 1925 && update_checksum(object) && get_object(object)) { 1926 /* color it gray temporarily */ 1927 object->count = object->min_count; 1928 list_add_tail(&object->gray_list, &gray_list); 1929 } 1930 raw_spin_unlock_irq(&object->lock); 1931 } 1932 rcu_read_unlock(); 1933 1934 /* 1935 * Re-scan the gray list for modified unreferenced objects. 1936 */ 1937 scan_gray_list(); 1938 1939 /* 1940 * If scanning was stopped do not report any new unreferenced objects. 1941 */ 1942 if (scan_should_stop()) 1943 return; 1944 1945 /* 1946 * Scanning result reporting. When verbose printing is enabled, dedupe 1947 * by stackdepot trace_handle so each unique backtrace is logged once 1948 * per scan, annotated with the number of objects that share it. The 1949 * per-leak count below still reflects every object, and 1950 * /sys/kernel/debug/kmemleak still lists them individually. 1951 */ 1952 xa_init(&dedup); 1953 rcu_read_lock(); 1954 list_for_each_entry_rcu(object, &object_list, object_list) { 1955 depot_stack_handle_t trace_handle; 1956 bool dedup_print; 1957 1958 if (need_resched()) 1959 kmemleak_cond_resched(object); 1960 1961 /* 1962 * This is racy but we can save the overhead of lock/unlock 1963 * calls. The missed objects, if any, should be caught in 1964 * the next scan. 1965 */ 1966 if (!color_white(object)) 1967 continue; 1968 raw_spin_lock_irq(&object->lock); 1969 trace_handle = 0; 1970 dedup_print = false; 1971 if (unreferenced_object(object) && 1972 !(object->flags & OBJECT_REPORTED)) { 1973 object->flags |= OBJECT_REPORTED; 1974 if (kmemleak_verbose) { 1975 trace_handle = object->trace_handle; 1976 dedup_print = true; 1977 } 1978 new_leaks++; 1979 } 1980 raw_spin_unlock_irq(&object->lock); 1981 1982 /* 1983 * Defer the verbose print outside object->lock: xa_store() 1984 * may take xa_node slab locks at a higher wait-context level 1985 * which lockdep would flag against the raw_spinlock_t 1986 * object->lock. rcu_read_lock() keeps the kmemleak_object 1987 * alive across the call. 1988 */ 1989 if (dedup_print) 1990 dedup_record(&dedup, object, trace_handle); 1991 } 1992 rcu_read_unlock(); 1993 /* Flush'em all */ 1994 dedup_flush(&dedup); 1995 xa_destroy(&dedup); 1996 1997 if (new_leaks) { 1998 kmemleak_found_leaks = true; 1999 2000 pr_info("%d new suspected memory leaks (see /sys/kernel/debug/kmemleak)\n", 2001 new_leaks); 2002 } 2003 2004 } 2005 2006 /* 2007 * Thread function performing automatic memory scanning. Unreferenced objects 2008 * at the end of a memory scan are reported but only the first time. 2009 */ 2010 static int kmemleak_scan_thread(void *arg) 2011 { 2012 static int first_run = IS_ENABLED(CONFIG_DEBUG_KMEMLEAK_AUTO_SCAN); 2013 2014 pr_info("Automatic memory scanning thread started\n"); 2015 set_user_nice(current, 10); 2016 2017 /* 2018 * Wait before the first scan to allow the system to fully initialize. 2019 */ 2020 if (first_run) { 2021 signed long timeout = secs_to_jiffies(SECS_FIRST_SCAN); 2022 first_run = 0; 2023 while (timeout && !kthread_should_stop()) 2024 timeout = schedule_timeout_interruptible(timeout); 2025 } 2026 2027 while (!kthread_should_stop()) { 2028 signed long timeout = READ_ONCE(jiffies_scan_wait); 2029 2030 mutex_lock(&scan_mutex); 2031 kmemleak_scan(); 2032 mutex_unlock(&scan_mutex); 2033 2034 /* wait before the next scan */ 2035 while (timeout && !kthread_should_stop()) 2036 timeout = schedule_timeout_interruptible(timeout); 2037 } 2038 2039 pr_info("Automatic memory scanning thread ended\n"); 2040 2041 return 0; 2042 } 2043 2044 /* 2045 * Start the automatic memory scanning thread. This function must be called 2046 * with the scan_mutex held. 2047 */ 2048 static void start_scan_thread(void) 2049 { 2050 if (scan_thread) 2051 return; 2052 scan_thread = kthread_run(kmemleak_scan_thread, NULL, "kmemleak"); 2053 if (IS_ERR(scan_thread)) { 2054 pr_warn("Failed to create the scan thread\n"); 2055 scan_thread = NULL; 2056 } 2057 } 2058 2059 /* 2060 * Stop the automatic memory scanning thread. 2061 */ 2062 static void stop_scan_thread(void) 2063 { 2064 if (scan_thread) { 2065 kthread_stop(scan_thread); 2066 scan_thread = NULL; 2067 } 2068 } 2069 2070 /* 2071 * Iterate over the object_list and return the first valid object at or after 2072 * the required position with its use_count incremented. The function triggers 2073 * a memory scanning when the pos argument points to the first position. 2074 */ 2075 static void *kmemleak_seq_start(struct seq_file *seq, loff_t *pos) 2076 { 2077 struct kmemleak_object *object; 2078 loff_t n = *pos; 2079 int err; 2080 2081 err = mutex_lock_interruptible(&scan_mutex); 2082 if (err < 0) 2083 return ERR_PTR(err); 2084 2085 rcu_read_lock(); 2086 list_for_each_entry_rcu(object, &object_list, object_list) { 2087 if (n-- > 0) 2088 continue; 2089 if (get_object(object)) 2090 goto out; 2091 } 2092 object = NULL; 2093 out: 2094 return object; 2095 } 2096 2097 /* 2098 * Return the next object in the object_list. The function decrements the 2099 * use_count of the previous object and increases that of the next one. 2100 */ 2101 static void *kmemleak_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2102 { 2103 struct kmemleak_object *prev_obj = v; 2104 struct kmemleak_object *next_obj = NULL; 2105 struct kmemleak_object *obj = prev_obj; 2106 2107 ++(*pos); 2108 2109 list_for_each_entry_continue_rcu(obj, &object_list, object_list) { 2110 if (get_object(obj)) { 2111 next_obj = obj; 2112 break; 2113 } 2114 } 2115 2116 put_object(prev_obj); 2117 return next_obj; 2118 } 2119 2120 /* 2121 * Decrement the use_count of the last object required, if any. 2122 */ 2123 static void kmemleak_seq_stop(struct seq_file *seq, void *v) 2124 { 2125 if (!IS_ERR(v)) { 2126 /* 2127 * kmemleak_seq_start may return ERR_PTR if the scan_mutex 2128 * waiting was interrupted, so only release it if !IS_ERR. 2129 */ 2130 rcu_read_unlock(); 2131 mutex_unlock(&scan_mutex); 2132 if (v) 2133 put_object(v); 2134 } 2135 } 2136 2137 /* 2138 * Print the information for an unreferenced object to the seq file. 2139 */ 2140 static int kmemleak_seq_show(struct seq_file *seq, void *v) 2141 { 2142 struct kmemleak_object *object = v; 2143 unsigned long flags; 2144 2145 raw_spin_lock_irqsave(&object->lock, flags); 2146 if ((object->flags & OBJECT_REPORTED) && unreferenced_object(object)) 2147 print_unreferenced(seq, object); 2148 raw_spin_unlock_irqrestore(&object->lock, flags); 2149 return 0; 2150 } 2151 2152 static const struct seq_operations kmemleak_seq_ops = { 2153 .start = kmemleak_seq_start, 2154 .next = kmemleak_seq_next, 2155 .stop = kmemleak_seq_stop, 2156 .show = kmemleak_seq_show, 2157 }; 2158 2159 static int kmemleak_open(struct inode *inode, struct file *file) 2160 { 2161 return seq_open(file, &kmemleak_seq_ops); 2162 } 2163 2164 static bool __dump_str_object_info(unsigned long addr, unsigned int objflags) 2165 { 2166 unsigned long flags; 2167 struct kmemleak_object *object; 2168 2169 object = __find_and_get_object(addr, 1, objflags); 2170 if (!object) 2171 return false; 2172 2173 raw_spin_lock_irqsave(&object->lock, flags); 2174 dump_object_info(object); 2175 raw_spin_unlock_irqrestore(&object->lock, flags); 2176 2177 put_object(object); 2178 2179 return true; 2180 } 2181 2182 static int dump_str_object_info(const char *str) 2183 { 2184 unsigned long addr; 2185 bool found = false; 2186 2187 if (kstrtoul(str, 0, &addr)) 2188 return -EINVAL; 2189 2190 found |= __dump_str_object_info(addr, 0); 2191 found |= __dump_str_object_info(addr, OBJECT_PHYS); 2192 found |= __dump_str_object_info(addr, OBJECT_PERCPU); 2193 2194 if (!found) { 2195 pr_info("Unknown object at 0x%08lx\n", addr); 2196 return -EINVAL; 2197 } 2198 2199 return 0; 2200 } 2201 2202 /* 2203 * We use grey instead of black to ensure we can do future scans on the same 2204 * objects. If we did not do future scans these black objects could 2205 * potentially contain references to newly allocated objects in the future and 2206 * we'd end up with false positives. 2207 */ 2208 static void kmemleak_clear(void) 2209 { 2210 struct kmemleak_object *object; 2211 2212 rcu_read_lock(); 2213 list_for_each_entry_rcu(object, &object_list, object_list) { 2214 raw_spin_lock_irq(&object->lock); 2215 if ((object->flags & OBJECT_REPORTED) && 2216 unreferenced_object(object)) 2217 __paint_it(object, KMEMLEAK_GREY); 2218 raw_spin_unlock_irq(&object->lock); 2219 } 2220 rcu_read_unlock(); 2221 2222 kmemleak_found_leaks = false; 2223 } 2224 2225 static void __kmemleak_do_cleanup(void); 2226 2227 /* 2228 * File write operation to configure kmemleak at run-time. The following 2229 * commands can be written to the /sys/kernel/debug/kmemleak file: 2230 * off - disable kmemleak (irreversible) 2231 * stack=on - enable the task stacks scanning 2232 * stack=off - disable the tasks stacks scanning 2233 * scan=on - start the automatic memory scanning thread 2234 * scan=off - stop the automatic memory scanning thread 2235 * scan=... - set the automatic memory scanning period in seconds (0 to 2236 * disable it) 2237 * scan - trigger a memory scan 2238 * clear - mark all current reported unreferenced kmemleak objects as 2239 * grey to ignore printing them, or free all kmemleak objects 2240 * if kmemleak has been disabled. 2241 * dump=... - dump information about the object found at the given address 2242 */ 2243 static ssize_t kmemleak_write(struct file *file, const char __user *user_buf, 2244 size_t size, loff_t *ppos) 2245 { 2246 char buf[64]; 2247 int buf_size; 2248 int ret; 2249 2250 buf_size = min(size, (sizeof(buf) - 1)); 2251 if (strncpy_from_user(buf, user_buf, buf_size) < 0) 2252 return -EFAULT; 2253 buf[buf_size] = 0; 2254 2255 ret = mutex_lock_interruptible(&scan_mutex); 2256 if (ret < 0) 2257 return ret; 2258 2259 if (strncmp(buf, "clear", 5) == 0) { 2260 if (kmemleak_enabled) 2261 kmemleak_clear(); 2262 else 2263 __kmemleak_do_cleanup(); 2264 goto out; 2265 } 2266 2267 if (!kmemleak_enabled) { 2268 ret = -EPERM; 2269 goto out; 2270 } 2271 2272 if (strncmp(buf, "off", 3) == 0) 2273 kmemleak_disable(); 2274 else if (strncmp(buf, "stack=on", 8) == 0) 2275 kmemleak_stack_scan = 1; 2276 else if (strncmp(buf, "stack=off", 9) == 0) 2277 kmemleak_stack_scan = 0; 2278 else if (strncmp(buf, "scan=on", 7) == 0) 2279 start_scan_thread(); 2280 else if (strncmp(buf, "scan=off", 8) == 0) 2281 stop_scan_thread(); 2282 else if (strncmp(buf, "scan=", 5) == 0) { 2283 unsigned secs; 2284 unsigned long msecs; 2285 2286 ret = kstrtouint(buf + 5, 0, &secs); 2287 if (ret < 0) 2288 goto out; 2289 2290 msecs = secs * MSEC_PER_SEC; 2291 if (msecs > UINT_MAX) 2292 msecs = UINT_MAX; 2293 2294 stop_scan_thread(); 2295 if (msecs) { 2296 WRITE_ONCE(jiffies_scan_wait, msecs_to_jiffies(msecs)); 2297 start_scan_thread(); 2298 } 2299 } else if (strncmp(buf, "scan", 4) == 0) 2300 kmemleak_scan(); 2301 else if (strncmp(buf, "dump=", 5) == 0) 2302 ret = dump_str_object_info(buf + 5); 2303 else 2304 ret = -EINVAL; 2305 2306 out: 2307 mutex_unlock(&scan_mutex); 2308 if (ret < 0) 2309 return ret; 2310 2311 /* ignore the rest of the buffer, only one command at a time */ 2312 *ppos += size; 2313 return size; 2314 } 2315 2316 static const struct file_operations kmemleak_fops = { 2317 .owner = THIS_MODULE, 2318 .open = kmemleak_open, 2319 .read = seq_read, 2320 .write = kmemleak_write, 2321 .llseek = seq_lseek, 2322 .release = seq_release, 2323 }; 2324 2325 static void __kmemleak_do_cleanup(void) 2326 { 2327 struct kmemleak_object *object, *tmp; 2328 unsigned int cnt = 0; 2329 2330 /* 2331 * Kmemleak has already been disabled, no need for RCU list traversal 2332 * or kmemleak_lock held. 2333 */ 2334 list_for_each_entry_safe(object, tmp, &object_list, object_list) { 2335 __remove_object(object); 2336 __delete_object(object); 2337 2338 /* Call cond_resched() once per 64 iterations to avoid soft lockup */ 2339 if (!(++cnt & 0x3f)) 2340 cond_resched(); 2341 } 2342 } 2343 2344 /* 2345 * Stop the memory scanning thread and free the kmemleak internal objects if 2346 * no previous scan thread (otherwise, kmemleak may still have some useful 2347 * information on memory leaks). 2348 */ 2349 static void kmemleak_do_cleanup(struct work_struct *work) 2350 { 2351 stop_scan_thread(); 2352 2353 mutex_lock(&scan_mutex); 2354 /* 2355 * Once it is made sure that kmemleak_scan has stopped, it is safe to no 2356 * longer track object freeing. Ordering of the scan thread stopping and 2357 * the memory accesses below is guaranteed by the kthread_stop() 2358 * function. 2359 */ 2360 kmemleak_free_enabled = 0; 2361 mutex_unlock(&scan_mutex); 2362 2363 if (!kmemleak_found_leaks) 2364 __kmemleak_do_cleanup(); 2365 else 2366 pr_info("Kmemleak disabled without freeing internal data. Reclaim the memory with \"echo clear > /sys/kernel/debug/kmemleak\".\n"); 2367 } 2368 2369 static DECLARE_WORK(cleanup_work, kmemleak_do_cleanup); 2370 2371 /* 2372 * Disable kmemleak. No memory allocation/freeing will be traced once this 2373 * function is called. Disabling kmemleak is an irreversible operation. 2374 */ 2375 static void kmemleak_disable(void) 2376 { 2377 /* atomically check whether it was already invoked */ 2378 if (cmpxchg(&kmemleak_error, 0, 1)) 2379 return; 2380 2381 /* stop any memory operation tracing */ 2382 kmemleak_enabled = 0; 2383 2384 /* check whether it is too early for a kernel thread */ 2385 if (kmemleak_late_initialized) 2386 schedule_work(&cleanup_work); 2387 else 2388 kmemleak_free_enabled = 0; 2389 2390 pr_info("Kernel memory leak detector disabled\n"); 2391 } 2392 2393 /* 2394 * Allow boot-time kmemleak disabling (enabled by default). 2395 */ 2396 static int __init kmemleak_boot_config(char *str) 2397 { 2398 if (!str) 2399 return -EINVAL; 2400 if (strcmp(str, "off") == 0) 2401 kmemleak_disable(); 2402 else if (strcmp(str, "on") == 0) { 2403 kmemleak_skip_disable = 1; 2404 stack_depot_request_early_init(); 2405 } 2406 else 2407 return -EINVAL; 2408 return 0; 2409 } 2410 early_param("kmemleak", kmemleak_boot_config); 2411 2412 /* 2413 * Kmemleak initialization. 2414 */ 2415 void __init kmemleak_init(void) 2416 { 2417 #ifdef CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF 2418 if (!kmemleak_skip_disable) { 2419 kmemleak_disable(); 2420 return; 2421 } 2422 #endif 2423 2424 if (kmemleak_error) 2425 return; 2426 2427 jiffies_min_age = msecs_to_jiffies(MSECS_MIN_AGE); 2428 jiffies_scan_wait = secs_to_jiffies(SECS_SCAN_WAIT); 2429 2430 object_cache = KMEM_CACHE(kmemleak_object, SLAB_NOLEAKTRACE); 2431 scan_area_cache = KMEM_CACHE(kmemleak_scan_area, SLAB_NOLEAKTRACE); 2432 2433 /* register the data/bss sections */ 2434 create_object((unsigned long)_sdata, _edata - _sdata, 2435 KMEMLEAK_GREY, GFP_ATOMIC); 2436 create_object((unsigned long)__bss_start, __bss_stop - __bss_start, 2437 KMEMLEAK_GREY, GFP_ATOMIC); 2438 /* only register .data..ro_after_init if not within .data */ 2439 if (&__start_ro_after_init < &_sdata || &__end_ro_after_init > &_edata) 2440 create_object((unsigned long)__start_ro_after_init, 2441 __end_ro_after_init - __start_ro_after_init, 2442 KMEMLEAK_GREY, GFP_ATOMIC); 2443 } 2444 2445 /* 2446 * Late initialization function. 2447 */ 2448 static int __init kmemleak_late_init(void) 2449 { 2450 kmemleak_late_initialized = 1; 2451 2452 debugfs_create_file("kmemleak", 0644, NULL, NULL, &kmemleak_fops); 2453 2454 if (kmemleak_error) { 2455 /* 2456 * Some error occurred and kmemleak was disabled. There is a 2457 * small chance that kmemleak_disable() was called immediately 2458 * after setting kmemleak_late_initialized and we may end up with 2459 * two clean-up threads but serialized by scan_mutex. 2460 */ 2461 schedule_work(&cleanup_work); 2462 return -ENOMEM; 2463 } 2464 2465 if (IS_ENABLED(CONFIG_DEBUG_KMEMLEAK_AUTO_SCAN)) { 2466 mutex_lock(&scan_mutex); 2467 start_scan_thread(); 2468 mutex_unlock(&scan_mutex); 2469 } 2470 2471 pr_info("Kernel memory leak detector initialized (mem pool available: %d)\n", 2472 mem_pool_free_count); 2473 2474 return 0; 2475 } 2476 late_initcall(kmemleak_late_init); 2477