1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Stack depot - a stack trace storage that avoids duplication. 4 * 5 * Internally, stack depot maintains a hash table of unique stacktraces. The 6 * stack traces themselves are stored contiguously one after another in a set 7 * of separate page allocations. 8 * 9 * Author: Alexander Potapenko <glider@google.com> 10 * Copyright (C) 2016 Google, Inc. 11 * 12 * Based on the code by Dmitry Chernenkov. 13 */ 14 15 #define pr_fmt(fmt) "stackdepot: " fmt 16 17 #include <linux/debugfs.h> 18 #include <linux/gfp.h> 19 #include <linux/jhash.h> 20 #include <linux/kernel.h> 21 #include <linux/kmsan.h> 22 #include <linux/list.h> 23 #include <linux/mm.h> 24 #include <linux/mutex.h> 25 #include <linux/poison.h> 26 #include <linux/printk.h> 27 #include <linux/rculist.h> 28 #include <linux/rcupdate.h> 29 #include <linux/refcount.h> 30 #include <linux/slab.h> 31 #include <linux/spinlock.h> 32 #include <linux/stacktrace.h> 33 #include <linux/stackdepot.h> 34 #include <linux/string.h> 35 #include <linux/types.h> 36 #include <linux/memblock.h> 37 #include <linux/kasan-enabled.h> 38 39 #define DEPOT_HANDLE_BITS (sizeof(depot_stack_handle_t) * 8) 40 41 #define DEPOT_POOL_ORDER 2 /* Pool size order, 4 pages */ 42 #define DEPOT_POOL_SIZE (1LL << (PAGE_SHIFT + DEPOT_POOL_ORDER)) 43 #define DEPOT_STACK_ALIGN 4 44 #define DEPOT_OFFSET_BITS (DEPOT_POOL_ORDER + PAGE_SHIFT - DEPOT_STACK_ALIGN) 45 #define DEPOT_POOL_INDEX_BITS (DEPOT_HANDLE_BITS - DEPOT_OFFSET_BITS - \ 46 STACK_DEPOT_EXTRA_BITS) 47 #define DEPOT_POOLS_CAP 8192 48 #define DEPOT_MAX_POOLS \ 49 (((1LL << (DEPOT_POOL_INDEX_BITS)) < DEPOT_POOLS_CAP) ? \ 50 (1LL << (DEPOT_POOL_INDEX_BITS)) : DEPOT_POOLS_CAP) 51 52 /* Compact structure that stores a reference to a stack. */ 53 union handle_parts { 54 depot_stack_handle_t handle; 55 struct { 56 u32 pool_index : DEPOT_POOL_INDEX_BITS; 57 u32 offset : DEPOT_OFFSET_BITS; 58 u32 extra : STACK_DEPOT_EXTRA_BITS; 59 }; 60 }; 61 62 struct stack_record { 63 struct list_head hash_list; /* Links in the hash table */ 64 u32 hash; /* Hash in hash table */ 65 u32 size; /* Number of stored frames */ 66 union handle_parts handle; /* Constant after initialization */ 67 refcount_t count; 68 union { 69 unsigned long entries[CONFIG_STACKDEPOT_MAX_FRAMES]; /* Frames */ 70 struct { 71 /* 72 * An important invariant of the implementation is to 73 * only place a stack record onto the freelist iff its 74 * refcount is zero. Because stack records with a zero 75 * refcount are never considered as valid, it is safe to 76 * union @entries and freelist management state below. 77 * Conversely, as soon as an entry is off the freelist 78 * and its refcount becomes non-zero, the below must not 79 * be accessed until being placed back on the freelist. 80 */ 81 struct list_head free_list; /* Links in the freelist */ 82 unsigned long rcu_state; /* RCU cookie */ 83 }; 84 }; 85 }; 86 87 static bool stack_depot_disabled; 88 static bool __stack_depot_early_init_requested __initdata = IS_ENABLED(CONFIG_STACKDEPOT_ALWAYS_INIT); 89 static bool __stack_depot_early_init_passed __initdata; 90 91 /* Use one hash table bucket per 16 KB of memory. */ 92 #define STACK_HASH_TABLE_SCALE 14 93 /* Limit the number of buckets between 4K and 1M. */ 94 #define STACK_BUCKET_NUMBER_ORDER_MIN 12 95 #define STACK_BUCKET_NUMBER_ORDER_MAX 20 96 /* Initial seed for jhash2. */ 97 #define STACK_HASH_SEED 0x9747b28c 98 99 /* Hash table of stored stack records. */ 100 static struct list_head *stack_table; 101 /* Fixed order of the number of table buckets. Used when KASAN is enabled. */ 102 static unsigned int stack_bucket_number_order; 103 /* Hash mask for indexing the table. */ 104 static unsigned int stack_hash_mask; 105 106 /* Array of memory regions that store stack records. */ 107 static void *stack_pools[DEPOT_MAX_POOLS]; 108 /* Newly allocated pool that is not yet added to stack_pools. */ 109 static void *new_pool; 110 /* Number of pools in stack_pools. */ 111 static int pools_num; 112 /* Offset to the unused space in the currently used pool. */ 113 static size_t pool_offset = DEPOT_POOL_SIZE; 114 /* Freelist of stack records within stack_pools. */ 115 static LIST_HEAD(free_stacks); 116 /* The lock must be held when performing pool or freelist modifications. */ 117 static DEFINE_RAW_SPINLOCK(pool_lock); 118 119 /* Statistics counters for debugfs. */ 120 enum depot_counter_id { 121 DEPOT_COUNTER_REFD_ALLOCS, 122 DEPOT_COUNTER_REFD_FREES, 123 DEPOT_COUNTER_REFD_INUSE, 124 DEPOT_COUNTER_FREELIST_SIZE, 125 DEPOT_COUNTER_PERSIST_COUNT, 126 DEPOT_COUNTER_PERSIST_BYTES, 127 DEPOT_COUNTER_COUNT, 128 }; 129 static long counters[DEPOT_COUNTER_COUNT]; 130 static const char *const counter_names[] = { 131 [DEPOT_COUNTER_REFD_ALLOCS] = "refcounted_allocations", 132 [DEPOT_COUNTER_REFD_FREES] = "refcounted_frees", 133 [DEPOT_COUNTER_REFD_INUSE] = "refcounted_in_use", 134 [DEPOT_COUNTER_FREELIST_SIZE] = "freelist_size", 135 [DEPOT_COUNTER_PERSIST_COUNT] = "persistent_count", 136 [DEPOT_COUNTER_PERSIST_BYTES] = "persistent_bytes", 137 }; 138 static_assert(ARRAY_SIZE(counter_names) == DEPOT_COUNTER_COUNT); 139 140 static int __init disable_stack_depot(char *str) 141 { 142 return kstrtobool(str, &stack_depot_disabled); 143 } 144 early_param("stack_depot_disable", disable_stack_depot); 145 146 void __init stack_depot_request_early_init(void) 147 { 148 /* Too late to request early init now. */ 149 WARN_ON(__stack_depot_early_init_passed); 150 151 __stack_depot_early_init_requested = true; 152 } 153 154 /* Initialize list_head's within the hash table. */ 155 static void init_stack_table(unsigned long entries) 156 { 157 unsigned long i; 158 159 for (i = 0; i < entries; i++) 160 INIT_LIST_HEAD(&stack_table[i]); 161 } 162 163 /* Allocates a hash table via memblock. Can only be used during early boot. */ 164 int __init stack_depot_early_init(void) 165 { 166 unsigned long entries = 0; 167 168 /* This function must be called only once, from mm_init(). */ 169 if (WARN_ON(__stack_depot_early_init_passed)) 170 return 0; 171 __stack_depot_early_init_passed = true; 172 173 /* 174 * Print disabled message even if early init has not been requested: 175 * stack_depot_init() will not print one. 176 */ 177 if (stack_depot_disabled) { 178 pr_info("disabled\n"); 179 return 0; 180 } 181 182 /* 183 * If KASAN is enabled, use the maximum order: KASAN is frequently used 184 * in fuzzing scenarios, which leads to a large number of different 185 * stack traces being stored in stack depot. 186 */ 187 if (kasan_enabled() && !stack_bucket_number_order) 188 stack_bucket_number_order = STACK_BUCKET_NUMBER_ORDER_MAX; 189 190 /* 191 * Check if early init has been requested after setting 192 * stack_bucket_number_order: stack_depot_init() uses its value. 193 */ 194 if (!__stack_depot_early_init_requested) 195 return 0; 196 197 /* 198 * If stack_bucket_number_order is not set, leave entries as 0 to rely 199 * on the automatic calculations performed by alloc_large_system_hash(). 200 */ 201 if (stack_bucket_number_order) 202 entries = 1UL << stack_bucket_number_order; 203 pr_info("allocating hash table via alloc_large_system_hash\n"); 204 stack_table = alloc_large_system_hash("stackdepot", 205 sizeof(struct list_head), 206 entries, 207 STACK_HASH_TABLE_SCALE, 208 HASH_EARLY, 209 NULL, 210 &stack_hash_mask, 211 1UL << STACK_BUCKET_NUMBER_ORDER_MIN, 212 1UL << STACK_BUCKET_NUMBER_ORDER_MAX); 213 if (!stack_table) { 214 pr_err("hash table allocation failed, disabling\n"); 215 stack_depot_disabled = true; 216 return -ENOMEM; 217 } 218 if (!entries) { 219 /* 220 * Obtain the number of entries that was calculated by 221 * alloc_large_system_hash(). 222 */ 223 entries = stack_hash_mask + 1; 224 } 225 init_stack_table(entries); 226 227 return 0; 228 } 229 230 /* Allocates a hash table via kvcalloc. Can be used after boot. */ 231 int stack_depot_init(void) 232 { 233 static DEFINE_MUTEX(stack_depot_init_mutex); 234 unsigned long entries; 235 int ret = 0; 236 237 mutex_lock(&stack_depot_init_mutex); 238 239 if (stack_depot_disabled || stack_table) 240 goto out_unlock; 241 242 /* 243 * Similarly to stack_depot_early_init, use stack_bucket_number_order 244 * if assigned, and rely on automatic scaling otherwise. 245 */ 246 if (stack_bucket_number_order) { 247 entries = 1UL << stack_bucket_number_order; 248 } else { 249 int scale = STACK_HASH_TABLE_SCALE; 250 251 entries = nr_free_buffer_pages(); 252 entries = roundup_pow_of_two(entries); 253 254 if (scale > PAGE_SHIFT) 255 entries >>= (scale - PAGE_SHIFT); 256 else 257 entries <<= (PAGE_SHIFT - scale); 258 } 259 260 if (entries < 1UL << STACK_BUCKET_NUMBER_ORDER_MIN) 261 entries = 1UL << STACK_BUCKET_NUMBER_ORDER_MIN; 262 if (entries > 1UL << STACK_BUCKET_NUMBER_ORDER_MAX) 263 entries = 1UL << STACK_BUCKET_NUMBER_ORDER_MAX; 264 265 pr_info("allocating hash table of %lu entries via kvcalloc\n", entries); 266 stack_table = kvcalloc(entries, sizeof(struct list_head), GFP_KERNEL); 267 if (!stack_table) { 268 pr_err("hash table allocation failed, disabling\n"); 269 stack_depot_disabled = true; 270 ret = -ENOMEM; 271 goto out_unlock; 272 } 273 stack_hash_mask = entries - 1; 274 init_stack_table(entries); 275 276 out_unlock: 277 mutex_unlock(&stack_depot_init_mutex); 278 279 return ret; 280 } 281 EXPORT_SYMBOL_GPL(stack_depot_init); 282 283 /* 284 * Initializes new stack pool, and updates the list of pools. 285 */ 286 static bool depot_init_pool(void **prealloc) 287 { 288 lockdep_assert_held(&pool_lock); 289 290 if (unlikely(pools_num >= DEPOT_MAX_POOLS)) { 291 /* Bail out if we reached the pool limit. */ 292 WARN_ON_ONCE(pools_num > DEPOT_MAX_POOLS); /* should never happen */ 293 WARN_ON_ONCE(!new_pool); /* to avoid unnecessary pre-allocation */ 294 WARN_ONCE(1, "Stack depot reached limit capacity"); 295 return false; 296 } 297 298 if (!new_pool && *prealloc) { 299 /* We have preallocated memory, use it. */ 300 WRITE_ONCE(new_pool, *prealloc); 301 *prealloc = NULL; 302 } 303 304 if (!new_pool) 305 return false; /* new_pool and *prealloc are NULL */ 306 307 /* Save reference to the pool to be used by depot_fetch_stack(). */ 308 stack_pools[pools_num] = new_pool; 309 310 /* 311 * Stack depot tries to keep an extra pool allocated even before it runs 312 * out of space in the currently used pool. 313 * 314 * To indicate that a new preallocation is needed new_pool is reset to 315 * NULL; do not reset to NULL if we have reached the maximum number of 316 * pools. 317 */ 318 if (pools_num < DEPOT_MAX_POOLS) 319 WRITE_ONCE(new_pool, NULL); 320 else 321 WRITE_ONCE(new_pool, STACK_DEPOT_POISON); 322 323 /* Pairs with concurrent READ_ONCE() in depot_fetch_stack(). */ 324 WRITE_ONCE(pools_num, pools_num + 1); 325 ASSERT_EXCLUSIVE_WRITER(pools_num); 326 327 pool_offset = 0; 328 329 return true; 330 } 331 332 /* Keeps the preallocated memory to be used for a new stack depot pool. */ 333 static void depot_keep_new_pool(void **prealloc) 334 { 335 lockdep_assert_held(&pool_lock); 336 337 /* 338 * If a new pool is already saved or the maximum number of 339 * pools is reached, do not use the preallocated memory. 340 */ 341 if (new_pool) 342 return; 343 344 WRITE_ONCE(new_pool, *prealloc); 345 *prealloc = NULL; 346 } 347 348 /* 349 * Try to initialize a new stack record from the current pool, a cached pool, or 350 * the current pre-allocation. 351 */ 352 static struct stack_record *depot_pop_free_pool(void **prealloc, size_t size) 353 { 354 struct stack_record *stack; 355 void *current_pool; 356 u32 pool_index; 357 358 lockdep_assert_held(&pool_lock); 359 360 if (pool_offset + size > DEPOT_POOL_SIZE) { 361 if (!depot_init_pool(prealloc)) 362 return NULL; 363 } 364 365 if (WARN_ON_ONCE(pools_num < 1)) 366 return NULL; 367 pool_index = pools_num - 1; 368 current_pool = stack_pools[pool_index]; 369 if (WARN_ON_ONCE(!current_pool)) 370 return NULL; 371 372 stack = current_pool + pool_offset; 373 374 /* Pre-initialize handle once. */ 375 stack->handle.pool_index = pool_index; 376 stack->handle.offset = pool_offset >> DEPOT_STACK_ALIGN; 377 stack->handle.extra = 0; 378 INIT_LIST_HEAD(&stack->hash_list); 379 380 pool_offset += size; 381 382 return stack; 383 } 384 385 /* Try to find next free usable entry from the freelist. */ 386 static struct stack_record *depot_pop_free(void) 387 { 388 struct stack_record *stack; 389 390 lockdep_assert_held(&pool_lock); 391 392 if (list_empty(&free_stacks)) 393 return NULL; 394 395 /* 396 * We maintain the invariant that the elements in front are least 397 * recently used, and are therefore more likely to be associated with an 398 * RCU grace period in the past. Consequently it is sufficient to only 399 * check the first entry. 400 */ 401 stack = list_first_entry(&free_stacks, struct stack_record, free_list); 402 if (!poll_state_synchronize_rcu(stack->rcu_state)) 403 return NULL; 404 405 list_del(&stack->free_list); 406 counters[DEPOT_COUNTER_FREELIST_SIZE]--; 407 408 return stack; 409 } 410 411 static inline size_t depot_stack_record_size(struct stack_record *s, unsigned int nr_entries) 412 { 413 const size_t used = flex_array_size(s, entries, nr_entries); 414 const size_t unused = sizeof(s->entries) - used; 415 416 WARN_ON_ONCE(sizeof(s->entries) < used); 417 418 return ALIGN(sizeof(struct stack_record) - unused, 1 << DEPOT_STACK_ALIGN); 419 } 420 421 /* Allocates a new stack in a stack depot pool. */ 422 static struct stack_record * 423 depot_alloc_stack(unsigned long *entries, unsigned int nr_entries, u32 hash, depot_flags_t flags, void **prealloc) 424 { 425 struct stack_record *stack = NULL; 426 size_t record_size; 427 428 lockdep_assert_held(&pool_lock); 429 430 /* This should already be checked by public API entry points. */ 431 if (WARN_ON_ONCE(!nr_entries)) 432 return NULL; 433 434 /* Limit number of saved frames to CONFIG_STACKDEPOT_MAX_FRAMES. */ 435 if (nr_entries > CONFIG_STACKDEPOT_MAX_FRAMES) 436 nr_entries = CONFIG_STACKDEPOT_MAX_FRAMES; 437 438 if (flags & STACK_DEPOT_FLAG_GET) { 439 /* 440 * Evictable entries have to allocate the max. size so they may 441 * safely be re-used by differently sized allocations. 442 */ 443 record_size = depot_stack_record_size(stack, CONFIG_STACKDEPOT_MAX_FRAMES); 444 stack = depot_pop_free(); 445 } else { 446 record_size = depot_stack_record_size(stack, nr_entries); 447 } 448 449 if (!stack) { 450 stack = depot_pop_free_pool(prealloc, record_size); 451 if (!stack) 452 return NULL; 453 } 454 455 /* Save the stack trace. */ 456 stack->hash = hash; 457 stack->size = nr_entries; 458 /* stack->handle is already filled in by depot_pop_free_pool(). */ 459 memcpy(stack->entries, entries, flex_array_size(stack, entries, nr_entries)); 460 461 if (flags & STACK_DEPOT_FLAG_GET) { 462 refcount_set(&stack->count, 1); 463 counters[DEPOT_COUNTER_REFD_ALLOCS]++; 464 counters[DEPOT_COUNTER_REFD_INUSE]++; 465 } else { 466 /* Warn on attempts to switch to refcounting this entry. */ 467 refcount_set(&stack->count, REFCOUNT_SATURATED); 468 counters[DEPOT_COUNTER_PERSIST_COUNT]++; 469 counters[DEPOT_COUNTER_PERSIST_BYTES] += record_size; 470 } 471 472 /* 473 * Let KMSAN know the stored stack record is initialized. This shall 474 * prevent false positive reports if instrumented code accesses it. 475 */ 476 kmsan_unpoison_memory(stack, record_size); 477 478 return stack; 479 } 480 481 static struct stack_record *depot_fetch_stack(depot_stack_handle_t handle) 482 { 483 const int pools_num_cached = READ_ONCE(pools_num); 484 union handle_parts parts = { .handle = handle }; 485 void *pool; 486 size_t offset = parts.offset << DEPOT_STACK_ALIGN; 487 struct stack_record *stack; 488 489 lockdep_assert_not_held(&pool_lock); 490 491 if (parts.pool_index > pools_num_cached) { 492 WARN(1, "pool index %d out of bounds (%d) for stack id %08x\n", 493 parts.pool_index, pools_num_cached, handle); 494 return NULL; 495 } 496 497 pool = stack_pools[parts.pool_index]; 498 if (WARN_ON(!pool)) 499 return NULL; 500 501 stack = pool + offset; 502 if (WARN_ON(!refcount_read(&stack->count))) 503 return NULL; 504 505 return stack; 506 } 507 508 /* Links stack into the freelist. */ 509 static void depot_free_stack(struct stack_record *stack) 510 { 511 unsigned long flags; 512 513 lockdep_assert_not_held(&pool_lock); 514 515 raw_spin_lock_irqsave(&pool_lock, flags); 516 printk_deferred_enter(); 517 518 /* 519 * Remove the entry from the hash list. Concurrent list traversal may 520 * still observe the entry, but since the refcount is zero, this entry 521 * will no longer be considered as valid. 522 */ 523 list_del_rcu(&stack->hash_list); 524 525 /* 526 * Due to being used from constrained contexts such as the allocators, 527 * NMI, or even RCU itself, stack depot cannot rely on primitives that 528 * would sleep (such as synchronize_rcu()) or recursively call into 529 * stack depot again (such as call_rcu()). 530 * 531 * Instead, get an RCU cookie, so that we can ensure this entry isn't 532 * moved onto another list until the next grace period, and concurrent 533 * RCU list traversal remains safe. 534 */ 535 stack->rcu_state = get_state_synchronize_rcu(); 536 537 /* 538 * Add the entry to the freelist tail, so that older entries are 539 * considered first - their RCU cookie is more likely to no longer be 540 * associated with the current grace period. 541 */ 542 list_add_tail(&stack->free_list, &free_stacks); 543 544 counters[DEPOT_COUNTER_FREELIST_SIZE]++; 545 counters[DEPOT_COUNTER_REFD_FREES]++; 546 counters[DEPOT_COUNTER_REFD_INUSE]--; 547 548 printk_deferred_exit(); 549 raw_spin_unlock_irqrestore(&pool_lock, flags); 550 } 551 552 /* Calculates the hash for a stack. */ 553 static inline u32 hash_stack(unsigned long *entries, unsigned int size) 554 { 555 return jhash2((u32 *)entries, 556 array_size(size, sizeof(*entries)) / sizeof(u32), 557 STACK_HASH_SEED); 558 } 559 560 /* 561 * Non-instrumented version of memcmp(). 562 * Does not check the lexicographical order, only the equality. 563 */ 564 static inline 565 int stackdepot_memcmp(const unsigned long *u1, const unsigned long *u2, 566 unsigned int n) 567 { 568 for ( ; n-- ; u1++, u2++) { 569 if (*u1 != *u2) 570 return 1; 571 } 572 return 0; 573 } 574 575 /* Finds a stack in a bucket of the hash table. */ 576 static inline struct stack_record *find_stack(struct list_head *bucket, 577 unsigned long *entries, int size, 578 u32 hash, depot_flags_t flags) 579 { 580 struct stack_record *stack, *ret = NULL; 581 582 /* 583 * Stack depot may be used from instrumentation that instruments RCU or 584 * tracing itself; use variant that does not call into RCU and cannot be 585 * traced. 586 * 587 * Note: Such use cases must take care when using refcounting to evict 588 * unused entries, because the stack record free-then-reuse code paths 589 * do call into RCU. 590 */ 591 rcu_read_lock_sched_notrace(); 592 593 list_for_each_entry_rcu(stack, bucket, hash_list) { 594 if (stack->hash != hash || stack->size != size) 595 continue; 596 597 /* 598 * This may race with depot_free_stack() accessing the freelist 599 * management state unioned with @entries. The refcount is zero 600 * in that case and the below refcount_inc_not_zero() will fail. 601 */ 602 if (data_race(stackdepot_memcmp(entries, stack->entries, size))) 603 continue; 604 605 /* 606 * Try to increment refcount. If this succeeds, the stack record 607 * is valid and has not yet been freed. 608 * 609 * If STACK_DEPOT_FLAG_GET is not used, it is undefined behavior 610 * to then call stack_depot_put() later, and we can assume that 611 * a stack record is never placed back on the freelist. 612 */ 613 if ((flags & STACK_DEPOT_FLAG_GET) && !refcount_inc_not_zero(&stack->count)) 614 continue; 615 616 ret = stack; 617 break; 618 } 619 620 rcu_read_unlock_sched_notrace(); 621 622 return ret; 623 } 624 625 depot_stack_handle_t stack_depot_save_flags(unsigned long *entries, 626 unsigned int nr_entries, 627 gfp_t alloc_flags, 628 depot_flags_t depot_flags) 629 { 630 struct list_head *bucket; 631 struct stack_record *found = NULL; 632 depot_stack_handle_t handle = 0; 633 struct page *page = NULL; 634 void *prealloc = NULL; 635 bool can_alloc = depot_flags & STACK_DEPOT_FLAG_CAN_ALLOC; 636 unsigned long flags; 637 u32 hash; 638 639 if (WARN_ON(depot_flags & ~STACK_DEPOT_FLAGS_MASK)) 640 return 0; 641 642 /* 643 * If this stack trace is from an interrupt, including anything before 644 * interrupt entry usually leads to unbounded stack depot growth. 645 * 646 * Since use of filter_irq_stacks() is a requirement to ensure stack 647 * depot can efficiently deduplicate interrupt stacks, always 648 * filter_irq_stacks() to simplify all callers' use of stack depot. 649 */ 650 nr_entries = filter_irq_stacks(entries, nr_entries); 651 652 if (unlikely(nr_entries == 0) || stack_depot_disabled) 653 return 0; 654 655 hash = hash_stack(entries, nr_entries); 656 bucket = &stack_table[hash & stack_hash_mask]; 657 658 /* Fast path: look the stack trace up without locking. */ 659 found = find_stack(bucket, entries, nr_entries, hash, depot_flags); 660 if (found) 661 goto exit; 662 663 /* 664 * Allocate memory for a new pool if required now: 665 * we won't be able to do that under the lock. 666 */ 667 if (unlikely(can_alloc && !READ_ONCE(new_pool))) { 668 /* 669 * Zero out zone modifiers, as we don't have specific zone 670 * requirements. Keep the flags related to allocation in atomic 671 * contexts and I/O. 672 */ 673 alloc_flags &= ~GFP_ZONEMASK; 674 alloc_flags &= (GFP_ATOMIC | GFP_KERNEL); 675 alloc_flags |= __GFP_NOWARN; 676 page = alloc_pages(alloc_flags, DEPOT_POOL_ORDER); 677 if (page) 678 prealloc = page_address(page); 679 } 680 681 raw_spin_lock_irqsave(&pool_lock, flags); 682 printk_deferred_enter(); 683 684 /* Try to find again, to avoid concurrently inserting duplicates. */ 685 found = find_stack(bucket, entries, nr_entries, hash, depot_flags); 686 if (!found) { 687 struct stack_record *new = 688 depot_alloc_stack(entries, nr_entries, hash, depot_flags, &prealloc); 689 690 if (new) { 691 /* 692 * This releases the stack record into the bucket and 693 * makes it visible to readers in find_stack(). 694 */ 695 list_add_rcu(&new->hash_list, bucket); 696 found = new; 697 } 698 } 699 700 if (prealloc) { 701 /* 702 * Either stack depot already contains this stack trace, or 703 * depot_alloc_stack() did not consume the preallocated memory. 704 * Try to keep the preallocated memory for future. 705 */ 706 depot_keep_new_pool(&prealloc); 707 } 708 709 printk_deferred_exit(); 710 raw_spin_unlock_irqrestore(&pool_lock, flags); 711 exit: 712 if (prealloc) { 713 /* Stack depot didn't use this memory, free it. */ 714 free_pages((unsigned long)prealloc, DEPOT_POOL_ORDER); 715 } 716 if (found) 717 handle = found->handle.handle; 718 return handle; 719 } 720 EXPORT_SYMBOL_GPL(stack_depot_save_flags); 721 722 depot_stack_handle_t stack_depot_save(unsigned long *entries, 723 unsigned int nr_entries, 724 gfp_t alloc_flags) 725 { 726 return stack_depot_save_flags(entries, nr_entries, alloc_flags, 727 STACK_DEPOT_FLAG_CAN_ALLOC); 728 } 729 EXPORT_SYMBOL_GPL(stack_depot_save); 730 731 unsigned int stack_depot_fetch(depot_stack_handle_t handle, 732 unsigned long **entries) 733 { 734 struct stack_record *stack; 735 736 *entries = NULL; 737 /* 738 * Let KMSAN know *entries is initialized. This shall prevent false 739 * positive reports if instrumented code accesses it. 740 */ 741 kmsan_unpoison_memory(entries, sizeof(*entries)); 742 743 if (!handle || stack_depot_disabled) 744 return 0; 745 746 stack = depot_fetch_stack(handle); 747 /* 748 * Should never be NULL, otherwise this is a use-after-put (or just a 749 * corrupt handle). 750 */ 751 if (WARN(!stack, "corrupt handle or use after stack_depot_put()")) 752 return 0; 753 754 *entries = stack->entries; 755 return stack->size; 756 } 757 EXPORT_SYMBOL_GPL(stack_depot_fetch); 758 759 void stack_depot_put(depot_stack_handle_t handle) 760 { 761 struct stack_record *stack; 762 763 if (!handle || stack_depot_disabled) 764 return; 765 766 stack = depot_fetch_stack(handle); 767 /* 768 * Should always be able to find the stack record, otherwise this is an 769 * unbalanced put attempt (or corrupt handle). 770 */ 771 if (WARN(!stack, "corrupt handle or unbalanced stack_depot_put()")) 772 return; 773 774 if (refcount_dec_and_test(&stack->count)) 775 depot_free_stack(stack); 776 } 777 EXPORT_SYMBOL_GPL(stack_depot_put); 778 779 void stack_depot_print(depot_stack_handle_t stack) 780 { 781 unsigned long *entries; 782 unsigned int nr_entries; 783 784 nr_entries = stack_depot_fetch(stack, &entries); 785 if (nr_entries > 0) 786 stack_trace_print(entries, nr_entries, 0); 787 } 788 EXPORT_SYMBOL_GPL(stack_depot_print); 789 790 int stack_depot_snprint(depot_stack_handle_t handle, char *buf, size_t size, 791 int spaces) 792 { 793 unsigned long *entries; 794 unsigned int nr_entries; 795 796 nr_entries = stack_depot_fetch(handle, &entries); 797 return nr_entries ? stack_trace_snprint(buf, size, entries, nr_entries, 798 spaces) : 0; 799 } 800 EXPORT_SYMBOL_GPL(stack_depot_snprint); 801 802 depot_stack_handle_t __must_check stack_depot_set_extra_bits( 803 depot_stack_handle_t handle, unsigned int extra_bits) 804 { 805 union handle_parts parts = { .handle = handle }; 806 807 /* Don't set extra bits on empty handles. */ 808 if (!handle) 809 return 0; 810 811 parts.extra = extra_bits; 812 return parts.handle; 813 } 814 EXPORT_SYMBOL(stack_depot_set_extra_bits); 815 816 unsigned int stack_depot_get_extra_bits(depot_stack_handle_t handle) 817 { 818 union handle_parts parts = { .handle = handle }; 819 820 return parts.extra; 821 } 822 EXPORT_SYMBOL(stack_depot_get_extra_bits); 823 824 static int stats_show(struct seq_file *seq, void *v) 825 { 826 /* 827 * data race ok: These are just statistics counters, and approximate 828 * statistics are ok for debugging. 829 */ 830 seq_printf(seq, "pools: %d\n", data_race(pools_num)); 831 for (int i = 0; i < DEPOT_COUNTER_COUNT; i++) 832 seq_printf(seq, "%s: %ld\n", counter_names[i], data_race(counters[i])); 833 834 return 0; 835 } 836 DEFINE_SHOW_ATTRIBUTE(stats); 837 838 static int depot_debugfs_init(void) 839 { 840 struct dentry *dir; 841 842 if (stack_depot_disabled) 843 return 0; 844 845 dir = debugfs_create_dir("stackdepot", NULL); 846 debugfs_create_file("stats", 0444, dir, NULL, &stats_fops); 847 return 0; 848 } 849 late_initcall(depot_debugfs_init); 850