1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Stack depot - a stack trace storage that avoids duplication. 4 * 5 * Internally, stack depot maintains a hash table of unique stacktraces. The 6 * stack traces themselves are stored contiguously one after another in a set 7 * of separate page allocations. 8 * 9 * Author: Alexander Potapenko <glider@google.com> 10 * Copyright (C) 2016 Google, Inc. 11 * 12 * Based on the code by Dmitry Chernenkov. 13 */ 14 15 #define pr_fmt(fmt) "stackdepot: " fmt 16 17 #include <linux/debugfs.h> 18 #include <linux/gfp.h> 19 #include <linux/jhash.h> 20 #include <linux/kernel.h> 21 #include <linux/kmsan.h> 22 #include <linux/list.h> 23 #include <linux/mm.h> 24 #include <linux/mutex.h> 25 #include <linux/poison.h> 26 #include <linux/printk.h> 27 #include <linux/rculist.h> 28 #include <linux/rcupdate.h> 29 #include <linux/refcount.h> 30 #include <linux/slab.h> 31 #include <linux/spinlock.h> 32 #include <linux/stacktrace.h> 33 #include <linux/stackdepot.h> 34 #include <linux/string.h> 35 #include <linux/types.h> 36 #include <linux/memblock.h> 37 #include <linux/kasan-enabled.h> 38 39 /* 40 * The pool_index is offset by 1 so the first record does not have a 0 handle. 41 */ 42 static unsigned int stack_max_pools __read_mostly = 43 MIN((1LL << DEPOT_POOL_INDEX_BITS) - 1, 8192); 44 45 static bool stack_depot_disabled; 46 static bool __stack_depot_early_init_requested __initdata = IS_ENABLED(CONFIG_STACKDEPOT_ALWAYS_INIT); 47 static bool __stack_depot_early_init_passed __initdata; 48 49 /* Use one hash table bucket per 16 KB of memory. */ 50 #define STACK_HASH_TABLE_SCALE 14 51 /* Limit the number of buckets between 4K and 1M. */ 52 #define STACK_BUCKET_NUMBER_ORDER_MIN 12 53 #define STACK_BUCKET_NUMBER_ORDER_MAX 20 54 /* Initial seed for jhash2. */ 55 #define STACK_HASH_SEED 0x9747b28c 56 57 /* Hash table of stored stack records. */ 58 static struct list_head *stack_table; 59 /* Fixed order of the number of table buckets. Used when KASAN is enabled. */ 60 static unsigned int stack_bucket_number_order; 61 /* Hash mask for indexing the table. */ 62 static unsigned int stack_hash_mask; 63 64 /* The lock must be held when performing pool or freelist modifications. */ 65 static DEFINE_RAW_SPINLOCK(pool_lock); 66 /* Array of memory regions that store stack records. */ 67 static void **stack_pools __pt_guarded_by(&pool_lock); 68 /* Newly allocated pool that is not yet added to stack_pools. */ 69 static void *new_pool; 70 /* Number of pools in stack_pools. */ 71 static int pools_num; 72 /* Offset to the unused space in the currently used pool. */ 73 static size_t pool_offset __guarded_by(&pool_lock) = DEPOT_POOL_SIZE; 74 /* Freelist of stack records within stack_pools. */ 75 static __guarded_by(&pool_lock) LIST_HEAD(free_stacks); 76 77 /* Statistics counters for debugfs. */ 78 enum depot_counter_id { 79 DEPOT_COUNTER_REFD_ALLOCS, 80 DEPOT_COUNTER_REFD_FREES, 81 DEPOT_COUNTER_REFD_INUSE, 82 DEPOT_COUNTER_FREELIST_SIZE, 83 DEPOT_COUNTER_PERSIST_COUNT, 84 DEPOT_COUNTER_PERSIST_BYTES, 85 DEPOT_COUNTER_COUNT, 86 }; 87 static long counters[DEPOT_COUNTER_COUNT]; 88 static const char *const counter_names[] = { 89 [DEPOT_COUNTER_REFD_ALLOCS] = "refcounted_allocations", 90 [DEPOT_COUNTER_REFD_FREES] = "refcounted_frees", 91 [DEPOT_COUNTER_REFD_INUSE] = "refcounted_in_use", 92 [DEPOT_COUNTER_FREELIST_SIZE] = "freelist_size", 93 [DEPOT_COUNTER_PERSIST_COUNT] = "persistent_count", 94 [DEPOT_COUNTER_PERSIST_BYTES] = "persistent_bytes", 95 }; 96 static_assert(ARRAY_SIZE(counter_names) == DEPOT_COUNTER_COUNT); 97 98 static int __init disable_stack_depot(char *str) 99 { 100 return kstrtobool(str, &stack_depot_disabled); 101 } 102 early_param("stack_depot_disable", disable_stack_depot); 103 104 static int __init parse_max_pools(char *str) 105 { 106 const long long limit = (1LL << (DEPOT_POOL_INDEX_BITS)) - 1; 107 unsigned int max_pools; 108 int rv; 109 110 rv = kstrtouint(str, 0, &max_pools); 111 if (rv) 112 return rv; 113 114 if (max_pools < 1024) { 115 pr_err("stack_depot_max_pools below 1024, using default of %u\n", 116 stack_max_pools); 117 goto out; 118 } 119 120 if (max_pools > limit) { 121 pr_err("stack_depot_max_pools exceeds %lld, using default of %u\n", 122 limit, stack_max_pools); 123 goto out; 124 } 125 126 stack_max_pools = max_pools; 127 out: 128 return 0; 129 } 130 early_param("stack_depot_max_pools", parse_max_pools); 131 132 void __init stack_depot_request_early_init(void) 133 { 134 /* Too late to request early init now. */ 135 WARN_ON(__stack_depot_early_init_passed); 136 137 __stack_depot_early_init_requested = true; 138 } 139 140 /* Initialize list_head's within the hash table. */ 141 static void init_stack_table(unsigned long entries) 142 { 143 unsigned long i; 144 145 for (i = 0; i < entries; i++) 146 INIT_LIST_HEAD(&stack_table[i]); 147 } 148 149 /* Allocates a hash table via memblock. Can only be used during early boot. */ 150 int __init stack_depot_early_init(void) 151 { 152 unsigned long entries = 0; 153 154 /* This function must be called only once, from mm_init(). */ 155 if (WARN_ON(__stack_depot_early_init_passed)) 156 return 0; 157 __stack_depot_early_init_passed = true; 158 159 /* 160 * Print disabled message even if early init has not been requested: 161 * stack_depot_init() will not print one. 162 */ 163 if (stack_depot_disabled) { 164 pr_info("disabled\n"); 165 return 0; 166 } 167 168 /* 169 * If KASAN is enabled, use the maximum order: KASAN is frequently used 170 * in fuzzing scenarios, which leads to a large number of different 171 * stack traces being stored in stack depot. 172 */ 173 if (kasan_enabled() && !stack_bucket_number_order) 174 stack_bucket_number_order = STACK_BUCKET_NUMBER_ORDER_MAX; 175 176 /* 177 * Check if early init has been requested after setting 178 * stack_bucket_number_order: stack_depot_init() uses its value. 179 */ 180 if (!__stack_depot_early_init_requested) 181 return 0; 182 183 /* 184 * If stack_bucket_number_order is not set, leave entries as 0 to rely 185 * on the automatic calculations performed by alloc_large_system_hash(). 186 */ 187 if (stack_bucket_number_order) 188 entries = 1UL << stack_bucket_number_order; 189 pr_info("allocating hash table via alloc_large_system_hash\n"); 190 stack_table = alloc_large_system_hash("stackdepot", 191 sizeof(struct list_head), 192 entries, 193 STACK_HASH_TABLE_SCALE, 194 HASH_EARLY, 195 NULL, 196 &stack_hash_mask, 197 1UL << STACK_BUCKET_NUMBER_ORDER_MIN, 198 1UL << STACK_BUCKET_NUMBER_ORDER_MAX); 199 if (!stack_table) { 200 pr_err("hash table allocation failed, disabling\n"); 201 stack_depot_disabled = true; 202 return -ENOMEM; 203 } 204 if (!entries) { 205 /* 206 * Obtain the number of entries that was calculated by 207 * alloc_large_system_hash(). 208 */ 209 entries = stack_hash_mask + 1; 210 } 211 init_stack_table(entries); 212 213 pr_info("allocating space for %u stack pools via memblock\n", 214 stack_max_pools); 215 stack_pools = 216 memblock_alloc(stack_max_pools * sizeof(void *), PAGE_SIZE); 217 if (!stack_pools) { 218 pr_err("stack pools allocation failed, disabling\n"); 219 memblock_free(stack_table, entries * sizeof(struct list_head)); 220 stack_depot_disabled = true; 221 return -ENOMEM; 222 } 223 224 return 0; 225 } 226 227 /* Allocates a hash table via kvcalloc. Can be used after boot. */ 228 int stack_depot_init(void) 229 { 230 static DEFINE_MUTEX(stack_depot_init_mutex); 231 unsigned long entries; 232 int ret = 0; 233 234 mutex_lock(&stack_depot_init_mutex); 235 236 if (stack_depot_disabled || stack_table) 237 goto out_unlock; 238 239 /* 240 * Similarly to stack_depot_early_init, use stack_bucket_number_order 241 * if assigned, and rely on automatic scaling otherwise. 242 */ 243 if (stack_bucket_number_order) { 244 entries = 1UL << stack_bucket_number_order; 245 } else { 246 int scale = STACK_HASH_TABLE_SCALE; 247 248 entries = nr_free_buffer_pages(); 249 entries = roundup_pow_of_two(entries); 250 251 if (scale > PAGE_SHIFT) 252 entries >>= (scale - PAGE_SHIFT); 253 else 254 entries <<= (PAGE_SHIFT - scale); 255 } 256 257 if (entries < 1UL << STACK_BUCKET_NUMBER_ORDER_MIN) 258 entries = 1UL << STACK_BUCKET_NUMBER_ORDER_MIN; 259 if (entries > 1UL << STACK_BUCKET_NUMBER_ORDER_MAX) 260 entries = 1UL << STACK_BUCKET_NUMBER_ORDER_MAX; 261 262 pr_info("allocating hash table of %lu entries via kvcalloc\n", entries); 263 stack_table = kvcalloc(entries, sizeof(struct list_head), GFP_KERNEL); 264 if (!stack_table) { 265 pr_err("hash table allocation failed, disabling\n"); 266 stack_depot_disabled = true; 267 ret = -ENOMEM; 268 goto out_unlock; 269 } 270 stack_hash_mask = entries - 1; 271 init_stack_table(entries); 272 273 pr_info("allocating space for %u stack pools via kvcalloc\n", 274 stack_max_pools); 275 stack_pools = kvcalloc(stack_max_pools, sizeof(void *), GFP_KERNEL); 276 if (!stack_pools) { 277 pr_err("stack pools allocation failed, disabling\n"); 278 kvfree(stack_table); 279 stack_depot_disabled = true; 280 ret = -ENOMEM; 281 } 282 283 out_unlock: 284 mutex_unlock(&stack_depot_init_mutex); 285 286 return ret; 287 } 288 EXPORT_SYMBOL_GPL(stack_depot_init); 289 290 /* 291 * Initializes new stack pool, and updates the list of pools. 292 */ 293 static bool depot_init_pool(void **prealloc) 294 __must_hold(&pool_lock) 295 { 296 lockdep_assert_held(&pool_lock); 297 298 if (unlikely(pools_num >= stack_max_pools)) { 299 /* Bail out if we reached the pool limit. */ 300 WARN_ON_ONCE(pools_num > stack_max_pools); /* should never happen */ 301 WARN_ON_ONCE(!new_pool); /* to avoid unnecessary pre-allocation */ 302 WARN_ONCE(1, "Stack depot reached limit capacity"); 303 return false; 304 } 305 306 if (!new_pool && *prealloc) { 307 /* We have preallocated memory, use it. */ 308 WRITE_ONCE(new_pool, *prealloc); 309 *prealloc = NULL; 310 } 311 312 if (!new_pool) 313 return false; /* new_pool and *prealloc are NULL */ 314 315 /* Save reference to the pool to be used by depot_fetch_stack(). */ 316 stack_pools[pools_num] = new_pool; 317 318 /* 319 * Stack depot tries to keep an extra pool allocated even before it runs 320 * out of space in the currently used pool. 321 * 322 * To indicate that a new preallocation is needed new_pool is reset to 323 * NULL; do not reset to NULL if we have reached the maximum number of 324 * pools. 325 */ 326 if (pools_num < stack_max_pools) 327 WRITE_ONCE(new_pool, NULL); 328 else 329 WRITE_ONCE(new_pool, STACK_DEPOT_POISON); 330 331 /* Pairs with concurrent READ_ONCE() in depot_fetch_stack(). */ 332 WRITE_ONCE(pools_num, pools_num + 1); 333 ASSERT_EXCLUSIVE_WRITER(pools_num); 334 335 pool_offset = 0; 336 337 return true; 338 } 339 340 /* Keeps the preallocated memory to be used for a new stack depot pool. */ 341 static void depot_keep_new_pool(void **prealloc) 342 __must_hold(&pool_lock) 343 { 344 lockdep_assert_held(&pool_lock); 345 346 /* 347 * If a new pool is already saved or the maximum number of 348 * pools is reached, do not use the preallocated memory. 349 */ 350 if (new_pool) 351 return; 352 353 WRITE_ONCE(new_pool, *prealloc); 354 *prealloc = NULL; 355 } 356 357 /* 358 * Try to initialize a new stack record from the current pool, a cached pool, or 359 * the current pre-allocation. 360 */ 361 static struct stack_record *depot_pop_free_pool(void **prealloc, size_t size) 362 __must_hold(&pool_lock) 363 { 364 struct stack_record *stack; 365 void *current_pool; 366 u32 pool_index; 367 368 lockdep_assert_held(&pool_lock); 369 370 if (pool_offset + size > DEPOT_POOL_SIZE) { 371 if (!depot_init_pool(prealloc)) 372 return NULL; 373 } 374 375 if (WARN_ON_ONCE(pools_num < 1)) 376 return NULL; 377 pool_index = pools_num - 1; 378 current_pool = stack_pools[pool_index]; 379 if (WARN_ON_ONCE(!current_pool)) 380 return NULL; 381 382 stack = current_pool + pool_offset; 383 384 /* Pre-initialize handle once. */ 385 stack->handle.pool_index_plus_1 = pool_index + 1; 386 stack->handle.offset = pool_offset >> DEPOT_STACK_ALIGN; 387 stack->handle.extra = 0; 388 INIT_LIST_HEAD(&stack->hash_list); 389 390 pool_offset += size; 391 392 return stack; 393 } 394 395 /* Try to find next free usable entry from the freelist. */ 396 static struct stack_record *depot_pop_free(void) 397 __must_hold(&pool_lock) 398 { 399 struct stack_record *stack; 400 401 lockdep_assert_held(&pool_lock); 402 403 if (list_empty(&free_stacks)) 404 return NULL; 405 406 /* 407 * We maintain the invariant that the elements in front are least 408 * recently used, and are therefore more likely to be associated with an 409 * RCU grace period in the past. Consequently it is sufficient to only 410 * check the first entry. 411 */ 412 stack = list_first_entry(&free_stacks, struct stack_record, free_list); 413 if (!poll_state_synchronize_rcu(stack->rcu_state)) 414 return NULL; 415 416 list_del(&stack->free_list); 417 counters[DEPOT_COUNTER_FREELIST_SIZE]--; 418 419 return stack; 420 } 421 422 static inline size_t depot_stack_record_size(struct stack_record *s, unsigned int nr_entries) 423 { 424 const size_t used = flex_array_size(s, entries, nr_entries); 425 const size_t unused = sizeof(s->entries) - used; 426 427 WARN_ON_ONCE(sizeof(s->entries) < used); 428 429 return ALIGN(sizeof(struct stack_record) - unused, 1 << DEPOT_STACK_ALIGN); 430 } 431 432 /* Allocates a new stack in a stack depot pool. */ 433 static struct stack_record * 434 depot_alloc_stack(unsigned long *entries, unsigned int nr_entries, u32 hash, depot_flags_t flags, void **prealloc) 435 __must_hold(&pool_lock) 436 { 437 struct stack_record *stack = NULL; 438 size_t record_size; 439 440 lockdep_assert_held(&pool_lock); 441 442 /* This should already be checked by public API entry points. */ 443 if (WARN_ON_ONCE(!nr_entries)) 444 return NULL; 445 446 /* Limit number of saved frames to CONFIG_STACKDEPOT_MAX_FRAMES. */ 447 if (nr_entries > CONFIG_STACKDEPOT_MAX_FRAMES) 448 nr_entries = CONFIG_STACKDEPOT_MAX_FRAMES; 449 450 if (flags & STACK_DEPOT_FLAG_GET) { 451 /* 452 * Evictable entries have to allocate the max. size so they may 453 * safely be re-used by differently sized allocations. 454 */ 455 record_size = depot_stack_record_size(stack, CONFIG_STACKDEPOT_MAX_FRAMES); 456 stack = depot_pop_free(); 457 } else { 458 record_size = depot_stack_record_size(stack, nr_entries); 459 } 460 461 if (!stack) { 462 stack = depot_pop_free_pool(prealloc, record_size); 463 if (!stack) 464 return NULL; 465 } 466 467 /* Save the stack trace. */ 468 stack->hash = hash; 469 stack->size = nr_entries; 470 /* stack->handle is already filled in by depot_pop_free_pool(). */ 471 memcpy(stack->entries, entries, flex_array_size(stack, entries, nr_entries)); 472 473 if (flags & STACK_DEPOT_FLAG_GET) { 474 refcount_set(&stack->count, 1); 475 counters[DEPOT_COUNTER_REFD_ALLOCS]++; 476 counters[DEPOT_COUNTER_REFD_INUSE]++; 477 } else { 478 /* Warn on attempts to switch to refcounting this entry. */ 479 refcount_set(&stack->count, REFCOUNT_SATURATED); 480 counters[DEPOT_COUNTER_PERSIST_COUNT]++; 481 counters[DEPOT_COUNTER_PERSIST_BYTES] += record_size; 482 } 483 484 /* 485 * Let KMSAN know the stored stack record is initialized. This shall 486 * prevent false positive reports if instrumented code accesses it. 487 */ 488 kmsan_unpoison_memory(stack, record_size); 489 490 return stack; 491 } 492 493 static struct stack_record *depot_fetch_stack(depot_stack_handle_t handle) 494 __must_not_hold(&pool_lock) 495 { 496 const int pools_num_cached = READ_ONCE(pools_num); 497 union handle_parts parts = { .handle = handle }; 498 void *pool; 499 u32 pool_index = parts.pool_index_plus_1 - 1; 500 size_t offset = parts.offset << DEPOT_STACK_ALIGN; 501 struct stack_record *stack; 502 503 lockdep_assert_not_held(&pool_lock); 504 505 if (pool_index >= pools_num_cached) { 506 WARN(1, "pool index %d out of bounds (%d) for stack id %08x\n", 507 pool_index, pools_num_cached, handle); 508 return NULL; 509 } 510 511 /* @pool_index either valid, or user passed in corrupted value. */ 512 pool = context_unsafe(stack_pools[pool_index]); 513 if (WARN_ON(!pool)) 514 return NULL; 515 516 stack = pool + offset; 517 if (WARN_ON(!refcount_read(&stack->count))) 518 return NULL; 519 520 return stack; 521 } 522 523 /* Links stack into the freelist. */ 524 static void depot_free_stack(struct stack_record *stack) 525 __must_not_hold(&pool_lock) 526 { 527 unsigned long flags; 528 529 lockdep_assert_not_held(&pool_lock); 530 531 raw_spin_lock_irqsave(&pool_lock, flags); 532 printk_deferred_enter(); 533 534 /* 535 * Remove the entry from the hash list. Concurrent list traversal may 536 * still observe the entry, but since the refcount is zero, this entry 537 * will no longer be considered as valid. 538 */ 539 list_del_rcu(&stack->hash_list); 540 541 /* 542 * Due to being used from constrained contexts such as the allocators, 543 * NMI, or even RCU itself, stack depot cannot rely on primitives that 544 * would sleep (such as synchronize_rcu()) or recursively call into 545 * stack depot again (such as call_rcu()). 546 * 547 * Instead, get an RCU cookie, so that we can ensure this entry isn't 548 * moved onto another list until the next grace period, and concurrent 549 * RCU list traversal remains safe. 550 */ 551 stack->rcu_state = get_state_synchronize_rcu(); 552 553 /* 554 * Add the entry to the freelist tail, so that older entries are 555 * considered first - their RCU cookie is more likely to no longer be 556 * associated with the current grace period. 557 */ 558 list_add_tail(&stack->free_list, &free_stacks); 559 560 counters[DEPOT_COUNTER_FREELIST_SIZE]++; 561 counters[DEPOT_COUNTER_REFD_FREES]++; 562 counters[DEPOT_COUNTER_REFD_INUSE]--; 563 564 printk_deferred_exit(); 565 raw_spin_unlock_irqrestore(&pool_lock, flags); 566 } 567 568 /* Calculates the hash for a stack. */ 569 static inline u32 hash_stack(unsigned long *entries, unsigned int size) 570 { 571 return jhash2((u32 *)entries, 572 array_size(size, sizeof(*entries)) / sizeof(u32), 573 STACK_HASH_SEED); 574 } 575 576 /* 577 * Non-instrumented version of memcmp(). 578 * Does not check the lexicographical order, only the equality. 579 */ 580 static inline 581 int stackdepot_memcmp(const unsigned long *u1, const unsigned long *u2, 582 unsigned int n) 583 { 584 for ( ; n-- ; u1++, u2++) { 585 if (*u1 != *u2) 586 return 1; 587 } 588 return 0; 589 } 590 591 /* Finds a stack in a bucket of the hash table. */ 592 static inline struct stack_record *find_stack(struct list_head *bucket, 593 unsigned long *entries, int size, 594 u32 hash, depot_flags_t flags) 595 { 596 struct stack_record *stack, *ret = NULL; 597 598 /* 599 * Stack depot may be used from instrumentation that instruments RCU or 600 * tracing itself; use variant that does not call into RCU and cannot be 601 * traced. 602 * 603 * Note: Such use cases must take care when using refcounting to evict 604 * unused entries, because the stack record free-then-reuse code paths 605 * do call into RCU. 606 */ 607 rcu_read_lock_sched_notrace(); 608 609 list_for_each_entry_rcu(stack, bucket, hash_list) { 610 if (stack->hash != hash || stack->size != size) 611 continue; 612 613 /* 614 * This may race with depot_free_stack() accessing the freelist 615 * management state unioned with @entries. The refcount is zero 616 * in that case and the below refcount_inc_not_zero() will fail. 617 */ 618 if (data_race(stackdepot_memcmp(entries, stack->entries, size))) 619 continue; 620 621 /* 622 * Try to increment refcount. If this succeeds, the stack record 623 * is valid and has not yet been freed. 624 * 625 * If STACK_DEPOT_FLAG_GET is not used, it is undefined behavior 626 * to then call stack_depot_put() later, and we can assume that 627 * a stack record is never placed back on the freelist. 628 */ 629 if ((flags & STACK_DEPOT_FLAG_GET) && !refcount_inc_not_zero(&stack->count)) 630 continue; 631 632 ret = stack; 633 break; 634 } 635 636 rcu_read_unlock_sched_notrace(); 637 638 return ret; 639 } 640 641 depot_stack_handle_t stack_depot_save_flags(unsigned long *entries, 642 unsigned int nr_entries, 643 gfp_t alloc_flags, 644 depot_flags_t depot_flags) 645 { 646 struct list_head *bucket; 647 struct stack_record *found = NULL; 648 depot_stack_handle_t handle = 0; 649 struct page *page = NULL; 650 void *prealloc = NULL; 651 bool allow_spin = gfpflags_allow_spinning(alloc_flags); 652 bool can_alloc = (depot_flags & STACK_DEPOT_FLAG_CAN_ALLOC) && allow_spin; 653 unsigned long flags; 654 u32 hash; 655 656 if (WARN_ON(depot_flags & ~STACK_DEPOT_FLAGS_MASK)) 657 return 0; 658 659 /* 660 * If this stack trace is from an interrupt, including anything before 661 * interrupt entry usually leads to unbounded stack depot growth. 662 * 663 * Since use of filter_irq_stacks() is a requirement to ensure stack 664 * depot can efficiently deduplicate interrupt stacks, always 665 * filter_irq_stacks() to simplify all callers' use of stack depot. 666 */ 667 nr_entries = filter_irq_stacks(entries, nr_entries); 668 669 if (unlikely(nr_entries == 0) || stack_depot_disabled) 670 return 0; 671 672 hash = hash_stack(entries, nr_entries); 673 bucket = &stack_table[hash & stack_hash_mask]; 674 675 /* Fast path: look the stack trace up without locking. */ 676 found = find_stack(bucket, entries, nr_entries, hash, depot_flags); 677 if (found) 678 goto exit; 679 680 /* 681 * Allocate memory for a new pool if required now: 682 * we won't be able to do that under the lock. 683 */ 684 if (unlikely(can_alloc && !READ_ONCE(new_pool))) { 685 page = alloc_pages(gfp_nested_mask(alloc_flags), 686 DEPOT_POOL_ORDER); 687 if (page) 688 prealloc = page_address(page); 689 } 690 691 if (in_nmi() || !allow_spin) { 692 /* We can never allocate in NMI context. */ 693 WARN_ON_ONCE(can_alloc); 694 /* Best effort; bail if we fail to take the lock. */ 695 if (!raw_spin_trylock_irqsave(&pool_lock, flags)) 696 goto exit; 697 } else { 698 raw_spin_lock_irqsave(&pool_lock, flags); 699 } 700 printk_deferred_enter(); 701 702 /* Try to find again, to avoid concurrently inserting duplicates. */ 703 found = find_stack(bucket, entries, nr_entries, hash, depot_flags); 704 if (!found) { 705 struct stack_record *new = 706 depot_alloc_stack(entries, nr_entries, hash, depot_flags, &prealloc); 707 708 if (new) { 709 /* 710 * This releases the stack record into the bucket and 711 * makes it visible to readers in find_stack(). 712 */ 713 list_add_rcu(&new->hash_list, bucket); 714 found = new; 715 } 716 } 717 718 if (prealloc) { 719 /* 720 * Either stack depot already contains this stack trace, or 721 * depot_alloc_stack() did not consume the preallocated memory. 722 * Try to keep the preallocated memory for future. 723 */ 724 depot_keep_new_pool(&prealloc); 725 } 726 727 printk_deferred_exit(); 728 raw_spin_unlock_irqrestore(&pool_lock, flags); 729 exit: 730 if (prealloc) { 731 /* Stack depot didn't use this memory, free it. */ 732 if (!allow_spin) 733 free_pages_nolock(virt_to_page(prealloc), DEPOT_POOL_ORDER); 734 else 735 free_pages((unsigned long)prealloc, DEPOT_POOL_ORDER); 736 } 737 if (found) 738 handle = found->handle.handle; 739 return handle; 740 } 741 EXPORT_SYMBOL_GPL(stack_depot_save_flags); 742 743 depot_stack_handle_t stack_depot_save(unsigned long *entries, 744 unsigned int nr_entries, 745 gfp_t alloc_flags) 746 { 747 return stack_depot_save_flags(entries, nr_entries, alloc_flags, 748 STACK_DEPOT_FLAG_CAN_ALLOC); 749 } 750 EXPORT_SYMBOL_GPL(stack_depot_save); 751 752 struct stack_record *__stack_depot_get_stack_record(depot_stack_handle_t handle) 753 { 754 if (!handle) 755 return NULL; 756 757 return depot_fetch_stack(handle); 758 } 759 760 unsigned int stack_depot_fetch(depot_stack_handle_t handle, 761 unsigned long **entries) 762 { 763 struct stack_record *stack; 764 765 *entries = NULL; 766 /* 767 * Let KMSAN know *entries is initialized. This shall prevent false 768 * positive reports if instrumented code accesses it. 769 */ 770 kmsan_unpoison_memory(entries, sizeof(*entries)); 771 772 if (!handle || stack_depot_disabled) 773 return 0; 774 775 stack = depot_fetch_stack(handle); 776 /* 777 * Should never be NULL, otherwise this is a use-after-put (or just a 778 * corrupt handle). 779 */ 780 if (WARN(!stack, "corrupt handle or use after stack_depot_put()")) 781 return 0; 782 783 *entries = stack->entries; 784 return stack->size; 785 } 786 EXPORT_SYMBOL_GPL(stack_depot_fetch); 787 788 void stack_depot_put(depot_stack_handle_t handle) 789 { 790 struct stack_record *stack; 791 792 if (!handle || stack_depot_disabled) 793 return; 794 795 stack = depot_fetch_stack(handle); 796 /* 797 * Should always be able to find the stack record, otherwise this is an 798 * unbalanced put attempt (or corrupt handle). 799 */ 800 if (WARN(!stack, "corrupt handle or unbalanced stack_depot_put()")) 801 return; 802 803 if (refcount_dec_and_test(&stack->count)) 804 depot_free_stack(stack); 805 } 806 EXPORT_SYMBOL_GPL(stack_depot_put); 807 808 void stack_depot_print(depot_stack_handle_t stack) 809 { 810 unsigned long *entries; 811 unsigned int nr_entries; 812 813 nr_entries = stack_depot_fetch(stack, &entries); 814 if (nr_entries > 0) 815 stack_trace_print(entries, nr_entries, 0); 816 } 817 EXPORT_SYMBOL_GPL(stack_depot_print); 818 819 int stack_depot_snprint(depot_stack_handle_t handle, char *buf, size_t size, 820 int spaces) 821 { 822 unsigned long *entries; 823 unsigned int nr_entries; 824 825 nr_entries = stack_depot_fetch(handle, &entries); 826 return nr_entries ? stack_trace_snprint(buf, size, entries, nr_entries, 827 spaces) : 0; 828 } 829 EXPORT_SYMBOL_GPL(stack_depot_snprint); 830 831 depot_stack_handle_t __must_check stack_depot_set_extra_bits( 832 depot_stack_handle_t handle, unsigned int extra_bits) 833 { 834 union handle_parts parts = { .handle = handle }; 835 836 /* Don't set extra bits on empty handles. */ 837 if (!handle) 838 return 0; 839 840 parts.extra = extra_bits; 841 return parts.handle; 842 } 843 EXPORT_SYMBOL(stack_depot_set_extra_bits); 844 845 unsigned int stack_depot_get_extra_bits(depot_stack_handle_t handle) 846 { 847 union handle_parts parts = { .handle = handle }; 848 849 return parts.extra; 850 } 851 EXPORT_SYMBOL(stack_depot_get_extra_bits); 852 853 static int stats_show(struct seq_file *seq, void *v) 854 { 855 /* 856 * data race ok: These are just statistics counters, and approximate 857 * statistics are ok for debugging. 858 */ 859 seq_printf(seq, "pools: %d\n", data_race(pools_num)); 860 for (int i = 0; i < DEPOT_COUNTER_COUNT; i++) 861 seq_printf(seq, "%s: %ld\n", counter_names[i], data_race(counters[i])); 862 863 return 0; 864 } 865 DEFINE_SHOW_ATTRIBUTE(stats); 866 867 static int depot_debugfs_init(void) 868 { 869 struct dentry *dir; 870 871 if (stack_depot_disabled) 872 return 0; 873 874 dir = debugfs_create_dir("stackdepot", NULL); 875 debugfs_create_file("stats", 0444, dir, NULL, &stats_fops); 876 return 0; 877 } 878 late_initcall(depot_debugfs_init); 879