1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Stack depot - a stack trace storage that avoids duplication. 4 * 5 * Internally, stack depot maintains a hash table of unique stacktraces. The 6 * stack traces themselves are stored contiguously one after another in a set 7 * of separate page allocations. 8 * 9 * Author: Alexander Potapenko <glider@google.com> 10 * Copyright (C) 2016 Google, Inc. 11 * 12 * Based on the code by Dmitry Chernenkov. 13 */ 14 15 #define pr_fmt(fmt) "stackdepot: " fmt 16 17 #include <linux/debugfs.h> 18 #include <linux/gfp.h> 19 #include <linux/jhash.h> 20 #include <linux/kernel.h> 21 #include <linux/kmsan.h> 22 #include <linux/list.h> 23 #include <linux/mm.h> 24 #include <linux/mutex.h> 25 #include <linux/poison.h> 26 #include <linux/printk.h> 27 #include <linux/rculist.h> 28 #include <linux/rcupdate.h> 29 #include <linux/refcount.h> 30 #include <linux/slab.h> 31 #include <linux/spinlock.h> 32 #include <linux/stacktrace.h> 33 #include <linux/stackdepot.h> 34 #include <linux/string.h> 35 #include <linux/types.h> 36 #include <linux/memblock.h> 37 #include <linux/kasan-enabled.h> 38 39 /* 40 * The pool_index is offset by 1 so the first record does not have a 0 handle. 41 */ 42 static unsigned int stack_max_pools __read_mostly = 43 MIN((1LL << DEPOT_POOL_INDEX_BITS) - 1, 8192); 44 45 static bool stack_depot_disabled; 46 static bool __stack_depot_early_init_requested __initdata = IS_ENABLED(CONFIG_STACKDEPOT_ALWAYS_INIT); 47 static bool __stack_depot_early_init_passed __initdata; 48 49 /* Use one hash table bucket per 16 KB of memory. */ 50 #define STACK_HASH_TABLE_SCALE 14 51 /* Limit the number of buckets between 4K and 1M. */ 52 #define STACK_BUCKET_NUMBER_ORDER_MIN 12 53 #define STACK_BUCKET_NUMBER_ORDER_MAX 20 54 /* Initial seed for jhash2. */ 55 #define STACK_HASH_SEED 0x9747b28c 56 57 /* Hash table of stored stack records. */ 58 static struct list_head *stack_table; 59 /* Fixed order of the number of table buckets. Used when KASAN is enabled. */ 60 static unsigned int stack_bucket_number_order; 61 /* Hash mask for indexing the table. */ 62 static unsigned int stack_hash_mask; 63 64 /* Array of memory regions that store stack records. */ 65 static void **stack_pools; 66 /* Newly allocated pool that is not yet added to stack_pools. */ 67 static void *new_pool; 68 /* Number of pools in stack_pools. */ 69 static int pools_num; 70 /* Offset to the unused space in the currently used pool. */ 71 static size_t pool_offset = DEPOT_POOL_SIZE; 72 /* Freelist of stack records within stack_pools. */ 73 static LIST_HEAD(free_stacks); 74 /* The lock must be held when performing pool or freelist modifications. */ 75 static DEFINE_RAW_SPINLOCK(pool_lock); 76 77 /* Statistics counters for debugfs. */ 78 enum depot_counter_id { 79 DEPOT_COUNTER_REFD_ALLOCS, 80 DEPOT_COUNTER_REFD_FREES, 81 DEPOT_COUNTER_REFD_INUSE, 82 DEPOT_COUNTER_FREELIST_SIZE, 83 DEPOT_COUNTER_PERSIST_COUNT, 84 DEPOT_COUNTER_PERSIST_BYTES, 85 DEPOT_COUNTER_COUNT, 86 }; 87 static long counters[DEPOT_COUNTER_COUNT]; 88 static const char *const counter_names[] = { 89 [DEPOT_COUNTER_REFD_ALLOCS] = "refcounted_allocations", 90 [DEPOT_COUNTER_REFD_FREES] = "refcounted_frees", 91 [DEPOT_COUNTER_REFD_INUSE] = "refcounted_in_use", 92 [DEPOT_COUNTER_FREELIST_SIZE] = "freelist_size", 93 [DEPOT_COUNTER_PERSIST_COUNT] = "persistent_count", 94 [DEPOT_COUNTER_PERSIST_BYTES] = "persistent_bytes", 95 }; 96 static_assert(ARRAY_SIZE(counter_names) == DEPOT_COUNTER_COUNT); 97 98 static int __init disable_stack_depot(char *str) 99 { 100 return kstrtobool(str, &stack_depot_disabled); 101 } 102 early_param("stack_depot_disable", disable_stack_depot); 103 104 static int __init parse_max_pools(char *str) 105 { 106 const long long limit = (1LL << (DEPOT_POOL_INDEX_BITS)) - 1; 107 unsigned int max_pools; 108 int rv; 109 110 rv = kstrtouint(str, 0, &max_pools); 111 if (rv) 112 return rv; 113 114 if (max_pools < 1024) { 115 pr_err("stack_depot_max_pools below 1024, using default of %u\n", 116 stack_max_pools); 117 goto out; 118 } 119 120 if (max_pools > limit) { 121 pr_err("stack_depot_max_pools exceeds %lld, using default of %u\n", 122 limit, stack_max_pools); 123 goto out; 124 } 125 126 stack_max_pools = max_pools; 127 out: 128 return 0; 129 } 130 early_param("stack_depot_max_pools", parse_max_pools); 131 132 void __init stack_depot_request_early_init(void) 133 { 134 /* Too late to request early init now. */ 135 WARN_ON(__stack_depot_early_init_passed); 136 137 __stack_depot_early_init_requested = true; 138 } 139 140 /* Initialize list_head's within the hash table. */ 141 static void init_stack_table(unsigned long entries) 142 { 143 unsigned long i; 144 145 for (i = 0; i < entries; i++) 146 INIT_LIST_HEAD(&stack_table[i]); 147 } 148 149 /* Allocates a hash table via memblock. Can only be used during early boot. */ 150 int __init stack_depot_early_init(void) 151 { 152 unsigned long entries = 0; 153 154 /* This function must be called only once, from mm_init(). */ 155 if (WARN_ON(__stack_depot_early_init_passed)) 156 return 0; 157 __stack_depot_early_init_passed = true; 158 159 /* 160 * Print disabled message even if early init has not been requested: 161 * stack_depot_init() will not print one. 162 */ 163 if (stack_depot_disabled) { 164 pr_info("disabled\n"); 165 return 0; 166 } 167 168 /* 169 * If KASAN is enabled, use the maximum order: KASAN is frequently used 170 * in fuzzing scenarios, which leads to a large number of different 171 * stack traces being stored in stack depot. 172 */ 173 if (kasan_enabled() && !stack_bucket_number_order) 174 stack_bucket_number_order = STACK_BUCKET_NUMBER_ORDER_MAX; 175 176 /* 177 * Check if early init has been requested after setting 178 * stack_bucket_number_order: stack_depot_init() uses its value. 179 */ 180 if (!__stack_depot_early_init_requested) 181 return 0; 182 183 /* 184 * If stack_bucket_number_order is not set, leave entries as 0 to rely 185 * on the automatic calculations performed by alloc_large_system_hash(). 186 */ 187 if (stack_bucket_number_order) 188 entries = 1UL << stack_bucket_number_order; 189 pr_info("allocating hash table via alloc_large_system_hash\n"); 190 stack_table = alloc_large_system_hash("stackdepot", 191 sizeof(struct list_head), 192 entries, 193 STACK_HASH_TABLE_SCALE, 194 HASH_EARLY, 195 NULL, 196 &stack_hash_mask, 197 1UL << STACK_BUCKET_NUMBER_ORDER_MIN, 198 1UL << STACK_BUCKET_NUMBER_ORDER_MAX); 199 if (!stack_table) { 200 pr_err("hash table allocation failed, disabling\n"); 201 stack_depot_disabled = true; 202 return -ENOMEM; 203 } 204 if (!entries) { 205 /* 206 * Obtain the number of entries that was calculated by 207 * alloc_large_system_hash(). 208 */ 209 entries = stack_hash_mask + 1; 210 } 211 init_stack_table(entries); 212 213 pr_info("allocating space for %u stack pools via memblock\n", 214 stack_max_pools); 215 stack_pools = 216 memblock_alloc(stack_max_pools * sizeof(void *), PAGE_SIZE); 217 if (!stack_pools) { 218 pr_err("stack pools allocation failed, disabling\n"); 219 memblock_free(stack_table, entries * sizeof(struct list_head)); 220 stack_depot_disabled = true; 221 return -ENOMEM; 222 } 223 224 return 0; 225 } 226 227 /* Allocates a hash table via kvcalloc. Can be used after boot. */ 228 int stack_depot_init(void) 229 { 230 static DEFINE_MUTEX(stack_depot_init_mutex); 231 unsigned long entries; 232 int ret = 0; 233 234 mutex_lock(&stack_depot_init_mutex); 235 236 if (stack_depot_disabled || stack_table) 237 goto out_unlock; 238 239 /* 240 * Similarly to stack_depot_early_init, use stack_bucket_number_order 241 * if assigned, and rely on automatic scaling otherwise. 242 */ 243 if (stack_bucket_number_order) { 244 entries = 1UL << stack_bucket_number_order; 245 } else { 246 int scale = STACK_HASH_TABLE_SCALE; 247 248 entries = nr_free_buffer_pages(); 249 entries = roundup_pow_of_two(entries); 250 251 if (scale > PAGE_SHIFT) 252 entries >>= (scale - PAGE_SHIFT); 253 else 254 entries <<= (PAGE_SHIFT - scale); 255 } 256 257 if (entries < 1UL << STACK_BUCKET_NUMBER_ORDER_MIN) 258 entries = 1UL << STACK_BUCKET_NUMBER_ORDER_MIN; 259 if (entries > 1UL << STACK_BUCKET_NUMBER_ORDER_MAX) 260 entries = 1UL << STACK_BUCKET_NUMBER_ORDER_MAX; 261 262 pr_info("allocating hash table of %lu entries via kvcalloc\n", entries); 263 stack_table = kvcalloc(entries, sizeof(struct list_head), GFP_KERNEL); 264 if (!stack_table) { 265 pr_err("hash table allocation failed, disabling\n"); 266 stack_depot_disabled = true; 267 ret = -ENOMEM; 268 goto out_unlock; 269 } 270 stack_hash_mask = entries - 1; 271 init_stack_table(entries); 272 273 pr_info("allocating space for %u stack pools via kvcalloc\n", 274 stack_max_pools); 275 stack_pools = kvcalloc(stack_max_pools, sizeof(void *), GFP_KERNEL); 276 if (!stack_pools) { 277 pr_err("stack pools allocation failed, disabling\n"); 278 kvfree(stack_table); 279 stack_depot_disabled = true; 280 ret = -ENOMEM; 281 } 282 283 out_unlock: 284 mutex_unlock(&stack_depot_init_mutex); 285 286 return ret; 287 } 288 EXPORT_SYMBOL_GPL(stack_depot_init); 289 290 /* 291 * Initializes new stack pool, and updates the list of pools. 292 */ 293 static bool depot_init_pool(void **prealloc) 294 { 295 lockdep_assert_held(&pool_lock); 296 297 if (unlikely(pools_num >= stack_max_pools)) { 298 /* Bail out if we reached the pool limit. */ 299 WARN_ON_ONCE(pools_num > stack_max_pools); /* should never happen */ 300 WARN_ON_ONCE(!new_pool); /* to avoid unnecessary pre-allocation */ 301 WARN_ONCE(1, "Stack depot reached limit capacity"); 302 return false; 303 } 304 305 if (!new_pool && *prealloc) { 306 /* We have preallocated memory, use it. */ 307 WRITE_ONCE(new_pool, *prealloc); 308 *prealloc = NULL; 309 } 310 311 if (!new_pool) 312 return false; /* new_pool and *prealloc are NULL */ 313 314 /* Save reference to the pool to be used by depot_fetch_stack(). */ 315 stack_pools[pools_num] = new_pool; 316 317 /* 318 * Stack depot tries to keep an extra pool allocated even before it runs 319 * out of space in the currently used pool. 320 * 321 * To indicate that a new preallocation is needed new_pool is reset to 322 * NULL; do not reset to NULL if we have reached the maximum number of 323 * pools. 324 */ 325 if (pools_num < stack_max_pools) 326 WRITE_ONCE(new_pool, NULL); 327 else 328 WRITE_ONCE(new_pool, STACK_DEPOT_POISON); 329 330 /* Pairs with concurrent READ_ONCE() in depot_fetch_stack(). */ 331 WRITE_ONCE(pools_num, pools_num + 1); 332 ASSERT_EXCLUSIVE_WRITER(pools_num); 333 334 pool_offset = 0; 335 336 return true; 337 } 338 339 /* Keeps the preallocated memory to be used for a new stack depot pool. */ 340 static void depot_keep_new_pool(void **prealloc) 341 { 342 lockdep_assert_held(&pool_lock); 343 344 /* 345 * If a new pool is already saved or the maximum number of 346 * pools is reached, do not use the preallocated memory. 347 */ 348 if (new_pool) 349 return; 350 351 WRITE_ONCE(new_pool, *prealloc); 352 *prealloc = NULL; 353 } 354 355 /* 356 * Try to initialize a new stack record from the current pool, a cached pool, or 357 * the current pre-allocation. 358 */ 359 static struct stack_record *depot_pop_free_pool(void **prealloc, size_t size) 360 { 361 struct stack_record *stack; 362 void *current_pool; 363 u32 pool_index; 364 365 lockdep_assert_held(&pool_lock); 366 367 if (pool_offset + size > DEPOT_POOL_SIZE) { 368 if (!depot_init_pool(prealloc)) 369 return NULL; 370 } 371 372 if (WARN_ON_ONCE(pools_num < 1)) 373 return NULL; 374 pool_index = pools_num - 1; 375 current_pool = stack_pools[pool_index]; 376 if (WARN_ON_ONCE(!current_pool)) 377 return NULL; 378 379 stack = current_pool + pool_offset; 380 381 /* Pre-initialize handle once. */ 382 stack->handle.pool_index_plus_1 = pool_index + 1; 383 stack->handle.offset = pool_offset >> DEPOT_STACK_ALIGN; 384 stack->handle.extra = 0; 385 INIT_LIST_HEAD(&stack->hash_list); 386 387 pool_offset += size; 388 389 return stack; 390 } 391 392 /* Try to find next free usable entry from the freelist. */ 393 static struct stack_record *depot_pop_free(void) 394 { 395 struct stack_record *stack; 396 397 lockdep_assert_held(&pool_lock); 398 399 if (list_empty(&free_stacks)) 400 return NULL; 401 402 /* 403 * We maintain the invariant that the elements in front are least 404 * recently used, and are therefore more likely to be associated with an 405 * RCU grace period in the past. Consequently it is sufficient to only 406 * check the first entry. 407 */ 408 stack = list_first_entry(&free_stacks, struct stack_record, free_list); 409 if (!poll_state_synchronize_rcu(stack->rcu_state)) 410 return NULL; 411 412 list_del(&stack->free_list); 413 counters[DEPOT_COUNTER_FREELIST_SIZE]--; 414 415 return stack; 416 } 417 418 static inline size_t depot_stack_record_size(struct stack_record *s, unsigned int nr_entries) 419 { 420 const size_t used = flex_array_size(s, entries, nr_entries); 421 const size_t unused = sizeof(s->entries) - used; 422 423 WARN_ON_ONCE(sizeof(s->entries) < used); 424 425 return ALIGN(sizeof(struct stack_record) - unused, 1 << DEPOT_STACK_ALIGN); 426 } 427 428 /* Allocates a new stack in a stack depot pool. */ 429 static struct stack_record * 430 depot_alloc_stack(unsigned long *entries, unsigned int nr_entries, u32 hash, depot_flags_t flags, void **prealloc) 431 { 432 struct stack_record *stack = NULL; 433 size_t record_size; 434 435 lockdep_assert_held(&pool_lock); 436 437 /* This should already be checked by public API entry points. */ 438 if (WARN_ON_ONCE(!nr_entries)) 439 return NULL; 440 441 /* Limit number of saved frames to CONFIG_STACKDEPOT_MAX_FRAMES. */ 442 if (nr_entries > CONFIG_STACKDEPOT_MAX_FRAMES) 443 nr_entries = CONFIG_STACKDEPOT_MAX_FRAMES; 444 445 if (flags & STACK_DEPOT_FLAG_GET) { 446 /* 447 * Evictable entries have to allocate the max. size so they may 448 * safely be re-used by differently sized allocations. 449 */ 450 record_size = depot_stack_record_size(stack, CONFIG_STACKDEPOT_MAX_FRAMES); 451 stack = depot_pop_free(); 452 } else { 453 record_size = depot_stack_record_size(stack, nr_entries); 454 } 455 456 if (!stack) { 457 stack = depot_pop_free_pool(prealloc, record_size); 458 if (!stack) 459 return NULL; 460 } 461 462 /* Save the stack trace. */ 463 stack->hash = hash; 464 stack->size = nr_entries; 465 /* stack->handle is already filled in by depot_pop_free_pool(). */ 466 memcpy(stack->entries, entries, flex_array_size(stack, entries, nr_entries)); 467 468 if (flags & STACK_DEPOT_FLAG_GET) { 469 refcount_set(&stack->count, 1); 470 counters[DEPOT_COUNTER_REFD_ALLOCS]++; 471 counters[DEPOT_COUNTER_REFD_INUSE]++; 472 } else { 473 /* Warn on attempts to switch to refcounting this entry. */ 474 refcount_set(&stack->count, REFCOUNT_SATURATED); 475 counters[DEPOT_COUNTER_PERSIST_COUNT]++; 476 counters[DEPOT_COUNTER_PERSIST_BYTES] += record_size; 477 } 478 479 /* 480 * Let KMSAN know the stored stack record is initialized. This shall 481 * prevent false positive reports if instrumented code accesses it. 482 */ 483 kmsan_unpoison_memory(stack, record_size); 484 485 return stack; 486 } 487 488 static struct stack_record *depot_fetch_stack(depot_stack_handle_t handle) 489 { 490 const int pools_num_cached = READ_ONCE(pools_num); 491 union handle_parts parts = { .handle = handle }; 492 void *pool; 493 u32 pool_index = parts.pool_index_plus_1 - 1; 494 size_t offset = parts.offset << DEPOT_STACK_ALIGN; 495 struct stack_record *stack; 496 497 lockdep_assert_not_held(&pool_lock); 498 499 if (pool_index >= pools_num_cached) { 500 WARN(1, "pool index %d out of bounds (%d) for stack id %08x\n", 501 pool_index, pools_num_cached, handle); 502 return NULL; 503 } 504 505 pool = stack_pools[pool_index]; 506 if (WARN_ON(!pool)) 507 return NULL; 508 509 stack = pool + offset; 510 if (WARN_ON(!refcount_read(&stack->count))) 511 return NULL; 512 513 return stack; 514 } 515 516 /* Links stack into the freelist. */ 517 static void depot_free_stack(struct stack_record *stack) 518 { 519 unsigned long flags; 520 521 lockdep_assert_not_held(&pool_lock); 522 523 raw_spin_lock_irqsave(&pool_lock, flags); 524 printk_deferred_enter(); 525 526 /* 527 * Remove the entry from the hash list. Concurrent list traversal may 528 * still observe the entry, but since the refcount is zero, this entry 529 * will no longer be considered as valid. 530 */ 531 list_del_rcu(&stack->hash_list); 532 533 /* 534 * Due to being used from constrained contexts such as the allocators, 535 * NMI, or even RCU itself, stack depot cannot rely on primitives that 536 * would sleep (such as synchronize_rcu()) or recursively call into 537 * stack depot again (such as call_rcu()). 538 * 539 * Instead, get an RCU cookie, so that we can ensure this entry isn't 540 * moved onto another list until the next grace period, and concurrent 541 * RCU list traversal remains safe. 542 */ 543 stack->rcu_state = get_state_synchronize_rcu(); 544 545 /* 546 * Add the entry to the freelist tail, so that older entries are 547 * considered first - their RCU cookie is more likely to no longer be 548 * associated with the current grace period. 549 */ 550 list_add_tail(&stack->free_list, &free_stacks); 551 552 counters[DEPOT_COUNTER_FREELIST_SIZE]++; 553 counters[DEPOT_COUNTER_REFD_FREES]++; 554 counters[DEPOT_COUNTER_REFD_INUSE]--; 555 556 printk_deferred_exit(); 557 raw_spin_unlock_irqrestore(&pool_lock, flags); 558 } 559 560 /* Calculates the hash for a stack. */ 561 static inline u32 hash_stack(unsigned long *entries, unsigned int size) 562 { 563 return jhash2((u32 *)entries, 564 array_size(size, sizeof(*entries)) / sizeof(u32), 565 STACK_HASH_SEED); 566 } 567 568 /* 569 * Non-instrumented version of memcmp(). 570 * Does not check the lexicographical order, only the equality. 571 */ 572 static inline 573 int stackdepot_memcmp(const unsigned long *u1, const unsigned long *u2, 574 unsigned int n) 575 { 576 for ( ; n-- ; u1++, u2++) { 577 if (*u1 != *u2) 578 return 1; 579 } 580 return 0; 581 } 582 583 /* Finds a stack in a bucket of the hash table. */ 584 static inline struct stack_record *find_stack(struct list_head *bucket, 585 unsigned long *entries, int size, 586 u32 hash, depot_flags_t flags) 587 { 588 struct stack_record *stack, *ret = NULL; 589 590 /* 591 * Stack depot may be used from instrumentation that instruments RCU or 592 * tracing itself; use variant that does not call into RCU and cannot be 593 * traced. 594 * 595 * Note: Such use cases must take care when using refcounting to evict 596 * unused entries, because the stack record free-then-reuse code paths 597 * do call into RCU. 598 */ 599 rcu_read_lock_sched_notrace(); 600 601 list_for_each_entry_rcu(stack, bucket, hash_list) { 602 if (stack->hash != hash || stack->size != size) 603 continue; 604 605 /* 606 * This may race with depot_free_stack() accessing the freelist 607 * management state unioned with @entries. The refcount is zero 608 * in that case and the below refcount_inc_not_zero() will fail. 609 */ 610 if (data_race(stackdepot_memcmp(entries, stack->entries, size))) 611 continue; 612 613 /* 614 * Try to increment refcount. If this succeeds, the stack record 615 * is valid and has not yet been freed. 616 * 617 * If STACK_DEPOT_FLAG_GET is not used, it is undefined behavior 618 * to then call stack_depot_put() later, and we can assume that 619 * a stack record is never placed back on the freelist. 620 */ 621 if ((flags & STACK_DEPOT_FLAG_GET) && !refcount_inc_not_zero(&stack->count)) 622 continue; 623 624 ret = stack; 625 break; 626 } 627 628 rcu_read_unlock_sched_notrace(); 629 630 return ret; 631 } 632 633 depot_stack_handle_t stack_depot_save_flags(unsigned long *entries, 634 unsigned int nr_entries, 635 gfp_t alloc_flags, 636 depot_flags_t depot_flags) 637 { 638 struct list_head *bucket; 639 struct stack_record *found = NULL; 640 depot_stack_handle_t handle = 0; 641 struct page *page = NULL; 642 void *prealloc = NULL; 643 bool allow_spin = gfpflags_allow_spinning(alloc_flags); 644 bool can_alloc = (depot_flags & STACK_DEPOT_FLAG_CAN_ALLOC) && allow_spin; 645 unsigned long flags; 646 u32 hash; 647 648 if (WARN_ON(depot_flags & ~STACK_DEPOT_FLAGS_MASK)) 649 return 0; 650 651 /* 652 * If this stack trace is from an interrupt, including anything before 653 * interrupt entry usually leads to unbounded stack depot growth. 654 * 655 * Since use of filter_irq_stacks() is a requirement to ensure stack 656 * depot can efficiently deduplicate interrupt stacks, always 657 * filter_irq_stacks() to simplify all callers' use of stack depot. 658 */ 659 nr_entries = filter_irq_stacks(entries, nr_entries); 660 661 if (unlikely(nr_entries == 0) || stack_depot_disabled) 662 return 0; 663 664 hash = hash_stack(entries, nr_entries); 665 bucket = &stack_table[hash & stack_hash_mask]; 666 667 /* Fast path: look the stack trace up without locking. */ 668 found = find_stack(bucket, entries, nr_entries, hash, depot_flags); 669 if (found) 670 goto exit; 671 672 /* 673 * Allocate memory for a new pool if required now: 674 * we won't be able to do that under the lock. 675 */ 676 if (unlikely(can_alloc && !READ_ONCE(new_pool))) { 677 page = alloc_pages(gfp_nested_mask(alloc_flags), 678 DEPOT_POOL_ORDER); 679 if (page) 680 prealloc = page_address(page); 681 } 682 683 if (in_nmi() || !allow_spin) { 684 /* We can never allocate in NMI context. */ 685 WARN_ON_ONCE(can_alloc); 686 /* Best effort; bail if we fail to take the lock. */ 687 if (!raw_spin_trylock_irqsave(&pool_lock, flags)) 688 goto exit; 689 } else { 690 raw_spin_lock_irqsave(&pool_lock, flags); 691 } 692 printk_deferred_enter(); 693 694 /* Try to find again, to avoid concurrently inserting duplicates. */ 695 found = find_stack(bucket, entries, nr_entries, hash, depot_flags); 696 if (!found) { 697 struct stack_record *new = 698 depot_alloc_stack(entries, nr_entries, hash, depot_flags, &prealloc); 699 700 if (new) { 701 /* 702 * This releases the stack record into the bucket and 703 * makes it visible to readers in find_stack(). 704 */ 705 list_add_rcu(&new->hash_list, bucket); 706 found = new; 707 } 708 } 709 710 if (prealloc) { 711 /* 712 * Either stack depot already contains this stack trace, or 713 * depot_alloc_stack() did not consume the preallocated memory. 714 * Try to keep the preallocated memory for future. 715 */ 716 depot_keep_new_pool(&prealloc); 717 } 718 719 printk_deferred_exit(); 720 raw_spin_unlock_irqrestore(&pool_lock, flags); 721 exit: 722 if (prealloc) { 723 /* Stack depot didn't use this memory, free it. */ 724 if (!allow_spin) 725 free_pages_nolock(virt_to_page(prealloc), DEPOT_POOL_ORDER); 726 else 727 free_pages((unsigned long)prealloc, DEPOT_POOL_ORDER); 728 } 729 if (found) 730 handle = found->handle.handle; 731 return handle; 732 } 733 EXPORT_SYMBOL_GPL(stack_depot_save_flags); 734 735 depot_stack_handle_t stack_depot_save(unsigned long *entries, 736 unsigned int nr_entries, 737 gfp_t alloc_flags) 738 { 739 return stack_depot_save_flags(entries, nr_entries, alloc_flags, 740 STACK_DEPOT_FLAG_CAN_ALLOC); 741 } 742 EXPORT_SYMBOL_GPL(stack_depot_save); 743 744 struct stack_record *__stack_depot_get_stack_record(depot_stack_handle_t handle) 745 { 746 if (!handle) 747 return NULL; 748 749 return depot_fetch_stack(handle); 750 } 751 752 unsigned int stack_depot_fetch(depot_stack_handle_t handle, 753 unsigned long **entries) 754 { 755 struct stack_record *stack; 756 757 *entries = NULL; 758 /* 759 * Let KMSAN know *entries is initialized. This shall prevent false 760 * positive reports if instrumented code accesses it. 761 */ 762 kmsan_unpoison_memory(entries, sizeof(*entries)); 763 764 if (!handle || stack_depot_disabled) 765 return 0; 766 767 stack = depot_fetch_stack(handle); 768 /* 769 * Should never be NULL, otherwise this is a use-after-put (or just a 770 * corrupt handle). 771 */ 772 if (WARN(!stack, "corrupt handle or use after stack_depot_put()")) 773 return 0; 774 775 *entries = stack->entries; 776 return stack->size; 777 } 778 EXPORT_SYMBOL_GPL(stack_depot_fetch); 779 780 void stack_depot_put(depot_stack_handle_t handle) 781 { 782 struct stack_record *stack; 783 784 if (!handle || stack_depot_disabled) 785 return; 786 787 stack = depot_fetch_stack(handle); 788 /* 789 * Should always be able to find the stack record, otherwise this is an 790 * unbalanced put attempt (or corrupt handle). 791 */ 792 if (WARN(!stack, "corrupt handle or unbalanced stack_depot_put()")) 793 return; 794 795 if (refcount_dec_and_test(&stack->count)) 796 depot_free_stack(stack); 797 } 798 EXPORT_SYMBOL_GPL(stack_depot_put); 799 800 void stack_depot_print(depot_stack_handle_t stack) 801 { 802 unsigned long *entries; 803 unsigned int nr_entries; 804 805 nr_entries = stack_depot_fetch(stack, &entries); 806 if (nr_entries > 0) 807 stack_trace_print(entries, nr_entries, 0); 808 } 809 EXPORT_SYMBOL_GPL(stack_depot_print); 810 811 int stack_depot_snprint(depot_stack_handle_t handle, char *buf, size_t size, 812 int spaces) 813 { 814 unsigned long *entries; 815 unsigned int nr_entries; 816 817 nr_entries = stack_depot_fetch(handle, &entries); 818 return nr_entries ? stack_trace_snprint(buf, size, entries, nr_entries, 819 spaces) : 0; 820 } 821 EXPORT_SYMBOL_GPL(stack_depot_snprint); 822 823 depot_stack_handle_t __must_check stack_depot_set_extra_bits( 824 depot_stack_handle_t handle, unsigned int extra_bits) 825 { 826 union handle_parts parts = { .handle = handle }; 827 828 /* Don't set extra bits on empty handles. */ 829 if (!handle) 830 return 0; 831 832 parts.extra = extra_bits; 833 return parts.handle; 834 } 835 EXPORT_SYMBOL(stack_depot_set_extra_bits); 836 837 unsigned int stack_depot_get_extra_bits(depot_stack_handle_t handle) 838 { 839 union handle_parts parts = { .handle = handle }; 840 841 return parts.extra; 842 } 843 EXPORT_SYMBOL(stack_depot_get_extra_bits); 844 845 static int stats_show(struct seq_file *seq, void *v) 846 { 847 /* 848 * data race ok: These are just statistics counters, and approximate 849 * statistics are ok for debugging. 850 */ 851 seq_printf(seq, "pools: %d\n", data_race(pools_num)); 852 for (int i = 0; i < DEPOT_COUNTER_COUNT; i++) 853 seq_printf(seq, "%s: %ld\n", counter_names[i], data_race(counters[i])); 854 855 return 0; 856 } 857 DEFINE_SHOW_ATTRIBUTE(stats); 858 859 static int depot_debugfs_init(void) 860 { 861 struct dentry *dir; 862 863 if (stack_depot_disabled) 864 return 0; 865 866 dir = debugfs_create_dir("stackdepot", NULL); 867 debugfs_create_file("stats", 0444, dir, NULL, &stats_fops); 868 return 0; 869 } 870 late_initcall(depot_debugfs_init); 871