1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Stack depot - a stack trace storage that avoids duplication. 4 * 5 * Internally, stack depot maintains a hash table of unique stacktraces. The 6 * stack traces themselves are stored contiguously one after another in a set 7 * of separate page allocations. 8 * 9 * Author: Alexander Potapenko <glider@google.com> 10 * Copyright (C) 2016 Google, Inc. 11 * 12 * Based on the code by Dmitry Chernenkov. 13 */ 14 15 #define pr_fmt(fmt) "stackdepot: " fmt 16 17 #include <linux/debugfs.h> 18 #include <linux/gfp.h> 19 #include <linux/jhash.h> 20 #include <linux/kernel.h> 21 #include <linux/kmsan.h> 22 #include <linux/list.h> 23 #include <linux/mm.h> 24 #include <linux/mutex.h> 25 #include <linux/printk.h> 26 #include <linux/rculist.h> 27 #include <linux/rcupdate.h> 28 #include <linux/refcount.h> 29 #include <linux/slab.h> 30 #include <linux/spinlock.h> 31 #include <linux/stacktrace.h> 32 #include <linux/stackdepot.h> 33 #include <linux/string.h> 34 #include <linux/types.h> 35 #include <linux/memblock.h> 36 #include <linux/kasan-enabled.h> 37 38 #define DEPOT_HANDLE_BITS (sizeof(depot_stack_handle_t) * 8) 39 40 #define DEPOT_POOL_ORDER 2 /* Pool size order, 4 pages */ 41 #define DEPOT_POOL_SIZE (1LL << (PAGE_SHIFT + DEPOT_POOL_ORDER)) 42 #define DEPOT_STACK_ALIGN 4 43 #define DEPOT_OFFSET_BITS (DEPOT_POOL_ORDER + PAGE_SHIFT - DEPOT_STACK_ALIGN) 44 #define DEPOT_POOL_INDEX_BITS (DEPOT_HANDLE_BITS - DEPOT_OFFSET_BITS - \ 45 STACK_DEPOT_EXTRA_BITS) 46 #if IS_ENABLED(CONFIG_KMSAN) && CONFIG_STACKDEPOT_MAX_FRAMES >= 32 47 /* 48 * KMSAN is frequently used in fuzzing scenarios and thus saves a lot of stack 49 * traces. As KMSAN does not support evicting stack traces from the stack 50 * depot, the stack depot capacity might be reached quickly with large stack 51 * records. Adjust the maximum number of stack depot pools for this case. 52 */ 53 #define DEPOT_POOLS_CAP (8192 * (CONFIG_STACKDEPOT_MAX_FRAMES / 16)) 54 #else 55 #define DEPOT_POOLS_CAP 8192 56 #endif 57 #define DEPOT_MAX_POOLS \ 58 (((1LL << (DEPOT_POOL_INDEX_BITS)) < DEPOT_POOLS_CAP) ? \ 59 (1LL << (DEPOT_POOL_INDEX_BITS)) : DEPOT_POOLS_CAP) 60 61 /* Compact structure that stores a reference to a stack. */ 62 union handle_parts { 63 depot_stack_handle_t handle; 64 struct { 65 u32 pool_index : DEPOT_POOL_INDEX_BITS; 66 u32 offset : DEPOT_OFFSET_BITS; 67 u32 extra : STACK_DEPOT_EXTRA_BITS; 68 }; 69 }; 70 71 struct stack_record { 72 struct list_head hash_list; /* Links in the hash table */ 73 u32 hash; /* Hash in hash table */ 74 u32 size; /* Number of stored frames */ 75 union handle_parts handle; /* Constant after initialization */ 76 refcount_t count; 77 union { 78 unsigned long entries[CONFIG_STACKDEPOT_MAX_FRAMES]; /* Frames */ 79 struct { 80 /* 81 * An important invariant of the implementation is to 82 * only place a stack record onto the freelist iff its 83 * refcount is zero. Because stack records with a zero 84 * refcount are never considered as valid, it is safe to 85 * union @entries and freelist management state below. 86 * Conversely, as soon as an entry is off the freelist 87 * and its refcount becomes non-zero, the below must not 88 * be accessed until being placed back on the freelist. 89 */ 90 struct list_head free_list; /* Links in the freelist */ 91 unsigned long rcu_state; /* RCU cookie */ 92 }; 93 }; 94 }; 95 96 #define DEPOT_STACK_RECORD_SIZE \ 97 ALIGN(sizeof(struct stack_record), 1 << DEPOT_STACK_ALIGN) 98 99 static bool stack_depot_disabled; 100 static bool __stack_depot_early_init_requested __initdata = IS_ENABLED(CONFIG_STACKDEPOT_ALWAYS_INIT); 101 static bool __stack_depot_early_init_passed __initdata; 102 103 /* Use one hash table bucket per 16 KB of memory. */ 104 #define STACK_HASH_TABLE_SCALE 14 105 /* Limit the number of buckets between 4K and 1M. */ 106 #define STACK_BUCKET_NUMBER_ORDER_MIN 12 107 #define STACK_BUCKET_NUMBER_ORDER_MAX 20 108 /* Initial seed for jhash2. */ 109 #define STACK_HASH_SEED 0x9747b28c 110 111 /* Hash table of stored stack records. */ 112 static struct list_head *stack_table; 113 /* Fixed order of the number of table buckets. Used when KASAN is enabled. */ 114 static unsigned int stack_bucket_number_order; 115 /* Hash mask for indexing the table. */ 116 static unsigned int stack_hash_mask; 117 118 /* Array of memory regions that store stack records. */ 119 static void *stack_pools[DEPOT_MAX_POOLS]; 120 /* Newly allocated pool that is not yet added to stack_pools. */ 121 static void *new_pool; 122 /* Number of pools in stack_pools. */ 123 static int pools_num; 124 /* Freelist of stack records within stack_pools. */ 125 static LIST_HEAD(free_stacks); 126 /* 127 * Stack depot tries to keep an extra pool allocated even before it runs out 128 * of space in the currently used pool. This flag marks whether this extra pool 129 * needs to be allocated. It has the value 0 when either an extra pool is not 130 * yet allocated or if the limit on the number of pools is reached. 131 */ 132 static bool new_pool_required = true; 133 /* The lock must be held when performing pool or freelist modifications. */ 134 static DEFINE_RAW_SPINLOCK(pool_lock); 135 136 /* Statistics counters for debugfs. */ 137 enum depot_counter_id { 138 DEPOT_COUNTER_ALLOCS, 139 DEPOT_COUNTER_FREES, 140 DEPOT_COUNTER_INUSE, 141 DEPOT_COUNTER_FREELIST_SIZE, 142 DEPOT_COUNTER_COUNT, 143 }; 144 static long counters[DEPOT_COUNTER_COUNT]; 145 static const char *const counter_names[] = { 146 [DEPOT_COUNTER_ALLOCS] = "allocations", 147 [DEPOT_COUNTER_FREES] = "frees", 148 [DEPOT_COUNTER_INUSE] = "in_use", 149 [DEPOT_COUNTER_FREELIST_SIZE] = "freelist_size", 150 }; 151 static_assert(ARRAY_SIZE(counter_names) == DEPOT_COUNTER_COUNT); 152 153 static int __init disable_stack_depot(char *str) 154 { 155 return kstrtobool(str, &stack_depot_disabled); 156 } 157 early_param("stack_depot_disable", disable_stack_depot); 158 159 void __init stack_depot_request_early_init(void) 160 { 161 /* Too late to request early init now. */ 162 WARN_ON(__stack_depot_early_init_passed); 163 164 __stack_depot_early_init_requested = true; 165 } 166 167 /* Initialize list_head's within the hash table. */ 168 static void init_stack_table(unsigned long entries) 169 { 170 unsigned long i; 171 172 for (i = 0; i < entries; i++) 173 INIT_LIST_HEAD(&stack_table[i]); 174 } 175 176 /* Allocates a hash table via memblock. Can only be used during early boot. */ 177 int __init stack_depot_early_init(void) 178 { 179 unsigned long entries = 0; 180 181 /* This function must be called only once, from mm_init(). */ 182 if (WARN_ON(__stack_depot_early_init_passed)) 183 return 0; 184 __stack_depot_early_init_passed = true; 185 186 /* 187 * Print disabled message even if early init has not been requested: 188 * stack_depot_init() will not print one. 189 */ 190 if (stack_depot_disabled) { 191 pr_info("disabled\n"); 192 return 0; 193 } 194 195 /* 196 * If KASAN is enabled, use the maximum order: KASAN is frequently used 197 * in fuzzing scenarios, which leads to a large number of different 198 * stack traces being stored in stack depot. 199 */ 200 if (kasan_enabled() && !stack_bucket_number_order) 201 stack_bucket_number_order = STACK_BUCKET_NUMBER_ORDER_MAX; 202 203 /* 204 * Check if early init has been requested after setting 205 * stack_bucket_number_order: stack_depot_init() uses its value. 206 */ 207 if (!__stack_depot_early_init_requested) 208 return 0; 209 210 /* 211 * If stack_bucket_number_order is not set, leave entries as 0 to rely 212 * on the automatic calculations performed by alloc_large_system_hash(). 213 */ 214 if (stack_bucket_number_order) 215 entries = 1UL << stack_bucket_number_order; 216 pr_info("allocating hash table via alloc_large_system_hash\n"); 217 stack_table = alloc_large_system_hash("stackdepot", 218 sizeof(struct list_head), 219 entries, 220 STACK_HASH_TABLE_SCALE, 221 HASH_EARLY, 222 NULL, 223 &stack_hash_mask, 224 1UL << STACK_BUCKET_NUMBER_ORDER_MIN, 225 1UL << STACK_BUCKET_NUMBER_ORDER_MAX); 226 if (!stack_table) { 227 pr_err("hash table allocation failed, disabling\n"); 228 stack_depot_disabled = true; 229 return -ENOMEM; 230 } 231 if (!entries) { 232 /* 233 * Obtain the number of entries that was calculated by 234 * alloc_large_system_hash(). 235 */ 236 entries = stack_hash_mask + 1; 237 } 238 init_stack_table(entries); 239 240 return 0; 241 } 242 243 /* Allocates a hash table via kvcalloc. Can be used after boot. */ 244 int stack_depot_init(void) 245 { 246 static DEFINE_MUTEX(stack_depot_init_mutex); 247 unsigned long entries; 248 int ret = 0; 249 250 mutex_lock(&stack_depot_init_mutex); 251 252 if (stack_depot_disabled || stack_table) 253 goto out_unlock; 254 255 /* 256 * Similarly to stack_depot_early_init, use stack_bucket_number_order 257 * if assigned, and rely on automatic scaling otherwise. 258 */ 259 if (stack_bucket_number_order) { 260 entries = 1UL << stack_bucket_number_order; 261 } else { 262 int scale = STACK_HASH_TABLE_SCALE; 263 264 entries = nr_free_buffer_pages(); 265 entries = roundup_pow_of_two(entries); 266 267 if (scale > PAGE_SHIFT) 268 entries >>= (scale - PAGE_SHIFT); 269 else 270 entries <<= (PAGE_SHIFT - scale); 271 } 272 273 if (entries < 1UL << STACK_BUCKET_NUMBER_ORDER_MIN) 274 entries = 1UL << STACK_BUCKET_NUMBER_ORDER_MIN; 275 if (entries > 1UL << STACK_BUCKET_NUMBER_ORDER_MAX) 276 entries = 1UL << STACK_BUCKET_NUMBER_ORDER_MAX; 277 278 pr_info("allocating hash table of %lu entries via kvcalloc\n", entries); 279 stack_table = kvcalloc(entries, sizeof(struct list_head), GFP_KERNEL); 280 if (!stack_table) { 281 pr_err("hash table allocation failed, disabling\n"); 282 stack_depot_disabled = true; 283 ret = -ENOMEM; 284 goto out_unlock; 285 } 286 stack_hash_mask = entries - 1; 287 init_stack_table(entries); 288 289 out_unlock: 290 mutex_unlock(&stack_depot_init_mutex); 291 292 return ret; 293 } 294 EXPORT_SYMBOL_GPL(stack_depot_init); 295 296 /* 297 * Initializes new stack depot @pool, release all its entries to the freelist, 298 * and update the list of pools. 299 */ 300 static void depot_init_pool(void *pool) 301 { 302 int offset; 303 304 lockdep_assert_held(&pool_lock); 305 306 /* Initialize handles and link stack records into the freelist. */ 307 for (offset = 0; offset <= DEPOT_POOL_SIZE - DEPOT_STACK_RECORD_SIZE; 308 offset += DEPOT_STACK_RECORD_SIZE) { 309 struct stack_record *stack = pool + offset; 310 311 stack->handle.pool_index = pools_num; 312 stack->handle.offset = offset >> DEPOT_STACK_ALIGN; 313 stack->handle.extra = 0; 314 315 /* 316 * Stack traces of size 0 are never saved, and we can simply use 317 * the size field as an indicator if this is a new unused stack 318 * record in the freelist. 319 */ 320 stack->size = 0; 321 322 INIT_LIST_HEAD(&stack->hash_list); 323 /* 324 * Add to the freelist front to prioritize never-used entries: 325 * required in case there are entries in the freelist, but their 326 * RCU cookie still belongs to the current RCU grace period 327 * (there can still be concurrent readers). 328 */ 329 list_add(&stack->free_list, &free_stacks); 330 counters[DEPOT_COUNTER_FREELIST_SIZE]++; 331 } 332 333 /* Save reference to the pool to be used by depot_fetch_stack(). */ 334 stack_pools[pools_num] = pool; 335 336 /* Pairs with concurrent READ_ONCE() in depot_fetch_stack(). */ 337 WRITE_ONCE(pools_num, pools_num + 1); 338 ASSERT_EXCLUSIVE_WRITER(pools_num); 339 } 340 341 /* Keeps the preallocated memory to be used for a new stack depot pool. */ 342 static void depot_keep_new_pool(void **prealloc) 343 { 344 lockdep_assert_held(&pool_lock); 345 346 /* 347 * If a new pool is already saved or the maximum number of 348 * pools is reached, do not use the preallocated memory. 349 */ 350 if (!new_pool_required) 351 return; 352 353 /* 354 * Use the preallocated memory for the new pool 355 * as long as we do not exceed the maximum number of pools. 356 */ 357 if (pools_num < DEPOT_MAX_POOLS) { 358 new_pool = *prealloc; 359 *prealloc = NULL; 360 } 361 362 /* 363 * At this point, either a new pool is kept or the maximum 364 * number of pools is reached. In either case, take note that 365 * keeping another pool is not required. 366 */ 367 WRITE_ONCE(new_pool_required, false); 368 } 369 370 /* 371 * Try to initialize a new stack depot pool from either a previous or the 372 * current pre-allocation, and release all its entries to the freelist. 373 */ 374 static bool depot_try_init_pool(void **prealloc) 375 { 376 lockdep_assert_held(&pool_lock); 377 378 /* Check if we have a new pool saved and use it. */ 379 if (new_pool) { 380 depot_init_pool(new_pool); 381 new_pool = NULL; 382 383 /* Take note that we might need a new new_pool. */ 384 if (pools_num < DEPOT_MAX_POOLS) 385 WRITE_ONCE(new_pool_required, true); 386 387 return true; 388 } 389 390 /* Bail out if we reached the pool limit. */ 391 if (unlikely(pools_num >= DEPOT_MAX_POOLS)) { 392 WARN_ONCE(1, "Stack depot reached limit capacity"); 393 return false; 394 } 395 396 /* Check if we have preallocated memory and use it. */ 397 if (*prealloc) { 398 depot_init_pool(*prealloc); 399 *prealloc = NULL; 400 return true; 401 } 402 403 return false; 404 } 405 406 /* Try to find next free usable entry. */ 407 static struct stack_record *depot_pop_free(void) 408 { 409 struct stack_record *stack; 410 411 lockdep_assert_held(&pool_lock); 412 413 if (list_empty(&free_stacks)) 414 return NULL; 415 416 /* 417 * We maintain the invariant that the elements in front are least 418 * recently used, and are therefore more likely to be associated with an 419 * RCU grace period in the past. Consequently it is sufficient to only 420 * check the first entry. 421 */ 422 stack = list_first_entry(&free_stacks, struct stack_record, free_list); 423 if (stack->size && !poll_state_synchronize_rcu(stack->rcu_state)) 424 return NULL; 425 426 list_del(&stack->free_list); 427 counters[DEPOT_COUNTER_FREELIST_SIZE]--; 428 429 return stack; 430 } 431 432 /* Allocates a new stack in a stack depot pool. */ 433 static struct stack_record * 434 depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) 435 { 436 struct stack_record *stack; 437 438 lockdep_assert_held(&pool_lock); 439 440 /* This should already be checked by public API entry points. */ 441 if (WARN_ON_ONCE(!size)) 442 return NULL; 443 444 /* Check if we have a stack record to save the stack trace. */ 445 stack = depot_pop_free(); 446 if (!stack) { 447 /* No usable entries on the freelist - try to refill the freelist. */ 448 if (!depot_try_init_pool(prealloc)) 449 return NULL; 450 stack = depot_pop_free(); 451 if (WARN_ON(!stack)) 452 return NULL; 453 } 454 455 /* Limit number of saved frames to CONFIG_STACKDEPOT_MAX_FRAMES. */ 456 if (size > CONFIG_STACKDEPOT_MAX_FRAMES) 457 size = CONFIG_STACKDEPOT_MAX_FRAMES; 458 459 /* Save the stack trace. */ 460 stack->hash = hash; 461 stack->size = size; 462 /* stack->handle is already filled in by depot_init_pool(). */ 463 refcount_set(&stack->count, 1); 464 memcpy(stack->entries, entries, flex_array_size(stack, entries, size)); 465 466 /* 467 * Let KMSAN know the stored stack record is initialized. This shall 468 * prevent false positive reports if instrumented code accesses it. 469 */ 470 kmsan_unpoison_memory(stack, DEPOT_STACK_RECORD_SIZE); 471 472 counters[DEPOT_COUNTER_ALLOCS]++; 473 counters[DEPOT_COUNTER_INUSE]++; 474 return stack; 475 } 476 477 static struct stack_record *depot_fetch_stack(depot_stack_handle_t handle) 478 { 479 const int pools_num_cached = READ_ONCE(pools_num); 480 union handle_parts parts = { .handle = handle }; 481 void *pool; 482 size_t offset = parts.offset << DEPOT_STACK_ALIGN; 483 struct stack_record *stack; 484 485 lockdep_assert_not_held(&pool_lock); 486 487 if (parts.pool_index > pools_num_cached) { 488 WARN(1, "pool index %d out of bounds (%d) for stack id %08x\n", 489 parts.pool_index, pools_num_cached, handle); 490 return NULL; 491 } 492 493 pool = stack_pools[parts.pool_index]; 494 if (WARN_ON(!pool)) 495 return NULL; 496 497 stack = pool + offset; 498 if (WARN_ON(!refcount_read(&stack->count))) 499 return NULL; 500 501 return stack; 502 } 503 504 /* Links stack into the freelist. */ 505 static void depot_free_stack(struct stack_record *stack) 506 { 507 unsigned long flags; 508 509 lockdep_assert_not_held(&pool_lock); 510 511 raw_spin_lock_irqsave(&pool_lock, flags); 512 printk_deferred_enter(); 513 514 /* 515 * Remove the entry from the hash list. Concurrent list traversal may 516 * still observe the entry, but since the refcount is zero, this entry 517 * will no longer be considered as valid. 518 */ 519 list_del_rcu(&stack->hash_list); 520 521 /* 522 * Due to being used from constrained contexts such as the allocators, 523 * NMI, or even RCU itself, stack depot cannot rely on primitives that 524 * would sleep (such as synchronize_rcu()) or recursively call into 525 * stack depot again (such as call_rcu()). 526 * 527 * Instead, get an RCU cookie, so that we can ensure this entry isn't 528 * moved onto another list until the next grace period, and concurrent 529 * RCU list traversal remains safe. 530 */ 531 stack->rcu_state = get_state_synchronize_rcu(); 532 533 /* 534 * Add the entry to the freelist tail, so that older entries are 535 * considered first - their RCU cookie is more likely to no longer be 536 * associated with the current grace period. 537 */ 538 list_add_tail(&stack->free_list, &free_stacks); 539 540 counters[DEPOT_COUNTER_FREELIST_SIZE]++; 541 counters[DEPOT_COUNTER_FREES]++; 542 counters[DEPOT_COUNTER_INUSE]--; 543 544 printk_deferred_exit(); 545 raw_spin_unlock_irqrestore(&pool_lock, flags); 546 } 547 548 /* Calculates the hash for a stack. */ 549 static inline u32 hash_stack(unsigned long *entries, unsigned int size) 550 { 551 return jhash2((u32 *)entries, 552 array_size(size, sizeof(*entries)) / sizeof(u32), 553 STACK_HASH_SEED); 554 } 555 556 /* 557 * Non-instrumented version of memcmp(). 558 * Does not check the lexicographical order, only the equality. 559 */ 560 static inline 561 int stackdepot_memcmp(const unsigned long *u1, const unsigned long *u2, 562 unsigned int n) 563 { 564 for ( ; n-- ; u1++, u2++) { 565 if (*u1 != *u2) 566 return 1; 567 } 568 return 0; 569 } 570 571 /* Finds a stack in a bucket of the hash table. */ 572 static inline struct stack_record *find_stack(struct list_head *bucket, 573 unsigned long *entries, int size, 574 u32 hash, depot_flags_t flags) 575 { 576 struct stack_record *stack, *ret = NULL; 577 578 /* 579 * Stack depot may be used from instrumentation that instruments RCU or 580 * tracing itself; use variant that does not call into RCU and cannot be 581 * traced. 582 * 583 * Note: Such use cases must take care when using refcounting to evict 584 * unused entries, because the stack record free-then-reuse code paths 585 * do call into RCU. 586 */ 587 rcu_read_lock_sched_notrace(); 588 589 list_for_each_entry_rcu(stack, bucket, hash_list) { 590 if (stack->hash != hash || stack->size != size) 591 continue; 592 593 /* 594 * This may race with depot_free_stack() accessing the freelist 595 * management state unioned with @entries. The refcount is zero 596 * in that case and the below refcount_inc_not_zero() will fail. 597 */ 598 if (data_race(stackdepot_memcmp(entries, stack->entries, size))) 599 continue; 600 601 /* 602 * Try to increment refcount. If this succeeds, the stack record 603 * is valid and has not yet been freed. 604 * 605 * If STACK_DEPOT_FLAG_GET is not used, it is undefined behavior 606 * to then call stack_depot_put() later, and we can assume that 607 * a stack record is never placed back on the freelist. 608 */ 609 if ((flags & STACK_DEPOT_FLAG_GET) && !refcount_inc_not_zero(&stack->count)) 610 continue; 611 612 ret = stack; 613 break; 614 } 615 616 rcu_read_unlock_sched_notrace(); 617 618 return ret; 619 } 620 621 depot_stack_handle_t stack_depot_save_flags(unsigned long *entries, 622 unsigned int nr_entries, 623 gfp_t alloc_flags, 624 depot_flags_t depot_flags) 625 { 626 struct list_head *bucket; 627 struct stack_record *found = NULL; 628 depot_stack_handle_t handle = 0; 629 struct page *page = NULL; 630 void *prealloc = NULL; 631 bool can_alloc = depot_flags & STACK_DEPOT_FLAG_CAN_ALLOC; 632 unsigned long flags; 633 u32 hash; 634 635 if (WARN_ON(depot_flags & ~STACK_DEPOT_FLAGS_MASK)) 636 return 0; 637 638 /* 639 * If this stack trace is from an interrupt, including anything before 640 * interrupt entry usually leads to unbounded stack depot growth. 641 * 642 * Since use of filter_irq_stacks() is a requirement to ensure stack 643 * depot can efficiently deduplicate interrupt stacks, always 644 * filter_irq_stacks() to simplify all callers' use of stack depot. 645 */ 646 nr_entries = filter_irq_stacks(entries, nr_entries); 647 648 if (unlikely(nr_entries == 0) || stack_depot_disabled) 649 return 0; 650 651 hash = hash_stack(entries, nr_entries); 652 bucket = &stack_table[hash & stack_hash_mask]; 653 654 /* Fast path: look the stack trace up without locking. */ 655 found = find_stack(bucket, entries, nr_entries, hash, depot_flags); 656 if (found) 657 goto exit; 658 659 /* 660 * Allocate memory for a new pool if required now: 661 * we won't be able to do that under the lock. 662 */ 663 if (unlikely(can_alloc && READ_ONCE(new_pool_required))) { 664 /* 665 * Zero out zone modifiers, as we don't have specific zone 666 * requirements. Keep the flags related to allocation in atomic 667 * contexts and I/O. 668 */ 669 alloc_flags &= ~GFP_ZONEMASK; 670 alloc_flags &= (GFP_ATOMIC | GFP_KERNEL); 671 alloc_flags |= __GFP_NOWARN; 672 page = alloc_pages(alloc_flags, DEPOT_POOL_ORDER); 673 if (page) 674 prealloc = page_address(page); 675 } 676 677 raw_spin_lock_irqsave(&pool_lock, flags); 678 printk_deferred_enter(); 679 680 /* Try to find again, to avoid concurrently inserting duplicates. */ 681 found = find_stack(bucket, entries, nr_entries, hash, depot_flags); 682 if (!found) { 683 struct stack_record *new = 684 depot_alloc_stack(entries, nr_entries, hash, &prealloc); 685 686 if (new) { 687 /* 688 * This releases the stack record into the bucket and 689 * makes it visible to readers in find_stack(). 690 */ 691 list_add_rcu(&new->hash_list, bucket); 692 found = new; 693 } 694 } 695 696 if (prealloc) { 697 /* 698 * Either stack depot already contains this stack trace, or 699 * depot_alloc_stack() did not consume the preallocated memory. 700 * Try to keep the preallocated memory for future. 701 */ 702 depot_keep_new_pool(&prealloc); 703 } 704 705 printk_deferred_exit(); 706 raw_spin_unlock_irqrestore(&pool_lock, flags); 707 exit: 708 if (prealloc) { 709 /* Stack depot didn't use this memory, free it. */ 710 free_pages((unsigned long)prealloc, DEPOT_POOL_ORDER); 711 } 712 if (found) 713 handle = found->handle.handle; 714 return handle; 715 } 716 EXPORT_SYMBOL_GPL(stack_depot_save_flags); 717 718 depot_stack_handle_t stack_depot_save(unsigned long *entries, 719 unsigned int nr_entries, 720 gfp_t alloc_flags) 721 { 722 return stack_depot_save_flags(entries, nr_entries, alloc_flags, 723 STACK_DEPOT_FLAG_CAN_ALLOC); 724 } 725 EXPORT_SYMBOL_GPL(stack_depot_save); 726 727 unsigned int stack_depot_fetch(depot_stack_handle_t handle, 728 unsigned long **entries) 729 { 730 struct stack_record *stack; 731 732 *entries = NULL; 733 /* 734 * Let KMSAN know *entries is initialized. This shall prevent false 735 * positive reports if instrumented code accesses it. 736 */ 737 kmsan_unpoison_memory(entries, sizeof(*entries)); 738 739 if (!handle || stack_depot_disabled) 740 return 0; 741 742 stack = depot_fetch_stack(handle); 743 /* 744 * Should never be NULL, otherwise this is a use-after-put (or just a 745 * corrupt handle). 746 */ 747 if (WARN(!stack, "corrupt handle or use after stack_depot_put()")) 748 return 0; 749 750 *entries = stack->entries; 751 return stack->size; 752 } 753 EXPORT_SYMBOL_GPL(stack_depot_fetch); 754 755 void stack_depot_put(depot_stack_handle_t handle) 756 { 757 struct stack_record *stack; 758 759 if (!handle || stack_depot_disabled) 760 return; 761 762 stack = depot_fetch_stack(handle); 763 /* 764 * Should always be able to find the stack record, otherwise this is an 765 * unbalanced put attempt (or corrupt handle). 766 */ 767 if (WARN(!stack, "corrupt handle or unbalanced stack_depot_put()")) 768 return; 769 770 if (refcount_dec_and_test(&stack->count)) 771 depot_free_stack(stack); 772 } 773 EXPORT_SYMBOL_GPL(stack_depot_put); 774 775 void stack_depot_print(depot_stack_handle_t stack) 776 { 777 unsigned long *entries; 778 unsigned int nr_entries; 779 780 nr_entries = stack_depot_fetch(stack, &entries); 781 if (nr_entries > 0) 782 stack_trace_print(entries, nr_entries, 0); 783 } 784 EXPORT_SYMBOL_GPL(stack_depot_print); 785 786 int stack_depot_snprint(depot_stack_handle_t handle, char *buf, size_t size, 787 int spaces) 788 { 789 unsigned long *entries; 790 unsigned int nr_entries; 791 792 nr_entries = stack_depot_fetch(handle, &entries); 793 return nr_entries ? stack_trace_snprint(buf, size, entries, nr_entries, 794 spaces) : 0; 795 } 796 EXPORT_SYMBOL_GPL(stack_depot_snprint); 797 798 depot_stack_handle_t __must_check stack_depot_set_extra_bits( 799 depot_stack_handle_t handle, unsigned int extra_bits) 800 { 801 union handle_parts parts = { .handle = handle }; 802 803 /* Don't set extra bits on empty handles. */ 804 if (!handle) 805 return 0; 806 807 parts.extra = extra_bits; 808 return parts.handle; 809 } 810 EXPORT_SYMBOL(stack_depot_set_extra_bits); 811 812 unsigned int stack_depot_get_extra_bits(depot_stack_handle_t handle) 813 { 814 union handle_parts parts = { .handle = handle }; 815 816 return parts.extra; 817 } 818 EXPORT_SYMBOL(stack_depot_get_extra_bits); 819 820 static int stats_show(struct seq_file *seq, void *v) 821 { 822 /* 823 * data race ok: These are just statistics counters, and approximate 824 * statistics are ok for debugging. 825 */ 826 seq_printf(seq, "pools: %d\n", data_race(pools_num)); 827 for (int i = 0; i < DEPOT_COUNTER_COUNT; i++) 828 seq_printf(seq, "%s: %ld\n", counter_names[i], data_race(counters[i])); 829 830 return 0; 831 } 832 DEFINE_SHOW_ATTRIBUTE(stats); 833 834 static int depot_debugfs_init(void) 835 { 836 struct dentry *dir; 837 838 if (stack_depot_disabled) 839 return 0; 840 841 dir = debugfs_create_dir("stackdepot", NULL); 842 debugfs_create_file("stats", 0444, dir, NULL, &stats_fops); 843 return 0; 844 } 845 late_initcall(depot_debugfs_init); 846