1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #ifndef MM_SLAB_H 3 #define MM_SLAB_H 4 5 #include <linux/reciprocal_div.h> 6 #include <linux/list_lru.h> 7 #include <linux/local_lock.h> 8 #include <linux/random.h> 9 #include <linux/kobject.h> 10 #include <linux/sched/mm.h> 11 #include <linux/memcontrol.h> 12 #include <linux/kfence.h> 13 #include <linux/kasan.h> 14 15 /* 16 * Internal slab definitions 17 */ 18 19 #ifdef CONFIG_64BIT 20 # ifdef system_has_cmpxchg128 21 # define system_has_freelist_aba() system_has_cmpxchg128() 22 # define try_cmpxchg_freelist try_cmpxchg128 23 # endif 24 #define this_cpu_try_cmpxchg_freelist this_cpu_try_cmpxchg128 25 typedef u128 freelist_full_t; 26 #else /* CONFIG_64BIT */ 27 # ifdef system_has_cmpxchg64 28 # define system_has_freelist_aba() system_has_cmpxchg64() 29 # define try_cmpxchg_freelist try_cmpxchg64 30 # endif 31 #define this_cpu_try_cmpxchg_freelist this_cpu_try_cmpxchg64 32 typedef u64 freelist_full_t; 33 #endif /* CONFIG_64BIT */ 34 35 #if defined(system_has_freelist_aba) && !defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE) 36 #undef system_has_freelist_aba 37 #endif 38 39 /* 40 * Freelist pointer and counter to cmpxchg together, avoids the typical ABA 41 * problems with cmpxchg of just a pointer. 42 */ 43 typedef union { 44 struct { 45 void *freelist; 46 unsigned long counter; 47 }; 48 freelist_full_t full; 49 } freelist_aba_t; 50 51 /* Reuses the bits in struct page */ 52 struct slab { 53 unsigned long flags; 54 55 struct kmem_cache *slab_cache; 56 union { 57 struct { 58 union { 59 struct list_head slab_list; 60 #ifdef CONFIG_SLUB_CPU_PARTIAL 61 struct { 62 struct slab *next; 63 int slabs; /* Nr of slabs left */ 64 }; 65 #endif 66 }; 67 /* Double-word boundary */ 68 union { 69 struct { 70 void *freelist; /* first free object */ 71 union { 72 unsigned long counters; 73 struct { 74 unsigned inuse:16; 75 unsigned objects:15; 76 /* 77 * If slab debugging is enabled then the 78 * frozen bit can be reused to indicate 79 * that the slab was corrupted 80 */ 81 unsigned frozen:1; 82 }; 83 }; 84 }; 85 #ifdef system_has_freelist_aba 86 freelist_aba_t freelist_counter; 87 #endif 88 }; 89 }; 90 struct rcu_head rcu_head; 91 }; 92 93 unsigned int __page_type; 94 atomic_t __page_refcount; 95 #ifdef CONFIG_SLAB_OBJ_EXT 96 unsigned long obj_exts; 97 #endif 98 }; 99 100 #define SLAB_MATCH(pg, sl) \ 101 static_assert(offsetof(struct page, pg) == offsetof(struct slab, sl)) 102 SLAB_MATCH(flags, flags); 103 SLAB_MATCH(compound_head, slab_cache); /* Ensure bit 0 is clear */ 104 SLAB_MATCH(_refcount, __page_refcount); 105 #ifdef CONFIG_MEMCG 106 SLAB_MATCH(memcg_data, obj_exts); 107 #elif defined(CONFIG_SLAB_OBJ_EXT) 108 SLAB_MATCH(_unused_slab_obj_exts, obj_exts); 109 #endif 110 #undef SLAB_MATCH 111 static_assert(sizeof(struct slab) <= sizeof(struct page)); 112 #if defined(system_has_freelist_aba) 113 static_assert(IS_ALIGNED(offsetof(struct slab, freelist), sizeof(freelist_aba_t))); 114 #endif 115 116 /** 117 * folio_slab - Converts from folio to slab. 118 * @folio: The folio. 119 * 120 * Currently struct slab is a different representation of a folio where 121 * folio_test_slab() is true. 122 * 123 * Return: The slab which contains this folio. 124 */ 125 #define folio_slab(folio) (_Generic((folio), \ 126 const struct folio *: (const struct slab *)(folio), \ 127 struct folio *: (struct slab *)(folio))) 128 129 /** 130 * slab_folio - The folio allocated for a slab 131 * @s: The slab. 132 * 133 * Slabs are allocated as folios that contain the individual objects and are 134 * using some fields in the first struct page of the folio - those fields are 135 * now accessed by struct slab. It is occasionally necessary to convert back to 136 * a folio in order to communicate with the rest of the mm. Please use this 137 * helper function instead of casting yourself, as the implementation may change 138 * in the future. 139 */ 140 #define slab_folio(s) (_Generic((s), \ 141 const struct slab *: (const struct folio *)s, \ 142 struct slab *: (struct folio *)s)) 143 144 /** 145 * page_slab - Converts from first struct page to slab. 146 * @p: The first (either head of compound or single) page of slab. 147 * 148 * A temporary wrapper to convert struct page to struct slab in situations where 149 * we know the page is the compound head, or single order-0 page. 150 * 151 * Long-term ideally everything would work with struct slab directly or go 152 * through folio to struct slab. 153 * 154 * Return: The slab which contains this page 155 */ 156 #define page_slab(p) (_Generic((p), \ 157 const struct page *: (const struct slab *)(p), \ 158 struct page *: (struct slab *)(p))) 159 160 /** 161 * slab_page - The first struct page allocated for a slab 162 * @s: The slab. 163 * 164 * A convenience wrapper for converting slab to the first struct page of the 165 * underlying folio, to communicate with code not yet converted to folio or 166 * struct slab. 167 */ 168 #define slab_page(s) folio_page(slab_folio(s), 0) 169 170 static inline void *slab_address(const struct slab *slab) 171 { 172 return folio_address(slab_folio(slab)); 173 } 174 175 static inline int slab_nid(const struct slab *slab) 176 { 177 return folio_nid(slab_folio(slab)); 178 } 179 180 static inline pg_data_t *slab_pgdat(const struct slab *slab) 181 { 182 return folio_pgdat(slab_folio(slab)); 183 } 184 185 static inline struct slab *virt_to_slab(const void *addr) 186 { 187 struct folio *folio = virt_to_folio(addr); 188 189 if (!folio_test_slab(folio)) 190 return NULL; 191 192 return folio_slab(folio); 193 } 194 195 static inline int slab_order(const struct slab *slab) 196 { 197 return folio_order(slab_folio(slab)); 198 } 199 200 static inline size_t slab_size(const struct slab *slab) 201 { 202 return PAGE_SIZE << slab_order(slab); 203 } 204 205 #ifdef CONFIG_SLUB_CPU_PARTIAL 206 #define slub_percpu_partial(c) ((c)->partial) 207 208 #define slub_set_percpu_partial(c, p) \ 209 ({ \ 210 slub_percpu_partial(c) = (p)->next; \ 211 }) 212 213 #define slub_percpu_partial_read_once(c) READ_ONCE(slub_percpu_partial(c)) 214 #else 215 #define slub_percpu_partial(c) NULL 216 217 #define slub_set_percpu_partial(c, p) 218 219 #define slub_percpu_partial_read_once(c) NULL 220 #endif // CONFIG_SLUB_CPU_PARTIAL 221 222 /* 223 * Word size structure that can be atomically updated or read and that 224 * contains both the order and the number of objects that a slab of the 225 * given order would contain. 226 */ 227 struct kmem_cache_order_objects { 228 unsigned int x; 229 }; 230 231 /* 232 * Slab cache management. 233 */ 234 struct kmem_cache { 235 #ifndef CONFIG_SLUB_TINY 236 struct kmem_cache_cpu __percpu *cpu_slab; 237 #endif 238 /* Used for retrieving partial slabs, etc. */ 239 slab_flags_t flags; 240 unsigned long min_partial; 241 unsigned int size; /* Object size including metadata */ 242 unsigned int object_size; /* Object size without metadata */ 243 struct reciprocal_value reciprocal_size; 244 unsigned int offset; /* Free pointer offset */ 245 #ifdef CONFIG_SLUB_CPU_PARTIAL 246 /* Number of per cpu partial objects to keep around */ 247 unsigned int cpu_partial; 248 /* Number of per cpu partial slabs to keep around */ 249 unsigned int cpu_partial_slabs; 250 #endif 251 struct kmem_cache_order_objects oo; 252 253 /* Allocation and freeing of slabs */ 254 struct kmem_cache_order_objects min; 255 gfp_t allocflags; /* gfp flags to use on each alloc */ 256 int refcount; /* Refcount for slab cache destroy */ 257 void (*ctor)(void *object); /* Object constructor */ 258 unsigned int inuse; /* Offset to metadata */ 259 unsigned int align; /* Alignment */ 260 unsigned int red_left_pad; /* Left redzone padding size */ 261 const char *name; /* Name (only for display!) */ 262 struct list_head list; /* List of slab caches */ 263 #ifdef CONFIG_SYSFS 264 struct kobject kobj; /* For sysfs */ 265 #endif 266 #ifdef CONFIG_SLAB_FREELIST_HARDENED 267 unsigned long random; 268 #endif 269 270 #ifdef CONFIG_NUMA 271 /* 272 * Defragmentation by allocating from a remote node. 273 */ 274 unsigned int remote_node_defrag_ratio; 275 #endif 276 277 #ifdef CONFIG_SLAB_FREELIST_RANDOM 278 unsigned int *random_seq; 279 #endif 280 281 #ifdef CONFIG_KASAN_GENERIC 282 struct kasan_cache kasan_info; 283 #endif 284 285 #ifdef CONFIG_HARDENED_USERCOPY 286 unsigned int useroffset; /* Usercopy region offset */ 287 unsigned int usersize; /* Usercopy region size */ 288 #endif 289 290 struct kmem_cache_node *node[MAX_NUMNODES]; 291 }; 292 293 #if defined(CONFIG_SYSFS) && !defined(CONFIG_SLUB_TINY) 294 #define SLAB_SUPPORTS_SYSFS 1 295 void sysfs_slab_unlink(struct kmem_cache *s); 296 void sysfs_slab_release(struct kmem_cache *s); 297 #else 298 static inline void sysfs_slab_unlink(struct kmem_cache *s) { } 299 static inline void sysfs_slab_release(struct kmem_cache *s) { } 300 #endif 301 302 void *fixup_red_left(struct kmem_cache *s, void *p); 303 304 static inline void *nearest_obj(struct kmem_cache *cache, 305 const struct slab *slab, void *x) 306 { 307 void *object = x - (x - slab_address(slab)) % cache->size; 308 void *last_object = slab_address(slab) + 309 (slab->objects - 1) * cache->size; 310 void *result = (unlikely(object > last_object)) ? last_object : object; 311 312 result = fixup_red_left(cache, result); 313 return result; 314 } 315 316 /* Determine object index from a given position */ 317 static inline unsigned int __obj_to_index(const struct kmem_cache *cache, 318 void *addr, void *obj) 319 { 320 return reciprocal_divide(kasan_reset_tag(obj) - addr, 321 cache->reciprocal_size); 322 } 323 324 static inline unsigned int obj_to_index(const struct kmem_cache *cache, 325 const struct slab *slab, void *obj) 326 { 327 if (is_kfence_address(obj)) 328 return 0; 329 return __obj_to_index(cache, slab_address(slab), obj); 330 } 331 332 static inline int objs_per_slab(const struct kmem_cache *cache, 333 const struct slab *slab) 334 { 335 return slab->objects; 336 } 337 338 /* 339 * State of the slab allocator. 340 * 341 * This is used to describe the states of the allocator during bootup. 342 * Allocators use this to gradually bootstrap themselves. Most allocators 343 * have the problem that the structures used for managing slab caches are 344 * allocated from slab caches themselves. 345 */ 346 enum slab_state { 347 DOWN, /* No slab functionality yet */ 348 PARTIAL, /* SLUB: kmem_cache_node available */ 349 UP, /* Slab caches usable but not all extras yet */ 350 FULL /* Everything is working */ 351 }; 352 353 extern enum slab_state slab_state; 354 355 /* The slab cache mutex protects the management structures during changes */ 356 extern struct mutex slab_mutex; 357 358 /* The list of all slab caches on the system */ 359 extern struct list_head slab_caches; 360 361 /* The slab cache that manages slab cache information */ 362 extern struct kmem_cache *kmem_cache; 363 364 /* A table of kmalloc cache names and sizes */ 365 extern const struct kmalloc_info_struct { 366 const char *name[NR_KMALLOC_TYPES]; 367 unsigned int size; 368 } kmalloc_info[]; 369 370 /* Kmalloc array related functions */ 371 void setup_kmalloc_cache_index_table(void); 372 void create_kmalloc_caches(void); 373 374 extern u8 kmalloc_size_index[24]; 375 376 static inline unsigned int size_index_elem(unsigned int bytes) 377 { 378 return (bytes - 1) / 8; 379 } 380 381 /* 382 * Find the kmem_cache structure that serves a given size of 383 * allocation 384 * 385 * This assumes size is larger than zero and not larger than 386 * KMALLOC_MAX_CACHE_SIZE and the caller must check that. 387 */ 388 static inline struct kmem_cache * 389 kmalloc_slab(size_t size, kmem_buckets *b, gfp_t flags, unsigned long caller) 390 { 391 unsigned int index; 392 393 if (!b) 394 b = &kmalloc_caches[kmalloc_type(flags, caller)]; 395 if (size <= 192) 396 index = kmalloc_size_index[size_index_elem(size)]; 397 else 398 index = fls(size - 1); 399 400 return (*b)[index]; 401 } 402 403 gfp_t kmalloc_fix_flags(gfp_t flags); 404 405 /* Functions provided by the slab allocators */ 406 int do_kmem_cache_create(struct kmem_cache *s, const char *name, 407 unsigned int size, struct kmem_cache_args *args, 408 slab_flags_t flags); 409 410 void __init kmem_cache_init(void); 411 extern void create_boot_cache(struct kmem_cache *, const char *name, 412 unsigned int size, slab_flags_t flags, 413 unsigned int useroffset, unsigned int usersize); 414 415 int slab_unmergeable(struct kmem_cache *s); 416 struct kmem_cache *find_mergeable(unsigned size, unsigned align, 417 slab_flags_t flags, const char *name, void (*ctor)(void *)); 418 struct kmem_cache * 419 __kmem_cache_alias(const char *name, unsigned int size, unsigned int align, 420 slab_flags_t flags, void (*ctor)(void *)); 421 422 slab_flags_t kmem_cache_flags(slab_flags_t flags, const char *name); 423 424 static inline bool is_kmalloc_cache(struct kmem_cache *s) 425 { 426 return (s->flags & SLAB_KMALLOC); 427 } 428 429 static inline bool is_kmalloc_normal(struct kmem_cache *s) 430 { 431 if (!is_kmalloc_cache(s)) 432 return false; 433 return !(s->flags & (SLAB_CACHE_DMA|SLAB_ACCOUNT|SLAB_RECLAIM_ACCOUNT)); 434 } 435 436 #define SLAB_CORE_FLAGS (SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA | \ 437 SLAB_CACHE_DMA32 | SLAB_PANIC | \ 438 SLAB_TYPESAFE_BY_RCU | SLAB_DEBUG_OBJECTS | \ 439 SLAB_NOLEAKTRACE | SLAB_RECLAIM_ACCOUNT | \ 440 SLAB_TEMPORARY | SLAB_ACCOUNT | \ 441 SLAB_NO_USER_FLAGS | SLAB_KMALLOC | SLAB_NO_MERGE) 442 443 #define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \ 444 SLAB_TRACE | SLAB_CONSISTENCY_CHECKS) 445 446 #define SLAB_FLAGS_PERMITTED (SLAB_CORE_FLAGS | SLAB_DEBUG_FLAGS) 447 448 bool __kmem_cache_empty(struct kmem_cache *); 449 int __kmem_cache_shutdown(struct kmem_cache *); 450 void __kmem_cache_release(struct kmem_cache *); 451 int __kmem_cache_shrink(struct kmem_cache *); 452 void slab_kmem_cache_release(struct kmem_cache *); 453 454 struct seq_file; 455 struct file; 456 457 struct slabinfo { 458 unsigned long active_objs; 459 unsigned long num_objs; 460 unsigned long active_slabs; 461 unsigned long num_slabs; 462 unsigned long shared_avail; 463 unsigned int limit; 464 unsigned int batchcount; 465 unsigned int shared; 466 unsigned int objects_per_slab; 467 unsigned int cache_order; 468 }; 469 470 void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo); 471 472 #ifdef CONFIG_SLUB_DEBUG 473 #ifdef CONFIG_SLUB_DEBUG_ON 474 DECLARE_STATIC_KEY_TRUE(slub_debug_enabled); 475 #else 476 DECLARE_STATIC_KEY_FALSE(slub_debug_enabled); 477 #endif 478 extern void print_tracking(struct kmem_cache *s, void *object); 479 long validate_slab_cache(struct kmem_cache *s); 480 static inline bool __slub_debug_enabled(void) 481 { 482 return static_branch_unlikely(&slub_debug_enabled); 483 } 484 #else 485 static inline void print_tracking(struct kmem_cache *s, void *object) 486 { 487 } 488 static inline bool __slub_debug_enabled(void) 489 { 490 return false; 491 } 492 #endif 493 494 /* 495 * Returns true if any of the specified slab_debug flags is enabled for the 496 * cache. Use only for flags parsed by setup_slub_debug() as it also enables 497 * the static key. 498 */ 499 static inline bool kmem_cache_debug_flags(struct kmem_cache *s, slab_flags_t flags) 500 { 501 if (IS_ENABLED(CONFIG_SLUB_DEBUG)) 502 VM_WARN_ON_ONCE(!(flags & SLAB_DEBUG_FLAGS)); 503 if (__slub_debug_enabled()) 504 return s->flags & flags; 505 return false; 506 } 507 508 #if IS_ENABLED(CONFIG_SLUB_DEBUG) && IS_ENABLED(CONFIG_KUNIT) 509 bool slab_in_kunit_test(void); 510 #else 511 static inline bool slab_in_kunit_test(void) { return false; } 512 #endif 513 514 #ifdef CONFIG_SLAB_OBJ_EXT 515 516 /* 517 * slab_obj_exts - get the pointer to the slab object extension vector 518 * associated with a slab. 519 * @slab: a pointer to the slab struct 520 * 521 * Returns a pointer to the object extension vector associated with the slab, 522 * or NULL if no such vector has been associated yet. 523 */ 524 static inline struct slabobj_ext *slab_obj_exts(struct slab *slab) 525 { 526 unsigned long obj_exts = READ_ONCE(slab->obj_exts); 527 528 #ifdef CONFIG_MEMCG 529 VM_BUG_ON_PAGE(obj_exts && !(obj_exts & MEMCG_DATA_OBJEXTS), 530 slab_page(slab)); 531 VM_BUG_ON_PAGE(obj_exts & MEMCG_DATA_KMEM, slab_page(slab)); 532 #endif 533 return (struct slabobj_ext *)(obj_exts & ~OBJEXTS_FLAGS_MASK); 534 } 535 536 int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s, 537 gfp_t gfp, bool new_slab); 538 539 #else /* CONFIG_SLAB_OBJ_EXT */ 540 541 static inline struct slabobj_ext *slab_obj_exts(struct slab *slab) 542 { 543 return NULL; 544 } 545 546 #endif /* CONFIG_SLAB_OBJ_EXT */ 547 548 static inline enum node_stat_item cache_vmstat_idx(struct kmem_cache *s) 549 { 550 return (s->flags & SLAB_RECLAIM_ACCOUNT) ? 551 NR_SLAB_RECLAIMABLE_B : NR_SLAB_UNRECLAIMABLE_B; 552 } 553 554 #ifdef CONFIG_MEMCG 555 bool __memcg_slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru, 556 gfp_t flags, size_t size, void **p); 557 void __memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab, 558 void **p, int objects, struct slabobj_ext *obj_exts); 559 #endif 560 561 void kvfree_rcu_cb(struct rcu_head *head); 562 563 size_t __ksize(const void *objp); 564 565 static inline size_t slab_ksize(const struct kmem_cache *s) 566 { 567 #ifdef CONFIG_SLUB_DEBUG 568 /* 569 * Debugging requires use of the padding between object 570 * and whatever may come after it. 571 */ 572 if (s->flags & (SLAB_RED_ZONE | SLAB_POISON)) 573 return s->object_size; 574 #endif 575 if (s->flags & SLAB_KASAN) 576 return s->object_size; 577 /* 578 * If we have the need to store the freelist pointer 579 * back there or track user information then we can 580 * only use the space before that information. 581 */ 582 if (s->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_STORE_USER)) 583 return s->inuse; 584 /* 585 * Else we can use all the padding etc for the allocation 586 */ 587 return s->size; 588 } 589 590 #ifdef CONFIG_SLUB_DEBUG 591 void dump_unreclaimable_slab(void); 592 #else 593 static inline void dump_unreclaimable_slab(void) 594 { 595 } 596 #endif 597 598 void ___cache_free(struct kmem_cache *cache, void *x, unsigned long addr); 599 600 #ifdef CONFIG_SLAB_FREELIST_RANDOM 601 int cache_random_seq_create(struct kmem_cache *cachep, unsigned int count, 602 gfp_t gfp); 603 void cache_random_seq_destroy(struct kmem_cache *cachep); 604 #else 605 static inline int cache_random_seq_create(struct kmem_cache *cachep, 606 unsigned int count, gfp_t gfp) 607 { 608 return 0; 609 } 610 static inline void cache_random_seq_destroy(struct kmem_cache *cachep) { } 611 #endif /* CONFIG_SLAB_FREELIST_RANDOM */ 612 613 static inline bool slab_want_init_on_alloc(gfp_t flags, struct kmem_cache *c) 614 { 615 if (static_branch_maybe(CONFIG_INIT_ON_ALLOC_DEFAULT_ON, 616 &init_on_alloc)) { 617 if (c->ctor) 618 return false; 619 if (c->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) 620 return flags & __GFP_ZERO; 621 return true; 622 } 623 return flags & __GFP_ZERO; 624 } 625 626 static inline bool slab_want_init_on_free(struct kmem_cache *c) 627 { 628 if (static_branch_maybe(CONFIG_INIT_ON_FREE_DEFAULT_ON, 629 &init_on_free)) 630 return !(c->ctor || 631 (c->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON))); 632 return false; 633 } 634 635 #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_SLUB_DEBUG) 636 void debugfs_slab_release(struct kmem_cache *); 637 #else 638 static inline void debugfs_slab_release(struct kmem_cache *s) { } 639 #endif 640 641 #ifdef CONFIG_PRINTK 642 #define KS_ADDRS_COUNT 16 643 struct kmem_obj_info { 644 void *kp_ptr; 645 struct slab *kp_slab; 646 void *kp_objp; 647 unsigned long kp_data_offset; 648 struct kmem_cache *kp_slab_cache; 649 void *kp_ret; 650 void *kp_stack[KS_ADDRS_COUNT]; 651 void *kp_free_stack[KS_ADDRS_COUNT]; 652 }; 653 void __kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab); 654 #endif 655 656 void __check_heap_object(const void *ptr, unsigned long n, 657 const struct slab *slab, bool to_user); 658 659 static inline bool slub_debug_orig_size(struct kmem_cache *s) 660 { 661 return (kmem_cache_debug_flags(s, SLAB_STORE_USER) && 662 (s->flags & SLAB_KMALLOC)); 663 } 664 665 #ifdef CONFIG_SLUB_DEBUG 666 void skip_orig_size_check(struct kmem_cache *s, const void *object); 667 #endif 668 669 #endif /* MM_SLAB_H */ 670