xref: /linux/mm/slab.h (revision e4f3c9118f27b3b400db9993dd8bfd028d3b86ca)
1  /* SPDX-License-Identifier: GPL-2.0 */
2  #ifndef MM_SLAB_H
3  #define MM_SLAB_H
4  /*
5   * Internal slab definitions
6   */
7  
8  #ifdef CONFIG_SLOB
9  /*
10   * Common fields provided in kmem_cache by all slab allocators
11   * This struct is either used directly by the allocator (SLOB)
12   * or the allocator must include definitions for all fields
13   * provided in kmem_cache_common in their definition of kmem_cache.
14   *
15   * Once we can do anonymous structs (C11 standard) we could put a
16   * anonymous struct definition in these allocators so that the
17   * separate allocations in the kmem_cache structure of SLAB and
18   * SLUB is no longer needed.
19   */
20  struct kmem_cache {
21  	unsigned int object_size;/* The original size of the object */
22  	unsigned int size;	/* The aligned/padded/added on size  */
23  	unsigned int align;	/* Alignment as calculated */
24  	slab_flags_t flags;	/* Active flags on the slab */
25  	unsigned int useroffset;/* Usercopy region offset */
26  	unsigned int usersize;	/* Usercopy region size */
27  	const char *name;	/* Slab name for sysfs */
28  	int refcount;		/* Use counter */
29  	void (*ctor)(void *);	/* Called on object slot creation */
30  	struct list_head list;	/* List of all slab caches on the system */
31  };
32  
33  #endif /* CONFIG_SLOB */
34  
35  #ifdef CONFIG_SLAB
36  #include <linux/slab_def.h>
37  #endif
38  
39  #ifdef CONFIG_SLUB
40  #include <linux/slub_def.h>
41  #endif
42  
43  #include <linux/memcontrol.h>
44  #include <linux/fault-inject.h>
45  #include <linux/kasan.h>
46  #include <linux/kmemleak.h>
47  #include <linux/random.h>
48  #include <linux/sched/mm.h>
49  
50  /*
51   * State of the slab allocator.
52   *
53   * This is used to describe the states of the allocator during bootup.
54   * Allocators use this to gradually bootstrap themselves. Most allocators
55   * have the problem that the structures used for managing slab caches are
56   * allocated from slab caches themselves.
57   */
58  enum slab_state {
59  	DOWN,			/* No slab functionality yet */
60  	PARTIAL,		/* SLUB: kmem_cache_node available */
61  	PARTIAL_NODE,		/* SLAB: kmalloc size for node struct available */
62  	UP,			/* Slab caches usable but not all extras yet */
63  	FULL			/* Everything is working */
64  };
65  
66  extern enum slab_state slab_state;
67  
68  /* The slab cache mutex protects the management structures during changes */
69  extern struct mutex slab_mutex;
70  
71  /* The list of all slab caches on the system */
72  extern struct list_head slab_caches;
73  
74  /* The slab cache that manages slab cache information */
75  extern struct kmem_cache *kmem_cache;
76  
77  /* A table of kmalloc cache names and sizes */
78  extern const struct kmalloc_info_struct {
79  	const char *name[NR_KMALLOC_TYPES];
80  	unsigned int size;
81  } kmalloc_info[];
82  
83  #ifndef CONFIG_SLOB
84  /* Kmalloc array related functions */
85  void setup_kmalloc_cache_index_table(void);
86  void create_kmalloc_caches(slab_flags_t);
87  
88  /* Find the kmalloc slab corresponding for a certain size */
89  struct kmem_cache *kmalloc_slab(size_t, gfp_t);
90  #endif
91  
92  gfp_t kmalloc_fix_flags(gfp_t flags);
93  
94  /* Functions provided by the slab allocators */
95  int __kmem_cache_create(struct kmem_cache *, slab_flags_t flags);
96  
97  struct kmem_cache *create_kmalloc_cache(const char *name, unsigned int size,
98  			slab_flags_t flags, unsigned int useroffset,
99  			unsigned int usersize);
100  extern void create_boot_cache(struct kmem_cache *, const char *name,
101  			unsigned int size, slab_flags_t flags,
102  			unsigned int useroffset, unsigned int usersize);
103  
104  int slab_unmergeable(struct kmem_cache *s);
105  struct kmem_cache *find_mergeable(unsigned size, unsigned align,
106  		slab_flags_t flags, const char *name, void (*ctor)(void *));
107  #ifndef CONFIG_SLOB
108  struct kmem_cache *
109  __kmem_cache_alias(const char *name, unsigned int size, unsigned int align,
110  		   slab_flags_t flags, void (*ctor)(void *));
111  
112  slab_flags_t kmem_cache_flags(unsigned int object_size,
113  	slab_flags_t flags, const char *name);
114  #else
115  static inline struct kmem_cache *
116  __kmem_cache_alias(const char *name, unsigned int size, unsigned int align,
117  		   slab_flags_t flags, void (*ctor)(void *))
118  { return NULL; }
119  
120  static inline slab_flags_t kmem_cache_flags(unsigned int object_size,
121  	slab_flags_t flags, const char *name)
122  {
123  	return flags;
124  }
125  #endif
126  
127  
128  /* Legal flag mask for kmem_cache_create(), for various configurations */
129  #define SLAB_CORE_FLAGS (SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA | \
130  			 SLAB_CACHE_DMA32 | SLAB_PANIC | \
131  			 SLAB_TYPESAFE_BY_RCU | SLAB_DEBUG_OBJECTS )
132  
133  #if defined(CONFIG_DEBUG_SLAB)
134  #define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER)
135  #elif defined(CONFIG_SLUB_DEBUG)
136  #define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
137  			  SLAB_TRACE | SLAB_CONSISTENCY_CHECKS)
138  #else
139  #define SLAB_DEBUG_FLAGS (0)
140  #endif
141  
142  #if defined(CONFIG_SLAB)
143  #define SLAB_CACHE_FLAGS (SLAB_MEM_SPREAD | SLAB_NOLEAKTRACE | \
144  			  SLAB_RECLAIM_ACCOUNT | SLAB_TEMPORARY | \
145  			  SLAB_ACCOUNT)
146  #elif defined(CONFIG_SLUB)
147  #define SLAB_CACHE_FLAGS (SLAB_NOLEAKTRACE | SLAB_RECLAIM_ACCOUNT | \
148  			  SLAB_TEMPORARY | SLAB_ACCOUNT)
149  #else
150  #define SLAB_CACHE_FLAGS (0)
151  #endif
152  
153  /* Common flags available with current configuration */
154  #define CACHE_CREATE_MASK (SLAB_CORE_FLAGS | SLAB_DEBUG_FLAGS | SLAB_CACHE_FLAGS)
155  
156  /* Common flags permitted for kmem_cache_create */
157  #define SLAB_FLAGS_PERMITTED (SLAB_CORE_FLAGS | \
158  			      SLAB_RED_ZONE | \
159  			      SLAB_POISON | \
160  			      SLAB_STORE_USER | \
161  			      SLAB_TRACE | \
162  			      SLAB_CONSISTENCY_CHECKS | \
163  			      SLAB_MEM_SPREAD | \
164  			      SLAB_NOLEAKTRACE | \
165  			      SLAB_RECLAIM_ACCOUNT | \
166  			      SLAB_TEMPORARY | \
167  			      SLAB_ACCOUNT)
168  
169  bool __kmem_cache_empty(struct kmem_cache *);
170  int __kmem_cache_shutdown(struct kmem_cache *);
171  void __kmem_cache_release(struct kmem_cache *);
172  int __kmem_cache_shrink(struct kmem_cache *);
173  void slab_kmem_cache_release(struct kmem_cache *);
174  
175  struct seq_file;
176  struct file;
177  
178  struct slabinfo {
179  	unsigned long active_objs;
180  	unsigned long num_objs;
181  	unsigned long active_slabs;
182  	unsigned long num_slabs;
183  	unsigned long shared_avail;
184  	unsigned int limit;
185  	unsigned int batchcount;
186  	unsigned int shared;
187  	unsigned int objects_per_slab;
188  	unsigned int cache_order;
189  };
190  
191  void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo);
192  void slabinfo_show_stats(struct seq_file *m, struct kmem_cache *s);
193  ssize_t slabinfo_write(struct file *file, const char __user *buffer,
194  		       size_t count, loff_t *ppos);
195  
196  /*
197   * Generic implementation of bulk operations
198   * These are useful for situations in which the allocator cannot
199   * perform optimizations. In that case segments of the object listed
200   * may be allocated or freed using these operations.
201   */
202  void __kmem_cache_free_bulk(struct kmem_cache *, size_t, void **);
203  int __kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **);
204  
205  static inline enum node_stat_item cache_vmstat_idx(struct kmem_cache *s)
206  {
207  	return (s->flags & SLAB_RECLAIM_ACCOUNT) ?
208  		NR_SLAB_RECLAIMABLE_B : NR_SLAB_UNRECLAIMABLE_B;
209  }
210  
211  #ifdef CONFIG_SLUB_DEBUG
212  #ifdef CONFIG_SLUB_DEBUG_ON
213  DECLARE_STATIC_KEY_TRUE(slub_debug_enabled);
214  #else
215  DECLARE_STATIC_KEY_FALSE(slub_debug_enabled);
216  #endif
217  extern void print_tracking(struct kmem_cache *s, void *object);
218  #else
219  static inline void print_tracking(struct kmem_cache *s, void *object)
220  {
221  }
222  #endif
223  
224  /*
225   * Returns true if any of the specified slub_debug flags is enabled for the
226   * cache. Use only for flags parsed by setup_slub_debug() as it also enables
227   * the static key.
228   */
229  static inline bool kmem_cache_debug_flags(struct kmem_cache *s, slab_flags_t flags)
230  {
231  #ifdef CONFIG_SLUB_DEBUG
232  	VM_WARN_ON_ONCE(!(flags & SLAB_DEBUG_FLAGS));
233  	if (static_branch_unlikely(&slub_debug_enabled))
234  		return s->flags & flags;
235  #endif
236  	return false;
237  }
238  
239  #ifdef CONFIG_MEMCG_KMEM
240  int memcg_alloc_page_obj_cgroups(struct page *page, struct kmem_cache *s,
241  				 gfp_t gfp, bool new_page);
242  
243  static inline void memcg_free_page_obj_cgroups(struct page *page)
244  {
245  	kfree(page_objcgs(page));
246  	page->memcg_data = 0;
247  }
248  
249  static inline size_t obj_full_size(struct kmem_cache *s)
250  {
251  	/*
252  	 * For each accounted object there is an extra space which is used
253  	 * to store obj_cgroup membership. Charge it too.
254  	 */
255  	return s->size + sizeof(struct obj_cgroup *);
256  }
257  
258  /*
259   * Returns false if the allocation should fail.
260   */
261  static inline bool memcg_slab_pre_alloc_hook(struct kmem_cache *s,
262  					     struct obj_cgroup **objcgp,
263  					     size_t objects, gfp_t flags)
264  {
265  	struct obj_cgroup *objcg;
266  
267  	if (!memcg_kmem_enabled())
268  		return true;
269  
270  	if (!(flags & __GFP_ACCOUNT) && !(s->flags & SLAB_ACCOUNT))
271  		return true;
272  
273  	objcg = get_obj_cgroup_from_current();
274  	if (!objcg)
275  		return true;
276  
277  	if (obj_cgroup_charge(objcg, flags, objects * obj_full_size(s))) {
278  		obj_cgroup_put(objcg);
279  		return false;
280  	}
281  
282  	*objcgp = objcg;
283  	return true;
284  }
285  
286  static inline void mod_objcg_state(struct obj_cgroup *objcg,
287  				   struct pglist_data *pgdat,
288  				   enum node_stat_item idx, int nr)
289  {
290  	struct mem_cgroup *memcg;
291  	struct lruvec *lruvec;
292  
293  	rcu_read_lock();
294  	memcg = obj_cgroup_memcg(objcg);
295  	lruvec = mem_cgroup_lruvec(memcg, pgdat);
296  	mod_memcg_lruvec_state(lruvec, idx, nr);
297  	rcu_read_unlock();
298  }
299  
300  static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s,
301  					      struct obj_cgroup *objcg,
302  					      gfp_t flags, size_t size,
303  					      void **p)
304  {
305  	struct page *page;
306  	unsigned long off;
307  	size_t i;
308  
309  	if (!memcg_kmem_enabled() || !objcg)
310  		return;
311  
312  	flags &= ~__GFP_ACCOUNT;
313  	for (i = 0; i < size; i++) {
314  		if (likely(p[i])) {
315  			page = virt_to_head_page(p[i]);
316  
317  			if (!page_objcgs(page) &&
318  			    memcg_alloc_page_obj_cgroups(page, s, flags,
319  							 false)) {
320  				obj_cgroup_uncharge(objcg, obj_full_size(s));
321  				continue;
322  			}
323  
324  			off = obj_to_index(s, page, p[i]);
325  			obj_cgroup_get(objcg);
326  			page_objcgs(page)[off] = objcg;
327  			mod_objcg_state(objcg, page_pgdat(page),
328  					cache_vmstat_idx(s), obj_full_size(s));
329  		} else {
330  			obj_cgroup_uncharge(objcg, obj_full_size(s));
331  		}
332  	}
333  	obj_cgroup_put(objcg);
334  }
335  
336  static inline void memcg_slab_free_hook(struct kmem_cache *s_orig,
337  					void **p, int objects)
338  {
339  	struct kmem_cache *s;
340  	struct obj_cgroup **objcgs;
341  	struct obj_cgroup *objcg;
342  	struct page *page;
343  	unsigned int off;
344  	int i;
345  
346  	if (!memcg_kmem_enabled())
347  		return;
348  
349  	for (i = 0; i < objects; i++) {
350  		if (unlikely(!p[i]))
351  			continue;
352  
353  		page = virt_to_head_page(p[i]);
354  		objcgs = page_objcgs(page);
355  		if (!objcgs)
356  			continue;
357  
358  		if (!s_orig)
359  			s = page->slab_cache;
360  		else
361  			s = s_orig;
362  
363  		off = obj_to_index(s, page, p[i]);
364  		objcg = objcgs[off];
365  		if (!objcg)
366  			continue;
367  
368  		objcgs[off] = NULL;
369  		obj_cgroup_uncharge(objcg, obj_full_size(s));
370  		mod_objcg_state(objcg, page_pgdat(page), cache_vmstat_idx(s),
371  				-obj_full_size(s));
372  		obj_cgroup_put(objcg);
373  	}
374  }
375  
376  #else /* CONFIG_MEMCG_KMEM */
377  static inline struct mem_cgroup *memcg_from_slab_obj(void *ptr)
378  {
379  	return NULL;
380  }
381  
382  static inline int memcg_alloc_page_obj_cgroups(struct page *page,
383  					       struct kmem_cache *s, gfp_t gfp,
384  					       bool new_page)
385  {
386  	return 0;
387  }
388  
389  static inline void memcg_free_page_obj_cgroups(struct page *page)
390  {
391  }
392  
393  static inline bool memcg_slab_pre_alloc_hook(struct kmem_cache *s,
394  					     struct obj_cgroup **objcgp,
395  					     size_t objects, gfp_t flags)
396  {
397  	return true;
398  }
399  
400  static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s,
401  					      struct obj_cgroup *objcg,
402  					      gfp_t flags, size_t size,
403  					      void **p)
404  {
405  }
406  
407  static inline void memcg_slab_free_hook(struct kmem_cache *s,
408  					void **p, int objects)
409  {
410  }
411  #endif /* CONFIG_MEMCG_KMEM */
412  
413  static inline struct kmem_cache *virt_to_cache(const void *obj)
414  {
415  	struct page *page;
416  
417  	page = virt_to_head_page(obj);
418  	if (WARN_ONCE(!PageSlab(page), "%s: Object is not a Slab page!\n",
419  					__func__))
420  		return NULL;
421  	return page->slab_cache;
422  }
423  
424  static __always_inline void account_slab_page(struct page *page, int order,
425  					      struct kmem_cache *s,
426  					      gfp_t gfp)
427  {
428  	if (memcg_kmem_enabled() && (s->flags & SLAB_ACCOUNT))
429  		memcg_alloc_page_obj_cgroups(page, s, gfp, true);
430  
431  	mod_node_page_state(page_pgdat(page), cache_vmstat_idx(s),
432  			    PAGE_SIZE << order);
433  }
434  
435  static __always_inline void unaccount_slab_page(struct page *page, int order,
436  						struct kmem_cache *s)
437  {
438  	if (memcg_kmem_enabled())
439  		memcg_free_page_obj_cgroups(page);
440  
441  	mod_node_page_state(page_pgdat(page), cache_vmstat_idx(s),
442  			    -(PAGE_SIZE << order));
443  }
444  
445  static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
446  {
447  	struct kmem_cache *cachep;
448  
449  	if (!IS_ENABLED(CONFIG_SLAB_FREELIST_HARDENED) &&
450  	    !kmem_cache_debug_flags(s, SLAB_CONSISTENCY_CHECKS))
451  		return s;
452  
453  	cachep = virt_to_cache(x);
454  	if (WARN(cachep && cachep != s,
455  		  "%s: Wrong slab cache. %s but object is from %s\n",
456  		  __func__, s->name, cachep->name))
457  		print_tracking(cachep, x);
458  	return cachep;
459  }
460  
461  static inline size_t slab_ksize(const struct kmem_cache *s)
462  {
463  #ifndef CONFIG_SLUB
464  	return s->object_size;
465  
466  #else /* CONFIG_SLUB */
467  # ifdef CONFIG_SLUB_DEBUG
468  	/*
469  	 * Debugging requires use of the padding between object
470  	 * and whatever may come after it.
471  	 */
472  	if (s->flags & (SLAB_RED_ZONE | SLAB_POISON))
473  		return s->object_size;
474  # endif
475  	if (s->flags & SLAB_KASAN)
476  		return s->object_size;
477  	/*
478  	 * If we have the need to store the freelist pointer
479  	 * back there or track user information then we can
480  	 * only use the space before that information.
481  	 */
482  	if (s->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_STORE_USER))
483  		return s->inuse;
484  	/*
485  	 * Else we can use all the padding etc for the allocation
486  	 */
487  	return s->size;
488  #endif
489  }
490  
491  static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
492  						     struct obj_cgroup **objcgp,
493  						     size_t size, gfp_t flags)
494  {
495  	flags &= gfp_allowed_mask;
496  
497  	might_alloc(flags);
498  
499  	if (should_failslab(s, flags))
500  		return NULL;
501  
502  	if (!memcg_slab_pre_alloc_hook(s, objcgp, size, flags))
503  		return NULL;
504  
505  	return s;
506  }
507  
508  static inline void slab_post_alloc_hook(struct kmem_cache *s,
509  					struct obj_cgroup *objcg, gfp_t flags,
510  					size_t size, void **p, bool init)
511  {
512  	size_t i;
513  
514  	flags &= gfp_allowed_mask;
515  
516  	/*
517  	 * As memory initialization might be integrated into KASAN,
518  	 * kasan_slab_alloc and initialization memset must be
519  	 * kept together to avoid discrepancies in behavior.
520  	 *
521  	 * As p[i] might get tagged, memset and kmemleak hook come after KASAN.
522  	 */
523  	for (i = 0; i < size; i++) {
524  		p[i] = kasan_slab_alloc(s, p[i], flags, init);
525  		if (p[i] && init && !kasan_has_integrated_init())
526  			memset(p[i], 0, s->object_size);
527  		kmemleak_alloc_recursive(p[i], s->object_size, 1,
528  					 s->flags, flags);
529  	}
530  
531  	memcg_slab_post_alloc_hook(s, objcg, flags, size, p);
532  }
533  
534  #ifndef CONFIG_SLOB
535  /*
536   * The slab lists for all objects.
537   */
538  struct kmem_cache_node {
539  	spinlock_t list_lock;
540  
541  #ifdef CONFIG_SLAB
542  	struct list_head slabs_partial;	/* partial list first, better asm code */
543  	struct list_head slabs_full;
544  	struct list_head slabs_free;
545  	unsigned long total_slabs;	/* length of all slab lists */
546  	unsigned long free_slabs;	/* length of free slab list only */
547  	unsigned long free_objects;
548  	unsigned int free_limit;
549  	unsigned int colour_next;	/* Per-node cache coloring */
550  	struct array_cache *shared;	/* shared per node */
551  	struct alien_cache **alien;	/* on other nodes */
552  	unsigned long next_reap;	/* updated without locking */
553  	int free_touched;		/* updated without locking */
554  #endif
555  
556  #ifdef CONFIG_SLUB
557  	unsigned long nr_partial;
558  	struct list_head partial;
559  #ifdef CONFIG_SLUB_DEBUG
560  	atomic_long_t nr_slabs;
561  	atomic_long_t total_objects;
562  	struct list_head full;
563  #endif
564  #endif
565  
566  };
567  
568  static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
569  {
570  	return s->node[node];
571  }
572  
573  /*
574   * Iterator over all nodes. The body will be executed for each node that has
575   * a kmem_cache_node structure allocated (which is true for all online nodes)
576   */
577  #define for_each_kmem_cache_node(__s, __node, __n) \
578  	for (__node = 0; __node < nr_node_ids; __node++) \
579  		 if ((__n = get_node(__s, __node)))
580  
581  #endif
582  
583  void *slab_start(struct seq_file *m, loff_t *pos);
584  void *slab_next(struct seq_file *m, void *p, loff_t *pos);
585  void slab_stop(struct seq_file *m, void *p);
586  int memcg_slab_show(struct seq_file *m, void *p);
587  
588  #if defined(CONFIG_SLAB) || defined(CONFIG_SLUB_DEBUG)
589  void dump_unreclaimable_slab(void);
590  #else
591  static inline void dump_unreclaimable_slab(void)
592  {
593  }
594  #endif
595  
596  void ___cache_free(struct kmem_cache *cache, void *x, unsigned long addr);
597  
598  #ifdef CONFIG_SLAB_FREELIST_RANDOM
599  int cache_random_seq_create(struct kmem_cache *cachep, unsigned int count,
600  			gfp_t gfp);
601  void cache_random_seq_destroy(struct kmem_cache *cachep);
602  #else
603  static inline int cache_random_seq_create(struct kmem_cache *cachep,
604  					unsigned int count, gfp_t gfp)
605  {
606  	return 0;
607  }
608  static inline void cache_random_seq_destroy(struct kmem_cache *cachep) { }
609  #endif /* CONFIG_SLAB_FREELIST_RANDOM */
610  
611  static inline bool slab_want_init_on_alloc(gfp_t flags, struct kmem_cache *c)
612  {
613  	if (static_branch_maybe(CONFIG_INIT_ON_ALLOC_DEFAULT_ON,
614  				&init_on_alloc)) {
615  		if (c->ctor)
616  			return false;
617  		if (c->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON))
618  			return flags & __GFP_ZERO;
619  		return true;
620  	}
621  	return flags & __GFP_ZERO;
622  }
623  
624  static inline bool slab_want_init_on_free(struct kmem_cache *c)
625  {
626  	if (static_branch_maybe(CONFIG_INIT_ON_FREE_DEFAULT_ON,
627  				&init_on_free))
628  		return !(c->ctor ||
629  			 (c->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)));
630  	return false;
631  }
632  
633  #ifdef CONFIG_PRINTK
634  #define KS_ADDRS_COUNT 16
635  struct kmem_obj_info {
636  	void *kp_ptr;
637  	struct page *kp_page;
638  	void *kp_objp;
639  	unsigned long kp_data_offset;
640  	struct kmem_cache *kp_slab_cache;
641  	void *kp_ret;
642  	void *kp_stack[KS_ADDRS_COUNT];
643  };
644  void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct page *page);
645  #endif
646  
647  #endif /* MM_SLAB_H */
648