xref: /linux/mm/swap.h (revision bba2c3615bd6cfee7456d1130f2e6b01b3f4e9ba)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 #ifndef _MM_SWAP_H
3 #define _MM_SWAP_H
4 
5 #include <linux/atomic.h> /* for atomic_long_t */
6 #include <linux/mm.h> /* for PAGE_SHIFT */
7 struct mempolicy;
8 struct swap_iocb;
9 struct swap_memcg_table;
10 
11 extern int page_cluster;
12 
13 #if defined(MAX_POSSIBLE_PHYSMEM_BITS)
14 #define SWAP_CACHE_PFN_BITS (MAX_POSSIBLE_PHYSMEM_BITS - PAGE_SHIFT)
15 #elif defined(MAX_PHYSMEM_BITS)
16 #define SWAP_CACHE_PFN_BITS (MAX_PHYSMEM_BITS - PAGE_SHIFT)
17 #else
18 #define SWAP_CACHE_PFN_BITS (BITS_PER_LONG - PAGE_SHIFT)
19 #endif
20 
21 /* Swap table marker, 0x1 means shadow, 0x2 means PFN (SWP_TB_PFN_MARK) */
22 #define SWAP_CACHE_PFN_MARK_BITS	2
23 /* At least 2 bits are needed to distinguish SWP_TB_COUNT_MAX, 1 and 0 */
24 #define SWAP_COUNT_MIN_BITS		2
25 /* If there are enough bits besides PFN and marker, store zero flag inline */
26 #define SWAP_TABLE_HAS_ZEROFLAG		((BITS_PER_LONG - SWAP_CACHE_PFN_MARK_BITS - \
27 					  SWAP_CACHE_PFN_BITS) > SWAP_COUNT_MIN_BITS)
28 
29 #ifdef CONFIG_THP_SWAP
30 #define SWAPFILE_CLUSTER	HPAGE_PMD_NR
31 #define swap_entry_order(order)	(order)
32 #else
33 #define SWAPFILE_CLUSTER	256
34 #define swap_entry_order(order)	0
35 #endif
36 
37 extern struct swap_info_struct *swap_info[];
38 
39 /*
40  * We use this to track usage of a cluster. A cluster is a block of swap disk
41  * space with SWAPFILE_CLUSTER pages long and naturally aligns in disk. All
42  * free clusters are organized into a list. We fetch an entry from the list to
43  * get a free cluster.
44  *
45  * The flags field determines if a cluster is free. This is
46  * protected by cluster lock.
47  */
48 struct swap_cluster_info {
49 	spinlock_t lock;	/*
50 				 * Protect swap_cluster_info fields
51 				 * other than list, and swap_info_struct->swap_map
52 				 * elements corresponding to the swap cluster.
53 				 */
54 	u16 count;
55 	u8 flags;
56 	u8 order;
57 	atomic_long_t __rcu *table;	/* Swap table entries, see mm/swap_table.h */
58 	unsigned int *extend_table;	/* For large swap count, protected by ci->lock */
59 #ifdef CONFIG_MEMCG
60 	struct swap_memcg_table *memcg_table;	/* Swap table entries' cgroup record */
61 #endif
62 #if !SWAP_TABLE_HAS_ZEROFLAG
63 	unsigned long *zero_bitmap;
64 #endif
65 	struct list_head list;
66 };
67 
68 /* All on-list cluster must have a non-zero flag. */
69 enum swap_cluster_flags {
70 	CLUSTER_FLAG_NONE = 0, /* For temporary off-list cluster */
71 	CLUSTER_FLAG_FREE,
72 	CLUSTER_FLAG_NONFULL,
73 	CLUSTER_FLAG_FRAG,
74 	/* Clusters with flags above are allocatable */
75 	CLUSTER_FLAG_USABLE = CLUSTER_FLAG_FRAG,
76 	CLUSTER_FLAG_FULL,
77 	CLUSTER_FLAG_DISCARD,
78 	CLUSTER_FLAG_MAX,
79 };
80 
81 #ifdef CONFIG_SWAP
82 #include <linux/swapops.h> /* for swp_offset */
83 #include <linux/blk_types.h> /* for bio_end_io_t */
84 
85 static inline unsigned int swp_cluster_offset(swp_entry_t entry)
86 {
87 	return swp_offset(entry) % SWAPFILE_CLUSTER;
88 }
89 
90 /*
91  * Callers of all helpers below must ensure the entry, type, or offset is
92  * valid, and protect the swap device with reference count or locks.
93  */
94 static inline struct swap_info_struct *__swap_type_to_info(int type)
95 {
96 	struct swap_info_struct *si;
97 
98 	si = READ_ONCE(swap_info[type]); /* rcu_dereference() */
99 	VM_WARN_ON_ONCE(percpu_ref_is_zero(&si->users)); /* race with swapoff */
100 	return si;
101 }
102 
103 static inline struct swap_info_struct *__swap_entry_to_info(swp_entry_t entry)
104 {
105 	return __swap_type_to_info(swp_type(entry));
106 }
107 
108 static inline struct swap_cluster_info *__swap_offset_to_cluster(
109 		struct swap_info_struct *si, pgoff_t offset)
110 {
111 	VM_WARN_ON_ONCE(percpu_ref_is_zero(&si->users)); /* race with swapoff */
112 	VM_WARN_ON_ONCE(offset >= roundup(si->max, SWAPFILE_CLUSTER));
113 	return &si->cluster_info[offset / SWAPFILE_CLUSTER];
114 }
115 
116 static inline struct swap_cluster_info *__swap_entry_to_cluster(swp_entry_t entry)
117 {
118 	return __swap_offset_to_cluster(__swap_entry_to_info(entry),
119 					swp_offset(entry));
120 }
121 
122 static __always_inline struct swap_cluster_info *__swap_cluster_lock(
123 		struct swap_info_struct *si, unsigned long offset, bool irq)
124 {
125 	struct swap_cluster_info *ci = __swap_offset_to_cluster(si, offset);
126 
127 	/*
128 	 * Nothing modifies swap cache in an IRQ context. All access to
129 	 * swap cache is wrapped by swap_cache_* helpers, and swap cache
130 	 * writeback is handled outside of IRQs. Swapin or swapout never
131 	 * occurs in IRQ, and neither does in-place split or replace.
132 	 *
133 	 * Besides, modifying swap cache requires synchronization with
134 	 * swap_map, which was never IRQ safe.
135 	 */
136 	VM_WARN_ON_ONCE(!in_task());
137 	VM_WARN_ON_ONCE(percpu_ref_is_zero(&si->users)); /* race with swapoff */
138 	if (irq)
139 		spin_lock_irq(&ci->lock);
140 	else
141 		spin_lock(&ci->lock);
142 	return ci;
143 }
144 
145 /**
146  * swap_cluster_lock - Lock and return the swap cluster of given offset.
147  * @si: swap device the cluster belongs to.
148  * @offset: the swap entry offset, pointing to a valid slot.
149  *
150  * Context: The caller must ensure the offset is in the valid range and
151  * protect the swap device with reference count or locks.
152  */
153 static inline struct swap_cluster_info *swap_cluster_lock(
154 		struct swap_info_struct *si, unsigned long offset)
155 {
156 	return __swap_cluster_lock(si, offset, false);
157 }
158 
159 static inline struct swap_cluster_info *__swap_cluster_get_and_lock(
160 		const struct folio *folio, bool irq)
161 {
162 	VM_WARN_ON_ONCE_FOLIO(!folio_test_locked(folio), folio);
163 	VM_WARN_ON_ONCE_FOLIO(!folio_test_swapcache(folio), folio);
164 	return __swap_cluster_lock(__swap_entry_to_info(folio->swap),
165 				   swp_offset(folio->swap), irq);
166 }
167 
168 /*
169  * swap_cluster_get_and_lock - Locks the cluster that holds a folio's entries.
170  * @folio: The folio.
171  *
172  * This locks and returns the swap cluster that contains a folio's swap
173  * entries. The swap entries of a folio are always in one single cluster.
174  * The folio has to be locked so its swap entries won't change and the
175  * cluster won't be freed.
176  *
177  * Context: Caller must ensure the folio is locked and in the swap cache.
178  * Return: Pointer to the swap cluster.
179  */
180 static inline struct swap_cluster_info *swap_cluster_get_and_lock(
181 		const struct folio *folio)
182 {
183 	return __swap_cluster_get_and_lock(folio, false);
184 }
185 
186 /*
187  * swap_cluster_get_and_lock_irq - Locks the cluster that holds a folio's entries.
188  * @folio: The folio.
189  *
190  * Same as swap_cluster_get_and_lock but also disable IRQ.
191  *
192  * Context: Caller must ensure the folio is locked and in the swap cache.
193  * Return: Pointer to the swap cluster.
194  */
195 static inline struct swap_cluster_info *swap_cluster_get_and_lock_irq(
196 		const struct folio *folio)
197 {
198 	return __swap_cluster_get_and_lock(folio, true);
199 }
200 
201 static inline void swap_cluster_unlock(struct swap_cluster_info *ci)
202 {
203 	spin_unlock(&ci->lock);
204 }
205 
206 static inline void swap_cluster_unlock_irq(struct swap_cluster_info *ci)
207 {
208 	spin_unlock_irq(&ci->lock);
209 }
210 
211 extern int swap_retry_table_alloc(swp_entry_t entry, gfp_t gfp);
212 
213 /*
214  * Below are the core routines for doing swap for a folio.
215  * All helpers requires the folio to be locked, and a locked folio
216  * in the swap cache pins the swap entries / slots allocated to the
217  * folio, swap relies heavily on the swap cache and folio lock for
218  * synchronization.
219  *
220  * folio_alloc_swap(): the entry point for a folio to be swapped
221  * out. It allocates swap slots and pins the slots with swap cache.
222  * The slots start with a swap count of zero. The slots are pinned
223  * by swap cache reference which doesn't contribute to swap count.
224  *
225  * folio_dup_swap(): increases the swap count of a folio, usually
226  * during it gets unmapped and a swap entry is installed to replace
227  * it (e.g., swap entry in page table). A swap slot with swap
228  * count == 0 can only be increased by this helper.
229  *
230  * folio_put_swap(): does the opposite thing of folio_dup_swap().
231  */
232 int folio_alloc_swap(struct folio *folio);
233 int folio_dup_swap(struct folio *folio, struct page *subpage);
234 void folio_put_swap(struct folio *folio, struct page *subpage);
235 
236 /* For internal use */
237 extern void __swap_cluster_free_entries(struct swap_info_struct *si,
238 					struct swap_cluster_info *ci,
239 					unsigned int ci_off, unsigned int nr_pages);
240 
241 /* linux/mm/page_io.c */
242 int sio_pool_init(void);
243 struct swap_iocb;
244 void swap_read_folio(struct folio *folio, struct swap_iocb **plug);
245 void __swap_read_unplug(struct swap_iocb *plug);
246 static inline void swap_read_unplug(struct swap_iocb *plug)
247 {
248 	if (unlikely(plug))
249 		__swap_read_unplug(plug);
250 }
251 void swap_write_unplug(struct swap_iocb *sio);
252 int swap_writeout(struct folio *folio, struct swap_iocb **swap_plug);
253 void __swap_writepage(struct folio *folio, struct swap_iocb **swap_plug);
254 
255 /* linux/mm/swap_state.c */
256 extern struct address_space swap_space __read_mostly;
257 static inline struct address_space *swap_address_space(swp_entry_t entry)
258 {
259 	return &swap_space;
260 }
261 
262 /*
263  * Return the swap device position of the swap entry.
264  */
265 static inline loff_t swap_dev_pos(swp_entry_t entry)
266 {
267 	return ((loff_t)swp_offset(entry)) << PAGE_SHIFT;
268 }
269 
270 /**
271  * folio_matches_swap_entry - Check if a folio matches a given swap entry.
272  * @folio: The folio.
273  * @entry: The swap entry to check against.
274  *
275  * Context: The caller should have the folio locked to ensure it's stable
276  * and nothing will move it in or out of the swap cache.
277  * Return: true or false.
278  */
279 static inline bool folio_matches_swap_entry(const struct folio *folio,
280 					    swp_entry_t entry)
281 {
282 	swp_entry_t folio_entry = folio->swap;
283 	long nr_pages = folio_nr_pages(folio);
284 
285 	VM_WARN_ON_ONCE_FOLIO(!folio_test_locked(folio), folio);
286 	if (!folio_test_swapcache(folio))
287 		return false;
288 	VM_WARN_ON_ONCE_FOLIO(!IS_ALIGNED(folio_entry.val, nr_pages), folio);
289 	return folio_entry.val == round_down(entry.val, nr_pages);
290 }
291 
292 /*
293  * All swap cache helpers below require the caller to ensure the swap entries
294  * used are valid and stabilize the device by any of the following ways:
295  * - Hold a reference by get_swap_device(): this ensures a single entry is
296  *   valid and increases the swap device's refcount.
297  * - Locking a folio in the swap cache: this ensures the folio's swap entries
298  *   are valid and pinned, also implies reference to the device.
299  * - Locking anything referencing the swap entry: e.g. PTL that protects
300  *   swap entries in the page table, similar to locking swap cache folio.
301  * - See the comment of get_swap_device() for more complex usage.
302  */
303 bool swap_cache_has_folio(swp_entry_t entry);
304 struct folio *swap_cache_get_folio(swp_entry_t entry);
305 void *swap_cache_get_shadow(swp_entry_t entry);
306 void swap_cache_del_folio(struct folio *folio);
307 struct folio *swap_cache_alloc_folio(swp_entry_t target_entry, gfp_t gfp_mask,
308 				     unsigned long orders, struct vm_fault *vmf,
309 				     struct mempolicy *mpol, pgoff_t ilx);
310 /* Below helpers require the caller to lock and pass in the swap cluster. */
311 void __swap_cache_add_folio(struct swap_cluster_info *ci,
312 			    struct folio *folio, swp_entry_t entry);
313 void __swap_cache_del_folio(struct swap_cluster_info *ci,
314 			    struct folio *folio, swp_entry_t entry, void *shadow);
315 void __swap_cache_replace_folio(struct swap_cluster_info *ci,
316 				struct folio *old, struct folio *new);
317 
318 void show_swap_cache_info(void);
319 void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry, int nr);
320 struct folio *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
321 		struct vm_area_struct *vma, unsigned long addr,
322 		struct swap_iocb **plug);
323 struct folio *swap_cluster_readahead(swp_entry_t entry, gfp_t flag,
324 		struct mempolicy *mpol, pgoff_t ilx);
325 struct folio *swapin_readahead(swp_entry_t entry, gfp_t flag,
326 		struct vm_fault *vmf);
327 struct folio *swapin_sync(swp_entry_t entry, gfp_t flag, unsigned long orders,
328 			   struct vm_fault *vmf, struct mempolicy *mpol, pgoff_t ilx);
329 void swap_update_readahead(struct folio *folio, struct vm_area_struct *vma,
330 			   unsigned long addr);
331 
332 static inline unsigned int folio_swap_flags(struct folio *folio)
333 {
334 	return __swap_entry_to_info(folio->swap)->flags;
335 }
336 
337 #else /* CONFIG_SWAP */
338 struct swap_iocb;
339 static inline struct swap_cluster_info *swap_cluster_lock(
340 	struct swap_info_struct *si, pgoff_t offset, bool irq)
341 {
342 	return NULL;
343 }
344 
345 static inline struct swap_cluster_info *swap_cluster_get_and_lock(
346 		struct folio *folio)
347 {
348 	return NULL;
349 }
350 
351 static inline struct swap_cluster_info *swap_cluster_get_and_lock_irq(
352 		struct folio *folio)
353 {
354 	return NULL;
355 }
356 
357 static inline void swap_cluster_unlock(struct swap_cluster_info *ci)
358 {
359 }
360 
361 static inline void swap_cluster_unlock_irq(struct swap_cluster_info *ci)
362 {
363 }
364 
365 static inline struct swap_info_struct *__swap_entry_to_info(swp_entry_t entry)
366 {
367 	return NULL;
368 }
369 
370 static inline int folio_alloc_swap(struct folio *folio)
371 {
372 	return -EINVAL;
373 }
374 
375 static inline int folio_dup_swap(struct folio *folio, struct page *page)
376 {
377 	return -EINVAL;
378 }
379 
380 static inline void folio_put_swap(struct folio *folio, struct page *page)
381 {
382 }
383 
384 static inline void swap_read_folio(struct folio *folio, struct swap_iocb **plug)
385 {
386 }
387 
388 static inline void swap_write_unplug(struct swap_iocb *sio)
389 {
390 }
391 
392 static inline struct address_space *swap_address_space(swp_entry_t entry)
393 {
394 	return NULL;
395 }
396 
397 static inline bool folio_matches_swap_entry(const struct folio *folio, swp_entry_t entry)
398 {
399 	return false;
400 }
401 
402 static inline void show_swap_cache_info(void)
403 {
404 }
405 
406 static inline struct folio *swap_cluster_readahead(swp_entry_t entry,
407 			gfp_t gfp_mask, struct mempolicy *mpol, pgoff_t ilx)
408 {
409 	return NULL;
410 }
411 
412 static inline struct folio *swapin_readahead(swp_entry_t swp, gfp_t gfp_mask,
413 			struct vm_fault *vmf)
414 {
415 	return NULL;
416 }
417 
418 static inline struct folio *swapin_sync(
419 	swp_entry_t entry, gfp_t flag, unsigned long orders,
420 	struct vm_fault *vmf, struct mempolicy *mpol, pgoff_t ilx)
421 {
422 	return NULL;
423 }
424 
425 static inline void swap_update_readahead(struct folio *folio,
426 		struct vm_area_struct *vma, unsigned long addr)
427 {
428 }
429 
430 static inline int swap_writeout(struct folio *folio,
431 		struct swap_iocb **swap_plug)
432 {
433 	return 0;
434 }
435 
436 static inline int swap_retry_table_alloc(swp_entry_t entry, gfp_t gfp)
437 {
438 	return -EINVAL;
439 }
440 
441 static inline bool swap_cache_has_folio(swp_entry_t entry)
442 {
443 	return false;
444 }
445 
446 static inline struct folio *swap_cache_get_folio(swp_entry_t entry)
447 {
448 	return NULL;
449 }
450 
451 static inline void *swap_cache_get_shadow(swp_entry_t entry)
452 {
453 	return NULL;
454 }
455 
456 static inline void swap_cache_del_folio(struct folio *folio)
457 {
458 }
459 
460 static inline void __swap_cache_del_folio(struct swap_cluster_info *ci,
461 		struct folio *folio, swp_entry_t entry, void *shadow)
462 {
463 }
464 
465 static inline void __swap_cache_replace_folio(struct swap_cluster_info *ci,
466 		struct folio *old, struct folio *new)
467 {
468 }
469 
470 static inline unsigned int folio_swap_flags(struct folio *folio)
471 {
472 	return 0;
473 }
474 
475 #endif /* CONFIG_SWAP */
476 #endif /* _MM_SWAP_H */
477