1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #ifndef _MM_SWAP_H 3 #define _MM_SWAP_H 4 5 #include <linux/atomic.h> /* for atomic_long_t */ 6 #include <linux/mm.h> /* for PAGE_SHIFT */ 7 struct mempolicy; 8 struct swap_iocb; 9 struct swap_memcg_table; 10 11 extern int page_cluster; 12 13 #if defined(MAX_POSSIBLE_PHYSMEM_BITS) 14 #define SWAP_CACHE_PFN_BITS (MAX_POSSIBLE_PHYSMEM_BITS - PAGE_SHIFT) 15 #elif defined(MAX_PHYSMEM_BITS) 16 #define SWAP_CACHE_PFN_BITS (MAX_PHYSMEM_BITS - PAGE_SHIFT) 17 #else 18 #define SWAP_CACHE_PFN_BITS (BITS_PER_LONG - PAGE_SHIFT) 19 #endif 20 21 /* Swap table marker, 0x1 means shadow, 0x2 means PFN (SWP_TB_PFN_MARK) */ 22 #define SWAP_CACHE_PFN_MARK_BITS 2 23 /* At least 2 bits are needed to distinguish SWP_TB_COUNT_MAX, 1 and 0 */ 24 #define SWAP_COUNT_MIN_BITS 2 25 /* If there are enough bits besides PFN and marker, store zero flag inline */ 26 #define SWAP_TABLE_HAS_ZEROFLAG ((BITS_PER_LONG - SWAP_CACHE_PFN_MARK_BITS - \ 27 SWAP_CACHE_PFN_BITS) > SWAP_COUNT_MIN_BITS) 28 29 #ifdef CONFIG_THP_SWAP 30 #define SWAPFILE_CLUSTER HPAGE_PMD_NR 31 #define swap_entry_order(order) (order) 32 #else 33 #define SWAPFILE_CLUSTER 256 34 #define swap_entry_order(order) 0 35 #endif 36 37 extern struct swap_info_struct *swap_info[]; 38 39 /* 40 * We use this to track usage of a cluster. A cluster is a block of swap disk 41 * space with SWAPFILE_CLUSTER pages long and naturally aligns in disk. All 42 * free clusters are organized into a list. We fetch an entry from the list to 43 * get a free cluster. 44 * 45 * The flags field determines if a cluster is free. This is 46 * protected by cluster lock. 47 */ 48 struct swap_cluster_info { 49 spinlock_t lock; /* 50 * Protect swap_cluster_info fields 51 * other than list, and swap_info_struct->swap_map 52 * elements corresponding to the swap cluster. 53 */ 54 u16 count; 55 u8 flags; 56 u8 order; 57 atomic_long_t __rcu *table; /* Swap table entries, see mm/swap_table.h */ 58 unsigned int *extend_table; /* For large swap count, protected by ci->lock */ 59 #ifdef CONFIG_MEMCG 60 struct swap_memcg_table *memcg_table; /* Swap table entries' cgroup record */ 61 #endif 62 #if !SWAP_TABLE_HAS_ZEROFLAG 63 unsigned long *zero_bitmap; 64 #endif 65 struct list_head list; 66 }; 67 68 /* All on-list cluster must have a non-zero flag. */ 69 enum swap_cluster_flags { 70 CLUSTER_FLAG_NONE = 0, /* For temporary off-list cluster */ 71 CLUSTER_FLAG_FREE, 72 CLUSTER_FLAG_NONFULL, 73 CLUSTER_FLAG_FRAG, 74 /* Clusters with flags above are allocatable */ 75 CLUSTER_FLAG_USABLE = CLUSTER_FLAG_FRAG, 76 CLUSTER_FLAG_FULL, 77 CLUSTER_FLAG_DISCARD, 78 CLUSTER_FLAG_MAX, 79 }; 80 81 #ifdef CONFIG_SWAP 82 #include <linux/swapops.h> /* for swp_offset */ 83 #include <linux/blk_types.h> /* for bio_end_io_t */ 84 85 static inline unsigned int swp_cluster_offset(swp_entry_t entry) 86 { 87 return swp_offset(entry) % SWAPFILE_CLUSTER; 88 } 89 90 /* 91 * Callers of all helpers below must ensure the entry, type, or offset is 92 * valid, and protect the swap device with reference count or locks. 93 */ 94 static inline struct swap_info_struct *__swap_type_to_info(int type) 95 { 96 struct swap_info_struct *si; 97 98 si = READ_ONCE(swap_info[type]); /* rcu_dereference() */ 99 VM_WARN_ON_ONCE(percpu_ref_is_zero(&si->users)); /* race with swapoff */ 100 return si; 101 } 102 103 static inline struct swap_info_struct *__swap_entry_to_info(swp_entry_t entry) 104 { 105 return __swap_type_to_info(swp_type(entry)); 106 } 107 108 static inline struct swap_cluster_info *__swap_offset_to_cluster( 109 struct swap_info_struct *si, pgoff_t offset) 110 { 111 VM_WARN_ON_ONCE(percpu_ref_is_zero(&si->users)); /* race with swapoff */ 112 VM_WARN_ON_ONCE(offset >= roundup(si->max, SWAPFILE_CLUSTER)); 113 return &si->cluster_info[offset / SWAPFILE_CLUSTER]; 114 } 115 116 static inline struct swap_cluster_info *__swap_entry_to_cluster(swp_entry_t entry) 117 { 118 return __swap_offset_to_cluster(__swap_entry_to_info(entry), 119 swp_offset(entry)); 120 } 121 122 static __always_inline struct swap_cluster_info *__swap_cluster_lock( 123 struct swap_info_struct *si, unsigned long offset, bool irq) 124 { 125 struct swap_cluster_info *ci = __swap_offset_to_cluster(si, offset); 126 127 /* 128 * Nothing modifies swap cache in an IRQ context. All access to 129 * swap cache is wrapped by swap_cache_* helpers, and swap cache 130 * writeback is handled outside of IRQs. Swapin or swapout never 131 * occurs in IRQ, and neither does in-place split or replace. 132 * 133 * Besides, modifying swap cache requires synchronization with 134 * swap_map, which was never IRQ safe. 135 */ 136 VM_WARN_ON_ONCE(!in_task()); 137 VM_WARN_ON_ONCE(percpu_ref_is_zero(&si->users)); /* race with swapoff */ 138 if (irq) 139 spin_lock_irq(&ci->lock); 140 else 141 spin_lock(&ci->lock); 142 return ci; 143 } 144 145 /** 146 * swap_cluster_lock - Lock and return the swap cluster of given offset. 147 * @si: swap device the cluster belongs to. 148 * @offset: the swap entry offset, pointing to a valid slot. 149 * 150 * Context: The caller must ensure the offset is in the valid range and 151 * protect the swap device with reference count or locks. 152 */ 153 static inline struct swap_cluster_info *swap_cluster_lock( 154 struct swap_info_struct *si, unsigned long offset) 155 { 156 return __swap_cluster_lock(si, offset, false); 157 } 158 159 static inline struct swap_cluster_info *__swap_cluster_get_and_lock( 160 const struct folio *folio, bool irq) 161 { 162 VM_WARN_ON_ONCE_FOLIO(!folio_test_locked(folio), folio); 163 VM_WARN_ON_ONCE_FOLIO(!folio_test_swapcache(folio), folio); 164 return __swap_cluster_lock(__swap_entry_to_info(folio->swap), 165 swp_offset(folio->swap), irq); 166 } 167 168 /* 169 * swap_cluster_get_and_lock - Locks the cluster that holds a folio's entries. 170 * @folio: The folio. 171 * 172 * This locks and returns the swap cluster that contains a folio's swap 173 * entries. The swap entries of a folio are always in one single cluster. 174 * The folio has to be locked so its swap entries won't change and the 175 * cluster won't be freed. 176 * 177 * Context: Caller must ensure the folio is locked and in the swap cache. 178 * Return: Pointer to the swap cluster. 179 */ 180 static inline struct swap_cluster_info *swap_cluster_get_and_lock( 181 const struct folio *folio) 182 { 183 return __swap_cluster_get_and_lock(folio, false); 184 } 185 186 /* 187 * swap_cluster_get_and_lock_irq - Locks the cluster that holds a folio's entries. 188 * @folio: The folio. 189 * 190 * Same as swap_cluster_get_and_lock but also disable IRQ. 191 * 192 * Context: Caller must ensure the folio is locked and in the swap cache. 193 * Return: Pointer to the swap cluster. 194 */ 195 static inline struct swap_cluster_info *swap_cluster_get_and_lock_irq( 196 const struct folio *folio) 197 { 198 return __swap_cluster_get_and_lock(folio, true); 199 } 200 201 static inline void swap_cluster_unlock(struct swap_cluster_info *ci) 202 { 203 spin_unlock(&ci->lock); 204 } 205 206 static inline void swap_cluster_unlock_irq(struct swap_cluster_info *ci) 207 { 208 spin_unlock_irq(&ci->lock); 209 } 210 211 extern int swap_retry_table_alloc(swp_entry_t entry, gfp_t gfp); 212 213 /* 214 * Below are the core routines for doing swap for a folio. 215 * All helpers requires the folio to be locked, and a locked folio 216 * in the swap cache pins the swap entries / slots allocated to the 217 * folio, swap relies heavily on the swap cache and folio lock for 218 * synchronization. 219 * 220 * folio_alloc_swap(): the entry point for a folio to be swapped 221 * out. It allocates swap slots and pins the slots with swap cache. 222 * The slots start with a swap count of zero. The slots are pinned 223 * by swap cache reference which doesn't contribute to swap count. 224 * 225 * folio_dup_swap(): increases the swap count of a folio, usually 226 * during it gets unmapped and a swap entry is installed to replace 227 * it (e.g., swap entry in page table). A swap slot with swap 228 * count == 0 can only be increased by this helper. 229 * 230 * folio_put_swap(): does the opposite thing of folio_dup_swap(). 231 */ 232 int folio_alloc_swap(struct folio *folio); 233 int folio_dup_swap(struct folio *folio, struct page *subpage); 234 void folio_put_swap(struct folio *folio, struct page *subpage); 235 236 /* For internal use */ 237 extern void __swap_cluster_free_entries(struct swap_info_struct *si, 238 struct swap_cluster_info *ci, 239 unsigned int ci_off, unsigned int nr_pages); 240 241 /* linux/mm/page_io.c */ 242 int sio_pool_init(void); 243 struct swap_iocb; 244 void swap_read_folio(struct folio *folio, struct swap_iocb **plug); 245 void __swap_read_unplug(struct swap_iocb *plug); 246 static inline void swap_read_unplug(struct swap_iocb *plug) 247 { 248 if (unlikely(plug)) 249 __swap_read_unplug(plug); 250 } 251 void swap_write_unplug(struct swap_iocb *sio); 252 int swap_writeout(struct folio *folio, struct swap_iocb **swap_plug); 253 void __swap_writepage(struct folio *folio, struct swap_iocb **swap_plug); 254 255 /* linux/mm/swap_state.c */ 256 extern struct address_space swap_space __read_mostly; 257 static inline struct address_space *swap_address_space(swp_entry_t entry) 258 { 259 return &swap_space; 260 } 261 262 /* 263 * Return the swap device position of the swap entry. 264 */ 265 static inline loff_t swap_dev_pos(swp_entry_t entry) 266 { 267 return ((loff_t)swp_offset(entry)) << PAGE_SHIFT; 268 } 269 270 /** 271 * folio_matches_swap_entry - Check if a folio matches a given swap entry. 272 * @folio: The folio. 273 * @entry: The swap entry to check against. 274 * 275 * Context: The caller should have the folio locked to ensure it's stable 276 * and nothing will move it in or out of the swap cache. 277 * Return: true or false. 278 */ 279 static inline bool folio_matches_swap_entry(const struct folio *folio, 280 swp_entry_t entry) 281 { 282 swp_entry_t folio_entry = folio->swap; 283 long nr_pages = folio_nr_pages(folio); 284 285 VM_WARN_ON_ONCE_FOLIO(!folio_test_locked(folio), folio); 286 if (!folio_test_swapcache(folio)) 287 return false; 288 VM_WARN_ON_ONCE_FOLIO(!IS_ALIGNED(folio_entry.val, nr_pages), folio); 289 return folio_entry.val == round_down(entry.val, nr_pages); 290 } 291 292 /* 293 * All swap cache helpers below require the caller to ensure the swap entries 294 * used are valid and stabilize the device by any of the following ways: 295 * - Hold a reference by get_swap_device(): this ensures a single entry is 296 * valid and increases the swap device's refcount. 297 * - Locking a folio in the swap cache: this ensures the folio's swap entries 298 * are valid and pinned, also implies reference to the device. 299 * - Locking anything referencing the swap entry: e.g. PTL that protects 300 * swap entries in the page table, similar to locking swap cache folio. 301 * - See the comment of get_swap_device() for more complex usage. 302 */ 303 bool swap_cache_has_folio(swp_entry_t entry); 304 struct folio *swap_cache_get_folio(swp_entry_t entry); 305 void *swap_cache_get_shadow(swp_entry_t entry); 306 void swap_cache_del_folio(struct folio *folio); 307 struct folio *swap_cache_alloc_folio(swp_entry_t target_entry, gfp_t gfp_mask, 308 unsigned long orders, struct vm_fault *vmf, 309 struct mempolicy *mpol, pgoff_t ilx); 310 /* Below helpers require the caller to lock and pass in the swap cluster. */ 311 void __swap_cache_add_folio(struct swap_cluster_info *ci, 312 struct folio *folio, swp_entry_t entry); 313 void __swap_cache_del_folio(struct swap_cluster_info *ci, 314 struct folio *folio, swp_entry_t entry, void *shadow); 315 void __swap_cache_replace_folio(struct swap_cluster_info *ci, 316 struct folio *old, struct folio *new); 317 318 void show_swap_cache_info(void); 319 void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry, int nr); 320 struct folio *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, 321 struct vm_area_struct *vma, unsigned long addr, 322 struct swap_iocb **plug); 323 struct folio *swap_cluster_readahead(swp_entry_t entry, gfp_t flag, 324 struct mempolicy *mpol, pgoff_t ilx); 325 struct folio *swapin_readahead(swp_entry_t entry, gfp_t flag, 326 struct vm_fault *vmf); 327 struct folio *swapin_sync(swp_entry_t entry, gfp_t flag, unsigned long orders, 328 struct vm_fault *vmf, struct mempolicy *mpol, pgoff_t ilx); 329 void swap_update_readahead(struct folio *folio, struct vm_area_struct *vma, 330 unsigned long addr); 331 332 static inline unsigned int folio_swap_flags(struct folio *folio) 333 { 334 return __swap_entry_to_info(folio->swap)->flags; 335 } 336 337 #else /* CONFIG_SWAP */ 338 struct swap_iocb; 339 static inline struct swap_cluster_info *swap_cluster_lock( 340 struct swap_info_struct *si, pgoff_t offset, bool irq) 341 { 342 return NULL; 343 } 344 345 static inline struct swap_cluster_info *swap_cluster_get_and_lock( 346 struct folio *folio) 347 { 348 return NULL; 349 } 350 351 static inline struct swap_cluster_info *swap_cluster_get_and_lock_irq( 352 struct folio *folio) 353 { 354 return NULL; 355 } 356 357 static inline void swap_cluster_unlock(struct swap_cluster_info *ci) 358 { 359 } 360 361 static inline void swap_cluster_unlock_irq(struct swap_cluster_info *ci) 362 { 363 } 364 365 static inline struct swap_info_struct *__swap_entry_to_info(swp_entry_t entry) 366 { 367 return NULL; 368 } 369 370 static inline int folio_alloc_swap(struct folio *folio) 371 { 372 return -EINVAL; 373 } 374 375 static inline int folio_dup_swap(struct folio *folio, struct page *page) 376 { 377 return -EINVAL; 378 } 379 380 static inline void folio_put_swap(struct folio *folio, struct page *page) 381 { 382 } 383 384 static inline void swap_read_folio(struct folio *folio, struct swap_iocb **plug) 385 { 386 } 387 388 static inline void swap_write_unplug(struct swap_iocb *sio) 389 { 390 } 391 392 static inline struct address_space *swap_address_space(swp_entry_t entry) 393 { 394 return NULL; 395 } 396 397 static inline bool folio_matches_swap_entry(const struct folio *folio, swp_entry_t entry) 398 { 399 return false; 400 } 401 402 static inline void show_swap_cache_info(void) 403 { 404 } 405 406 static inline struct folio *swap_cluster_readahead(swp_entry_t entry, 407 gfp_t gfp_mask, struct mempolicy *mpol, pgoff_t ilx) 408 { 409 return NULL; 410 } 411 412 static inline struct folio *swapin_readahead(swp_entry_t swp, gfp_t gfp_mask, 413 struct vm_fault *vmf) 414 { 415 return NULL; 416 } 417 418 static inline struct folio *swapin_sync( 419 swp_entry_t entry, gfp_t flag, unsigned long orders, 420 struct vm_fault *vmf, struct mempolicy *mpol, pgoff_t ilx) 421 { 422 return NULL; 423 } 424 425 static inline void swap_update_readahead(struct folio *folio, 426 struct vm_area_struct *vma, unsigned long addr) 427 { 428 } 429 430 static inline int swap_writeout(struct folio *folio, 431 struct swap_iocb **swap_plug) 432 { 433 return 0; 434 } 435 436 static inline int swap_retry_table_alloc(swp_entry_t entry, gfp_t gfp) 437 { 438 return -EINVAL; 439 } 440 441 static inline bool swap_cache_has_folio(swp_entry_t entry) 442 { 443 return false; 444 } 445 446 static inline struct folio *swap_cache_get_folio(swp_entry_t entry) 447 { 448 return NULL; 449 } 450 451 static inline void *swap_cache_get_shadow(swp_entry_t entry) 452 { 453 return NULL; 454 } 455 456 static inline void swap_cache_del_folio(struct folio *folio) 457 { 458 } 459 460 static inline void __swap_cache_del_folio(struct swap_cluster_info *ci, 461 struct folio *folio, swp_entry_t entry, void *shadow) 462 { 463 } 464 465 static inline void __swap_cache_replace_folio(struct swap_cluster_info *ci, 466 struct folio *old, struct folio *new) 467 { 468 } 469 470 static inline unsigned int folio_swap_flags(struct folio *folio) 471 { 472 return 0; 473 } 474 475 #endif /* CONFIG_SWAP */ 476 #endif /* _MM_SWAP_H */ 477