1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #ifndef _MM_SWAP_H 3 #define _MM_SWAP_H 4 5 #include <linux/atomic.h> /* for atomic_long_t */ 6 struct mempolicy; 7 struct swap_iocb; 8 9 extern int page_cluster; 10 11 #ifdef CONFIG_THP_SWAP 12 #define SWAPFILE_CLUSTER HPAGE_PMD_NR 13 #define swap_entry_order(order) (order) 14 #else 15 #define SWAPFILE_CLUSTER 256 16 #define swap_entry_order(order) 0 17 #endif 18 19 extern struct swap_info_struct *swap_info[]; 20 21 /* 22 * We use this to track usage of a cluster. A cluster is a block of swap disk 23 * space with SWAPFILE_CLUSTER pages long and naturally aligns in disk. All 24 * free clusters are organized into a list. We fetch an entry from the list to 25 * get a free cluster. 26 * 27 * The flags field determines if a cluster is free. This is 28 * protected by cluster lock. 29 */ 30 struct swap_cluster_info { 31 spinlock_t lock; /* 32 * Protect swap_cluster_info fields 33 * other than list, and swap_info_struct->swap_map 34 * elements corresponding to the swap cluster. 35 */ 36 u16 count; 37 u8 flags; 38 u8 order; 39 atomic_long_t __rcu *table; /* Swap table entries, see mm/swap_table.h */ 40 unsigned int *extend_table; /* For large swap count, protected by ci->lock */ 41 struct list_head list; 42 }; 43 44 /* All on-list cluster must have a non-zero flag. */ 45 enum swap_cluster_flags { 46 CLUSTER_FLAG_NONE = 0, /* For temporary off-list cluster */ 47 CLUSTER_FLAG_FREE, 48 CLUSTER_FLAG_NONFULL, 49 CLUSTER_FLAG_FRAG, 50 /* Clusters with flags above are allocatable */ 51 CLUSTER_FLAG_USABLE = CLUSTER_FLAG_FRAG, 52 CLUSTER_FLAG_FULL, 53 CLUSTER_FLAG_DISCARD, 54 CLUSTER_FLAG_MAX, 55 }; 56 57 #ifdef CONFIG_SWAP 58 #include <linux/swapops.h> /* for swp_offset */ 59 #include <linux/blk_types.h> /* for bio_end_io_t */ 60 61 static inline unsigned int swp_cluster_offset(swp_entry_t entry) 62 { 63 return swp_offset(entry) % SWAPFILE_CLUSTER; 64 } 65 66 /* 67 * Callers of all helpers below must ensure the entry, type, or offset is 68 * valid, and protect the swap device with reference count or locks. 69 */ 70 static inline struct swap_info_struct *__swap_type_to_info(int type) 71 { 72 struct swap_info_struct *si; 73 74 si = READ_ONCE(swap_info[type]); /* rcu_dereference() */ 75 VM_WARN_ON_ONCE(percpu_ref_is_zero(&si->users)); /* race with swapoff */ 76 return si; 77 } 78 79 static inline struct swap_info_struct *__swap_entry_to_info(swp_entry_t entry) 80 { 81 return __swap_type_to_info(swp_type(entry)); 82 } 83 84 static inline struct swap_cluster_info *__swap_offset_to_cluster( 85 struct swap_info_struct *si, pgoff_t offset) 86 { 87 VM_WARN_ON_ONCE(percpu_ref_is_zero(&si->users)); /* race with swapoff */ 88 VM_WARN_ON_ONCE(offset >= roundup(si->max, SWAPFILE_CLUSTER)); 89 return &si->cluster_info[offset / SWAPFILE_CLUSTER]; 90 } 91 92 static inline struct swap_cluster_info *__swap_entry_to_cluster(swp_entry_t entry) 93 { 94 return __swap_offset_to_cluster(__swap_entry_to_info(entry), 95 swp_offset(entry)); 96 } 97 98 static __always_inline struct swap_cluster_info *__swap_cluster_lock( 99 struct swap_info_struct *si, unsigned long offset, bool irq) 100 { 101 struct swap_cluster_info *ci = __swap_offset_to_cluster(si, offset); 102 103 /* 104 * Nothing modifies swap cache in an IRQ context. All access to 105 * swap cache is wrapped by swap_cache_* helpers, and swap cache 106 * writeback is handled outside of IRQs. Swapin or swapout never 107 * occurs in IRQ, and neither does in-place split or replace. 108 * 109 * Besides, modifying swap cache requires synchronization with 110 * swap_map, which was never IRQ safe. 111 */ 112 VM_WARN_ON_ONCE(!in_task()); 113 VM_WARN_ON_ONCE(percpu_ref_is_zero(&si->users)); /* race with swapoff */ 114 if (irq) 115 spin_lock_irq(&ci->lock); 116 else 117 spin_lock(&ci->lock); 118 return ci; 119 } 120 121 /** 122 * swap_cluster_lock - Lock and return the swap cluster of given offset. 123 * @si: swap device the cluster belongs to. 124 * @offset: the swap entry offset, pointing to a valid slot. 125 * 126 * Context: The caller must ensure the offset is in the valid range and 127 * protect the swap device with reference count or locks. 128 */ 129 static inline struct swap_cluster_info *swap_cluster_lock( 130 struct swap_info_struct *si, unsigned long offset) 131 { 132 return __swap_cluster_lock(si, offset, false); 133 } 134 135 static inline struct swap_cluster_info *__swap_cluster_get_and_lock( 136 const struct folio *folio, bool irq) 137 { 138 VM_WARN_ON_ONCE_FOLIO(!folio_test_locked(folio), folio); 139 VM_WARN_ON_ONCE_FOLIO(!folio_test_swapcache(folio), folio); 140 return __swap_cluster_lock(__swap_entry_to_info(folio->swap), 141 swp_offset(folio->swap), irq); 142 } 143 144 /* 145 * swap_cluster_get_and_lock - Locks the cluster that holds a folio's entries. 146 * @folio: The folio. 147 * 148 * This locks and returns the swap cluster that contains a folio's swap 149 * entries. The swap entries of a folio are always in one single cluster. 150 * The folio has to be locked so its swap entries won't change and the 151 * cluster won't be freed. 152 * 153 * Context: Caller must ensure the folio is locked and in the swap cache. 154 * Return: Pointer to the swap cluster. 155 */ 156 static inline struct swap_cluster_info *swap_cluster_get_and_lock( 157 const struct folio *folio) 158 { 159 return __swap_cluster_get_and_lock(folio, false); 160 } 161 162 /* 163 * swap_cluster_get_and_lock_irq - Locks the cluster that holds a folio's entries. 164 * @folio: The folio. 165 * 166 * Same as swap_cluster_get_and_lock but also disable IRQ. 167 * 168 * Context: Caller must ensure the folio is locked and in the swap cache. 169 * Return: Pointer to the swap cluster. 170 */ 171 static inline struct swap_cluster_info *swap_cluster_get_and_lock_irq( 172 const struct folio *folio) 173 { 174 return __swap_cluster_get_and_lock(folio, true); 175 } 176 177 static inline void swap_cluster_unlock(struct swap_cluster_info *ci) 178 { 179 spin_unlock(&ci->lock); 180 } 181 182 static inline void swap_cluster_unlock_irq(struct swap_cluster_info *ci) 183 { 184 spin_unlock_irq(&ci->lock); 185 } 186 187 extern int swap_retry_table_alloc(swp_entry_t entry, gfp_t gfp); 188 189 /* 190 * Below are the core routines for doing swap for a folio. 191 * All helpers requires the folio to be locked, and a locked folio 192 * in the swap cache pins the swap entries / slots allocated to the 193 * folio, swap relies heavily on the swap cache and folio lock for 194 * synchronization. 195 * 196 * folio_alloc_swap(): the entry point for a folio to be swapped 197 * out. It allocates swap slots and pins the slots with swap cache. 198 * The slots start with a swap count of zero. The slots are pinned 199 * by swap cache reference which doesn't contribute to swap count. 200 * 201 * folio_dup_swap(): increases the swap count of a folio, usually 202 * during it gets unmapped and a swap entry is installed to replace 203 * it (e.g., swap entry in page table). A swap slot with swap 204 * count == 0 can only be increased by this helper. 205 * 206 * folio_put_swap(): does the opposite thing of folio_dup_swap(). 207 */ 208 int folio_alloc_swap(struct folio *folio); 209 int folio_dup_swap(struct folio *folio, struct page *subpage); 210 void folio_put_swap(struct folio *folio, struct page *subpage); 211 212 /* For internal use */ 213 extern void __swap_cluster_free_entries(struct swap_info_struct *si, 214 struct swap_cluster_info *ci, 215 unsigned int ci_off, unsigned int nr_pages); 216 217 /* linux/mm/page_io.c */ 218 int sio_pool_init(void); 219 struct swap_iocb; 220 void swap_read_folio(struct folio *folio, struct swap_iocb **plug); 221 void __swap_read_unplug(struct swap_iocb *plug); 222 static inline void swap_read_unplug(struct swap_iocb *plug) 223 { 224 if (unlikely(plug)) 225 __swap_read_unplug(plug); 226 } 227 void swap_write_unplug(struct swap_iocb *sio); 228 int swap_writeout(struct folio *folio, struct swap_iocb **swap_plug); 229 void __swap_writepage(struct folio *folio, struct swap_iocb **swap_plug); 230 231 /* linux/mm/swap_state.c */ 232 extern struct address_space swap_space __read_mostly; 233 static inline struct address_space *swap_address_space(swp_entry_t entry) 234 { 235 return &swap_space; 236 } 237 238 /* 239 * Return the swap device position of the swap entry. 240 */ 241 static inline loff_t swap_dev_pos(swp_entry_t entry) 242 { 243 return ((loff_t)swp_offset(entry)) << PAGE_SHIFT; 244 } 245 246 /** 247 * folio_matches_swap_entry - Check if a folio matches a given swap entry. 248 * @folio: The folio. 249 * @entry: The swap entry to check against. 250 * 251 * Context: The caller should have the folio locked to ensure it's stable 252 * and nothing will move it in or out of the swap cache. 253 * Return: true or false. 254 */ 255 static inline bool folio_matches_swap_entry(const struct folio *folio, 256 swp_entry_t entry) 257 { 258 swp_entry_t folio_entry = folio->swap; 259 long nr_pages = folio_nr_pages(folio); 260 261 VM_WARN_ON_ONCE_FOLIO(!folio_test_locked(folio), folio); 262 if (!folio_test_swapcache(folio)) 263 return false; 264 VM_WARN_ON_ONCE_FOLIO(!IS_ALIGNED(folio_entry.val, nr_pages), folio); 265 return folio_entry.val == round_down(entry.val, nr_pages); 266 } 267 268 /* 269 * All swap cache helpers below require the caller to ensure the swap entries 270 * used are valid and stabilize the device by any of the following ways: 271 * - Hold a reference by get_swap_device(): this ensures a single entry is 272 * valid and increases the swap device's refcount. 273 * - Locking a folio in the swap cache: this ensures the folio's swap entries 274 * are valid and pinned, also implies reference to the device. 275 * - Locking anything referencing the swap entry: e.g. PTL that protects 276 * swap entries in the page table, similar to locking swap cache folio. 277 * - See the comment of get_swap_device() for more complex usage. 278 */ 279 bool swap_cache_has_folio(swp_entry_t entry); 280 struct folio *swap_cache_get_folio(swp_entry_t entry); 281 void *swap_cache_get_shadow(swp_entry_t entry); 282 void swap_cache_del_folio(struct folio *folio); 283 struct folio *swap_cache_alloc_folio(swp_entry_t entry, gfp_t gfp_flags, 284 struct mempolicy *mpol, pgoff_t ilx, 285 bool *alloced); 286 /* Below helpers require the caller to lock and pass in the swap cluster. */ 287 void __swap_cache_add_folio(struct swap_cluster_info *ci, 288 struct folio *folio, swp_entry_t entry); 289 void __swap_cache_del_folio(struct swap_cluster_info *ci, 290 struct folio *folio, swp_entry_t entry, void *shadow); 291 void __swap_cache_replace_folio(struct swap_cluster_info *ci, 292 struct folio *old, struct folio *new); 293 294 void show_swap_cache_info(void); 295 void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry, int nr); 296 struct folio *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, 297 struct vm_area_struct *vma, unsigned long addr, 298 struct swap_iocb **plug); 299 struct folio *swap_cluster_readahead(swp_entry_t entry, gfp_t flag, 300 struct mempolicy *mpol, pgoff_t ilx); 301 struct folio *swapin_readahead(swp_entry_t entry, gfp_t flag, 302 struct vm_fault *vmf); 303 struct folio *swapin_folio(swp_entry_t entry, struct folio *folio); 304 void swap_update_readahead(struct folio *folio, struct vm_area_struct *vma, 305 unsigned long addr); 306 307 static inline unsigned int folio_swap_flags(struct folio *folio) 308 { 309 return __swap_entry_to_info(folio->swap)->flags; 310 } 311 312 /* 313 * Return the count of contiguous swap entries that share the same 314 * zeromap status as the starting entry. If is_zeromap is not NULL, 315 * it will return the zeromap status of the starting entry. 316 */ 317 static inline int swap_zeromap_batch(swp_entry_t entry, int max_nr, 318 bool *is_zeromap) 319 { 320 struct swap_info_struct *sis = __swap_entry_to_info(entry); 321 unsigned long start = swp_offset(entry); 322 unsigned long end = start + max_nr; 323 bool first_bit; 324 325 first_bit = test_bit(start, sis->zeromap); 326 if (is_zeromap) 327 *is_zeromap = first_bit; 328 329 if (max_nr <= 1) 330 return max_nr; 331 if (first_bit) 332 return find_next_zero_bit(sis->zeromap, end, start) - start; 333 else 334 return find_next_bit(sis->zeromap, end, start) - start; 335 } 336 337 static inline int non_swapcache_batch(swp_entry_t entry, int max_nr) 338 { 339 int i; 340 341 /* 342 * While allocating a large folio and doing mTHP swapin, we need to 343 * ensure all entries are not cached, otherwise, the mTHP folio will 344 * be in conflict with the folio in swap cache. 345 */ 346 for (i = 0; i < max_nr; i++) { 347 if (swap_cache_has_folio(entry)) 348 return i; 349 entry.val++; 350 } 351 352 return i; 353 } 354 355 #else /* CONFIG_SWAP */ 356 struct swap_iocb; 357 static inline struct swap_cluster_info *swap_cluster_lock( 358 struct swap_info_struct *si, pgoff_t offset, bool irq) 359 { 360 return NULL; 361 } 362 363 static inline struct swap_cluster_info *swap_cluster_get_and_lock( 364 struct folio *folio) 365 { 366 return NULL; 367 } 368 369 static inline struct swap_cluster_info *swap_cluster_get_and_lock_irq( 370 struct folio *folio) 371 { 372 return NULL; 373 } 374 375 static inline void swap_cluster_unlock(struct swap_cluster_info *ci) 376 { 377 } 378 379 static inline void swap_cluster_unlock_irq(struct swap_cluster_info *ci) 380 { 381 } 382 383 static inline struct swap_info_struct *__swap_entry_to_info(swp_entry_t entry) 384 { 385 return NULL; 386 } 387 388 static inline int folio_alloc_swap(struct folio *folio) 389 { 390 return -EINVAL; 391 } 392 393 static inline int folio_dup_swap(struct folio *folio, struct page *page) 394 { 395 return -EINVAL; 396 } 397 398 static inline void folio_put_swap(struct folio *folio, struct page *page) 399 { 400 } 401 402 static inline void swap_read_folio(struct folio *folio, struct swap_iocb **plug) 403 { 404 } 405 406 static inline void swap_write_unplug(struct swap_iocb *sio) 407 { 408 } 409 410 static inline struct address_space *swap_address_space(swp_entry_t entry) 411 { 412 return NULL; 413 } 414 415 static inline bool folio_matches_swap_entry(const struct folio *folio, swp_entry_t entry) 416 { 417 return false; 418 } 419 420 static inline void show_swap_cache_info(void) 421 { 422 } 423 424 static inline struct folio *swap_cluster_readahead(swp_entry_t entry, 425 gfp_t gfp_mask, struct mempolicy *mpol, pgoff_t ilx) 426 { 427 return NULL; 428 } 429 430 static inline struct folio *swapin_readahead(swp_entry_t swp, gfp_t gfp_mask, 431 struct vm_fault *vmf) 432 { 433 return NULL; 434 } 435 436 static inline struct folio *swapin_folio(swp_entry_t entry, struct folio *folio) 437 { 438 return NULL; 439 } 440 441 static inline void swap_update_readahead(struct folio *folio, 442 struct vm_area_struct *vma, unsigned long addr) 443 { 444 } 445 446 static inline int swap_writeout(struct folio *folio, 447 struct swap_iocb **swap_plug) 448 { 449 return 0; 450 } 451 452 static inline int swap_retry_table_alloc(swp_entry_t entry, gfp_t gfp) 453 { 454 return -EINVAL; 455 } 456 457 static inline bool swap_cache_has_folio(swp_entry_t entry) 458 { 459 return false; 460 } 461 462 static inline struct folio *swap_cache_get_folio(swp_entry_t entry) 463 { 464 return NULL; 465 } 466 467 static inline void *swap_cache_get_shadow(swp_entry_t entry) 468 { 469 return NULL; 470 } 471 472 static inline void swap_cache_del_folio(struct folio *folio) 473 { 474 } 475 476 static inline void __swap_cache_del_folio(struct swap_cluster_info *ci, 477 struct folio *folio, swp_entry_t entry, void *shadow) 478 { 479 } 480 481 static inline void __swap_cache_replace_folio(struct swap_cluster_info *ci, 482 struct folio *old, struct folio *new) 483 { 484 } 485 486 static inline unsigned int folio_swap_flags(struct folio *folio) 487 { 488 return 0; 489 } 490 491 static inline int swap_zeromap_batch(swp_entry_t entry, int max_nr, 492 bool *has_zeromap) 493 { 494 return 0; 495 } 496 497 static inline int non_swapcache_batch(swp_entry_t entry, int max_nr) 498 { 499 return 0; 500 } 501 #endif /* CONFIG_SWAP */ 502 #endif /* _MM_SWAP_H */ 503