1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #ifndef _MM_SWAP_H 3 #define _MM_SWAP_H 4 5 #include <linux/atomic.h> /* for atomic_long_t */ 6 struct mempolicy; 7 struct swap_iocb; 8 9 extern int page_cluster; 10 11 #ifdef CONFIG_THP_SWAP 12 #define SWAPFILE_CLUSTER HPAGE_PMD_NR 13 #define swap_entry_order(order) (order) 14 #else 15 #define SWAPFILE_CLUSTER 256 16 #define swap_entry_order(order) 0 17 #endif 18 19 extern struct swap_info_struct *swap_info[]; 20 21 /* 22 * We use this to track usage of a cluster. A cluster is a block of swap disk 23 * space with SWAPFILE_CLUSTER pages long and naturally aligns in disk. All 24 * free clusters are organized into a list. We fetch an entry from the list to 25 * get a free cluster. 26 * 27 * The flags field determines if a cluster is free. This is 28 * protected by cluster lock. 29 */ 30 struct swap_cluster_info { 31 spinlock_t lock; /* 32 * Protect swap_cluster_info fields 33 * other than list, and swap_info_struct->swap_map 34 * elements corresponding to the swap cluster. 35 */ 36 u16 count; 37 u8 flags; 38 u8 order; 39 atomic_long_t __rcu *table; /* Swap table entries, see mm/swap_table.h */ 40 struct list_head list; 41 }; 42 43 /* All on-list cluster must have a non-zero flag. */ 44 enum swap_cluster_flags { 45 CLUSTER_FLAG_NONE = 0, /* For temporary off-list cluster */ 46 CLUSTER_FLAG_FREE, 47 CLUSTER_FLAG_NONFULL, 48 CLUSTER_FLAG_FRAG, 49 /* Clusters with flags above are allocatable */ 50 CLUSTER_FLAG_USABLE = CLUSTER_FLAG_FRAG, 51 CLUSTER_FLAG_FULL, 52 CLUSTER_FLAG_DISCARD, 53 CLUSTER_FLAG_MAX, 54 }; 55 56 #ifdef CONFIG_SWAP 57 #include <linux/swapops.h> /* for swp_offset */ 58 #include <linux/blk_types.h> /* for bio_end_io_t */ 59 60 static inline unsigned int swp_cluster_offset(swp_entry_t entry) 61 { 62 return swp_offset(entry) % SWAPFILE_CLUSTER; 63 } 64 65 /* 66 * Callers of all helpers below must ensure the entry, type, or offset is 67 * valid, and protect the swap device with reference count or locks. 68 */ 69 static inline struct swap_info_struct *__swap_type_to_info(int type) 70 { 71 struct swap_info_struct *si; 72 73 si = READ_ONCE(swap_info[type]); /* rcu_dereference() */ 74 VM_WARN_ON_ONCE(percpu_ref_is_zero(&si->users)); /* race with swapoff */ 75 return si; 76 } 77 78 static inline struct swap_info_struct *__swap_entry_to_info(swp_entry_t entry) 79 { 80 return __swap_type_to_info(swp_type(entry)); 81 } 82 83 static inline struct swap_cluster_info *__swap_offset_to_cluster( 84 struct swap_info_struct *si, pgoff_t offset) 85 { 86 VM_WARN_ON_ONCE(percpu_ref_is_zero(&si->users)); /* race with swapoff */ 87 VM_WARN_ON_ONCE(offset >= si->max); 88 return &si->cluster_info[offset / SWAPFILE_CLUSTER]; 89 } 90 91 static inline struct swap_cluster_info *__swap_entry_to_cluster(swp_entry_t entry) 92 { 93 return __swap_offset_to_cluster(__swap_entry_to_info(entry), 94 swp_offset(entry)); 95 } 96 97 static __always_inline struct swap_cluster_info *__swap_cluster_lock( 98 struct swap_info_struct *si, unsigned long offset, bool irq) 99 { 100 struct swap_cluster_info *ci = __swap_offset_to_cluster(si, offset); 101 102 /* 103 * Nothing modifies swap cache in an IRQ context. All access to 104 * swap cache is wrapped by swap_cache_* helpers, and swap cache 105 * writeback is handled outside of IRQs. Swapin or swapout never 106 * occurs in IRQ, and neither does in-place split or replace. 107 * 108 * Besides, modifying swap cache requires synchronization with 109 * swap_map, which was never IRQ safe. 110 */ 111 VM_WARN_ON_ONCE(!in_task()); 112 VM_WARN_ON_ONCE(percpu_ref_is_zero(&si->users)); /* race with swapoff */ 113 if (irq) 114 spin_lock_irq(&ci->lock); 115 else 116 spin_lock(&ci->lock); 117 return ci; 118 } 119 120 /** 121 * swap_cluster_lock - Lock and return the swap cluster of given offset. 122 * @si: swap device the cluster belongs to. 123 * @offset: the swap entry offset, pointing to a valid slot. 124 * 125 * Context: The caller must ensure the offset is in the valid range and 126 * protect the swap device with reference count or locks. 127 */ 128 static inline struct swap_cluster_info *swap_cluster_lock( 129 struct swap_info_struct *si, unsigned long offset) 130 { 131 return __swap_cluster_lock(si, offset, false); 132 } 133 134 static inline struct swap_cluster_info *__swap_cluster_get_and_lock( 135 const struct folio *folio, bool irq) 136 { 137 VM_WARN_ON_ONCE_FOLIO(!folio_test_locked(folio), folio); 138 VM_WARN_ON_ONCE_FOLIO(!folio_test_swapcache(folio), folio); 139 return __swap_cluster_lock(__swap_entry_to_info(folio->swap), 140 swp_offset(folio->swap), irq); 141 } 142 143 /* 144 * swap_cluster_get_and_lock - Locks the cluster that holds a folio's entries. 145 * @folio: The folio. 146 * 147 * This locks and returns the swap cluster that contains a folio's swap 148 * entries. The swap entries of a folio are always in one single cluster. 149 * The folio has to be locked so its swap entries won't change and the 150 * cluster won't be freed. 151 * 152 * Context: Caller must ensure the folio is locked and in the swap cache. 153 * Return: Pointer to the swap cluster. 154 */ 155 static inline struct swap_cluster_info *swap_cluster_get_and_lock( 156 const struct folio *folio) 157 { 158 return __swap_cluster_get_and_lock(folio, false); 159 } 160 161 /* 162 * swap_cluster_get_and_lock_irq - Locks the cluster that holds a folio's entries. 163 * @folio: The folio. 164 * 165 * Same as swap_cluster_get_and_lock but also disable IRQ. 166 * 167 * Context: Caller must ensure the folio is locked and in the swap cache. 168 * Return: Pointer to the swap cluster. 169 */ 170 static inline struct swap_cluster_info *swap_cluster_get_and_lock_irq( 171 const struct folio *folio) 172 { 173 return __swap_cluster_get_and_lock(folio, true); 174 } 175 176 static inline void swap_cluster_unlock(struct swap_cluster_info *ci) 177 { 178 spin_unlock(&ci->lock); 179 } 180 181 static inline void swap_cluster_unlock_irq(struct swap_cluster_info *ci) 182 { 183 spin_unlock_irq(&ci->lock); 184 } 185 186 /* 187 * Below are the core routines for doing swap for a folio. 188 * All helpers requires the folio to be locked, and a locked folio 189 * in the swap cache pins the swap entries / slots allocated to the 190 * folio, swap relies heavily on the swap cache and folio lock for 191 * synchronization. 192 * 193 * folio_alloc_swap(): the entry point for a folio to be swapped 194 * out. It allocates swap slots and pins the slots with swap cache. 195 * The slots start with a swap count of zero. 196 * 197 * folio_dup_swap(): increases the swap count of a folio, usually 198 * during it gets unmapped and a swap entry is installed to replace 199 * it (e.g., swap entry in page table). A swap slot with swap 200 * count == 0 should only be increasd by this helper. 201 * 202 * folio_put_swap(): does the opposite thing of folio_dup_swap(). 203 */ 204 int folio_alloc_swap(struct folio *folio); 205 int folio_dup_swap(struct folio *folio, struct page *subpage); 206 void folio_put_swap(struct folio *folio, struct page *subpage); 207 208 /* For internal use */ 209 extern void swap_entries_free(struct swap_info_struct *si, 210 struct swap_cluster_info *ci, 211 unsigned long offset, unsigned int nr_pages); 212 213 /* linux/mm/page_io.c */ 214 int sio_pool_init(void); 215 struct swap_iocb; 216 void swap_read_folio(struct folio *folio, struct swap_iocb **plug); 217 void __swap_read_unplug(struct swap_iocb *plug); 218 static inline void swap_read_unplug(struct swap_iocb *plug) 219 { 220 if (unlikely(plug)) 221 __swap_read_unplug(plug); 222 } 223 void swap_write_unplug(struct swap_iocb *sio); 224 int swap_writeout(struct folio *folio, struct swap_iocb **swap_plug); 225 void __swap_writepage(struct folio *folio, struct swap_iocb **swap_plug); 226 227 /* linux/mm/swap_state.c */ 228 extern struct address_space swap_space __read_mostly; 229 static inline struct address_space *swap_address_space(swp_entry_t entry) 230 { 231 return &swap_space; 232 } 233 234 /* 235 * Return the swap device position of the swap entry. 236 */ 237 static inline loff_t swap_dev_pos(swp_entry_t entry) 238 { 239 return ((loff_t)swp_offset(entry)) << PAGE_SHIFT; 240 } 241 242 /** 243 * folio_matches_swap_entry - Check if a folio matches a given swap entry. 244 * @folio: The folio. 245 * @entry: The swap entry to check against. 246 * 247 * Context: The caller should have the folio locked to ensure it's stable 248 * and nothing will move it in or out of the swap cache. 249 * Return: true or false. 250 */ 251 static inline bool folio_matches_swap_entry(const struct folio *folio, 252 swp_entry_t entry) 253 { 254 swp_entry_t folio_entry = folio->swap; 255 long nr_pages = folio_nr_pages(folio); 256 257 VM_WARN_ON_ONCE_FOLIO(!folio_test_locked(folio), folio); 258 if (!folio_test_swapcache(folio)) 259 return false; 260 VM_WARN_ON_ONCE_FOLIO(!IS_ALIGNED(folio_entry.val, nr_pages), folio); 261 return folio_entry.val == round_down(entry.val, nr_pages); 262 } 263 264 /* 265 * All swap cache helpers below require the caller to ensure the swap entries 266 * used are valid and stabilize the device by any of the following ways: 267 * - Hold a reference by get_swap_device(): this ensures a single entry is 268 * valid and increases the swap device's refcount. 269 * - Locking a folio in the swap cache: this ensures the folio's swap entries 270 * are valid and pinned, also implies reference to the device. 271 * - Locking anything referencing the swap entry: e.g. PTL that protects 272 * swap entries in the page table, similar to locking swap cache folio. 273 * - See the comment of get_swap_device() for more complex usage. 274 */ 275 bool swap_cache_has_folio(swp_entry_t entry); 276 struct folio *swap_cache_get_folio(swp_entry_t entry); 277 void *swap_cache_get_shadow(swp_entry_t entry); 278 void swap_cache_del_folio(struct folio *folio); 279 struct folio *swap_cache_alloc_folio(swp_entry_t entry, gfp_t gfp_flags, 280 struct mempolicy *mpol, pgoff_t ilx, 281 bool *alloced); 282 /* Below helpers require the caller to lock and pass in the swap cluster. */ 283 void __swap_cache_add_folio(struct swap_cluster_info *ci, 284 struct folio *folio, swp_entry_t entry); 285 void __swap_cache_del_folio(struct swap_cluster_info *ci, 286 struct folio *folio, swp_entry_t entry, void *shadow); 287 void __swap_cache_replace_folio(struct swap_cluster_info *ci, 288 struct folio *old, struct folio *new); 289 void __swap_cache_clear_shadow(swp_entry_t entry, int nr_ents); 290 291 void show_swap_cache_info(void); 292 void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry, int nr); 293 struct folio *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, 294 struct vm_area_struct *vma, unsigned long addr, 295 struct swap_iocb **plug); 296 struct folio *swap_cluster_readahead(swp_entry_t entry, gfp_t flag, 297 struct mempolicy *mpol, pgoff_t ilx); 298 struct folio *swapin_readahead(swp_entry_t entry, gfp_t flag, 299 struct vm_fault *vmf); 300 struct folio *swapin_folio(swp_entry_t entry, struct folio *folio); 301 void swap_update_readahead(struct folio *folio, struct vm_area_struct *vma, 302 unsigned long addr); 303 304 static inline unsigned int folio_swap_flags(struct folio *folio) 305 { 306 return __swap_entry_to_info(folio->swap)->flags; 307 } 308 309 /* 310 * Return the count of contiguous swap entries that share the same 311 * zeromap status as the starting entry. If is_zeromap is not NULL, 312 * it will return the zeromap status of the starting entry. 313 */ 314 static inline int swap_zeromap_batch(swp_entry_t entry, int max_nr, 315 bool *is_zeromap) 316 { 317 struct swap_info_struct *sis = __swap_entry_to_info(entry); 318 unsigned long start = swp_offset(entry); 319 unsigned long end = start + max_nr; 320 bool first_bit; 321 322 first_bit = test_bit(start, sis->zeromap); 323 if (is_zeromap) 324 *is_zeromap = first_bit; 325 326 if (max_nr <= 1) 327 return max_nr; 328 if (first_bit) 329 return find_next_zero_bit(sis->zeromap, end, start) - start; 330 else 331 return find_next_bit(sis->zeromap, end, start) - start; 332 } 333 334 static inline int non_swapcache_batch(swp_entry_t entry, int max_nr) 335 { 336 int i; 337 338 /* 339 * While allocating a large folio and doing mTHP swapin, we need to 340 * ensure all entries are not cached, otherwise, the mTHP folio will 341 * be in conflict with the folio in swap cache. 342 */ 343 for (i = 0; i < max_nr; i++) { 344 if (swap_cache_has_folio(entry)) 345 return i; 346 entry.val++; 347 } 348 349 return i; 350 } 351 352 #else /* CONFIG_SWAP */ 353 struct swap_iocb; 354 static inline struct swap_cluster_info *swap_cluster_lock( 355 struct swap_info_struct *si, pgoff_t offset, bool irq) 356 { 357 return NULL; 358 } 359 360 static inline struct swap_cluster_info *swap_cluster_get_and_lock( 361 struct folio *folio) 362 { 363 return NULL; 364 } 365 366 static inline struct swap_cluster_info *swap_cluster_get_and_lock_irq( 367 struct folio *folio) 368 { 369 return NULL; 370 } 371 372 static inline void swap_cluster_unlock(struct swap_cluster_info *ci) 373 { 374 } 375 376 static inline void swap_cluster_unlock_irq(struct swap_cluster_info *ci) 377 { 378 } 379 380 static inline struct swap_info_struct *__swap_entry_to_info(swp_entry_t entry) 381 { 382 return NULL; 383 } 384 385 static inline int folio_alloc_swap(struct folio *folio) 386 { 387 return -EINVAL; 388 } 389 390 static inline int folio_dup_swap(struct folio *folio, struct page *page) 391 { 392 return -EINVAL; 393 } 394 395 static inline void folio_put_swap(struct folio *folio, struct page *page) 396 { 397 } 398 399 static inline void swap_read_folio(struct folio *folio, struct swap_iocb **plug) 400 { 401 } 402 403 static inline void swap_write_unplug(struct swap_iocb *sio) 404 { 405 } 406 407 static inline struct address_space *swap_address_space(swp_entry_t entry) 408 { 409 return NULL; 410 } 411 412 static inline bool folio_matches_swap_entry(const struct folio *folio, swp_entry_t entry) 413 { 414 return false; 415 } 416 417 static inline void show_swap_cache_info(void) 418 { 419 } 420 421 static inline struct folio *swap_cluster_readahead(swp_entry_t entry, 422 gfp_t gfp_mask, struct mempolicy *mpol, pgoff_t ilx) 423 { 424 return NULL; 425 } 426 427 static inline struct folio *swapin_readahead(swp_entry_t swp, gfp_t gfp_mask, 428 struct vm_fault *vmf) 429 { 430 return NULL; 431 } 432 433 static inline struct folio *swapin_folio(swp_entry_t entry, struct folio *folio) 434 { 435 return NULL; 436 } 437 438 static inline void swap_update_readahead(struct folio *folio, 439 struct vm_area_struct *vma, unsigned long addr) 440 { 441 } 442 443 static inline int swap_writeout(struct folio *folio, 444 struct swap_iocb **swap_plug) 445 { 446 return 0; 447 } 448 449 static inline bool swap_cache_has_folio(swp_entry_t entry) 450 { 451 return false; 452 } 453 454 static inline struct folio *swap_cache_get_folio(swp_entry_t entry) 455 { 456 return NULL; 457 } 458 459 static inline void *swap_cache_get_shadow(swp_entry_t entry) 460 { 461 return NULL; 462 } 463 464 static inline void swap_cache_del_folio(struct folio *folio) 465 { 466 } 467 468 static inline void __swap_cache_del_folio(struct swap_cluster_info *ci, 469 struct folio *folio, swp_entry_t entry, void *shadow) 470 { 471 } 472 473 static inline void __swap_cache_replace_folio(struct swap_cluster_info *ci, 474 struct folio *old, struct folio *new) 475 { 476 } 477 478 static inline unsigned int folio_swap_flags(struct folio *folio) 479 { 480 return 0; 481 } 482 483 static inline int swap_zeromap_batch(swp_entry_t entry, int max_nr, 484 bool *has_zeromap) 485 { 486 return 0; 487 } 488 489 static inline int non_swapcache_batch(swp_entry_t entry, int max_nr) 490 { 491 return 0; 492 } 493 #endif /* CONFIG_SWAP */ 494 #endif /* _MM_SWAP_H */ 495