1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #ifndef _MM_SWAP_H 3 #define _MM_SWAP_H 4 5 #include <linux/atomic.h> /* for atomic_long_t */ 6 struct mempolicy; 7 struct swap_iocb; 8 9 extern int page_cluster; 10 11 #ifdef CONFIG_THP_SWAP 12 #define SWAPFILE_CLUSTER HPAGE_PMD_NR 13 #define swap_entry_order(order) (order) 14 #else 15 #define SWAPFILE_CLUSTER 256 16 #define swap_entry_order(order) 0 17 #endif 18 19 extern struct swap_info_struct *swap_info[]; 20 21 /* 22 * We use this to track usage of a cluster. A cluster is a block of swap disk 23 * space with SWAPFILE_CLUSTER pages long and naturally aligns in disk. All 24 * free clusters are organized into a list. We fetch an entry from the list to 25 * get a free cluster. 26 * 27 * The flags field determines if a cluster is free. This is 28 * protected by cluster lock. 29 */ 30 struct swap_cluster_info { 31 spinlock_t lock; /* 32 * Protect swap_cluster_info fields 33 * other than list, and swap_info_struct->swap_map 34 * elements corresponding to the swap cluster. 35 */ 36 u16 count; 37 u8 flags; 38 u8 order; 39 atomic_long_t __rcu *table; /* Swap table entries, see mm/swap_table.h */ 40 struct list_head list; 41 }; 42 43 /* All on-list cluster must have a non-zero flag. */ 44 enum swap_cluster_flags { 45 CLUSTER_FLAG_NONE = 0, /* For temporary off-list cluster */ 46 CLUSTER_FLAG_FREE, 47 CLUSTER_FLAG_NONFULL, 48 CLUSTER_FLAG_FRAG, 49 /* Clusters with flags above are allocatable */ 50 CLUSTER_FLAG_USABLE = CLUSTER_FLAG_FRAG, 51 CLUSTER_FLAG_FULL, 52 CLUSTER_FLAG_DISCARD, 53 CLUSTER_FLAG_MAX, 54 }; 55 56 #ifdef CONFIG_SWAP 57 #include <linux/swapops.h> /* for swp_offset */ 58 #include <linux/blk_types.h> /* for bio_end_io_t */ 59 60 static inline unsigned int swp_cluster_offset(swp_entry_t entry) 61 { 62 return swp_offset(entry) % SWAPFILE_CLUSTER; 63 } 64 65 /* 66 * Callers of all helpers below must ensure the entry, type, or offset is 67 * valid, and protect the swap device with reference count or locks. 68 */ 69 static inline struct swap_info_struct *__swap_type_to_info(int type) 70 { 71 struct swap_info_struct *si; 72 73 si = READ_ONCE(swap_info[type]); /* rcu_dereference() */ 74 VM_WARN_ON_ONCE(percpu_ref_is_zero(&si->users)); /* race with swapoff */ 75 return si; 76 } 77 78 static inline struct swap_info_struct *__swap_entry_to_info(swp_entry_t entry) 79 { 80 return __swap_type_to_info(swp_type(entry)); 81 } 82 83 static inline struct swap_cluster_info *__swap_offset_to_cluster( 84 struct swap_info_struct *si, pgoff_t offset) 85 { 86 VM_WARN_ON_ONCE(percpu_ref_is_zero(&si->users)); /* race with swapoff */ 87 VM_WARN_ON_ONCE(offset >= si->max); 88 return &si->cluster_info[offset / SWAPFILE_CLUSTER]; 89 } 90 91 static inline struct swap_cluster_info *__swap_entry_to_cluster(swp_entry_t entry) 92 { 93 return __swap_offset_to_cluster(__swap_entry_to_info(entry), 94 swp_offset(entry)); 95 } 96 97 static __always_inline struct swap_cluster_info *__swap_cluster_lock( 98 struct swap_info_struct *si, unsigned long offset, bool irq) 99 { 100 struct swap_cluster_info *ci = __swap_offset_to_cluster(si, offset); 101 102 /* 103 * Nothing modifies swap cache in an IRQ context. All access to 104 * swap cache is wrapped by swap_cache_* helpers, and swap cache 105 * writeback is handled outside of IRQs. Swapin or swapout never 106 * occurs in IRQ, and neither does in-place split or replace. 107 * 108 * Besides, modifying swap cache requires synchronization with 109 * swap_map, which was never IRQ safe. 110 */ 111 VM_WARN_ON_ONCE(!in_task()); 112 VM_WARN_ON_ONCE(percpu_ref_is_zero(&si->users)); /* race with swapoff */ 113 if (irq) 114 spin_lock_irq(&ci->lock); 115 else 116 spin_lock(&ci->lock); 117 return ci; 118 } 119 120 /** 121 * swap_cluster_lock - Lock and return the swap cluster of given offset. 122 * @si: swap device the cluster belongs to. 123 * @offset: the swap entry offset, pointing to a valid slot. 124 * 125 * Context: The caller must ensure the offset is in the valid range and 126 * protect the swap device with reference count or locks. 127 */ 128 static inline struct swap_cluster_info *swap_cluster_lock( 129 struct swap_info_struct *si, unsigned long offset) 130 { 131 return __swap_cluster_lock(si, offset, false); 132 } 133 134 static inline struct swap_cluster_info *__swap_cluster_get_and_lock( 135 const struct folio *folio, bool irq) 136 { 137 VM_WARN_ON_ONCE_FOLIO(!folio_test_locked(folio), folio); 138 VM_WARN_ON_ONCE_FOLIO(!folio_test_swapcache(folio), folio); 139 return __swap_cluster_lock(__swap_entry_to_info(folio->swap), 140 swp_offset(folio->swap), irq); 141 } 142 143 /* 144 * swap_cluster_get_and_lock - Locks the cluster that holds a folio's entries. 145 * @folio: The folio. 146 * 147 * This locks and returns the swap cluster that contains a folio's swap 148 * entries. The swap entries of a folio are always in one single cluster. 149 * The folio has to be locked so its swap entries won't change and the 150 * cluster won't be freed. 151 * 152 * Context: Caller must ensure the folio is locked and in the swap cache. 153 * Return: Pointer to the swap cluster. 154 */ 155 static inline struct swap_cluster_info *swap_cluster_get_and_lock( 156 const struct folio *folio) 157 { 158 return __swap_cluster_get_and_lock(folio, false); 159 } 160 161 /* 162 * swap_cluster_get_and_lock_irq - Locks the cluster that holds a folio's entries. 163 * @folio: The folio. 164 * 165 * Same as swap_cluster_get_and_lock but also disable IRQ. 166 * 167 * Context: Caller must ensure the folio is locked and in the swap cache. 168 * Return: Pointer to the swap cluster. 169 */ 170 static inline struct swap_cluster_info *swap_cluster_get_and_lock_irq( 171 const struct folio *folio) 172 { 173 return __swap_cluster_get_and_lock(folio, true); 174 } 175 176 static inline void swap_cluster_unlock(struct swap_cluster_info *ci) 177 { 178 spin_unlock(&ci->lock); 179 } 180 181 static inline void swap_cluster_unlock_irq(struct swap_cluster_info *ci) 182 { 183 spin_unlock_irq(&ci->lock); 184 } 185 186 /* linux/mm/page_io.c */ 187 int sio_pool_init(void); 188 struct swap_iocb; 189 void swap_read_folio(struct folio *folio, struct swap_iocb **plug); 190 void __swap_read_unplug(struct swap_iocb *plug); 191 static inline void swap_read_unplug(struct swap_iocb *plug) 192 { 193 if (unlikely(plug)) 194 __swap_read_unplug(plug); 195 } 196 void swap_write_unplug(struct swap_iocb *sio); 197 int swap_writeout(struct folio *folio, struct swap_iocb **swap_plug); 198 void __swap_writepage(struct folio *folio, struct swap_iocb **swap_plug); 199 200 /* linux/mm/swap_state.c */ 201 extern struct address_space swap_space __ro_after_init; 202 static inline struct address_space *swap_address_space(swp_entry_t entry) 203 { 204 return &swap_space; 205 } 206 207 /* 208 * Return the swap device position of the swap entry. 209 */ 210 static inline loff_t swap_dev_pos(swp_entry_t entry) 211 { 212 return ((loff_t)swp_offset(entry)) << PAGE_SHIFT; 213 } 214 215 /** 216 * folio_matches_swap_entry - Check if a folio matches a given swap entry. 217 * @folio: The folio. 218 * @entry: The swap entry to check against. 219 * 220 * Context: The caller should have the folio locked to ensure it's stable 221 * and nothing will move it in or out of the swap cache. 222 * Return: true or false. 223 */ 224 static inline bool folio_matches_swap_entry(const struct folio *folio, 225 swp_entry_t entry) 226 { 227 swp_entry_t folio_entry = folio->swap; 228 long nr_pages = folio_nr_pages(folio); 229 230 VM_WARN_ON_ONCE_FOLIO(!folio_test_locked(folio), folio); 231 if (!folio_test_swapcache(folio)) 232 return false; 233 VM_WARN_ON_ONCE_FOLIO(!IS_ALIGNED(folio_entry.val, nr_pages), folio); 234 return folio_entry.val == round_down(entry.val, nr_pages); 235 } 236 237 /* 238 * All swap cache helpers below require the caller to ensure the swap entries 239 * used are valid and stablize the device by any of the following ways: 240 * - Hold a reference by get_swap_device(): this ensures a single entry is 241 * valid and increases the swap device's refcount. 242 * - Locking a folio in the swap cache: this ensures the folio's swap entries 243 * are valid and pinned, also implies reference to the device. 244 * - Locking anything referencing the swap entry: e.g. PTL that protects 245 * swap entries in the page table, similar to locking swap cache folio. 246 * - See the comment of get_swap_device() for more complex usage. 247 */ 248 struct folio *swap_cache_get_folio(swp_entry_t entry); 249 void *swap_cache_get_shadow(swp_entry_t entry); 250 void swap_cache_add_folio(struct folio *folio, swp_entry_t entry, void **shadow); 251 void swap_cache_del_folio(struct folio *folio); 252 /* Below helpers require the caller to lock and pass in the swap cluster. */ 253 void __swap_cache_del_folio(struct swap_cluster_info *ci, 254 struct folio *folio, swp_entry_t entry, void *shadow); 255 void __swap_cache_replace_folio(struct swap_cluster_info *ci, 256 struct folio *old, struct folio *new); 257 void __swap_cache_clear_shadow(swp_entry_t entry, int nr_ents); 258 259 void show_swap_cache_info(void); 260 void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry, int nr); 261 struct folio *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, 262 struct vm_area_struct *vma, unsigned long addr, 263 struct swap_iocb **plug); 264 struct folio *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_flags, 265 struct mempolicy *mpol, pgoff_t ilx, bool *new_page_allocated, 266 bool skip_if_exists); 267 struct folio *swap_cluster_readahead(swp_entry_t entry, gfp_t flag, 268 struct mempolicy *mpol, pgoff_t ilx); 269 struct folio *swapin_readahead(swp_entry_t entry, gfp_t flag, 270 struct vm_fault *vmf); 271 void swap_update_readahead(struct folio *folio, struct vm_area_struct *vma, 272 unsigned long addr); 273 274 static inline unsigned int folio_swap_flags(struct folio *folio) 275 { 276 return __swap_entry_to_info(folio->swap)->flags; 277 } 278 279 /* 280 * Return the count of contiguous swap entries that share the same 281 * zeromap status as the starting entry. If is_zeromap is not NULL, 282 * it will return the zeromap status of the starting entry. 283 */ 284 static inline int swap_zeromap_batch(swp_entry_t entry, int max_nr, 285 bool *is_zeromap) 286 { 287 struct swap_info_struct *sis = __swap_entry_to_info(entry); 288 unsigned long start = swp_offset(entry); 289 unsigned long end = start + max_nr; 290 bool first_bit; 291 292 first_bit = test_bit(start, sis->zeromap); 293 if (is_zeromap) 294 *is_zeromap = first_bit; 295 296 if (max_nr <= 1) 297 return max_nr; 298 if (first_bit) 299 return find_next_zero_bit(sis->zeromap, end, start) - start; 300 else 301 return find_next_bit(sis->zeromap, end, start) - start; 302 } 303 304 static inline int non_swapcache_batch(swp_entry_t entry, int max_nr) 305 { 306 struct swap_info_struct *si = __swap_entry_to_info(entry); 307 pgoff_t offset = swp_offset(entry); 308 int i; 309 310 /* 311 * While allocating a large folio and doing mTHP swapin, we need to 312 * ensure all entries are not cached, otherwise, the mTHP folio will 313 * be in conflict with the folio in swap cache. 314 */ 315 for (i = 0; i < max_nr; i++) { 316 if ((si->swap_map[offset + i] & SWAP_HAS_CACHE)) 317 return i; 318 } 319 320 return i; 321 } 322 323 #else /* CONFIG_SWAP */ 324 struct swap_iocb; 325 static inline struct swap_cluster_info *swap_cluster_lock( 326 struct swap_info_struct *si, pgoff_t offset, bool irq) 327 { 328 return NULL; 329 } 330 331 static inline struct swap_cluster_info *swap_cluster_get_and_lock( 332 struct folio *folio) 333 { 334 return NULL; 335 } 336 337 static inline struct swap_cluster_info *swap_cluster_get_and_lock_irq( 338 struct folio *folio) 339 { 340 return NULL; 341 } 342 343 static inline void swap_cluster_unlock(struct swap_cluster_info *ci) 344 { 345 } 346 347 static inline void swap_cluster_unlock_irq(struct swap_cluster_info *ci) 348 { 349 } 350 351 static inline struct swap_info_struct *__swap_entry_to_info(swp_entry_t entry) 352 { 353 return NULL; 354 } 355 356 static inline void swap_read_folio(struct folio *folio, struct swap_iocb **plug) 357 { 358 } 359 static inline void swap_write_unplug(struct swap_iocb *sio) 360 { 361 } 362 363 static inline struct address_space *swap_address_space(swp_entry_t entry) 364 { 365 return NULL; 366 } 367 368 static inline bool folio_matches_swap_entry(const struct folio *folio, swp_entry_t entry) 369 { 370 return false; 371 } 372 373 static inline void show_swap_cache_info(void) 374 { 375 } 376 377 static inline struct folio *swap_cluster_readahead(swp_entry_t entry, 378 gfp_t gfp_mask, struct mempolicy *mpol, pgoff_t ilx) 379 { 380 return NULL; 381 } 382 383 static inline struct folio *swapin_readahead(swp_entry_t swp, gfp_t gfp_mask, 384 struct vm_fault *vmf) 385 { 386 return NULL; 387 } 388 389 static inline void swap_update_readahead(struct folio *folio, 390 struct vm_area_struct *vma, unsigned long addr) 391 { 392 } 393 394 static inline int swap_writeout(struct folio *folio, 395 struct swap_iocb **swap_plug) 396 { 397 return 0; 398 } 399 400 static inline void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry, int nr) 401 { 402 } 403 404 static inline struct folio *swap_cache_get_folio(swp_entry_t entry) 405 { 406 return NULL; 407 } 408 409 static inline void *swap_cache_get_shadow(swp_entry_t entry) 410 { 411 return NULL; 412 } 413 414 static inline void swap_cache_add_folio(struct folio *folio, swp_entry_t entry, void **shadow) 415 { 416 } 417 418 static inline void swap_cache_del_folio(struct folio *folio) 419 { 420 } 421 422 static inline void __swap_cache_del_folio(struct swap_cluster_info *ci, 423 struct folio *folio, swp_entry_t entry, void *shadow) 424 { 425 } 426 427 static inline void __swap_cache_replace_folio(struct swap_cluster_info *ci, 428 struct folio *old, struct folio *new) 429 { 430 } 431 432 static inline unsigned int folio_swap_flags(struct folio *folio) 433 { 434 return 0; 435 } 436 437 static inline int swap_zeromap_batch(swp_entry_t entry, int max_nr, 438 bool *has_zeromap) 439 { 440 return 0; 441 } 442 443 static inline int non_swapcache_batch(swp_entry_t entry, int max_nr) 444 { 445 return 0; 446 } 447 #endif /* CONFIG_SWAP */ 448 449 /** 450 * folio_index - File index of a folio. 451 * @folio: The folio. 452 * 453 * For a folio which is either in the page cache or the swap cache, 454 * return its index within the address_space it belongs to. If you know 455 * the folio is definitely in the page cache, you can look at the folio's 456 * index directly. 457 * 458 * Return: The index (offset in units of pages) of a folio in its file. 459 */ 460 static inline pgoff_t folio_index(struct folio *folio) 461 { 462 #ifdef CONFIG_SWAP 463 if (unlikely(folio_test_swapcache(folio))) 464 return swp_offset(folio->swap); 465 #endif 466 return folio->index; 467 } 468 469 #endif /* _MM_SWAP_H */ 470