Lines Matching +full:lock +full:- +full:offset

1 // SPDX-License-Identifier: GPL-2.0-only
22 #include <linux/blk-cgroup.h>
31 #include <linux/backing-dev.h>
63 unsigned long offset);
75 /* protected with swap_lock. reading in vm_swap_full() doesn't need lock */
77 static int least_priority = -1;
85 static const char Bad_offset[] = "Bad swap offset entry ";
86 static const char Unused_offset[] = "Unused swap offset entry ";
100 * This uses its own lock instead of swap_lock because when a
101 * swap_info_struct changes between not-full/full, it needs to
102 * add/remove itself to/from this list, but the swap_info_struct->lock
104 * before any swap_info_struct->lock.
139 * off-list bit in the atomic counter, updates no longer need any lock
145 #define SWAP_USAGE_OFFLIST_BIT (1UL << (BITS_PER_TYPE(atomic_t) - 2))
149 return atomic_long_read(&si->inuse_pages) & SWAP_USAGE_COUNTER_MASK; in swap_usage_in_pages()
161 /* Reclaim directly, bypass the slot cache and don't touch device lock */
165 unsigned long offset, int nr_pages) in swap_is_has_cache() argument
167 unsigned char *map = si->swap_map + offset; in swap_is_has_cache()
180 unsigned long offset, int nr_pages, bool *has_cache) in swap_is_last_map() argument
182 unsigned char *map = si->swap_map + offset; in swap_is_last_map()
204 unsigned long offset, unsigned long flags) in __try_to_reclaim_swap() argument
206 swp_entry_t entry = swp_entry(si->type, offset); in __try_to_reclaim_swap()
218 ret = -nr_pages; in __try_to_reclaim_swap()
222 * called by vmscan.c at reclaiming folios. So we hold a folio lock in __try_to_reclaim_swap()
230 /* offset could point to the middle of a large folio */ in __try_to_reclaim_swap()
231 entry = folio->swap; in __try_to_reclaim_swap()
232 offset = swp_offset(entry); in __try_to_reclaim_swap()
245 ci = lock_cluster(si, offset); in __try_to_reclaim_swap()
246 need_reclaim = swap_is_has_cache(si, offset, nr_pages); in __try_to_reclaim_swap()
259 xa_lock_irq(&address_space->i_pages); in __try_to_reclaim_swap()
261 xa_unlock_irq(&address_space->i_pages); in __try_to_reclaim_swap()
265 ci = lock_cluster(si, offset); in __try_to_reclaim_swap()
278 struct rb_node *rb = rb_first(&sis->swap_extent_root); in first_se()
284 struct rb_node *rb = rb_next(&se->rb_node); in next_se()
290 * to allow the swap device to optimize its wear-levelling.
301 start_block = (se->start_block + 1) << (PAGE_SHIFT - 9); in discard_swap()
302 nr_blocks = ((sector_t)se->nr_pages - 1) << (PAGE_SHIFT - 9); in discard_swap()
304 err = blkdev_issue_discard(si->bdev, start_block, in discard_swap()
312 start_block = se->start_block << (PAGE_SHIFT - 9); in discard_swap()
313 nr_blocks = (sector_t)se->nr_pages << (PAGE_SHIFT - 9); in discard_swap()
315 err = blkdev_issue_discard(si->bdev, start_block, in discard_swap()
322 return err; /* That will often be -EOPNOTSUPP */ in discard_swap()
326 offset_to_swap_extent(struct swap_info_struct *sis, unsigned long offset) in offset_to_swap_extent() argument
331 rb = sis->swap_extent_root.rb_node; in offset_to_swap_extent()
334 if (offset < se->start_page) in offset_to_swap_extent()
335 rb = rb->rb_left; in offset_to_swap_extent()
336 else if (offset >= se->start_page + se->nr_pages) in offset_to_swap_extent()
337 rb = rb->rb_right; in offset_to_swap_extent()
347 struct swap_info_struct *sis = swp_swap_info(folio->swap); in swap_folio_sector()
350 pgoff_t offset; in swap_folio_sector() local
352 offset = swp_offset(folio->swap); in swap_folio_sector()
353 se = offset_to_swap_extent(sis, offset); in swap_folio_sector()
354 sector = se->start_block + (offset - se->start_page); in swap_folio_sector()
355 return sector << (PAGE_SHIFT - 9); in swap_folio_sector()
360 * to allow the swap device to optimize its wear-levelling.
368 pgoff_t offset = start_page - se->start_page; in discard_swap_cluster() local
369 sector_t start_block = se->start_block + offset; in discard_swap_cluster()
370 sector_t nr_blocks = se->nr_pages - offset; in discard_swap_cluster()
375 nr_pages -= nr_blocks; in discard_swap_cluster()
377 start_block <<= PAGE_SHIFT - 9; in discard_swap_cluster()
378 nr_blocks <<= PAGE_SHIFT - 9; in discard_swap_cluster()
379 if (blkdev_issue_discard(si->bdev, start_block, in discard_swap_cluster()
404 return info->count == 0; in cluster_is_empty()
409 return info->flags == CLUSTER_FLAG_DISCARD; in cluster_is_discard()
414 if (unlikely(ci->flags > CLUSTER_FLAG_USABLE)) in cluster_is_usable()
418 return cluster_is_empty(ci) || order == ci->order; in cluster_is_usable()
424 return ci - si->cluster_info; in cluster_index()
428 unsigned long offset) in offset_to_cluster() argument
430 return &si->cluster_info[offset / SWAPFILE_CLUSTER]; in offset_to_cluster()
440 unsigned long offset) in lock_cluster() argument
444 ci = offset_to_cluster(si, offset); in lock_cluster()
445 spin_lock(&ci->lock); in lock_cluster()
452 spin_unlock(&ci->lock); in unlock_cluster()
459 VM_WARN_ON(ci->flags == new_flags); in move_cluster()
461 BUILD_BUG_ON(1 << sizeof(ci->flags) * BITS_PER_BYTE < CLUSTER_FLAG_MAX); in move_cluster()
462 lockdep_assert_held(&ci->lock); in move_cluster()
464 spin_lock(&si->lock); in move_cluster()
465 if (ci->flags == CLUSTER_FLAG_NONE) in move_cluster()
466 list_add_tail(&ci->list, list); in move_cluster()
468 list_move_tail(&ci->list, list); in move_cluster()
469 spin_unlock(&si->lock); in move_cluster()
471 if (ci->flags == CLUSTER_FLAG_FRAG) in move_cluster()
472 atomic_long_dec(&si->frag_cluster_nr[ci->order]); in move_cluster()
474 atomic_long_inc(&si->frag_cluster_nr[ci->order]); in move_cluster()
475 ci->flags = new_flags; in move_cluster()
485 * si->swap_map directly. To make sure the discarding cluster isn't in swap_cluster_schedule_discard()
489 memset(si->swap_map + idx * SWAPFILE_CLUSTER, in swap_cluster_schedule_discard()
491 VM_BUG_ON(ci->flags == CLUSTER_FLAG_FREE); in swap_cluster_schedule_discard()
492 move_cluster(si, ci, &si->discard_clusters, CLUSTER_FLAG_DISCARD); in swap_cluster_schedule_discard()
493 schedule_work(&si->discard_work); in swap_cluster_schedule_discard()
498 lockdep_assert_held(&ci->lock); in __free_cluster()
499 move_cluster(si, ci, &si->free_clusters, CLUSTER_FLAG_FREE); in __free_cluster()
500 ci->order = 0; in __free_cluster()
504 * Isolate and lock the first cluster that is not contented on a list,
505 * clean its flag before taken off-list. Cluster flag must be in sync
507 * list status without touching si lock.
510 * this returns NULL for an non-empty list.
517 spin_lock(&si->lock); in isolate_lock_cluster()
519 if (unlikely(!(si->flags & SWP_WRITEOK))) in isolate_lock_cluster()
523 if (!spin_trylock(&ci->lock)) in isolate_lock_cluster()
527 VM_BUG_ON(!ci->flags); in isolate_lock_cluster()
528 VM_BUG_ON(ci->flags > CLUSTER_FLAG_USABLE && in isolate_lock_cluster()
529 ci->flags != CLUSTER_FLAG_FULL); in isolate_lock_cluster()
531 list_del(&ci->list); in isolate_lock_cluster()
532 ci->flags = CLUSTER_FLAG_NONE; in isolate_lock_cluster()
537 spin_unlock(&si->lock); in isolate_lock_cluster()
546 * CLUSTER_FLAG_DISCARD must remain off-list or on discard list.
554 spin_lock(&si->lock); in swap_do_scheduled_discard()
555 while (!list_empty(&si->discard_clusters)) { in swap_do_scheduled_discard()
556 ci = list_first_entry(&si->discard_clusters, struct swap_cluster_info, list); in swap_do_scheduled_discard()
562 list_del(&ci->list); in swap_do_scheduled_discard()
564 spin_unlock(&si->lock); in swap_do_scheduled_discard()
568 spin_lock(&ci->lock); in swap_do_scheduled_discard()
570 * Discard is done, clear its flags as it's off-list, then in swap_do_scheduled_discard()
573 ci->flags = CLUSTER_FLAG_NONE; in swap_do_scheduled_discard()
574 memset(si->swap_map + idx * SWAPFILE_CLUSTER, in swap_do_scheduled_discard()
577 spin_unlock(&ci->lock); in swap_do_scheduled_discard()
579 spin_lock(&si->lock); in swap_do_scheduled_discard()
581 spin_unlock(&si->lock); in swap_do_scheduled_discard()
599 complete(&si->comp); in swap_users_ref_free()
603 * Must be called after freeing if ci->count == 0, moves the cluster to free
608 VM_BUG_ON(ci->count != 0); in free_cluster()
609 VM_BUG_ON(ci->flags == CLUSTER_FLAG_FREE); in free_cluster()
610 lockdep_assert_held(&ci->lock); in free_cluster()
617 if ((si->flags & (SWP_WRITEOK | SWP_PAGE_DISCARD)) == in free_cluster()
627 * Must be called after freeing if ci->count != 0, moves the cluster to
633 VM_BUG_ON(!ci->count || ci->count == SWAPFILE_CLUSTER); in partial_free_cluster()
634 lockdep_assert_held(&ci->lock); in partial_free_cluster()
636 if (ci->flags != CLUSTER_FLAG_NONFULL) in partial_free_cluster()
637 move_cluster(si, ci, &si->nonfull_clusters[ci->order], in partial_free_cluster()
643 * Note: allocation doesn't acquire si lock, and may drop the ci lock for
649 lockdep_assert_held(&ci->lock); in relocate_cluster()
651 /* Discard cluster must remain off-list or on discard list */ in relocate_cluster()
655 if (!ci->count) { in relocate_cluster()
656 if (ci->flags != CLUSTER_FLAG_FREE) in relocate_cluster()
658 } else if (ci->count != SWAPFILE_CLUSTER) { in relocate_cluster()
659 if (ci->flags != CLUSTER_FLAG_FRAG) in relocate_cluster()
660 move_cluster(si, ci, &si->frag_clusters[ci->order], in relocate_cluster()
663 if (ci->flags != CLUSTER_FLAG_FULL) in relocate_cluster()
664 move_cluster(si, ci, &si->full_clusters, in relocate_cluster()
681 ci->count++; in inc_cluster_info_page()
683 VM_BUG_ON(ci->count > SWAPFILE_CLUSTER); in inc_cluster_info_page()
684 VM_BUG_ON(ci->flags); in inc_cluster_info_page()
691 unsigned char *map = si->swap_map; in cluster_reclaim_range()
692 unsigned long offset = start; in cluster_reclaim_range() local
695 spin_unlock(&ci->lock); in cluster_reclaim_range()
697 switch (READ_ONCE(map[offset])) { in cluster_reclaim_range()
699 offset++; in cluster_reclaim_range()
702 nr_reclaim = __try_to_reclaim_swap(si, offset, TTRS_ANYWAY | TTRS_DIRECT); in cluster_reclaim_range()
704 offset += nr_reclaim; in cluster_reclaim_range()
711 } while (offset < end); in cluster_reclaim_range()
713 spin_lock(&ci->lock); in cluster_reclaim_range()
716 * could have been be freed while we are not holding the lock. in cluster_reclaim_range()
718 for (offset = start; offset < end; offset++) in cluster_reclaim_range()
719 if (READ_ONCE(map[offset])) in cluster_reclaim_range()
730 unsigned long offset, end = start + nr_pages; in cluster_scan_range() local
731 unsigned char *map = si->swap_map; in cluster_scan_range()
733 for (offset = start; offset < end; offset++) { in cluster_scan_range()
734 switch (READ_ONCE(map[offset])) { in cluster_scan_range()
756 lockdep_assert_held(&ci->lock); in cluster_alloc_range()
758 if (!(si->flags & SWP_WRITEOK)) in cluster_alloc_range()
766 ci->order = order; in cluster_alloc_range()
768 memset(si->swap_map + start, usage, nr_pages); in cluster_alloc_range()
770 ci->count += nr_pages; in cluster_alloc_range()
778 unsigned long offset, in alloc_swap_scan_cluster() argument
783 unsigned long start = ALIGN_DOWN(offset, SWAPFILE_CLUSTER); in alloc_swap_scan_cluster()
784 unsigned long end = min(start + SWAPFILE_CLUSTER, si->max); in alloc_swap_scan_cluster()
788 lockdep_assert_held(&ci->lock); in alloc_swap_scan_cluster()
790 if (end < nr_pages || ci->count + nr_pages > SWAPFILE_CLUSTER) in alloc_swap_scan_cluster()
793 for (end -= nr_pages; offset <= end; offset += nr_pages) { in alloc_swap_scan_cluster()
795 if (!cluster_scan_range(si, ci, offset, nr_pages, &need_reclaim)) in alloc_swap_scan_cluster()
798 ret = cluster_reclaim_range(si, ci, offset, offset + nr_pages); in alloc_swap_scan_cluster()
800 * Reclaim drops ci->lock and cluster could be used in alloc_swap_scan_cluster()
801 * by another order. Not checking flag as off-list in alloc_swap_scan_cluster()
808 offset = start; in alloc_swap_scan_cluster()
813 if (!cluster_alloc_range(si, ci, offset, usage, order)) in alloc_swap_scan_cluster()
815 found = offset; in alloc_swap_scan_cluster()
816 offset += nr_pages; in alloc_swap_scan_cluster()
817 if (ci->count < SWAPFILE_CLUSTER && offset <= end) in alloc_swap_scan_cluster()
818 next = offset; in alloc_swap_scan_cluster()
824 if (si->flags & SWP_SOLIDSTATE) in alloc_swap_scan_cluster()
825 __this_cpu_write(si->percpu_cluster->next[order], next); in alloc_swap_scan_cluster()
827 si->global_cluster->next[order] = next; in alloc_swap_scan_cluster()
835 unsigned long offset, end; in swap_reclaim_full_clusters() local
837 unsigned char *map = si->swap_map; in swap_reclaim_full_clusters()
843 while ((ci = isolate_lock_cluster(si, &si->full_clusters))) { in swap_reclaim_full_clusters()
844 offset = cluster_offset(si, ci); in swap_reclaim_full_clusters()
845 end = min(si->max, offset + SWAPFILE_CLUSTER); in swap_reclaim_full_clusters()
846 to_scan--; in swap_reclaim_full_clusters()
848 while (offset < end) { in swap_reclaim_full_clusters()
849 if (READ_ONCE(map[offset]) == SWAP_HAS_CACHE) { in swap_reclaim_full_clusters()
850 spin_unlock(&ci->lock); in swap_reclaim_full_clusters()
851 nr_reclaim = __try_to_reclaim_swap(si, offset, in swap_reclaim_full_clusters()
853 spin_lock(&ci->lock); in swap_reclaim_full_clusters()
855 offset += abs(nr_reclaim); in swap_reclaim_full_clusters()
859 offset++; in swap_reclaim_full_clusters()
863 if (ci->flags == CLUSTER_FLAG_NONE) in swap_reclaim_full_clusters()
890 unsigned int offset, found = 0; in cluster_alloc_swap_entry() local
892 if (si->flags & SWP_SOLIDSTATE) { in cluster_alloc_swap_entry()
894 local_lock(&si->percpu_cluster->lock); in cluster_alloc_swap_entry()
895 offset = __this_cpu_read(si->percpu_cluster->next[order]); in cluster_alloc_swap_entry()
898 spin_lock(&si->global_cluster_lock); in cluster_alloc_swap_entry()
899 offset = si->global_cluster->next[order]; in cluster_alloc_swap_entry()
902 if (offset) { in cluster_alloc_swap_entry()
903 ci = lock_cluster(si, offset); in cluster_alloc_swap_entry()
907 offset = cluster_offset(si, ci); in cluster_alloc_swap_entry()
908 found = alloc_swap_scan_cluster(si, ci, offset, in cluster_alloc_swap_entry()
918 ci = isolate_lock_cluster(si, &si->free_clusters); in cluster_alloc_swap_entry()
933 while ((ci = isolate_lock_cluster(si, &si->nonfull_clusters[order]))) { in cluster_alloc_swap_entry()
942 frags_existing = atomic_long_read(&si->frag_cluster_nr[order]); in cluster_alloc_swap_entry()
944 (ci = isolate_lock_cluster(si, &si->frag_clusters[order]))) { in cluster_alloc_swap_entry()
945 atomic_long_dec(&si->frag_cluster_nr[order]); in cluster_alloc_swap_entry()
949 * per-CPU usage, but they could contain newly released in cluster_alloc_swap_entry()
950 * reclaimable (eg. lazy-freed swap cache) slots. in cluster_alloc_swap_entry()
963 * reread cluster_next_cpu since we dropped si->lock in cluster_alloc_swap_entry()
965 if ((si->flags & SWP_PAGE_DISCARD) && swap_do_scheduled_discard(si)) in cluster_alloc_swap_entry()
975 * allocation, but reclaim may drop si->lock and race with another user. in cluster_alloc_swap_entry()
977 while ((ci = isolate_lock_cluster(si, &si->frag_clusters[o]))) { in cluster_alloc_swap_entry()
978 atomic_long_dec(&si->frag_cluster_nr[o]); in cluster_alloc_swap_entry()
985 while ((ci = isolate_lock_cluster(si, &si->nonfull_clusters[o]))) { in cluster_alloc_swap_entry()
993 if (si->flags & SWP_SOLIDSTATE) in cluster_alloc_swap_entry()
994 local_unlock(&si->percpu_cluster->lock); in cluster_alloc_swap_entry()
996 spin_unlock(&si->global_cluster_lock); in cluster_alloc_swap_entry()
1011 * swapoff here so it's synchronized by both si->lock and in del_from_avail_list()
1015 lockdep_assert_held(&si->lock); in del_from_avail_list()
1016 si->flags &= ~SWP_WRITEOK; in del_from_avail_list()
1017 atomic_long_or(SWAP_USAGE_OFFLIST_BIT, &si->inuse_pages); in del_from_avail_list()
1020 * If not called by swapoff, take it off-list only if it's in del_from_avail_list()
1022 * si->inuse_pages == pages), any concurrent slot freeing, in del_from_avail_list()
1026 pages = si->pages; in del_from_avail_list()
1027 if (!atomic_long_try_cmpxchg(&si->inuse_pages, &pages, in del_from_avail_list()
1033 plist_del(&si->avail_lists[nid], &swap_avail_heads[nid]); in del_from_avail_list()
1050 lockdep_assert_held(&si->lock); in add_to_avail_list()
1051 si->flags |= SWP_WRITEOK; in add_to_avail_list()
1053 if (!(READ_ONCE(si->flags) & SWP_WRITEOK)) in add_to_avail_list()
1057 if (!(atomic_long_read(&si->inuse_pages) & SWAP_USAGE_OFFLIST_BIT)) in add_to_avail_list()
1060 val = atomic_long_fetch_and_relaxed(~SWAP_USAGE_OFFLIST_BIT, &si->inuse_pages); in add_to_avail_list()
1064 * see (inuse_pages == si->pages) and will call del_from_avail_list. If in add_to_avail_list()
1067 pages = si->pages; in add_to_avail_list()
1070 if (atomic_long_try_cmpxchg(&si->inuse_pages, &pages, in add_to_avail_list()
1076 plist_add(&si->avail_lists[nid], &swap_avail_heads[nid]); in add_to_avail_list()
1083 * swap_usage_add / swap_usage_sub of each slot are serialized by ci->lock
1089 long val = atomic_long_add_return_relaxed(nr_entries, &si->inuse_pages); in swap_usage_add()
1095 if (unlikely(val == si->pages)) { in swap_usage_add()
1105 long val = atomic_long_sub_return_relaxed(nr_entries, &si->inuse_pages); in swap_usage_sub()
1120 schedule_work(&si->reclaim_work); in swap_range_alloc()
1124 static void swap_range_free(struct swap_info_struct *si, unsigned long offset, in swap_range_free() argument
1127 unsigned long begin = offset; in swap_range_free()
1128 unsigned long end = offset + nr_entries - 1; in swap_range_free()
1133 * Use atomic clear_bit operations only on zeromap instead of non-atomic in swap_range_free()
1137 clear_bit(offset + i, si->zeromap); in swap_range_free()
1138 zswap_invalidate(swp_entry(si->type, offset + i)); in swap_range_free()
1141 if (si->flags & SWP_BLKDEV) in swap_range_free()
1143 si->bdev->bd_disk->fops->swap_slot_free_notify; in swap_range_free()
1146 while (offset <= end) { in swap_range_free()
1147 arch_swap_invalidate_page(si->type, offset); in swap_range_free()
1149 swap_slot_free_notify(si->bdev, offset); in swap_range_free()
1150 offset++; in swap_range_free()
1152 clear_shadow_from_swap_cache(si->type, begin, end); in swap_range_free()
1155 * Make sure that try_to_unuse() observes si->inuse_pages reaching 0 in swap_range_free()
1170 unsigned long offset = cluster_alloc_swap_entry(si, order, usage); in cluster_alloc_swap() local
1172 if (!offset) in cluster_alloc_swap()
1174 slots[n_ret++] = swp_entry(si->type, offset); in cluster_alloc_swap()
1189 * way, however, we resort to first-free allocation, starting in scan_swap_map_slots()
1192 * overall disk seek times between swap pages. -- sct in scan_swap_map_slots()
1193 * But we do now try to find an empty cluster. -Andrea in scan_swap_map_slots()
1211 if (!(si->flags & SWP_BLKDEV)) in scan_swap_map_slots()
1220 if (!percpu_ref_tryget_live(&si->users)) in get_swap_device_info()
1223 * Guarantee the si->users are checked before accessing other in get_swap_device_info()
1224 * fields of swap_info_struct, and si->flags (SWP_WRITEOK) is in get_swap_device_info()
1258 /* requeue si to after same-priority siblings */ in get_swap_pages()
1259 plist_requeue(&si->avail_lists[node], &swap_avail_heads[node]); in get_swap_pages()
1272 * and since scan_swap_map_slots() can drop the si->lock, in get_swap_pages()
1276 * si->lock. Since we dropped the swap_avail_lock, the in get_swap_pages()
1281 if (plist_node_empty(&next->avail_lists[node])) in get_swap_pages()
1289 atomic_long_add((long)(n_goal - n_ret) * size, in get_swap_pages()
1298 unsigned long offset; in _swap_info_get() local
1305 if (data_race(!(si->flags & SWP_USED))) in _swap_info_get()
1307 offset = swp_offset(entry); in _swap_info_get()
1308 if (offset >= si->max) in _swap_info_get()
1310 if (data_race(!si->swap_map[swp_offset(entry)])) in _swap_info_get()
1330 unsigned long offset, in __swap_entry_free_locked() argument
1336 count = si->swap_map[offset]; in __swap_entry_free_locked()
1352 if (swap_count_continued(si, offset, count)) in __swap_entry_free_locked()
1357 count--; in __swap_entry_free_locked()
1362 WRITE_ONCE(si->swap_map[offset], usage); in __swap_entry_free_locked()
1364 WRITE_ONCE(si->swap_map[offset], SWAP_HAS_CACHE); in __swap_entry_free_locked()
1377 * RCU reader side lock (including any spinlock) is sufficient to
1411 unsigned long offset; in get_swap_device() local
1420 offset = swp_offset(entry); in get_swap_device()
1421 if (offset >= si->max) in get_swap_device()
1431 percpu_ref_put(&si->users); in get_swap_device()
1439 unsigned long offset = swp_offset(entry); in __swap_entry_free() local
1442 ci = lock_cluster(si, offset); in __swap_entry_free()
1443 usage = __swap_entry_free_locked(si, offset, 1); in __swap_entry_free()
1445 swap_entry_range_free(si, ci, swp_entry(si->type, offset), 1); in __swap_entry_free()
1454 unsigned long offset = swp_offset(entry); in __swap_entries_free() local
1461 if (nr <= 1 || swap_count(data_race(si->swap_map[offset])) != 1) in __swap_entries_free()
1464 if (nr > SWAPFILE_CLUSTER - offset % SWAPFILE_CLUSTER) in __swap_entries_free()
1467 ci = lock_cluster(si, offset); in __swap_entries_free()
1468 if (!swap_is_last_map(si, offset, nr, &has_cache)) { in __swap_entries_free()
1473 WRITE_ONCE(si->swap_map[offset + i], SWAP_HAS_CACHE); in __swap_entries_free()
1482 if (data_race(si->swap_map[offset + i])) { in __swap_entries_free()
1483 count = __swap_entry_free(si, swp_entry(type, offset + i)); in __swap_entries_free()
1501 unsigned long offset = swp_offset(entry); in swap_entry_range_free() local
1502 unsigned char *map = si->swap_map + offset; in swap_entry_range_free()
1506 VM_BUG_ON(ci != offset_to_cluster(si, offset + nr_pages - 1)); in swap_entry_range_free()
1508 VM_BUG_ON(ci->count < nr_pages); in swap_entry_range_free()
1510 ci->count -= nr_pages; in swap_entry_range_free()
1517 swap_range_free(si, offset, nr_pages); in swap_entry_range_free()
1519 if (!ci->count) in swap_entry_range_free()
1526 unsigned long offset, int nr_pages, in cluster_swap_free_nr() argument
1530 unsigned long end = offset + nr_pages; in cluster_swap_free_nr()
1532 ci = lock_cluster(si, offset); in cluster_swap_free_nr()
1534 if (!__swap_entry_free_locked(si, offset, usage)) in cluster_swap_free_nr()
1535 swap_entry_range_free(si, ci, swp_entry(si->type, offset), 1); in cluster_swap_free_nr()
1536 } while (++offset < end); in cluster_swap_free_nr()
1548 unsigned long offset = swp_offset(entry); in swap_free_nr() local
1555 nr = min_t(int, nr_pages, SWAPFILE_CLUSTER - offset % SWAPFILE_CLUSTER); in swap_free_nr()
1556 cluster_swap_free_nr(sis, offset, nr, 1); in swap_free_nr()
1557 offset += nr; in swap_free_nr()
1558 nr_pages -= nr; in swap_free_nr()
1567 unsigned long offset = swp_offset(entry); in put_swap_folio() local
1576 ci = lock_cluster(si, offset); in put_swap_folio()
1577 if (swap_is_has_cache(si, offset, size)) in put_swap_folio()
1581 if (!__swap_entry_free_locked(si, offset + i, SWAP_HAS_CACHE)) in put_swap_folio()
1610 pgoff_t offset = swp_offset(entry); in __swap_count() local
1612 return swap_count(si->swap_map[offset]); in __swap_count()
1622 pgoff_t offset = swp_offset(entry); in swap_swapcount() local
1626 ci = lock_cluster(si, offset); in swap_swapcount()
1627 count = swap_count(si->swap_map[offset]); in swap_swapcount()
1642 pgoff_t offset; in swp_swapcount() local
1649 offset = swp_offset(entry); in swp_swapcount()
1651 ci = lock_cluster(si, offset); in swp_swapcount()
1653 count = swap_count(si->swap_map[offset]); in swp_swapcount()
1660 page = vmalloc_to_page(si->swap_map + offset); in swp_swapcount()
1661 offset &= ~PAGE_MASK; in swp_swapcount()
1667 tmp_count = map[offset]; in swp_swapcount()
1682 unsigned char *map = si->swap_map; in swap_page_trans_huge_swapped()
1685 unsigned long offset = round_down(roffset, nr_pages); in swap_page_trans_huge_swapped() local
1689 ci = lock_cluster(si, offset); in swap_page_trans_huge_swapped()
1696 if (swap_count(map[offset + i])) { in swap_page_trans_huge_swapped()
1708 swp_entry_t entry = folio->swap; in folio_swapped()
1732 * - most probably a call from __try_to_reclaim_swap() while in folio_swapcache_freeable()
1734 * but conceivably even a call from memory reclaim - will free in folio_swapcache_freeable()
1751 * folio_free_swap() - Free the swap space used for this folio.
1772 * free_swap_and_cache_nr() - Release reference on range of swap entries and
1779 * offset range is defined by [entry.offset, entry.offset + nr).
1787 unsigned long offset; in free_swap_and_cache_nr() local
1796 if (WARN_ON(end_offset > si->max)) in free_swap_and_cache_nr()
1805 * Short-circuit the below loop if none of the entries had their in free_swap_and_cache_nr()
1816 * latter will get a reference and lock the folio for every individual in free_swap_and_cache_nr()
1820 for (offset = start_offset; offset < end_offset; offset += nr) { in free_swap_and_cache_nr()
1822 if (READ_ONCE(si->swap_map[offset]) == SWAP_HAS_CACHE) { in free_swap_and_cache_nr()
1831 nr = __try_to_reclaim_swap(si, offset, in free_swap_and_cache_nr()
1836 nr = -nr; in free_swap_and_cache_nr()
1837 nr = ALIGN(offset + 1, nr) - offset; in free_swap_and_cache_nr()
1857 if ((si->flags & SWP_WRITEOK) && scan_swap_map_slots(si, 1, 1, &entry, 0)) in get_swap_page_of_type()
1868 * @offset - number of the PAGE_SIZE-sized block of the device, starting
1873 int swap_type_of(dev_t device, sector_t offset) in swap_type_of() argument
1878 return -1; in swap_type_of()
1884 if (!(sis->flags & SWP_WRITEOK)) in swap_type_of()
1887 if (device == sis->bdev->bd_dev) { in swap_type_of()
1890 if (se->start_block == offset) { in swap_type_of()
1897 return -ENODEV; in swap_type_of()
1908 if (!(sis->flags & SWP_WRITEOK)) in find_first_swap()
1910 *device = sis->bdev->bd_dev; in find_first_swap()
1915 return -ENODEV; in find_first_swap()
1919 * Get the (PAGE_SIZE) block corresponding to given offset on the swapdev
1922 sector_t swapdev_block(int type, pgoff_t offset) in swapdev_block() argument
1927 if (!si || !(si->flags & SWP_WRITEOK)) in swapdev_block()
1929 se = offset_to_swap_extent(si, offset); in swapdev_block()
1930 return se->start_block + (offset - se->start_page); in swapdev_block()
1947 spin_lock(&sis->lock); in count_swap_pages()
1948 if (sis->flags & SWP_WRITEOK) { in count_swap_pages()
1949 n = sis->pages; in count_swap_pages()
1951 n -= swap_usage_in_pages(sis); in count_swap_pages()
1953 spin_unlock(&sis->lock); in count_swap_pages()
1967 * just let do_wp_page work it out if a write is requested later - to
1983 return -ENOMEM; in unuse_pte()
1984 else if (unlikely(folio == ERR_PTR(-EHWPOISON))) { in unuse_pte()
1993 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); in unuse_pte()
2005 dec_mm_counter(vma->vm_mm, MM_SWAPENTS); in unuse_pte()
2023 dec_mm_counter(vma->vm_mm, MM_SWAPENTS); in unuse_pte()
2024 inc_mm_counter(vma->vm_mm, MM_ANONPAGES); in unuse_pte()
2053 new_pte = pte_mkold(mk_pte(page, vma->vm_page_prot)); in unuse_pte()
2059 set_pte_at(vma->vm_mm, addr, pte, new_pte); in unuse_pte()
2081 unsigned long offset; in unuse_pte_range() local
2102 offset = swp_offset(entry); in unuse_pte_range()
2119 swp_count = READ_ONCE(si->swap_map[offset]); in unuse_pte_range()
2122 return -ENOMEM; in unuse_pte_range()
2209 addr = vma->vm_start; in unuse_vma()
2210 end = vma->vm_end; in unuse_vma()
2212 pgd = pgd_offset(vma->vm_mm, addr); in unuse_vma()
2232 if (vma->anon_vma && !is_vm_hugetlb_page(vma)) { in unuse_mm()
2261 for (i = prev + 1; i < si->max; i++) { in find_next_to_unuse()
2262 count = READ_ONCE(si->swap_map[i]); in find_next_to_unuse()
2269 if (i == si->max) in find_next_to_unuse()
2301 (p = p->next) != &init_mm.mmlist) { in try_to_unuse()
2338 * swap cache just before we acquired the page lock. The folio in try_to_unuse()
2360 * and robust (though cpu-intensive) just to keep retrying. in try_to_unuse()
2365 return -EINTR; in try_to_unuse()
2371 * after swap_range_free() reduces si->inuse_pages to 0. in try_to_unuse()
2381 * added to the mmlist just after page_duplicate - before would be racy.
2402 while (!RB_EMPTY_ROOT(&sis->swap_extent_root)) { in destroy_swap_extents()
2403 struct rb_node *rb = sis->swap_extent_root.rb_node; in destroy_swap_extents()
2406 rb_erase(rb, &sis->swap_extent_root); in destroy_swap_extents()
2410 if (sis->flags & SWP_ACTIVATED) { in destroy_swap_extents()
2411 struct file *swap_file = sis->swap_file; in destroy_swap_extents()
2412 struct address_space *mapping = swap_file->f_mapping; in destroy_swap_extents()
2414 sis->flags &= ~SWP_ACTIVATED; in destroy_swap_extents()
2415 if (mapping->a_ops->swap_deactivate) in destroy_swap_extents()
2416 mapping->a_ops->swap_deactivate(swap_file); in destroy_swap_extents()
2430 struct rb_node **link = &sis->swap_extent_root.rb_node, *parent = NULL; in add_swap_extent()
2440 link = &parent->rb_right; in add_swap_extent()
2445 BUG_ON(se->start_page + se->nr_pages != start_page); in add_swap_extent()
2446 if (se->start_block + se->nr_pages == start_block) { in add_swap_extent()
2448 se->nr_pages += nr_pages; in add_swap_extent()
2456 return -ENOMEM; in add_swap_extent()
2457 new_se->start_page = start_page; in add_swap_extent()
2458 new_se->nr_pages = nr_pages; in add_swap_extent()
2459 new_se->start_block = start_block; in add_swap_extent()
2461 rb_link_node(&new_se->rb_node, parent, link); in add_swap_extent()
2462 rb_insert_color(&new_se->rb_node, &sis->swap_extent_root); in add_swap_extent()
2484 * requirements, they are simply tossed out - we will never use those blocks
2491 * Typically it is in the 1-4 megabyte range. So we can have hundreds of
2492 * extents in the rbtree. - akpm.
2496 struct file *swap_file = sis->swap_file; in setup_swap_extents()
2497 struct address_space *mapping = swap_file->f_mapping; in setup_swap_extents()
2498 struct inode *inode = mapping->host; in setup_swap_extents()
2501 if (S_ISBLK(inode->i_mode)) { in setup_swap_extents()
2502 ret = add_swap_extent(sis, 0, sis->max, 0); in setup_swap_extents()
2503 *span = sis->pages; in setup_swap_extents()
2507 if (mapping->a_ops->swap_activate) { in setup_swap_extents()
2508 ret = mapping->a_ops->swap_activate(sis, swap_file, span); in setup_swap_extents()
2511 sis->flags |= SWP_ACTIVATED; in setup_swap_extents()
2512 if ((sis->flags & SWP_FS_OPS) && in setup_swap_extents()
2515 return -ENOMEM; in setup_swap_extents()
2527 if (si->bdev) in swap_node()
2528 bdev = si->bdev; in swap_node()
2530 bdev = si->swap_file->f_inode->i_sb->s_bdev; in swap_node()
2532 return bdev ? bdev->bd_disk->node_id : NUMA_NO_NODE; in swap_node()
2543 si->prio = prio; in setup_swap_info()
2545 si->prio = --least_priority; in setup_swap_info()
2548 * low-to-high, while swap ordering is high-to-low in setup_swap_info()
2550 si->list.prio = -si->prio; in setup_swap_info()
2552 if (si->prio >= 0) in setup_swap_info()
2553 si->avail_lists[i].prio = -si->prio; in setup_swap_info()
2556 si->avail_lists[i].prio = 1; in setup_swap_info()
2558 si->avail_lists[i].prio = -si->prio; in setup_swap_info()
2561 si->swap_map = swap_map; in setup_swap_info()
2562 si->cluster_info = cluster_info; in setup_swap_info()
2563 si->zeromap = zeromap; in setup_swap_info()
2568 atomic_long_add(si->pages, &nr_swap_pages); in _enable_swap_info()
2569 total_swap_pages += si->pages; in _enable_swap_info()
2575 * which on removal of any swap_info_struct with an auto-assigned in _enable_swap_info()
2576 * (i.e. negative) priority increments the auto-assigned priority in _enable_swap_info()
2577 * of any lower-priority swap_info_structs. in _enable_swap_info()
2582 plist_add(&si->list, &swap_active_head); in _enable_swap_info()
2594 spin_lock(&si->lock); in enable_swap_info()
2596 spin_unlock(&si->lock); in enable_swap_info()
2601 percpu_ref_resurrect(&si->users); in enable_swap_info()
2603 spin_lock(&si->lock); in enable_swap_info()
2605 spin_unlock(&si->lock); in enable_swap_info()
2612 spin_lock(&si->lock); in reinsert_swap_info()
2613 setup_swap_info(si, si->prio, si->swap_map, si->cluster_info, si->zeromap); in reinsert_swap_info()
2615 spin_unlock(&si->lock); in reinsert_swap_info()
2640 unsigned long offset; in wait_for_allocation() local
2641 unsigned long end = ALIGN(si->max, SWAPFILE_CLUSTER); in wait_for_allocation()
2644 BUG_ON(si->flags & SWP_WRITEOK); in wait_for_allocation()
2646 for (offset = 0; offset < end; offset += SWAPFILE_CLUSTER) { in wait_for_allocation()
2647 ci = lock_cluster(si, offset); in wait_for_allocation()
2665 return -EPERM; in SYSCALL_DEFINE1()
2667 BUG_ON(!current->mm); in SYSCALL_DEFINE1()
2678 mapping = victim->f_mapping; in SYSCALL_DEFINE1()
2681 if (p->flags & SWP_WRITEOK) { in SYSCALL_DEFINE1()
2682 if (p->swap_file->f_mapping == mapping) { in SYSCALL_DEFINE1()
2689 err = -EINVAL; in SYSCALL_DEFINE1()
2693 if (!security_vm_enough_memory_mm(current->mm, p->pages)) in SYSCALL_DEFINE1()
2694 vm_unacct_memory(p->pages); in SYSCALL_DEFINE1()
2696 err = -ENOMEM; in SYSCALL_DEFINE1()
2700 spin_lock(&p->lock); in SYSCALL_DEFINE1()
2702 if (p->prio < 0) { in SYSCALL_DEFINE1()
2707 si->prio++; in SYSCALL_DEFINE1()
2708 si->list.prio--; in SYSCALL_DEFINE1()
2710 if (si->avail_lists[nid].prio != 1) in SYSCALL_DEFINE1()
2711 si->avail_lists[nid].prio--; in SYSCALL_DEFINE1()
2716 plist_del(&p->list, &swap_active_head); in SYSCALL_DEFINE1()
2717 atomic_long_sub(p->pages, &nr_swap_pages); in SYSCALL_DEFINE1()
2718 total_swap_pages -= p->pages; in SYSCALL_DEFINE1()
2719 spin_unlock(&p->lock); in SYSCALL_DEFINE1()
2727 err = try_to_unuse(p->type); in SYSCALL_DEFINE1()
2731 /* re-insert swap space back into swap_list */ in SYSCALL_DEFINE1()
2742 * operations protected by RCU reader side lock (including any in SYSCALL_DEFINE1()
2747 percpu_ref_kill(&p->users); in SYSCALL_DEFINE1()
2749 wait_for_completion(&p->comp); in SYSCALL_DEFINE1()
2751 flush_work(&p->discard_work); in SYSCALL_DEFINE1()
2752 flush_work(&p->reclaim_work); in SYSCALL_DEFINE1()
2755 if (p->flags & SWP_CONTINUED) in SYSCALL_DEFINE1()
2758 if (!p->bdev || !bdev_nonrot(p->bdev)) in SYSCALL_DEFINE1()
2763 spin_lock(&p->lock); in SYSCALL_DEFINE1()
2766 swap_file = p->swap_file; in SYSCALL_DEFINE1()
2767 p->swap_file = NULL; in SYSCALL_DEFINE1()
2768 p->max = 0; in SYSCALL_DEFINE1()
2769 swap_map = p->swap_map; in SYSCALL_DEFINE1()
2770 p->swap_map = NULL; in SYSCALL_DEFINE1()
2771 zeromap = p->zeromap; in SYSCALL_DEFINE1()
2772 p->zeromap = NULL; in SYSCALL_DEFINE1()
2773 cluster_info = p->cluster_info; in SYSCALL_DEFINE1()
2774 p->cluster_info = NULL; in SYSCALL_DEFINE1()
2775 spin_unlock(&p->lock); in SYSCALL_DEFINE1()
2777 arch_swap_invalidate_area(p->type); in SYSCALL_DEFINE1()
2778 zswap_swapoff(p->type); in SYSCALL_DEFINE1()
2780 free_percpu(p->percpu_cluster); in SYSCALL_DEFINE1()
2781 p->percpu_cluster = NULL; in SYSCALL_DEFINE1()
2782 kfree(p->global_cluster); in SYSCALL_DEFINE1()
2783 p->global_cluster = NULL; in SYSCALL_DEFINE1()
2788 swap_cgroup_swapoff(p->type); in SYSCALL_DEFINE1()
2789 exit_swap_address_space(p->type); in SYSCALL_DEFINE1()
2791 inode = mapping->host; in SYSCALL_DEFINE1()
2794 inode->i_flags &= ~S_SWAPFILE; in SYSCALL_DEFINE1()
2801 * not hold p->lock after we cleared its SWP_WRITEOK. in SYSCALL_DEFINE1()
2804 p->flags = 0; in SYSCALL_DEFINE1()
2821 struct seq_file *seq = file->private_data; in swaps_poll()
2825 if (seq->poll_event != atomic_read(&proc_poll_event)) { in swaps_poll()
2826 seq->poll_event = atomic_read(&proc_poll_event); in swaps_poll()
2846 if (!(si->flags & SWP_USED) || !si->swap_map) in swap_start()
2848 if (!--l) in swap_start()
2863 type = si->type + 1; in swap_next()
2867 if (!(si->flags & SWP_USED) || !si->swap_map) in swap_next()
2892 bytes = K(si->pages); in swap_show()
2895 file = si->swap_file; in swap_show()
2898 len < 40 ? 40 - len : 1, " ", in swap_show()
2899 S_ISBLK(file_inode(file)->i_mode) ? in swap_show()
2903 si->prio); in swap_show()
2923 seq = file->private_data; in swaps_open()
2924 seq->poll_event = atomic_read(&proc_poll_event); in swaps_open()
2963 return ERR_PTR(-ENOMEM); in alloc_swap_info()
2965 if (percpu_ref_init(&p->users, swap_users_ref_free, in alloc_swap_info()
2968 return ERR_PTR(-ENOMEM); in alloc_swap_info()
2973 if (!(swap_info[type]->flags & SWP_USED)) in alloc_swap_info()
2978 percpu_ref_exit(&p->users); in alloc_swap_info()
2980 return ERR_PTR(-EPERM); in alloc_swap_info()
2983 p->type = type; in alloc_swap_info()
2995 * would be relying on p->type to remain valid. in alloc_swap_info()
2998 p->swap_extent_root = RB_ROOT; in alloc_swap_info()
2999 plist_node_init(&p->list, 0); in alloc_swap_info()
3001 plist_node_init(&p->avail_lists[i], 0); in alloc_swap_info()
3002 p->flags = SWP_USED; in alloc_swap_info()
3005 percpu_ref_exit(&defer->users); in alloc_swap_info()
3008 spin_lock_init(&p->lock); in alloc_swap_info()
3009 spin_lock_init(&p->cont_lock); in alloc_swap_info()
3010 atomic_long_set(&p->inuse_pages, SWAP_USAGE_OFFLIST_BIT); in alloc_swap_info()
3011 init_completion(&p->comp); in alloc_swap_info()
3018 if (S_ISBLK(inode->i_mode)) { in claim_swapfile()
3019 si->bdev = I_BDEV(inode); in claim_swapfile()
3025 if (bdev_is_zoned(si->bdev)) in claim_swapfile()
3026 return -EINVAL; in claim_swapfile()
3027 si->flags |= SWP_BLKDEV; in claim_swapfile()
3028 } else if (S_ISREG(inode->i_mode)) { in claim_swapfile()
3029 si->bdev = inode->i_sb->s_bdev; in claim_swapfile()
3039 * 1) the number of bits for the swap offset in the swp_entry_t type, and
3044 * swap type 0 and swap offset ~0UL is created, encoded to a swap pte,
3045 * decoded to a swp_entry_t again, and finally the swap offset is
3073 if (memcmp("SWAPSPACE2", swap_header->magic.magic, 10)) { in read_swap_header()
3074 pr_err("Unable to find swap-space signature\n"); in read_swap_header()
3079 if (swab32(swap_header->info.version) == 1) { in read_swap_header()
3080 swab32s(&swap_header->info.version); in read_swap_header()
3081 swab32s(&swap_header->info.last_page); in read_swap_header()
3082 swab32s(&swap_header->info.nr_badpages); in read_swap_header()
3083 if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES) in read_swap_header()
3085 for (i = 0; i < swap_header->info.nr_badpages; i++) in read_swap_header()
3086 swab32s(&swap_header->info.badpages[i]); in read_swap_header()
3088 /* Check the swap header's sub-version */ in read_swap_header()
3089 if (swap_header->info.version != 1) { in read_swap_header()
3091 swap_header->info.version); in read_swap_header()
3096 last_page = swap_header->info.last_page; in read_swap_header()
3098 pr_warn("Empty swap-file\n"); in read_swap_header()
3107 /* p->max is an unsigned int: don't overflow it */ in read_swap_header()
3119 if (swap_header->info.nr_badpages && S_ISREG(inode->i_mode)) in read_swap_header()
3121 if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES) in read_swap_header()
3144 nr_good_pages = maxpages - 1; /* omit header page */ in setup_swap_map_and_extents()
3146 for (i = 0; i < swap_header->info.nr_badpages; i++) { in setup_swap_map_and_extents()
3147 unsigned int page_nr = swap_header->info.badpages[i]; in setup_swap_map_and_extents()
3148 if (page_nr == 0 || page_nr > swap_header->info.last_page) in setup_swap_map_and_extents()
3149 return -EINVAL; in setup_swap_map_and_extents()
3152 nr_good_pages--; in setup_swap_map_and_extents()
3158 si->max = maxpages; in setup_swap_map_and_extents()
3159 si->pages = nr_good_pages; in setup_swap_map_and_extents()
3163 nr_good_pages = si->pages; in setup_swap_map_and_extents()
3166 pr_warn("Empty swap-file\n"); in setup_swap_map_and_extents()
3167 return -EINVAL; in setup_swap_map_and_extents()
3180 int cpu, err = -ENOMEM; in setup_clusters()
3187 spin_lock_init(&cluster_info[i].lock); in setup_clusters()
3189 if (si->flags & SWP_SOLIDSTATE) { in setup_clusters()
3190 si->percpu_cluster = alloc_percpu(struct percpu_cluster); in setup_clusters()
3191 if (!si->percpu_cluster) in setup_clusters()
3197 cluster = per_cpu_ptr(si->percpu_cluster, cpu); in setup_clusters()
3199 cluster->next[i] = SWAP_ENTRY_INVALID; in setup_clusters()
3200 local_lock_init(&cluster->lock); in setup_clusters()
3203 si->global_cluster = kmalloc(sizeof(*si->global_cluster), in setup_clusters()
3205 if (!si->global_cluster) in setup_clusters()
3208 si->global_cluster->next[i] = SWAP_ENTRY_INVALID; in setup_clusters()
3209 spin_lock_init(&si->global_cluster_lock); in setup_clusters()
3220 for (i = 0; i < swap_header->info.nr_badpages; i++) in setup_clusters()
3222 swap_header->info.badpages[i]); in setup_clusters()
3226 INIT_LIST_HEAD(&si->free_clusters); in setup_clusters()
3227 INIT_LIST_HEAD(&si->full_clusters); in setup_clusters()
3228 INIT_LIST_HEAD(&si->discard_clusters); in setup_clusters()
3231 INIT_LIST_HEAD(&si->nonfull_clusters[i]); in setup_clusters()
3232 INIT_LIST_HEAD(&si->frag_clusters[i]); in setup_clusters()
3233 atomic_long_set(&si->frag_cluster_nr[i], 0); in setup_clusters()
3248 if (ci->count) { in setup_clusters()
3249 ci->flags = CLUSTER_FLAG_NONFULL; in setup_clusters()
3250 list_add_tail(&ci->list, &si->nonfull_clusters[0]); in setup_clusters()
3253 ci->flags = CLUSTER_FLAG_FREE; in setup_clusters()
3254 list_add_tail(&ci->list, &si->free_clusters); in setup_clusters()
3287 return -EINVAL; in SYSCALL_DEFINE2()
3290 return -EPERM; in SYSCALL_DEFINE2()
3293 return -ENOMEM; in SYSCALL_DEFINE2()
3299 INIT_WORK(&si->discard_work, swap_discard_work); in SYSCALL_DEFINE2()
3300 INIT_WORK(&si->reclaim_work, swap_reclaim_work); in SYSCALL_DEFINE2()
3315 si->swap_file = swap_file; in SYSCALL_DEFINE2()
3316 mapping = swap_file->f_mapping; in SYSCALL_DEFINE2()
3317 dentry = swap_file->f_path.dentry; in SYSCALL_DEFINE2()
3318 inode = mapping->host; in SYSCALL_DEFINE2()
3326 error = -ENOENT; in SYSCALL_DEFINE2()
3330 error = -EBUSY; in SYSCALL_DEFINE2()
3337 if (!mapping->a_ops->read_folio) { in SYSCALL_DEFINE2()
3338 error = -EINVAL; in SYSCALL_DEFINE2()
3350 error = -EINVAL; in SYSCALL_DEFINE2()
3357 error = -ENOMEM; in SYSCALL_DEFINE2()
3361 error = swap_cgroup_swapon(si->type, maxpages); in SYSCALL_DEFINE2()
3379 error = -ENOMEM; in SYSCALL_DEFINE2()
3383 if (si->bdev && bdev_stable_writes(si->bdev)) in SYSCALL_DEFINE2()
3384 si->flags |= SWP_STABLE_WRITES; in SYSCALL_DEFINE2()
3386 if (si->bdev && bdev_synchronous(si->bdev)) in SYSCALL_DEFINE2()
3387 si->flags |= SWP_SYNCHRONOUS_IO; in SYSCALL_DEFINE2()
3389 if (si->bdev && bdev_nonrot(si->bdev)) { in SYSCALL_DEFINE2()
3390 si->flags |= SWP_SOLIDSTATE; in SYSCALL_DEFINE2()
3404 si->bdev && bdev_max_discard_sectors(si->bdev)) { in SYSCALL_DEFINE2()
3411 si->flags |= (SWP_DISCARDABLE | SWP_AREA_DISCARD | in SYSCALL_DEFINE2()
3416 * either do single-time area discards only, or to just in SYSCALL_DEFINE2()
3417 * perform discards for released swap page-clusters. in SYSCALL_DEFINE2()
3418 * Now it's time to adjust the p->flags accordingly. in SYSCALL_DEFINE2()
3421 si->flags &= ~SWP_PAGE_DISCARD; in SYSCALL_DEFINE2()
3423 si->flags &= ~SWP_AREA_DISCARD; in SYSCALL_DEFINE2()
3425 /* issue a swapon-time discard if it's still required */ in SYSCALL_DEFINE2()
3426 if (si->flags & SWP_AREA_DISCARD) { in SYSCALL_DEFINE2()
3434 error = init_swap_address_space(si->type, maxpages); in SYSCALL_DEFINE2()
3438 error = zswap_swapon(si->type, maxpages); in SYSCALL_DEFINE2()
3446 inode->i_flags |= S_SWAPFILE; in SYSCALL_DEFINE2()
3449 inode->i_flags &= ~S_SWAPFILE; in SYSCALL_DEFINE2()
3454 prio = -1; in SYSCALL_DEFINE2()
3461 K(si->pages), name->name, si->prio, nr_extents, in SYSCALL_DEFINE2()
3463 (si->flags & SWP_SOLIDSTATE) ? "SS" : "", in SYSCALL_DEFINE2()
3464 (si->flags & SWP_DISCARDABLE) ? "D" : "", in SYSCALL_DEFINE2()
3465 (si->flags & SWP_AREA_DISCARD) ? "s" : "", in SYSCALL_DEFINE2()
3466 (si->flags & SWP_PAGE_DISCARD) ? "c" : ""); in SYSCALL_DEFINE2()
3475 zswap_swapoff(si->type); in SYSCALL_DEFINE2()
3477 exit_swap_address_space(si->type); in SYSCALL_DEFINE2()
3481 free_percpu(si->percpu_cluster); in SYSCALL_DEFINE2()
3482 si->percpu_cluster = NULL; in SYSCALL_DEFINE2()
3483 kfree(si->global_cluster); in SYSCALL_DEFINE2()
3484 si->global_cluster = NULL; in SYSCALL_DEFINE2()
3487 swap_cgroup_swapoff(si->type); in SYSCALL_DEFINE2()
3489 si->swap_file = NULL; in SYSCALL_DEFINE2()
3490 si->flags = 0; in SYSCALL_DEFINE2()
3520 if ((si->flags & SWP_USED) && !(si->flags & SWP_WRITEOK)) in si_swapinfo()
3523 val->freeswap = atomic_long_read(&nr_swap_pages) + nr_to_be_unused; in si_swapinfo()
3524 val->totalswap = total_swap_pages + nr_to_be_unused; in si_swapinfo()
3532 * - success -> 0
3533 * - swp_entry is invalid -> EINVAL
3534 * - swp_entry is migration entry -> EINVAL
3535 * - swap-cache reference is requested but there is already one. -> EEXIST
3536 * - swap-cache reference is requested but the entry is not used. -> ENOENT
3537 * - swap-mapped reference requested but needs continued swap count. -> ENOMEM
3543 unsigned long offset; in __swap_duplicate() local
3551 return -EINVAL; in __swap_duplicate()
3554 offset = swp_offset(entry); in __swap_duplicate()
3555 VM_WARN_ON(nr > SWAPFILE_CLUSTER - offset % SWAPFILE_CLUSTER); in __swap_duplicate()
3557 ci = lock_cluster(si, offset); in __swap_duplicate()
3561 count = si->swap_map[offset + i]; in __swap_duplicate()
3565 * swap entry could be SWAP_MAP_BAD. Check here with lock held. in __swap_duplicate()
3568 err = -ENOENT; in __swap_duplicate()
3576 err = -ENOENT; in __swap_duplicate()
3579 err = -EEXIST; in __swap_duplicate()
3581 err = -EINVAL; in __swap_duplicate()
3589 count = si->swap_map[offset + i]; in __swap_duplicate()
3597 else if (swap_count_continued(si, offset + i, count)) in __swap_duplicate()
3604 err = -ENOMEM; in __swap_duplicate()
3608 WRITE_ONCE(si->swap_map[offset + i], count | has_cache); in __swap_duplicate()
3627 * Returns 0 for success, or -ENOMEM if a swap_count_continuation is required
3629 * if __swap_duplicate() fails for another reason (-EINVAL or -ENOENT), which
3636 while (!err && __swap_duplicate(entry, 1, 1) == -ENOMEM) in swap_duplicate()
3646 * -EEXIST means there is a swap cache.
3656 unsigned long offset = swp_offset(entry); in swapcache_clear() local
3658 cluster_swap_free_nr(si, offset, nr, SWAP_HAS_CACHE); in swapcache_clear()
3667 * out-of-line methods to avoid include hell.
3671 return swp_swap_info(folio->swap)->swap_file->f_mapping; in swapcache_mapping()
3677 return swap_cache_index(folio->swap); in __folio_swap_cache_index()
3682 * add_swap_count_continuation - called when a swap count is duplicated
3703 pgoff_t offset; in add_swap_count_continuation() local
3722 offset = swp_offset(entry); in add_swap_count_continuation()
3724 ci = lock_cluster(si, offset); in add_swap_count_continuation()
3726 count = swap_count(si->swap_map[offset]); in add_swap_count_continuation()
3732 * over-provisioning. in add_swap_count_continuation()
3738 ret = -ENOMEM; in add_swap_count_continuation()
3742 head = vmalloc_to_page(si->swap_map + offset); in add_swap_count_continuation()
3743 offset &= ~PAGE_MASK; in add_swap_count_continuation()
3745 spin_lock(&si->cont_lock); in add_swap_count_continuation()
3752 INIT_LIST_HEAD(&head->lru); in add_swap_count_continuation()
3754 si->flags |= SWP_CONTINUED; in add_swap_count_continuation()
3757 list_for_each_entry(list_page, &head->lru, lru) { in add_swap_count_continuation()
3767 map = kmap_local_page(list_page) + offset; in add_swap_count_continuation()
3779 list_add_tail(&page->lru, &head->lru); in add_swap_count_continuation()
3782 spin_unlock(&si->cont_lock); in add_swap_count_continuation()
3793 * swap_count_continued - when the original swap_map count is incremented
3799 * lock.
3802 pgoff_t offset, unsigned char count) in swap_count_continued() argument
3809 head = vmalloc_to_page(si->swap_map + offset); in swap_count_continued()
3815 spin_lock(&si->cont_lock); in swap_count_continued()
3816 offset &= ~PAGE_MASK; in swap_count_continued()
3818 map = kmap_local_page(page) + offset; in swap_count_continued()
3831 map = kmap_local_page(page) + offset; in swap_count_continued()
3840 map = kmap_local_page(page) + offset; in swap_count_continued()
3846 map = kmap_local_page(page) + offset; in swap_count_continued()
3861 map = kmap_local_page(page) + offset; in swap_count_continued()
3864 *map -= 1; in swap_count_continued()
3869 map = kmap_local_page(page) + offset; in swap_count_continued()
3877 spin_unlock(&si->cont_lock); in swap_count_continued()
3882 * free_swap_count_continuations - swapoff free all the continuation pages
3887 pgoff_t offset; in free_swap_count_continuations() local
3889 for (offset = 0; offset < si->max; offset += PAGE_SIZE) { in free_swap_count_continuations()
3891 head = vmalloc_to_page(si->swap_map + offset); in free_swap_count_continuations()
3895 list_for_each_entry_safe(page, next, &head->lru, lru) { in free_swap_count_continuations()
3896 list_del(&page->lru); in free_swap_count_continuations()
3920 * lock. in __folio_throttle_swaprate()
3922 if (current->throttle_disk) in __folio_throttle_swaprate()
3928 if (si->bdev) { in __folio_throttle_swaprate()
3929 blkcg_schedule_throttle(si->bdev->bd_disk, true); in __folio_throttle_swaprate()
3945 return -ENOMEM; in swapfile_init()