1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #ifndef _MM_SWAP_TABLE_H 3 #define _MM_SWAP_TABLE_H 4 5 #include <linux/rcupdate.h> 6 #include <linux/atomic.h> 7 #include "swap.h" 8 9 /* A typical flat array in each cluster as swap table */ 10 struct swap_table { 11 atomic_long_t entries[SWAPFILE_CLUSTER]; 12 }; 13 14 #define SWP_TABLE_USE_PAGE (sizeof(struct swap_table) == PAGE_SIZE) 15 16 /* 17 * A swap table entry represents the status of a swap slot on a swap 18 * (physical or virtual) device. The swap table in each cluster is a 19 * 1:1 map of the swap slots in this cluster. 20 * 21 * Swap table entry type and bits layouts: 22 * 23 * NULL: |---------------- 0 ---------------| - Free slot 24 * Shadow: | SWAP_COUNT |---- SHADOW_VAL ---|1| - Swapped out slot 25 * PFN: | SWAP_COUNT |------ PFN -------|10| - Cached slot 26 * Pointer: |----------- Pointer ----------|100| - (Unused) 27 * Bad: |------------- 1 -------------|1000| - Bad slot 28 * 29 * SWAP_COUNT is `SWP_TB_COUNT_BITS` long, each entry is an atomic long. 30 * 31 * Usages: 32 * 33 * - NULL: Swap slot is unused, could be allocated. 34 * 35 * - Shadow: Swap slot is used and not cached (usually swapped out). It reuses 36 * the XA_VALUE format to be compatible with working set shadows. SHADOW_VAL 37 * part might be all 0 if the working shadow info is absent. In such a case, 38 * we still want to keep the shadow format as a placeholder. 39 * 40 * Memcg ID is embedded in SHADOW_VAL. 41 * 42 * - PFN: Swap slot is in use, and cached. Memcg info is recorded on the page 43 * struct. 44 * 45 * - Pointer: Unused yet. `0b100` is reserved for potential pointer usage 46 * because only the lower three bits can be used as a marker for 8 bytes 47 * aligned pointers. 48 * 49 * - Bad: Swap slot is reserved, protects swap header or holes on swap devices. 50 */ 51 52 #if defined(MAX_POSSIBLE_PHYSMEM_BITS) 53 #define SWAP_CACHE_PFN_BITS (MAX_POSSIBLE_PHYSMEM_BITS - PAGE_SHIFT) 54 #elif defined(MAX_PHYSMEM_BITS) 55 #define SWAP_CACHE_PFN_BITS (MAX_PHYSMEM_BITS - PAGE_SHIFT) 56 #else 57 #define SWAP_CACHE_PFN_BITS (BITS_PER_LONG - PAGE_SHIFT) 58 #endif 59 60 /* NULL Entry, all 0 */ 61 #define SWP_TB_NULL 0UL 62 63 /* Swapped out: shadow */ 64 #define SWP_TB_SHADOW_MARK 0b1UL 65 66 /* Cached: PFN */ 67 #define SWP_TB_PFN_BITS (SWAP_CACHE_PFN_BITS + SWP_TB_PFN_MARK_BITS) 68 #define SWP_TB_PFN_MARK 0b10UL 69 #define SWP_TB_PFN_MARK_BITS 2 70 #define SWP_TB_PFN_MARK_MASK (BIT(SWP_TB_PFN_MARK_BITS) - 1) 71 72 /* SWAP_COUNT part for PFN or shadow, the width can be shrunk or extended */ 73 #define SWP_TB_COUNT_BITS min(4, BITS_PER_LONG - SWP_TB_PFN_BITS) 74 #define SWP_TB_COUNT_MASK (~((~0UL) >> SWP_TB_COUNT_BITS)) 75 #define SWP_TB_COUNT_SHIFT (BITS_PER_LONG - SWP_TB_COUNT_BITS) 76 #define SWP_TB_COUNT_MAX ((1 << SWP_TB_COUNT_BITS) - 1) 77 78 /* Bad slot: ends with 0b1000 and rests of bits are all 1 */ 79 #define SWP_TB_BAD ((~0UL) << 3) 80 81 /* Macro for shadow offset calculation */ 82 #define SWAP_COUNT_SHIFT SWP_TB_COUNT_BITS 83 84 /* 85 * Helpers for casting one type of info into a swap table entry. 86 */ 87 static inline unsigned long null_to_swp_tb(void) 88 { 89 BUILD_BUG_ON(sizeof(unsigned long) != sizeof(atomic_long_t)); 90 return 0; 91 } 92 93 static inline unsigned long __count_to_swp_tb(unsigned char count) 94 { 95 /* 96 * At least three values are needed to distinguish free (0), 97 * used (count > 0 && count < SWP_TB_COUNT_MAX), and 98 * overflow (count == SWP_TB_COUNT_MAX). 99 */ 100 BUILD_BUG_ON(SWP_TB_COUNT_MAX < 2 || SWP_TB_COUNT_BITS < 2); 101 VM_WARN_ON(count > SWP_TB_COUNT_MAX); 102 return ((unsigned long)count) << SWP_TB_COUNT_SHIFT; 103 } 104 105 static inline unsigned long pfn_to_swp_tb(unsigned long pfn, unsigned int count) 106 { 107 unsigned long swp_tb; 108 109 BUILD_BUG_ON(sizeof(unsigned long) != sizeof(void *)); 110 BUILD_BUG_ON(SWAP_CACHE_PFN_BITS > 111 (BITS_PER_LONG - SWP_TB_PFN_MARK_BITS - SWP_TB_COUNT_BITS)); 112 113 swp_tb = (pfn << SWP_TB_PFN_MARK_BITS) | SWP_TB_PFN_MARK; 114 VM_WARN_ON_ONCE(swp_tb & SWP_TB_COUNT_MASK); 115 116 return swp_tb | __count_to_swp_tb(count); 117 } 118 119 static inline unsigned long folio_to_swp_tb(struct folio *folio, unsigned int count) 120 { 121 return pfn_to_swp_tb(folio_pfn(folio), count); 122 } 123 124 static inline unsigned long shadow_to_swp_tb(void *shadow, unsigned int count) 125 { 126 BUILD_BUG_ON((BITS_PER_XA_VALUE + 1) != 127 BITS_PER_BYTE * sizeof(unsigned long)); 128 BUILD_BUG_ON((unsigned long)xa_mk_value(0) != SWP_TB_SHADOW_MARK); 129 130 VM_WARN_ON_ONCE(shadow && !xa_is_value(shadow)); 131 VM_WARN_ON_ONCE(shadow && ((unsigned long)shadow & SWP_TB_COUNT_MASK)); 132 133 return (unsigned long)shadow | __count_to_swp_tb(count) | SWP_TB_SHADOW_MARK; 134 } 135 136 /* 137 * Helpers for swap table entry type checking. 138 */ 139 static inline bool swp_tb_is_null(unsigned long swp_tb) 140 { 141 return !swp_tb; 142 } 143 144 static inline bool swp_tb_is_folio(unsigned long swp_tb) 145 { 146 return ((swp_tb & SWP_TB_PFN_MARK_MASK) == SWP_TB_PFN_MARK); 147 } 148 149 static inline bool swp_tb_is_shadow(unsigned long swp_tb) 150 { 151 return xa_is_value((void *)swp_tb); 152 } 153 154 static inline bool swp_tb_is_bad(unsigned long swp_tb) 155 { 156 return swp_tb == SWP_TB_BAD; 157 } 158 159 static inline bool swp_tb_is_countable(unsigned long swp_tb) 160 { 161 return (swp_tb_is_shadow(swp_tb) || swp_tb_is_folio(swp_tb) || 162 swp_tb_is_null(swp_tb)); 163 } 164 165 /* 166 * Helpers for retrieving info from swap table. 167 */ 168 static inline struct folio *swp_tb_to_folio(unsigned long swp_tb) 169 { 170 VM_WARN_ON(!swp_tb_is_folio(swp_tb)); 171 return pfn_folio((swp_tb & ~SWP_TB_COUNT_MASK) >> SWP_TB_PFN_MARK_BITS); 172 } 173 174 static inline void *swp_tb_to_shadow(unsigned long swp_tb) 175 { 176 VM_WARN_ON(!swp_tb_is_shadow(swp_tb)); 177 /* No shift needed, xa_value is stored as it is in the lower bits. */ 178 return (void *)(swp_tb & ~SWP_TB_COUNT_MASK); 179 } 180 181 static inline unsigned char __swp_tb_get_count(unsigned long swp_tb) 182 { 183 VM_WARN_ON(!swp_tb_is_countable(swp_tb)); 184 return ((swp_tb & SWP_TB_COUNT_MASK) >> SWP_TB_COUNT_SHIFT); 185 } 186 187 static inline int swp_tb_get_count(unsigned long swp_tb) 188 { 189 if (swp_tb_is_countable(swp_tb)) 190 return __swp_tb_get_count(swp_tb); 191 return -EINVAL; 192 } 193 194 static inline unsigned long __swp_tb_mk_count(unsigned long swp_tb, int count) 195 { 196 return ((swp_tb & ~SWP_TB_COUNT_MASK) | __count_to_swp_tb(count)); 197 } 198 199 /* 200 * Helpers for accessing or modifying the swap table of a cluster, 201 * the swap cluster must be locked. 202 */ 203 static inline void __swap_table_set(struct swap_cluster_info *ci, 204 unsigned int off, unsigned long swp_tb) 205 { 206 atomic_long_t *table = rcu_dereference_protected(ci->table, true); 207 208 lockdep_assert_held(&ci->lock); 209 VM_WARN_ON_ONCE(off >= SWAPFILE_CLUSTER); 210 atomic_long_set(&table[off], swp_tb); 211 } 212 213 static inline unsigned long __swap_table_xchg(struct swap_cluster_info *ci, 214 unsigned int off, unsigned long swp_tb) 215 { 216 atomic_long_t *table = rcu_dereference_protected(ci->table, true); 217 218 lockdep_assert_held(&ci->lock); 219 VM_WARN_ON_ONCE(off >= SWAPFILE_CLUSTER); 220 /* Ordering is guaranteed by cluster lock, relax */ 221 return atomic_long_xchg_relaxed(&table[off], swp_tb); 222 } 223 224 static inline unsigned long __swap_table_get(struct swap_cluster_info *ci, 225 unsigned int off) 226 { 227 atomic_long_t *table; 228 229 VM_WARN_ON_ONCE(off >= SWAPFILE_CLUSTER); 230 table = rcu_dereference_check(ci->table, lockdep_is_held(&ci->lock)); 231 232 return atomic_long_read(&table[off]); 233 } 234 235 static inline unsigned long swap_table_get(struct swap_cluster_info *ci, 236 unsigned int off) 237 { 238 atomic_long_t *table; 239 unsigned long swp_tb; 240 241 VM_WARN_ON_ONCE(off >= SWAPFILE_CLUSTER); 242 243 rcu_read_lock(); 244 table = rcu_dereference(ci->table); 245 swp_tb = table ? atomic_long_read(&table[off]) : null_to_swp_tb(); 246 rcu_read_unlock(); 247 248 return swp_tb; 249 } 250 #endif 251