1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/memblock.h> 3 #include <linux/compiler.h> 4 #include <linux/fs.h> 5 #include <linux/init.h> 6 #include <linux/ksm.h> 7 #include <linux/mm.h> 8 #include <linux/mmzone.h> 9 #include <linux/huge_mm.h> 10 #include <linux/proc_fs.h> 11 #include <linux/seq_file.h> 12 #include <linux/hugetlb.h> 13 #include <linux/memremap.h> 14 #include <linux/memcontrol.h> 15 #include <linux/mmu_notifier.h> 16 #include <linux/page_idle.h> 17 #include <linux/kernel-page-flags.h> 18 #include <linux/uaccess.h> 19 #include "internal.h" 20 21 #define KPMSIZE sizeof(u64) 22 #define KPMMASK (KPMSIZE - 1) 23 24 enum kpage_operation { 25 KPAGE_FLAGS, 26 KPAGE_COUNT, 27 KPAGE_CGROUP, 28 }; 29 30 static inline unsigned long get_max_dump_pfn(void) 31 { 32 #ifdef CONFIG_SPARSEMEM 33 /* 34 * The memmap of early sections is completely populated and marked 35 * online even if max_pfn does not fall on a section boundary - 36 * pfn_to_online_page() will succeed on all pages. Allow inspecting 37 * these memmaps. 38 */ 39 return round_up(max_pfn, PAGES_PER_SECTION); 40 #else 41 return max_pfn; 42 #endif 43 } 44 45 static u64 get_kpage_count(const struct page *page) 46 { 47 struct page_snapshot ps; 48 u64 ret; 49 50 snapshot_page(&ps, page); 51 52 if (IS_ENABLED(CONFIG_PAGE_MAPCOUNT)) 53 ret = folio_precise_page_mapcount(&ps.folio_snapshot, 54 &ps.page_snapshot); 55 else 56 ret = folio_average_page_mapcount(&ps.folio_snapshot); 57 58 return ret; 59 } 60 61 static ssize_t kpage_read(struct file *file, char __user *buf, 62 size_t count, loff_t *ppos, 63 enum kpage_operation op) 64 { 65 const unsigned long max_dump_pfn = get_max_dump_pfn(); 66 u64 __user *out = (u64 __user *)buf; 67 struct page *page; 68 unsigned long src = *ppos; 69 unsigned long pfn; 70 ssize_t ret = 0; 71 u64 info; 72 73 pfn = src / KPMSIZE; 74 if (src & KPMMASK || count & KPMMASK) 75 return -EINVAL; 76 if (src >= max_dump_pfn * KPMSIZE) 77 return 0; 78 count = min_t(unsigned long, count, (max_dump_pfn * KPMSIZE) - src); 79 80 while (count > 0) { 81 /* 82 * TODO: ZONE_DEVICE support requires to identify 83 * memmaps that were actually initialized. 84 */ 85 page = pfn_to_online_page(pfn); 86 87 if (page) { 88 switch (op) { 89 case KPAGE_FLAGS: 90 info = stable_page_flags(page); 91 break; 92 case KPAGE_COUNT: 93 info = get_kpage_count(page); 94 break; 95 case KPAGE_CGROUP: 96 info = page_cgroup_ino(page); 97 break; 98 default: 99 info = 0; 100 break; 101 } 102 } else 103 info = 0; 104 105 if (put_user(info, out)) { 106 ret = -EFAULT; 107 break; 108 } 109 110 pfn++; 111 out++; 112 count -= KPMSIZE; 113 114 cond_resched(); 115 } 116 117 *ppos += (char __user *)out - buf; 118 if (!ret) 119 ret = (char __user *)out - buf; 120 return ret; 121 } 122 123 /* /proc/kpagecount - an array exposing page mapcounts 124 * 125 * Each entry is a u64 representing the corresponding 126 * physical page mapcount. 127 */ 128 static ssize_t kpagecount_read(struct file *file, char __user *buf, 129 size_t count, loff_t *ppos) 130 { 131 return kpage_read(file, buf, count, ppos, KPAGE_COUNT); 132 } 133 134 static const struct proc_ops kpagecount_proc_ops = { 135 .proc_flags = PROC_ENTRY_PERMANENT, 136 .proc_lseek = mem_lseek, 137 .proc_read = kpagecount_read, 138 }; 139 140 141 static inline u64 kpf_copy_bit(u64 kflags, int ubit, int kbit) 142 { 143 return ((kflags >> kbit) & 1) << ubit; 144 } 145 146 u64 stable_page_flags(const struct page *page) 147 { 148 const struct folio *folio; 149 struct page_snapshot ps; 150 unsigned long k; 151 unsigned long mapping; 152 bool is_anon; 153 u64 u = 0; 154 155 /* 156 * pseudo flag: KPF_NOPAGE 157 * it differentiates a memory hole from a page with no flags 158 */ 159 if (!page) 160 return 1 << KPF_NOPAGE; 161 162 snapshot_page(&ps, page); 163 folio = &ps.folio_snapshot; 164 165 k = folio->flags.f; 166 mapping = (unsigned long)folio->mapping; 167 is_anon = mapping & FOLIO_MAPPING_ANON; 168 169 /* 170 * pseudo flags for the well known (anonymous) memory mapped pages 171 */ 172 if (folio_mapped(folio)) 173 u |= 1 << KPF_MMAP; 174 if (is_anon) { 175 u |= 1 << KPF_ANON; 176 if (mapping & FOLIO_MAPPING_KSM) 177 u |= 1 << KPF_KSM; 178 } 179 180 /* 181 * compound pages: export both head/tail info 182 * they together define a compound page's start/end pos and order 183 */ 184 if (ps.idx == 0) 185 u |= kpf_copy_bit(k, KPF_COMPOUND_HEAD, PG_head); 186 else 187 u |= 1 << KPF_COMPOUND_TAIL; 188 if (folio_test_hugetlb(folio)) 189 u |= 1 << KPF_HUGE; 190 else if (folio_test_large(folio) && 191 folio_test_large_rmappable(folio)) { 192 /* Note: we indicate any THPs here, not just PMD-sized ones */ 193 u |= 1 << KPF_THP; 194 } else if (is_huge_zero_pfn(ps.pfn)) { 195 u |= 1 << KPF_ZERO_PAGE; 196 u |= 1 << KPF_THP; 197 } else if (is_zero_pfn(ps.pfn)) { 198 u |= 1 << KPF_ZERO_PAGE; 199 } 200 201 if (ps.flags & PAGE_SNAPSHOT_PG_BUDDY) 202 u |= 1 << KPF_BUDDY; 203 204 if (folio_test_offline(folio)) 205 u |= 1 << KPF_OFFLINE; 206 if (folio_test_pgtable(folio)) 207 u |= 1 << KPF_PGTABLE; 208 if (folio_test_slab(folio)) 209 u |= 1 << KPF_SLAB; 210 211 #if defined(CONFIG_PAGE_IDLE_FLAG) && defined(CONFIG_64BIT) 212 u |= kpf_copy_bit(k, KPF_IDLE, PG_idle); 213 #else 214 if (ps.flags & PAGE_SNAPSHOT_PG_IDLE) 215 u |= 1 << KPF_IDLE; 216 #endif 217 218 u |= kpf_copy_bit(k, KPF_LOCKED, PG_locked); 219 u |= kpf_copy_bit(k, KPF_DIRTY, PG_dirty); 220 u |= kpf_copy_bit(k, KPF_UPTODATE, PG_uptodate); 221 u |= kpf_copy_bit(k, KPF_WRITEBACK, PG_writeback); 222 223 u |= kpf_copy_bit(k, KPF_LRU, PG_lru); 224 u |= kpf_copy_bit(k, KPF_REFERENCED, PG_referenced); 225 u |= kpf_copy_bit(k, KPF_ACTIVE, PG_active); 226 u |= kpf_copy_bit(k, KPF_RECLAIM, PG_reclaim); 227 228 #define SWAPCACHE ((1 << PG_swapbacked) | (1 << PG_swapcache)) 229 if ((k & SWAPCACHE) == SWAPCACHE) 230 u |= 1 << KPF_SWAPCACHE; 231 u |= kpf_copy_bit(k, KPF_SWAPBACKED, PG_swapbacked); 232 233 u |= kpf_copy_bit(k, KPF_UNEVICTABLE, PG_unevictable); 234 u |= kpf_copy_bit(k, KPF_MLOCKED, PG_mlocked); 235 236 #ifdef CONFIG_MEMORY_FAILURE 237 if (u & (1 << KPF_HUGE)) 238 u |= kpf_copy_bit(k, KPF_HWPOISON, PG_hwpoison); 239 else 240 u |= kpf_copy_bit(ps.page_snapshot.flags.f, KPF_HWPOISON, PG_hwpoison); 241 #endif 242 243 u |= kpf_copy_bit(k, KPF_RESERVED, PG_reserved); 244 u |= kpf_copy_bit(k, KPF_OWNER_2, PG_owner_2); 245 u |= kpf_copy_bit(k, KPF_PRIVATE, PG_private); 246 u |= kpf_copy_bit(k, KPF_PRIVATE_2, PG_private_2); 247 u |= kpf_copy_bit(k, KPF_OWNER_PRIVATE, PG_owner_priv_1); 248 u |= kpf_copy_bit(k, KPF_ARCH, PG_arch_1); 249 #ifdef CONFIG_ARCH_USES_PG_ARCH_2 250 u |= kpf_copy_bit(k, KPF_ARCH_2, PG_arch_2); 251 #endif 252 #ifdef CONFIG_ARCH_USES_PG_ARCH_3 253 u |= kpf_copy_bit(k, KPF_ARCH_3, PG_arch_3); 254 #endif 255 256 return u; 257 } 258 EXPORT_SYMBOL_GPL(stable_page_flags); 259 260 /* /proc/kpageflags - an array exposing page flags 261 * 262 * Each entry is a u64 representing the corresponding 263 * physical page flags. 264 */ 265 static ssize_t kpageflags_read(struct file *file, char __user *buf, 266 size_t count, loff_t *ppos) 267 { 268 return kpage_read(file, buf, count, ppos, KPAGE_FLAGS); 269 } 270 271 static const struct proc_ops kpageflags_proc_ops = { 272 .proc_flags = PROC_ENTRY_PERMANENT, 273 .proc_lseek = mem_lseek, 274 .proc_read = kpageflags_read, 275 }; 276 277 #ifdef CONFIG_MEMCG 278 static ssize_t kpagecgroup_read(struct file *file, char __user *buf, 279 size_t count, loff_t *ppos) 280 { 281 return kpage_read(file, buf, count, ppos, KPAGE_CGROUP); 282 } 283 static const struct proc_ops kpagecgroup_proc_ops = { 284 .proc_flags = PROC_ENTRY_PERMANENT, 285 .proc_lseek = mem_lseek, 286 .proc_read = kpagecgroup_read, 287 }; 288 #endif /* CONFIG_MEMCG */ 289 290 static int __init proc_page_init(void) 291 { 292 proc_create("kpagecount", S_IRUSR, NULL, &kpagecount_proc_ops); 293 proc_create("kpageflags", S_IRUSR, NULL, &kpageflags_proc_ops); 294 #ifdef CONFIG_MEMCG 295 proc_create("kpagecgroup", S_IRUSR, NULL, &kpagecgroup_proc_ops); 296 #endif 297 return 0; 298 } 299 fs_initcall(proc_page_init); 300