1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/memblock.h> 3 #include <linux/compiler.h> 4 #include <linux/fs.h> 5 #include <linux/init.h> 6 #include <linux/ksm.h> 7 #include <linux/mm.h> 8 #include <linux/mmzone.h> 9 #include <linux/huge_mm.h> 10 #include <linux/proc_fs.h> 11 #include <linux/seq_file.h> 12 #include <linux/hugetlb.h> 13 #include <linux/memremap.h> 14 #include <linux/memcontrol.h> 15 #include <linux/mmu_notifier.h> 16 #include <linux/page_idle.h> 17 #include <linux/kernel-page-flags.h> 18 #include <linux/uaccess.h> 19 #include "internal.h" 20 21 #define KPMSIZE sizeof(u64) 22 #define KPMMASK (KPMSIZE - 1) 23 #define KPMBITS (KPMSIZE * BITS_PER_BYTE) 24 25 enum kpage_operation { 26 KPAGE_FLAGS, 27 KPAGE_COUNT, 28 KPAGE_CGROUP, 29 }; 30 31 static inline unsigned long get_max_dump_pfn(void) 32 { 33 #ifdef CONFIG_SPARSEMEM 34 /* 35 * The memmap of early sections is completely populated and marked 36 * online even if max_pfn does not fall on a section boundary - 37 * pfn_to_online_page() will succeed on all pages. Allow inspecting 38 * these memmaps. 39 */ 40 return round_up(max_pfn, PAGES_PER_SECTION); 41 #else 42 return max_pfn; 43 #endif 44 } 45 46 static u64 get_kpage_count(const struct page *page) 47 { 48 struct page_snapshot ps; 49 u64 ret; 50 51 snapshot_page(&ps, page); 52 53 if (IS_ENABLED(CONFIG_PAGE_MAPCOUNT)) 54 ret = folio_precise_page_mapcount(&ps.folio_snapshot, 55 &ps.page_snapshot); 56 else 57 ret = folio_average_page_mapcount(&ps.folio_snapshot); 58 59 return ret; 60 } 61 62 static ssize_t kpage_read(struct file *file, char __user *buf, 63 size_t count, loff_t *ppos, 64 enum kpage_operation op) 65 { 66 const unsigned long max_dump_pfn = get_max_dump_pfn(); 67 u64 __user *out = (u64 __user *)buf; 68 struct page *page; 69 unsigned long src = *ppos; 70 unsigned long pfn; 71 ssize_t ret = 0; 72 u64 info; 73 74 pfn = src / KPMSIZE; 75 if (src & KPMMASK || count & KPMMASK) 76 return -EINVAL; 77 if (src >= max_dump_pfn * KPMSIZE) 78 return 0; 79 count = min_t(unsigned long, count, (max_dump_pfn * KPMSIZE) - src); 80 81 while (count > 0) { 82 /* 83 * TODO: ZONE_DEVICE support requires to identify 84 * memmaps that were actually initialized. 85 */ 86 page = pfn_to_online_page(pfn); 87 88 if (page) { 89 switch (op) { 90 case KPAGE_FLAGS: 91 info = stable_page_flags(page); 92 break; 93 case KPAGE_COUNT: 94 info = get_kpage_count(page); 95 break; 96 case KPAGE_CGROUP: 97 info = page_cgroup_ino(page); 98 break; 99 default: 100 info = 0; 101 break; 102 } 103 } else 104 info = 0; 105 106 if (put_user(info, out)) { 107 ret = -EFAULT; 108 break; 109 } 110 111 pfn++; 112 out++; 113 count -= KPMSIZE; 114 115 cond_resched(); 116 } 117 118 *ppos += (char __user *)out - buf; 119 if (!ret) 120 ret = (char __user *)out - buf; 121 return ret; 122 } 123 124 /* /proc/kpagecount - an array exposing page mapcounts 125 * 126 * Each entry is a u64 representing the corresponding 127 * physical page mapcount. 128 */ 129 static ssize_t kpagecount_read(struct file *file, char __user *buf, 130 size_t count, loff_t *ppos) 131 { 132 return kpage_read(file, buf, count, ppos, KPAGE_COUNT); 133 } 134 135 static const struct proc_ops kpagecount_proc_ops = { 136 .proc_flags = PROC_ENTRY_PERMANENT, 137 .proc_lseek = mem_lseek, 138 .proc_read = kpagecount_read, 139 }; 140 141 142 static inline u64 kpf_copy_bit(u64 kflags, int ubit, int kbit) 143 { 144 return ((kflags >> kbit) & 1) << ubit; 145 } 146 147 u64 stable_page_flags(const struct page *page) 148 { 149 const struct folio *folio; 150 struct page_snapshot ps; 151 unsigned long k; 152 unsigned long mapping; 153 bool is_anon; 154 u64 u = 0; 155 156 /* 157 * pseudo flag: KPF_NOPAGE 158 * it differentiates a memory hole from a page with no flags 159 */ 160 if (!page) 161 return 1 << KPF_NOPAGE; 162 163 snapshot_page(&ps, page); 164 folio = &ps.folio_snapshot; 165 166 k = folio->flags; 167 mapping = (unsigned long)folio->mapping; 168 is_anon = mapping & FOLIO_MAPPING_ANON; 169 170 /* 171 * pseudo flags for the well known (anonymous) memory mapped pages 172 */ 173 if (folio_mapped(folio)) 174 u |= 1 << KPF_MMAP; 175 if (is_anon) { 176 u |= 1 << KPF_ANON; 177 if (mapping & FOLIO_MAPPING_KSM) 178 u |= 1 << KPF_KSM; 179 } 180 181 /* 182 * compound pages: export both head/tail info 183 * they together define a compound page's start/end pos and order 184 */ 185 if (ps.idx == 0) 186 u |= kpf_copy_bit(k, KPF_COMPOUND_HEAD, PG_head); 187 else 188 u |= 1 << KPF_COMPOUND_TAIL; 189 if (folio_test_hugetlb(folio)) 190 u |= 1 << KPF_HUGE; 191 else if (folio_test_large(folio) && 192 folio_test_large_rmappable(folio)) { 193 /* Note: we indicate any THPs here, not just PMD-sized ones */ 194 u |= 1 << KPF_THP; 195 } else if (is_huge_zero_pfn(ps.pfn)) { 196 u |= 1 << KPF_ZERO_PAGE; 197 u |= 1 << KPF_THP; 198 } else if (is_zero_pfn(ps.pfn)) { 199 u |= 1 << KPF_ZERO_PAGE; 200 } 201 202 if (ps.flags & PAGE_SNAPSHOT_PG_BUDDY) 203 u |= 1 << KPF_BUDDY; 204 205 if (folio_test_offline(folio)) 206 u |= 1 << KPF_OFFLINE; 207 if (folio_test_pgtable(folio)) 208 u |= 1 << KPF_PGTABLE; 209 if (folio_test_slab(folio)) 210 u |= 1 << KPF_SLAB; 211 212 #if defined(CONFIG_PAGE_IDLE_FLAG) && defined(CONFIG_64BIT) 213 u |= kpf_copy_bit(k, KPF_IDLE, PG_idle); 214 #else 215 if (ps.flags & PAGE_SNAPSHOT_PG_IDLE) 216 u |= 1 << KPF_IDLE; 217 #endif 218 219 u |= kpf_copy_bit(k, KPF_LOCKED, PG_locked); 220 u |= kpf_copy_bit(k, KPF_DIRTY, PG_dirty); 221 u |= kpf_copy_bit(k, KPF_UPTODATE, PG_uptodate); 222 u |= kpf_copy_bit(k, KPF_WRITEBACK, PG_writeback); 223 224 u |= kpf_copy_bit(k, KPF_LRU, PG_lru); 225 u |= kpf_copy_bit(k, KPF_REFERENCED, PG_referenced); 226 u |= kpf_copy_bit(k, KPF_ACTIVE, PG_active); 227 u |= kpf_copy_bit(k, KPF_RECLAIM, PG_reclaim); 228 229 #define SWAPCACHE ((1 << PG_swapbacked) | (1 << PG_swapcache)) 230 if ((k & SWAPCACHE) == SWAPCACHE) 231 u |= 1 << KPF_SWAPCACHE; 232 u |= kpf_copy_bit(k, KPF_SWAPBACKED, PG_swapbacked); 233 234 u |= kpf_copy_bit(k, KPF_UNEVICTABLE, PG_unevictable); 235 u |= kpf_copy_bit(k, KPF_MLOCKED, PG_mlocked); 236 237 #ifdef CONFIG_MEMORY_FAILURE 238 if (u & (1 << KPF_HUGE)) 239 u |= kpf_copy_bit(k, KPF_HWPOISON, PG_hwpoison); 240 else 241 u |= kpf_copy_bit(ps.page_snapshot.flags, KPF_HWPOISON, PG_hwpoison); 242 #endif 243 244 u |= kpf_copy_bit(k, KPF_RESERVED, PG_reserved); 245 u |= kpf_copy_bit(k, KPF_OWNER_2, PG_owner_2); 246 u |= kpf_copy_bit(k, KPF_PRIVATE, PG_private); 247 u |= kpf_copy_bit(k, KPF_PRIVATE_2, PG_private_2); 248 u |= kpf_copy_bit(k, KPF_OWNER_PRIVATE, PG_owner_priv_1); 249 u |= kpf_copy_bit(k, KPF_ARCH, PG_arch_1); 250 #ifdef CONFIG_ARCH_USES_PG_ARCH_2 251 u |= kpf_copy_bit(k, KPF_ARCH_2, PG_arch_2); 252 #endif 253 #ifdef CONFIG_ARCH_USES_PG_ARCH_3 254 u |= kpf_copy_bit(k, KPF_ARCH_3, PG_arch_3); 255 #endif 256 257 return u; 258 } 259 260 /* /proc/kpageflags - an array exposing page flags 261 * 262 * Each entry is a u64 representing the corresponding 263 * physical page flags. 264 */ 265 static ssize_t kpageflags_read(struct file *file, char __user *buf, 266 size_t count, loff_t *ppos) 267 { 268 return kpage_read(file, buf, count, ppos, KPAGE_FLAGS); 269 } 270 271 static const struct proc_ops kpageflags_proc_ops = { 272 .proc_flags = PROC_ENTRY_PERMANENT, 273 .proc_lseek = mem_lseek, 274 .proc_read = kpageflags_read, 275 }; 276 277 #ifdef CONFIG_MEMCG 278 static ssize_t kpagecgroup_read(struct file *file, char __user *buf, 279 size_t count, loff_t *ppos) 280 { 281 return kpage_read(file, buf, count, ppos, KPAGE_CGROUP); 282 } 283 static const struct proc_ops kpagecgroup_proc_ops = { 284 .proc_flags = PROC_ENTRY_PERMANENT, 285 .proc_lseek = mem_lseek, 286 .proc_read = kpagecgroup_read, 287 }; 288 #endif /* CONFIG_MEMCG */ 289 290 static int __init proc_page_init(void) 291 { 292 proc_create("kpagecount", S_IRUSR, NULL, &kpagecount_proc_ops); 293 proc_create("kpageflags", S_IRUSR, NULL, &kpageflags_proc_ops); 294 #ifdef CONFIG_MEMCG 295 proc_create("kpagecgroup", S_IRUSR, NULL, &kpagecgroup_proc_ops); 296 #endif 297 return 0; 298 } 299 fs_initcall(proc_page_init); 300