1 /* 2 * Debug helper to dump the current kernel pagetables of the system 3 * so that we can see what the various memory ranges are set to. 4 * 5 * (C) Copyright 2008 Intel Corporation 6 * 7 * Author: Arjan van de Ven <arjan@linux.intel.com> 8 * 9 * This program is free software; you can redistribute it and/or 10 * modify it under the terms of the GNU General Public License 11 * as published by the Free Software Foundation; version 2 12 * of the License. 13 */ 14 15 #include <linux/debugfs.h> 16 #include <linux/mm.h> 17 #include <linux/module.h> 18 #include <linux/seq_file.h> 19 20 #include <asm/pgtable.h> 21 22 /* 23 * The dumper groups pagetable entries of the same type into one, and for 24 * that it needs to keep some state when walking, and flush this state 25 * when a "break" in the continuity is found. 26 */ 27 struct pg_state { 28 int level; 29 pgprot_t current_prot; 30 unsigned long start_address; 31 unsigned long current_address; 32 const struct addr_marker *marker; 33 }; 34 35 struct addr_marker { 36 unsigned long start_address; 37 const char *name; 38 }; 39 40 /* Address space markers hints */ 41 static struct addr_marker address_markers[] = { 42 { 0, "User Space" }, 43 #ifdef CONFIG_X86_64 44 { 0x8000000000000000UL, "Kernel Space" }, 45 { PAGE_OFFSET, "Low Kernel Mapping" }, 46 { VMALLOC_START, "vmalloc() Area" }, 47 { VMEMMAP_START, "Vmemmap" }, 48 { __START_KERNEL_map, "High Kernel Mapping" }, 49 { MODULES_VADDR, "Modules" }, 50 { MODULES_END, "End Modules" }, 51 #else 52 { PAGE_OFFSET, "Kernel Mapping" }, 53 { 0/* VMALLOC_START */, "vmalloc() Area" }, 54 { 0/*VMALLOC_END*/, "vmalloc() End" }, 55 # ifdef CONFIG_HIGHMEM 56 { 0/*PKMAP_BASE*/, "Persisent kmap() Area" }, 57 # endif 58 { 0/*FIXADDR_START*/, "Fixmap Area" }, 59 #endif 60 { -1, NULL } /* End of list */ 61 }; 62 63 /* Multipliers for offsets within the PTEs */ 64 #define PTE_LEVEL_MULT (PAGE_SIZE) 65 #define PMD_LEVEL_MULT (PTRS_PER_PTE * PTE_LEVEL_MULT) 66 #define PUD_LEVEL_MULT (PTRS_PER_PMD * PMD_LEVEL_MULT) 67 #define PGD_LEVEL_MULT (PTRS_PER_PUD * PUD_LEVEL_MULT) 68 69 /* 70 * Print a readable form of a pgprot_t to the seq_file 71 */ 72 static void printk_prot(struct seq_file *m, pgprot_t prot, int level) 73 { 74 pgprotval_t pr = pgprot_val(prot); 75 static const char * const level_name[] = 76 { "cr3", "pgd", "pud", "pmd", "pte" }; 77 78 if (!pgprot_val(prot)) { 79 /* Not present */ 80 seq_printf(m, " "); 81 } else { 82 if (pr & _PAGE_USER) 83 seq_printf(m, "USR "); 84 else 85 seq_printf(m, " "); 86 if (pr & _PAGE_RW) 87 seq_printf(m, "RW "); 88 else 89 seq_printf(m, "ro "); 90 if (pr & _PAGE_PWT) 91 seq_printf(m, "PWT "); 92 else 93 seq_printf(m, " "); 94 if (pr & _PAGE_PCD) 95 seq_printf(m, "PCD "); 96 else 97 seq_printf(m, " "); 98 99 /* Bit 9 has a different meaning on level 3 vs 4 */ 100 if (level <= 3) { 101 if (pr & _PAGE_PSE) 102 seq_printf(m, "PSE "); 103 else 104 seq_printf(m, " "); 105 } else { 106 if (pr & _PAGE_PAT) 107 seq_printf(m, "pat "); 108 else 109 seq_printf(m, " "); 110 } 111 if (pr & _PAGE_GLOBAL) 112 seq_printf(m, "GLB "); 113 else 114 seq_printf(m, " "); 115 if (pr & _PAGE_NX) 116 seq_printf(m, "NX "); 117 else 118 seq_printf(m, "x "); 119 } 120 seq_printf(m, "%s\n", level_name[level]); 121 } 122 123 /* 124 * On 64 bits, sign-extend the 48 bit address to 64 bit 125 */ 126 static unsigned long normalize_addr(unsigned long u) 127 { 128 #ifdef CONFIG_X86_64 129 return (signed long)(u << 16) >> 16; 130 #else 131 return u; 132 #endif 133 } 134 135 /* 136 * This function gets called on a break in a continuous series 137 * of PTE entries; the next one is different so we need to 138 * print what we collected so far. 139 */ 140 static void note_page(struct seq_file *m, struct pg_state *st, 141 pgprot_t new_prot, int level) 142 { 143 pgprotval_t prot, cur; 144 static const char units[] = "KMGTPE"; 145 146 /* 147 * If we have a "break" in the series, we need to flush the state that 148 * we have now. "break" is either changing perms, levels or 149 * address space marker. 150 */ 151 prot = pgprot_val(new_prot) & PTE_FLAGS_MASK; 152 cur = pgprot_val(st->current_prot) & PTE_FLAGS_MASK; 153 154 if (!st->level) { 155 /* First entry */ 156 st->current_prot = new_prot; 157 st->level = level; 158 st->marker = address_markers; 159 seq_printf(m, "---[ %s ]---\n", st->marker->name); 160 } else if (prot != cur || level != st->level || 161 st->current_address >= st->marker[1].start_address) { 162 const char *unit = units; 163 unsigned long delta; 164 int width = sizeof(unsigned long) * 2; 165 166 /* 167 * Now print the actual finished series 168 */ 169 seq_printf(m, "0x%0*lx-0x%0*lx ", 170 width, st->start_address, 171 width, st->current_address); 172 173 delta = (st->current_address - st->start_address) >> 10; 174 while (!(delta & 1023) && unit[1]) { 175 delta >>= 10; 176 unit++; 177 } 178 seq_printf(m, "%9lu%c ", delta, *unit); 179 printk_prot(m, st->current_prot, st->level); 180 181 /* 182 * We print markers for special areas of address space, 183 * such as the start of vmalloc space etc. 184 * This helps in the interpretation. 185 */ 186 if (st->current_address >= st->marker[1].start_address) { 187 st->marker++; 188 seq_printf(m, "---[ %s ]---\n", st->marker->name); 189 } 190 191 st->start_address = st->current_address; 192 st->current_prot = new_prot; 193 st->level = level; 194 } 195 } 196 197 static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr, 198 unsigned long P) 199 { 200 int i; 201 pte_t *start; 202 203 start = (pte_t *) pmd_page_vaddr(addr); 204 for (i = 0; i < PTRS_PER_PTE; i++) { 205 pgprot_t prot = pte_pgprot(*start); 206 207 st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT); 208 note_page(m, st, prot, 4); 209 start++; 210 } 211 } 212 213 #if PTRS_PER_PMD > 1 214 215 static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr, 216 unsigned long P) 217 { 218 int i; 219 pmd_t *start; 220 221 start = (pmd_t *) pud_page_vaddr(addr); 222 for (i = 0; i < PTRS_PER_PMD; i++) { 223 st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT); 224 if (!pmd_none(*start)) { 225 pgprotval_t prot = pmd_val(*start) & PTE_FLAGS_MASK; 226 227 if (pmd_large(*start) || !pmd_present(*start)) 228 note_page(m, st, __pgprot(prot), 3); 229 else 230 walk_pte_level(m, st, *start, 231 P + i * PMD_LEVEL_MULT); 232 } else 233 note_page(m, st, __pgprot(0), 3); 234 start++; 235 } 236 } 237 238 #else 239 #define walk_pmd_level(m,s,a,p) walk_pte_level(m,s,__pmd(pud_val(a)),p) 240 #define pud_large(a) pmd_large(__pmd(pud_val(a))) 241 #define pud_none(a) pmd_none(__pmd(pud_val(a))) 242 #endif 243 244 #if PTRS_PER_PUD > 1 245 246 static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr, 247 unsigned long P) 248 { 249 int i; 250 pud_t *start; 251 252 start = (pud_t *) pgd_page_vaddr(addr); 253 254 for (i = 0; i < PTRS_PER_PUD; i++) { 255 st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT); 256 if (!pud_none(*start)) { 257 pgprotval_t prot = pud_val(*start) & PTE_FLAGS_MASK; 258 259 if (pud_large(*start) || !pud_present(*start)) 260 note_page(m, st, __pgprot(prot), 2); 261 else 262 walk_pmd_level(m, st, *start, 263 P + i * PUD_LEVEL_MULT); 264 } else 265 note_page(m, st, __pgprot(0), 2); 266 267 start++; 268 } 269 } 270 271 #else 272 #define walk_pud_level(m,s,a,p) walk_pmd_level(m,s,__pud(pgd_val(a)),p) 273 #define pgd_large(a) pud_large(__pud(pgd_val(a))) 274 #define pgd_none(a) pud_none(__pud(pgd_val(a))) 275 #endif 276 277 static void walk_pgd_level(struct seq_file *m) 278 { 279 #ifdef CONFIG_X86_64 280 pgd_t *start = (pgd_t *) &init_level4_pgt; 281 #else 282 pgd_t *start = swapper_pg_dir; 283 #endif 284 int i; 285 struct pg_state st; 286 287 memset(&st, 0, sizeof(st)); 288 289 for (i = 0; i < PTRS_PER_PGD; i++) { 290 st.current_address = normalize_addr(i * PGD_LEVEL_MULT); 291 if (!pgd_none(*start)) { 292 pgprotval_t prot = pgd_val(*start) & PTE_FLAGS_MASK; 293 294 if (pgd_large(*start) || !pgd_present(*start)) 295 note_page(m, &st, __pgprot(prot), 1); 296 else 297 walk_pud_level(m, &st, *start, 298 i * PGD_LEVEL_MULT); 299 } else 300 note_page(m, &st, __pgprot(0), 1); 301 302 start++; 303 } 304 305 /* Flush out the last page */ 306 st.current_address = normalize_addr(PTRS_PER_PGD*PGD_LEVEL_MULT); 307 note_page(m, &st, __pgprot(0), 0); 308 } 309 310 static int ptdump_show(struct seq_file *m, void *v) 311 { 312 walk_pgd_level(m); 313 return 0; 314 } 315 316 static int ptdump_open(struct inode *inode, struct file *filp) 317 { 318 return single_open(filp, ptdump_show, NULL); 319 } 320 321 static const struct file_operations ptdump_fops = { 322 .open = ptdump_open, 323 .read = seq_read, 324 .llseek = seq_lseek, 325 .release = single_release, 326 }; 327 328 static int pt_dump_init(void) 329 { 330 struct dentry *pe; 331 332 #ifdef CONFIG_X86_32 333 /* Not a compile-time constant on x86-32 */ 334 address_markers[2].start_address = VMALLOC_START; 335 address_markers[3].start_address = VMALLOC_END; 336 # ifdef CONFIG_HIGHMEM 337 address_markers[4].start_address = PKMAP_BASE; 338 address_markers[5].start_address = FIXADDR_START; 339 # else 340 address_markers[4].start_address = FIXADDR_START; 341 # endif 342 #endif 343 344 pe = debugfs_create_file("kernel_page_tables", 0600, NULL, NULL, 345 &ptdump_fops); 346 if (!pe) 347 return -ENOMEM; 348 349 return 0; 350 } 351 352 __initcall(pt_dump_init); 353 MODULE_LICENSE("GPL"); 354 MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>"); 355 MODULE_DESCRIPTION("Kernel debugging helper that dumps pagetables"); 356