1 /* 2 * Debug helper to dump the current kernel pagetables of the system 3 * so that we can see what the various memory ranges are set to. 4 * 5 * (C) Copyright 2008 Intel Corporation 6 * 7 * Author: Arjan van de Ven <arjan@linux.intel.com> 8 * 9 * This program is free software; you can redistribute it and/or 10 * modify it under the terms of the GNU General Public License 11 * as published by the Free Software Foundation; version 2 12 * of the License. 13 */ 14 15 #include <linux/debugfs.h> 16 #include <linux/mm.h> 17 #include <linux/module.h> 18 #include <linux/seq_file.h> 19 20 #include <asm/pgtable.h> 21 22 /* 23 * The dumper groups pagetable entries of the same type into one, and for 24 * that it needs to keep some state when walking, and flush this state 25 * when a "break" in the continuity is found. 26 */ 27 struct pg_state { 28 int level; 29 pgprot_t current_prot; 30 unsigned long start_address; 31 unsigned long current_address; 32 const struct addr_marker *marker; 33 }; 34 35 struct addr_marker { 36 unsigned long start_address; 37 const char *name; 38 }; 39 40 /* Address space markers hints */ 41 static struct addr_marker address_markers[] = { 42 { 0, "User Space" }, 43 #ifdef CONFIG_X86_64 44 { 0x8000000000000000UL, "Kernel Space" }, 45 { 0xffff810000000000UL, "Low Kernel Mapping" }, 46 { VMALLOC_START, "vmalloc() Area" }, 47 { MODULES_VADDR, "Modules" }, 48 { MODULES_END, "End Modules" }, 49 { VMEMMAP_START, "Vmemmap" }, 50 { __START_KERNEL_map, "High Kernel Mapping" }, 51 #else 52 { PAGE_OFFSET, "Kernel Mapping" }, 53 { 0/* VMALLOC_START */, "vmalloc() Area" }, 54 { 0/*VMALLOC_END*/, "vmalloc() End" }, 55 # ifdef CONFIG_HIGHMEM 56 { 0/*PKMAP_BASE*/, "Persisent kmap() Area" }, 57 # endif 58 { 0/*FIXADDR_START*/, "Fixmap Area" }, 59 #endif 60 { -1, NULL } /* End of list */ 61 }; 62 63 /* Multipliers for offsets within the PTEs */ 64 #define PTE_LEVEL_MULT (PAGE_SIZE) 65 #define PMD_LEVEL_MULT (PTRS_PER_PTE * PTE_LEVEL_MULT) 66 #define PUD_LEVEL_MULT (PTRS_PER_PMD * PMD_LEVEL_MULT) 67 #define PGD_LEVEL_MULT (PTRS_PER_PUD * PUD_LEVEL_MULT) 68 69 /* 70 * Print a readable form of a pgprot_t to the seq_file 71 */ 72 static void printk_prot(struct seq_file *m, pgprot_t prot, int level) 73 { 74 pgprotval_t pr = pgprot_val(prot); 75 static const char * const level_name[] = 76 { "cr3", "pgd", "pud", "pmd", "pte" }; 77 78 if (!pgprot_val(prot)) { 79 /* Not present */ 80 seq_printf(m, " "); 81 } else { 82 if (pr & _PAGE_USER) 83 seq_printf(m, "USR "); 84 else 85 seq_printf(m, " "); 86 if (pr & _PAGE_RW) 87 seq_printf(m, "RW "); 88 else 89 seq_printf(m, "ro "); 90 if (pr & _PAGE_PWT) 91 seq_printf(m, "PWT "); 92 else 93 seq_printf(m, " "); 94 if (pr & _PAGE_PCD) 95 seq_printf(m, "PCD "); 96 else 97 seq_printf(m, " "); 98 99 /* Bit 9 has a different meaning on level 3 vs 4 */ 100 if (level <= 3) { 101 if (pr & _PAGE_PSE) 102 seq_printf(m, "PSE "); 103 else 104 seq_printf(m, " "); 105 } else { 106 if (pr & _PAGE_PAT) 107 seq_printf(m, "pat "); 108 else 109 seq_printf(m, " "); 110 } 111 if (pr & _PAGE_GLOBAL) 112 seq_printf(m, "GLB "); 113 else 114 seq_printf(m, " "); 115 if (pr & _PAGE_NX) 116 seq_printf(m, "NX "); 117 else 118 seq_printf(m, "x "); 119 } 120 seq_printf(m, "%s\n", level_name[level]); 121 } 122 123 /* 124 * On 64 bits, sign-extend the 48 bit address to 64 bit 125 */ 126 static unsigned long normalize_addr(unsigned long u) 127 { 128 #ifdef CONFIG_X86_64 129 return (signed long)(u << 16) >> 16; 130 #else 131 return u; 132 #endif 133 } 134 135 /* 136 * This function gets called on a break in a continuous series 137 * of PTE entries; the next one is different so we need to 138 * print what we collected so far. 139 */ 140 static void note_page(struct seq_file *m, struct pg_state *st, 141 pgprot_t new_prot, int level) 142 { 143 pgprotval_t prot, cur; 144 static const char units[] = "KMGTPE"; 145 146 /* 147 * If we have a "break" in the series, we need to flush the state that 148 * we have now. "break" is either changing perms, levels or 149 * address space marker. 150 */ 151 prot = pgprot_val(new_prot) & ~(PTE_MASK); 152 cur = pgprot_val(st->current_prot) & ~(PTE_MASK); 153 154 if (!st->level) { 155 /* First entry */ 156 st->current_prot = new_prot; 157 st->level = level; 158 st->marker = address_markers; 159 seq_printf(m, "---[ %s ]---\n", st->marker->name); 160 } else if (prot != cur || level != st->level || 161 st->current_address >= st->marker[1].start_address) { 162 const char *unit = units; 163 unsigned long delta; 164 165 /* 166 * Now print the actual finished series 167 */ 168 seq_printf(m, "0x%p-0x%p ", 169 (void *)st->start_address, 170 (void *)st->current_address); 171 172 delta = (st->current_address - st->start_address) >> 10; 173 while (!(delta & 1023) && unit[1]) { 174 delta >>= 10; 175 unit++; 176 } 177 seq_printf(m, "%9lu%c ", delta, *unit); 178 printk_prot(m, st->current_prot, st->level); 179 180 /* 181 * We print markers for special areas of address space, 182 * such as the start of vmalloc space etc. 183 * This helps in the interpretation. 184 */ 185 if (st->current_address >= st->marker[1].start_address) { 186 st->marker++; 187 seq_printf(m, "---[ %s ]---\n", st->marker->name); 188 } 189 190 st->start_address = st->current_address; 191 st->current_prot = new_prot; 192 st->level = level; 193 } 194 } 195 196 static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr, 197 unsigned long P) 198 { 199 int i; 200 pte_t *start; 201 202 start = (pte_t *) pmd_page_vaddr(addr); 203 for (i = 0; i < PTRS_PER_PTE; i++) { 204 pgprot_t prot = pte_pgprot(*start); 205 206 st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT); 207 note_page(m, st, prot, 4); 208 start++; 209 } 210 } 211 212 #if PTRS_PER_PMD > 1 213 214 static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr, 215 unsigned long P) 216 { 217 int i; 218 pmd_t *start; 219 220 start = (pmd_t *) pud_page_vaddr(addr); 221 for (i = 0; i < PTRS_PER_PMD; i++) { 222 st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT); 223 if (!pmd_none(*start)) { 224 pgprotval_t prot = pmd_val(*start) & ~PTE_MASK; 225 226 if (pmd_large(*start) || !pmd_present(*start)) 227 note_page(m, st, __pgprot(prot), 3); 228 else 229 walk_pte_level(m, st, *start, 230 P + i * PMD_LEVEL_MULT); 231 } else 232 note_page(m, st, __pgprot(0), 3); 233 start++; 234 } 235 } 236 237 #else 238 #define walk_pmd_level(m,s,a,p) walk_pte_level(m,s,__pmd(pud_val(a)),p) 239 #define pud_large(a) pmd_large(__pmd(pud_val(a))) 240 #define pud_none(a) pmd_none(__pmd(pud_val(a))) 241 #endif 242 243 #if PTRS_PER_PUD > 1 244 245 static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr, 246 unsigned long P) 247 { 248 int i; 249 pud_t *start; 250 251 start = (pud_t *) pgd_page_vaddr(addr); 252 253 for (i = 0; i < PTRS_PER_PUD; i++) { 254 st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT); 255 if (!pud_none(*start)) { 256 pgprotval_t prot = pud_val(*start) & ~PTE_MASK; 257 258 if (pud_large(*start) || !pud_present(*start)) 259 note_page(m, st, __pgprot(prot), 2); 260 else 261 walk_pmd_level(m, st, *start, 262 P + i * PUD_LEVEL_MULT); 263 } else 264 note_page(m, st, __pgprot(0), 2); 265 266 start++; 267 } 268 } 269 270 #else 271 #define walk_pud_level(m,s,a,p) walk_pmd_level(m,s,__pud(pgd_val(a)),p) 272 #define pgd_large(a) pud_large(__pud(pgd_val(a))) 273 #define pgd_none(a) pud_none(__pud(pgd_val(a))) 274 #endif 275 276 static void walk_pgd_level(struct seq_file *m) 277 { 278 #ifdef CONFIG_X86_64 279 pgd_t *start = (pgd_t *) &init_level4_pgt; 280 #else 281 pgd_t *start = swapper_pg_dir; 282 #endif 283 int i; 284 struct pg_state st; 285 286 memset(&st, 0, sizeof(st)); 287 288 for (i = 0; i < PTRS_PER_PGD; i++) { 289 st.current_address = normalize_addr(i * PGD_LEVEL_MULT); 290 if (!pgd_none(*start)) { 291 pgprotval_t prot = pgd_val(*start) & ~PTE_MASK; 292 293 if (pgd_large(*start) || !pgd_present(*start)) 294 note_page(m, &st, __pgprot(prot), 1); 295 else 296 walk_pud_level(m, &st, *start, 297 i * PGD_LEVEL_MULT); 298 } else 299 note_page(m, &st, __pgprot(0), 1); 300 301 start++; 302 } 303 304 /* Flush out the last page */ 305 st.current_address = normalize_addr(PTRS_PER_PGD*PGD_LEVEL_MULT); 306 note_page(m, &st, __pgprot(0), 0); 307 } 308 309 static int ptdump_show(struct seq_file *m, void *v) 310 { 311 walk_pgd_level(m); 312 return 0; 313 } 314 315 static int ptdump_open(struct inode *inode, struct file *filp) 316 { 317 return single_open(filp, ptdump_show, NULL); 318 } 319 320 static const struct file_operations ptdump_fops = { 321 .open = ptdump_open, 322 .read = seq_read, 323 .llseek = seq_lseek, 324 .release = single_release, 325 }; 326 327 int pt_dump_init(void) 328 { 329 struct dentry *pe; 330 331 #ifdef CONFIG_X86_32 332 /* Not a compile-time constant on x86-32 */ 333 address_markers[2].start_address = VMALLOC_START; 334 address_markers[3].start_address = VMALLOC_END; 335 # ifdef CONFIG_HIGHMEM 336 address_markers[4].start_address = PKMAP_BASE; 337 address_markers[5].start_address = FIXADDR_START; 338 # else 339 address_markers[4].start_address = FIXADDR_START; 340 # endif 341 #endif 342 343 pe = debugfs_create_file("kernel_page_tables", 0600, NULL, NULL, 344 &ptdump_fops); 345 if (!pe) 346 return -ENOMEM; 347 348 return 0; 349 } 350 351 __initcall(pt_dump_init); 352 MODULE_LICENSE("GPL"); 353 MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>"); 354 MODULE_DESCRIPTION("Kernel debugging helper that dumps pagetables"); 355