1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright 2016, Rashmica Gupta, IBM Corp. 4 * 5 * This traverses the kernel virtual memory and dumps the pages that are in 6 * the hash pagetable, along with their flags to 7 * /sys/kernel/debug/kernel_hash_pagetable. 8 * 9 * If radix is enabled then there is no hash page table and so no debugfs file 10 * is generated. 11 */ 12 #include <linux/debugfs.h> 13 #include <linux/fs.h> 14 #include <linux/io.h> 15 #include <linux/mm.h> 16 #include <linux/sched.h> 17 #include <linux/seq_file.h> 18 #include <linux/const.h> 19 #include <asm/page.h> 20 #include <asm/plpar_wrappers.h> 21 #include <linux/memblock.h> 22 #include <asm/firmware.h> 23 #include <asm/pgalloc.h> 24 25 struct pg_state { 26 struct seq_file *seq; 27 const struct addr_marker *marker; 28 unsigned long start_address; 29 unsigned int level; 30 u64 current_flags; 31 }; 32 33 struct addr_marker { 34 unsigned long start_address; 35 const char *name; 36 }; 37 38 static struct addr_marker address_markers[] = { 39 { 0, "Start of kernel VM" }, 40 { 0, "vmalloc() Area" }, 41 { 0, "vmalloc() End" }, 42 { 0, "isa I/O start" }, 43 { 0, "isa I/O end" }, 44 { 0, "phb I/O start" }, 45 { 0, "phb I/O end" }, 46 { 0, "I/O remap start" }, 47 { 0, "I/O remap end" }, 48 { 0, "vmemmap start" }, 49 { -1, NULL }, 50 }; 51 52 struct flag_info { 53 u64 mask; 54 u64 val; 55 const char *set; 56 const char *clear; 57 bool is_val; 58 int shift; 59 }; 60 61 static const struct flag_info v_flag_array[] = { 62 { 63 .mask = SLB_VSID_B, 64 .val = SLB_VSID_B_256M, 65 .set = "ssize: 256M", 66 .clear = "ssize: 1T ", 67 }, { 68 .mask = HPTE_V_SECONDARY, 69 .val = HPTE_V_SECONDARY, 70 .set = "secondary", 71 .clear = "primary ", 72 }, { 73 .mask = HPTE_V_VALID, 74 .val = HPTE_V_VALID, 75 .set = "valid ", 76 .clear = "invalid", 77 }, { 78 .mask = HPTE_V_BOLTED, 79 .val = HPTE_V_BOLTED, 80 .set = "bolted", 81 .clear = "", 82 } 83 }; 84 85 static const struct flag_info r_flag_array[] = { 86 { 87 .mask = HPTE_R_PP0 | HPTE_R_PP, 88 .val = PP_RWXX, 89 .set = "prot:RW--", 90 }, { 91 .mask = HPTE_R_PP0 | HPTE_R_PP, 92 .val = PP_RWRX, 93 .set = "prot:RWR-", 94 }, { 95 .mask = HPTE_R_PP0 | HPTE_R_PP, 96 .val = PP_RWRW, 97 .set = "prot:RWRW", 98 }, { 99 .mask = HPTE_R_PP0 | HPTE_R_PP, 100 .val = PP_RXRX, 101 .set = "prot:R-R-", 102 }, { 103 .mask = HPTE_R_PP0 | HPTE_R_PP, 104 .val = PP_RXXX, 105 .set = "prot:R---", 106 }, { 107 .mask = HPTE_R_KEY_HI | HPTE_R_KEY_LO, 108 .val = HPTE_R_KEY_HI | HPTE_R_KEY_LO, 109 .set = "key", 110 .clear = "", 111 .is_val = true, 112 }, { 113 .mask = HPTE_R_R, 114 .val = HPTE_R_R, 115 .set = "ref", 116 .clear = " ", 117 }, { 118 .mask = HPTE_R_C, 119 .val = HPTE_R_C, 120 .set = "changed", 121 .clear = " ", 122 }, { 123 .mask = HPTE_R_N, 124 .val = HPTE_R_N, 125 .set = "no execute", 126 }, { 127 .mask = HPTE_R_WIMG, 128 .val = HPTE_R_W, 129 .set = "writethru", 130 }, { 131 .mask = HPTE_R_WIMG, 132 .val = HPTE_R_I, 133 .set = "no cache", 134 }, { 135 .mask = HPTE_R_WIMG, 136 .val = HPTE_R_G, 137 .set = "guarded", 138 } 139 }; 140 141 static int calculate_pagesize(struct pg_state *st, int ps, char s[]) 142 { 143 static const char units[] = "BKMGTPE"; 144 const char *unit = units; 145 146 while (ps > 9 && unit[1]) { 147 ps -= 10; 148 unit++; 149 } 150 seq_printf(st->seq, " %s_ps: %i%c\t", s, 1<<ps, *unit); 151 return ps; 152 } 153 154 static void dump_flag_info(struct pg_state *st, const struct flag_info 155 *flag, u64 pte, int num) 156 { 157 unsigned int i; 158 159 for (i = 0; i < num; i++, flag++) { 160 const char *s = NULL; 161 u64 val; 162 163 /* flag not defined so don't check it */ 164 if (flag->mask == 0) 165 continue; 166 /* Some 'flags' are actually values */ 167 if (flag->is_val) { 168 val = pte & flag->val; 169 if (flag->shift) 170 val = val >> flag->shift; 171 seq_printf(st->seq, " %s:%llx", flag->set, val); 172 } else { 173 if ((pte & flag->mask) == flag->val) 174 s = flag->set; 175 else 176 s = flag->clear; 177 if (s) 178 seq_printf(st->seq, " %s", s); 179 } 180 } 181 } 182 183 static void dump_hpte_info(struct pg_state *st, unsigned long ea, u64 v, u64 r, 184 unsigned long rpn, int bps, int aps, unsigned long lp) 185 { 186 int aps_index; 187 188 while (ea >= st->marker[1].start_address) { 189 st->marker++; 190 seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); 191 } 192 seq_printf(st->seq, "0x%lx:\t", ea); 193 seq_printf(st->seq, "AVPN:%llx\t", HPTE_V_AVPN_VAL(v)); 194 dump_flag_info(st, v_flag_array, v, ARRAY_SIZE(v_flag_array)); 195 seq_printf(st->seq, " rpn: %lx\t", rpn); 196 dump_flag_info(st, r_flag_array, r, ARRAY_SIZE(r_flag_array)); 197 198 calculate_pagesize(st, bps, "base"); 199 aps_index = calculate_pagesize(st, aps, "actual"); 200 if (aps_index != 2) 201 seq_printf(st->seq, "LP enc: %lx", lp); 202 seq_putc(st->seq, '\n'); 203 } 204 205 206 static int native_find(unsigned long ea, int psize, bool primary, u64 *v, u64 207 *r) 208 { 209 struct hash_pte *hptep; 210 unsigned long hash, vsid, vpn, hpte_group, want_v, hpte_v; 211 int i, ssize = mmu_kernel_ssize; 212 unsigned long shift = mmu_psize_defs[psize].shift; 213 214 /* calculate hash */ 215 vsid = get_kernel_vsid(ea, ssize); 216 vpn = hpt_vpn(ea, vsid, ssize); 217 hash = hpt_hash(vpn, shift, ssize); 218 want_v = hpte_encode_avpn(vpn, psize, ssize); 219 if (cpu_has_feature(CPU_FTR_ARCH_300)) 220 want_v = hpte_old_to_new_v(want_v); 221 222 /* to check in the secondary hash table, we invert the hash */ 223 if (!primary) 224 hash = ~hash; 225 hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP; 226 for (i = 0; i < HPTES_PER_GROUP; i++) { 227 hptep = htab_address + hpte_group; 228 hpte_v = be64_to_cpu(hptep->v); 229 230 if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) { 231 /* HPTE matches */ 232 *v = be64_to_cpu(hptep->v); 233 *r = be64_to_cpu(hptep->r); 234 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 235 *v = hpte_new_to_old_v(*v, *r); 236 *r = hpte_new_to_old_r(*r); 237 } 238 return 0; 239 } 240 ++hpte_group; 241 } 242 return -1; 243 } 244 245 static int pseries_find(unsigned long ea, int psize, bool primary, u64 *v, u64 *r) 246 { 247 struct { 248 unsigned long v; 249 unsigned long r; 250 } ptes[4]; 251 unsigned long vsid, vpn, hash, hpte_group, want_v; 252 int i, j, ssize = mmu_kernel_ssize; 253 long lpar_rc = 0; 254 unsigned long shift = mmu_psize_defs[psize].shift; 255 256 /* calculate hash */ 257 vsid = get_kernel_vsid(ea, ssize); 258 vpn = hpt_vpn(ea, vsid, ssize); 259 hash = hpt_hash(vpn, shift, ssize); 260 want_v = hpte_encode_avpn(vpn, psize, ssize); 261 262 /* to check in the secondary hash table, we invert the hash */ 263 if (!primary) 264 hash = ~hash; 265 hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP; 266 /* see if we can find an entry in the hpte with this hash */ 267 for (i = 0; i < HPTES_PER_GROUP; i += 4, hpte_group += 4) { 268 lpar_rc = plpar_pte_read_4(0, hpte_group, (void *)ptes); 269 270 if (lpar_rc) 271 continue; 272 for (j = 0; j < 4; j++) { 273 if (HPTE_V_COMPARE(ptes[j].v, want_v) && 274 (ptes[j].v & HPTE_V_VALID)) { 275 /* HPTE matches */ 276 *v = ptes[j].v; 277 *r = ptes[j].r; 278 return 0; 279 } 280 } 281 } 282 return -1; 283 } 284 285 static void decode_r(int bps, unsigned long r, unsigned long *rpn, int *aps, 286 unsigned long *lp_bits) 287 { 288 struct mmu_psize_def entry; 289 unsigned long arpn, mask, lp; 290 int penc = -2, idx = 0, shift; 291 292 /*. 293 * The LP field has 8 bits. Depending on the actual page size, some of 294 * these bits are concatenated with the APRN to get the RPN. The rest 295 * of the bits in the LP field is the LP value and is an encoding for 296 * the base page size and the actual page size. 297 * 298 * - find the mmu entry for our base page size 299 * - go through all page encodings and use the associated mask to 300 * find an encoding that matches our encoding in the LP field. 301 */ 302 arpn = (r & HPTE_R_RPN) >> HPTE_R_RPN_SHIFT; 303 lp = arpn & 0xff; 304 305 entry = mmu_psize_defs[bps]; 306 while (idx < MMU_PAGE_COUNT) { 307 penc = entry.penc[idx]; 308 if ((penc != -1) && (mmu_psize_defs[idx].shift)) { 309 shift = mmu_psize_defs[idx].shift - HPTE_R_RPN_SHIFT; 310 mask = (0x1 << (shift)) - 1; 311 if ((lp & mask) == penc) { 312 *aps = mmu_psize_to_shift(idx); 313 *lp_bits = lp & mask; 314 *rpn = arpn >> shift; 315 return; 316 } 317 } 318 idx++; 319 } 320 } 321 322 static int base_hpte_find(unsigned long ea, int psize, bool primary, u64 *v, 323 u64 *r) 324 { 325 if (IS_ENABLED(CONFIG_PPC_PSERIES) && firmware_has_feature(FW_FEATURE_LPAR)) 326 return pseries_find(ea, psize, primary, v, r); 327 328 return native_find(ea, psize, primary, v, r); 329 } 330 331 static unsigned long hpte_find(struct pg_state *st, unsigned long ea, int psize) 332 { 333 unsigned long slot; 334 u64 v = 0, r = 0; 335 unsigned long rpn, lp_bits; 336 int base_psize = 0, actual_psize = 0; 337 338 if (ea < PAGE_OFFSET) 339 return -1; 340 341 /* Look in primary table */ 342 slot = base_hpte_find(ea, psize, true, &v, &r); 343 344 /* Look in secondary table */ 345 if (slot == -1) 346 slot = base_hpte_find(ea, psize, false, &v, &r); 347 348 /* No entry found */ 349 if (slot == -1) 350 return -1; 351 352 /* 353 * We found an entry in the hash page table: 354 * - check that this has the same base page 355 * - find the actual page size 356 * - find the RPN 357 */ 358 base_psize = mmu_psize_to_shift(psize); 359 360 if ((v & HPTE_V_LARGE) == HPTE_V_LARGE) { 361 decode_r(psize, r, &rpn, &actual_psize, &lp_bits); 362 } else { 363 /* 4K actual page size */ 364 actual_psize = 12; 365 rpn = (r & HPTE_R_RPN) >> HPTE_R_RPN_SHIFT; 366 /* In this case there are no LP bits */ 367 lp_bits = -1; 368 } 369 /* 370 * We didn't find a matching encoding, so the PTE we found isn't for 371 * this address. 372 */ 373 if (actual_psize == -1) 374 return -1; 375 376 dump_hpte_info(st, ea, v, r, rpn, base_psize, actual_psize, lp_bits); 377 return 0; 378 } 379 380 static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start) 381 { 382 pte_t *pte = pte_offset_kernel(pmd, 0); 383 unsigned long addr, pteval, psize; 384 int i, status; 385 386 for (i = 0; i < PTRS_PER_PTE; i++, pte++) { 387 addr = start + i * PAGE_SIZE; 388 pteval = pte_val(*pte); 389 390 if (addr < VMALLOC_END) 391 psize = mmu_vmalloc_psize; 392 else 393 psize = mmu_io_psize; 394 395 /* check for secret 4K mappings */ 396 if (IS_ENABLED(CONFIG_PPC_64K_PAGES) && 397 ((pteval & H_PAGE_COMBO) == H_PAGE_COMBO || 398 (pteval & H_PAGE_4K_PFN) == H_PAGE_4K_PFN)) 399 psize = mmu_io_psize; 400 401 /* check for hashpte */ 402 status = hpte_find(st, addr, psize); 403 404 if (((pteval & H_PAGE_HASHPTE) != H_PAGE_HASHPTE) 405 && (status != -1)) { 406 /* found a hpte that is not in the linux page tables */ 407 seq_printf(st->seq, "page probably bolted before linux" 408 " pagetables were set: addr:%lx, pteval:%lx\n", 409 addr, pteval); 410 } 411 } 412 } 413 414 static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start) 415 { 416 pmd_t *pmd = pmd_offset(pud, 0); 417 unsigned long addr; 418 unsigned int i; 419 420 for (i = 0; i < PTRS_PER_PMD; i++, pmd++) { 421 addr = start + i * PMD_SIZE; 422 if (!pmd_none(*pmd)) 423 /* pmd exists */ 424 walk_pte(st, pmd, addr); 425 } 426 } 427 428 static void walk_pud(struct pg_state *st, p4d_t *p4d, unsigned long start) 429 { 430 pud_t *pud = pud_offset(p4d, 0); 431 unsigned long addr; 432 unsigned int i; 433 434 for (i = 0; i < PTRS_PER_PUD; i++, pud++) { 435 addr = start + i * PUD_SIZE; 436 if (!pud_none(*pud)) 437 /* pud exists */ 438 walk_pmd(st, pud, addr); 439 } 440 } 441 442 static void walk_p4d(struct pg_state *st, pgd_t *pgd, unsigned long start) 443 { 444 p4d_t *p4d = p4d_offset(pgd, 0); 445 unsigned long addr; 446 unsigned int i; 447 448 for (i = 0; i < PTRS_PER_P4D; i++, p4d++) { 449 addr = start + i * P4D_SIZE; 450 if (!p4d_none(*p4d)) 451 /* p4d exists */ 452 walk_pud(st, p4d, addr); 453 } 454 } 455 456 static void walk_pagetables(struct pg_state *st) 457 { 458 pgd_t *pgd = pgd_offset_k(0UL); 459 unsigned int i; 460 unsigned long addr; 461 462 /* 463 * Traverse the linux pagetable structure and dump pages that are in 464 * the hash pagetable. 465 */ 466 for (i = 0; i < PTRS_PER_PGD; i++, pgd++) { 467 addr = KERN_VIRT_START + i * PGDIR_SIZE; 468 if (!pgd_none(*pgd)) 469 /* pgd exists */ 470 walk_p4d(st, pgd, addr); 471 } 472 } 473 474 475 static void walk_linearmapping(struct pg_state *st) 476 { 477 unsigned long addr; 478 479 /* 480 * Traverse the linear mapping section of virtual memory and dump pages 481 * that are in the hash pagetable. 482 */ 483 unsigned long psize = 1 << mmu_psize_defs[mmu_linear_psize].shift; 484 485 for (addr = PAGE_OFFSET; addr < PAGE_OFFSET + 486 memblock_end_of_DRAM(); addr += psize) 487 hpte_find(st, addr, mmu_linear_psize); 488 } 489 490 static void walk_vmemmap(struct pg_state *st) 491 { 492 struct vmemmap_backing *ptr = vmemmap_list; 493 494 if (!IS_ENABLED(CONFIG_SPARSEMEM_VMEMMAP)) 495 return; 496 /* 497 * Traverse the vmemmaped memory and dump pages that are in the hash 498 * pagetable. 499 */ 500 while (ptr) { 501 hpte_find(st, ptr->virt_addr, mmu_vmemmap_psize); 502 ptr = ptr->list; 503 } 504 seq_puts(st->seq, "---[ vmemmap end ]---\n"); 505 } 506 507 static void populate_markers(void) 508 { 509 address_markers[0].start_address = PAGE_OFFSET; 510 address_markers[1].start_address = VMALLOC_START; 511 address_markers[2].start_address = VMALLOC_END; 512 address_markers[3].start_address = ISA_IO_BASE; 513 address_markers[4].start_address = ISA_IO_END; 514 address_markers[5].start_address = PHB_IO_BASE; 515 address_markers[6].start_address = PHB_IO_END; 516 address_markers[7].start_address = IOREMAP_BASE; 517 address_markers[8].start_address = IOREMAP_END; 518 address_markers[9].start_address = H_VMEMMAP_START; 519 } 520 521 static int ptdump_show(struct seq_file *m, void *v) 522 { 523 struct pg_state st = { 524 .seq = m, 525 .start_address = PAGE_OFFSET, 526 .marker = address_markers, 527 }; 528 /* 529 * Traverse the 0xc, 0xd and 0xf areas of the kernel virtual memory and 530 * dump pages that are in the hash pagetable. 531 */ 532 walk_linearmapping(&st); 533 walk_pagetables(&st); 534 walk_vmemmap(&st); 535 return 0; 536 } 537 538 DEFINE_SHOW_ATTRIBUTE(ptdump); 539 540 static int ptdump_init(void) 541 { 542 if (!radix_enabled()) { 543 populate_markers(); 544 debugfs_create_file("kernel_hash_pagetable", 0400, NULL, NULL, 545 &ptdump_fops); 546 } 547 return 0; 548 } 549 device_initcall(ptdump_init); 550