1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * This kernel test validates architecture page table helpers and 4 * accessors and helps in verifying their continued compliance with 5 * expected generic MM semantics. 6 * 7 * Copyright (C) 2019 ARM Ltd. 8 * 9 * Author: Anshuman Khandual <anshuman.khandual@arm.com> 10 */ 11 #define pr_fmt(fmt) "debug_vm_pgtable: [%-25s]: " fmt, __func__ 12 13 #include <linux/gfp.h> 14 #include <linux/highmem.h> 15 #include <linux/hugetlb.h> 16 #include <linux/kernel.h> 17 #include <linux/kconfig.h> 18 #include <linux/mm.h> 19 #include <linux/mman.h> 20 #include <linux/mm_types.h> 21 #include <linux/module.h> 22 #include <linux/pfn_t.h> 23 #include <linux/printk.h> 24 #include <linux/pgtable.h> 25 #include <linux/random.h> 26 #include <linux/spinlock.h> 27 #include <linux/swap.h> 28 #include <linux/swapops.h> 29 #include <linux/start_kernel.h> 30 #include <linux/sched/mm.h> 31 #include <linux/io.h> 32 #include <asm/pgalloc.h> 33 #include <asm/tlbflush.h> 34 35 /* 36 * Please refer Documentation/vm/arch_pgtable_helpers.rst for the semantics 37 * expectations that are being validated here. All future changes in here 38 * or the documentation need to be in sync. 39 */ 40 41 #define VMFLAGS (VM_READ|VM_WRITE|VM_EXEC) 42 43 /* 44 * On s390 platform, the lower 4 bits are used to identify given page table 45 * entry type. But these bits might affect the ability to clear entries with 46 * pxx_clear() because of how dynamic page table folding works on s390. So 47 * while loading up the entries do not change the lower 4 bits. It does not 48 * have affect any other platform. Also avoid the 62nd bit on ppc64 that is 49 * used to mark a pte entry. 50 */ 51 #define S390_SKIP_MASK GENMASK(3, 0) 52 #if __BITS_PER_LONG == 64 53 #define PPC64_SKIP_MASK GENMASK(62, 62) 54 #else 55 #define PPC64_SKIP_MASK 0x0 56 #endif 57 #define ARCH_SKIP_MASK (S390_SKIP_MASK | PPC64_SKIP_MASK) 58 #define RANDOM_ORVALUE (GENMASK(BITS_PER_LONG - 1, 0) & ~ARCH_SKIP_MASK) 59 #define RANDOM_NZVALUE GENMASK(7, 0) 60 61 static void __init pte_basic_tests(unsigned long pfn, int idx) 62 { 63 pgprot_t prot = protection_map[idx]; 64 pte_t pte = pfn_pte(pfn, prot); 65 unsigned long val = idx, *ptr = &val; 66 67 pr_debug("Validating PTE basic (%pGv)\n", ptr); 68 69 /* 70 * This test needs to be executed after the given page table entry 71 * is created with pfn_pte() to make sure that protection_map[idx] 72 * does not have the dirty bit enabled from the beginning. This is 73 * important for platforms like arm64 where (!PTE_RDONLY) indicate 74 * dirty bit being set. 75 */ 76 WARN_ON(pte_dirty(pte_wrprotect(pte))); 77 78 WARN_ON(!pte_same(pte, pte)); 79 WARN_ON(!pte_young(pte_mkyoung(pte_mkold(pte)))); 80 WARN_ON(!pte_dirty(pte_mkdirty(pte_mkclean(pte)))); 81 WARN_ON(!pte_write(pte_mkwrite(pte_wrprotect(pte)))); 82 WARN_ON(pte_young(pte_mkold(pte_mkyoung(pte)))); 83 WARN_ON(pte_dirty(pte_mkclean(pte_mkdirty(pte)))); 84 WARN_ON(pte_write(pte_wrprotect(pte_mkwrite(pte)))); 85 WARN_ON(pte_dirty(pte_wrprotect(pte_mkclean(pte)))); 86 WARN_ON(!pte_dirty(pte_wrprotect(pte_mkdirty(pte)))); 87 } 88 89 static void __init pte_advanced_tests(struct mm_struct *mm, 90 struct vm_area_struct *vma, pte_t *ptep, 91 unsigned long pfn, unsigned long vaddr, 92 pgprot_t prot) 93 { 94 pte_t pte = pfn_pte(pfn, prot); 95 96 /* 97 * Architectures optimize set_pte_at by avoiding TLB flush. 98 * This requires set_pte_at to be not used to update an 99 * existing pte entry. Clear pte before we do set_pte_at 100 */ 101 102 pr_debug("Validating PTE advanced\n"); 103 pte = pfn_pte(pfn, prot); 104 set_pte_at(mm, vaddr, ptep, pte); 105 ptep_set_wrprotect(mm, vaddr, ptep); 106 pte = ptep_get(ptep); 107 WARN_ON(pte_write(pte)); 108 ptep_get_and_clear(mm, vaddr, ptep); 109 pte = ptep_get(ptep); 110 WARN_ON(!pte_none(pte)); 111 112 pte = pfn_pte(pfn, prot); 113 pte = pte_wrprotect(pte); 114 pte = pte_mkclean(pte); 115 set_pte_at(mm, vaddr, ptep, pte); 116 pte = pte_mkwrite(pte); 117 pte = pte_mkdirty(pte); 118 ptep_set_access_flags(vma, vaddr, ptep, pte, 1); 119 pte = ptep_get(ptep); 120 WARN_ON(!(pte_write(pte) && pte_dirty(pte))); 121 ptep_get_and_clear_full(mm, vaddr, ptep, 1); 122 pte = ptep_get(ptep); 123 WARN_ON(!pte_none(pte)); 124 125 pte = pfn_pte(pfn, prot); 126 pte = pte_mkyoung(pte); 127 set_pte_at(mm, vaddr, ptep, pte); 128 ptep_test_and_clear_young(vma, vaddr, ptep); 129 pte = ptep_get(ptep); 130 WARN_ON(pte_young(pte)); 131 } 132 133 static void __init pte_savedwrite_tests(unsigned long pfn, pgprot_t prot) 134 { 135 pte_t pte = pfn_pte(pfn, prot); 136 137 if (!IS_ENABLED(CONFIG_NUMA_BALANCING)) 138 return; 139 140 pr_debug("Validating PTE saved write\n"); 141 WARN_ON(!pte_savedwrite(pte_mk_savedwrite(pte_clear_savedwrite(pte)))); 142 WARN_ON(pte_savedwrite(pte_clear_savedwrite(pte_mk_savedwrite(pte)))); 143 } 144 145 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 146 static void __init pmd_basic_tests(unsigned long pfn, int idx) 147 { 148 pgprot_t prot = protection_map[idx]; 149 pmd_t pmd = pfn_pmd(pfn, prot); 150 unsigned long val = idx, *ptr = &val; 151 152 if (!has_transparent_hugepage()) 153 return; 154 155 pr_debug("Validating PMD basic (%pGv)\n", ptr); 156 157 /* 158 * This test needs to be executed after the given page table entry 159 * is created with pfn_pmd() to make sure that protection_map[idx] 160 * does not have the dirty bit enabled from the beginning. This is 161 * important for platforms like arm64 where (!PTE_RDONLY) indicate 162 * dirty bit being set. 163 */ 164 WARN_ON(pmd_dirty(pmd_wrprotect(pmd))); 165 166 167 WARN_ON(!pmd_same(pmd, pmd)); 168 WARN_ON(!pmd_young(pmd_mkyoung(pmd_mkold(pmd)))); 169 WARN_ON(!pmd_dirty(pmd_mkdirty(pmd_mkclean(pmd)))); 170 WARN_ON(!pmd_write(pmd_mkwrite(pmd_wrprotect(pmd)))); 171 WARN_ON(pmd_young(pmd_mkold(pmd_mkyoung(pmd)))); 172 WARN_ON(pmd_dirty(pmd_mkclean(pmd_mkdirty(pmd)))); 173 WARN_ON(pmd_write(pmd_wrprotect(pmd_mkwrite(pmd)))); 174 WARN_ON(pmd_dirty(pmd_wrprotect(pmd_mkclean(pmd)))); 175 WARN_ON(!pmd_dirty(pmd_wrprotect(pmd_mkdirty(pmd)))); 176 /* 177 * A huge page does not point to next level page table 178 * entry. Hence this must qualify as pmd_bad(). 179 */ 180 WARN_ON(!pmd_bad(pmd_mkhuge(pmd))); 181 } 182 183 static void __init pmd_advanced_tests(struct mm_struct *mm, 184 struct vm_area_struct *vma, pmd_t *pmdp, 185 unsigned long pfn, unsigned long vaddr, 186 pgprot_t prot, pgtable_t pgtable) 187 { 188 pmd_t pmd = pfn_pmd(pfn, prot); 189 190 if (!has_transparent_hugepage()) 191 return; 192 193 pr_debug("Validating PMD advanced\n"); 194 /* Align the address wrt HPAGE_PMD_SIZE */ 195 vaddr = (vaddr & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE; 196 197 pgtable_trans_huge_deposit(mm, pmdp, pgtable); 198 199 pmd = pfn_pmd(pfn, prot); 200 set_pmd_at(mm, vaddr, pmdp, pmd); 201 pmdp_set_wrprotect(mm, vaddr, pmdp); 202 pmd = READ_ONCE(*pmdp); 203 WARN_ON(pmd_write(pmd)); 204 pmdp_huge_get_and_clear(mm, vaddr, pmdp); 205 pmd = READ_ONCE(*pmdp); 206 WARN_ON(!pmd_none(pmd)); 207 208 pmd = pfn_pmd(pfn, prot); 209 pmd = pmd_wrprotect(pmd); 210 pmd = pmd_mkclean(pmd); 211 set_pmd_at(mm, vaddr, pmdp, pmd); 212 pmd = pmd_mkwrite(pmd); 213 pmd = pmd_mkdirty(pmd); 214 pmdp_set_access_flags(vma, vaddr, pmdp, pmd, 1); 215 pmd = READ_ONCE(*pmdp); 216 WARN_ON(!(pmd_write(pmd) && pmd_dirty(pmd))); 217 pmdp_huge_get_and_clear_full(vma, vaddr, pmdp, 1); 218 pmd = READ_ONCE(*pmdp); 219 WARN_ON(!pmd_none(pmd)); 220 221 pmd = pmd_mkhuge(pfn_pmd(pfn, prot)); 222 pmd = pmd_mkyoung(pmd); 223 set_pmd_at(mm, vaddr, pmdp, pmd); 224 pmdp_test_and_clear_young(vma, vaddr, pmdp); 225 pmd = READ_ONCE(*pmdp); 226 WARN_ON(pmd_young(pmd)); 227 228 /* Clear the pte entries */ 229 pmdp_huge_get_and_clear(mm, vaddr, pmdp); 230 pgtable = pgtable_trans_huge_withdraw(mm, pmdp); 231 } 232 233 static void __init pmd_leaf_tests(unsigned long pfn, pgprot_t prot) 234 { 235 pmd_t pmd = pfn_pmd(pfn, prot); 236 237 pr_debug("Validating PMD leaf\n"); 238 /* 239 * PMD based THP is a leaf entry. 240 */ 241 pmd = pmd_mkhuge(pmd); 242 WARN_ON(!pmd_leaf(pmd)); 243 } 244 245 #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP 246 static void __init pmd_huge_tests(pmd_t *pmdp, unsigned long pfn, pgprot_t prot) 247 { 248 pmd_t pmd; 249 250 if (!arch_vmap_pmd_supported(prot)) 251 return; 252 253 pr_debug("Validating PMD huge\n"); 254 /* 255 * X86 defined pmd_set_huge() verifies that the given 256 * PMD is not a populated non-leaf entry. 257 */ 258 WRITE_ONCE(*pmdp, __pmd(0)); 259 WARN_ON(!pmd_set_huge(pmdp, __pfn_to_phys(pfn), prot)); 260 WARN_ON(!pmd_clear_huge(pmdp)); 261 pmd = READ_ONCE(*pmdp); 262 WARN_ON(!pmd_none(pmd)); 263 } 264 #else /* CONFIG_HAVE_ARCH_HUGE_VMAP */ 265 static void __init pmd_huge_tests(pmd_t *pmdp, unsigned long pfn, pgprot_t prot) { } 266 #endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */ 267 268 static void __init pmd_savedwrite_tests(unsigned long pfn, pgprot_t prot) 269 { 270 pmd_t pmd = pfn_pmd(pfn, prot); 271 272 if (!IS_ENABLED(CONFIG_NUMA_BALANCING)) 273 return; 274 275 pr_debug("Validating PMD saved write\n"); 276 WARN_ON(!pmd_savedwrite(pmd_mk_savedwrite(pmd_clear_savedwrite(pmd)))); 277 WARN_ON(pmd_savedwrite(pmd_clear_savedwrite(pmd_mk_savedwrite(pmd)))); 278 } 279 280 #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD 281 static void __init pud_basic_tests(struct mm_struct *mm, unsigned long pfn, int idx) 282 { 283 pgprot_t prot = protection_map[idx]; 284 pud_t pud = pfn_pud(pfn, prot); 285 unsigned long val = idx, *ptr = &val; 286 287 if (!has_transparent_hugepage()) 288 return; 289 290 pr_debug("Validating PUD basic (%pGv)\n", ptr); 291 292 /* 293 * This test needs to be executed after the given page table entry 294 * is created with pfn_pud() to make sure that protection_map[idx] 295 * does not have the dirty bit enabled from the beginning. This is 296 * important for platforms like arm64 where (!PTE_RDONLY) indicate 297 * dirty bit being set. 298 */ 299 WARN_ON(pud_dirty(pud_wrprotect(pud))); 300 301 WARN_ON(!pud_same(pud, pud)); 302 WARN_ON(!pud_young(pud_mkyoung(pud_mkold(pud)))); 303 WARN_ON(!pud_dirty(pud_mkdirty(pud_mkclean(pud)))); 304 WARN_ON(pud_dirty(pud_mkclean(pud_mkdirty(pud)))); 305 WARN_ON(!pud_write(pud_mkwrite(pud_wrprotect(pud)))); 306 WARN_ON(pud_write(pud_wrprotect(pud_mkwrite(pud)))); 307 WARN_ON(pud_young(pud_mkold(pud_mkyoung(pud)))); 308 WARN_ON(pud_dirty(pud_wrprotect(pud_mkclean(pud)))); 309 WARN_ON(!pud_dirty(pud_wrprotect(pud_mkdirty(pud)))); 310 311 if (mm_pmd_folded(mm)) 312 return; 313 314 /* 315 * A huge page does not point to next level page table 316 * entry. Hence this must qualify as pud_bad(). 317 */ 318 WARN_ON(!pud_bad(pud_mkhuge(pud))); 319 } 320 321 static void __init pud_advanced_tests(struct mm_struct *mm, 322 struct vm_area_struct *vma, pud_t *pudp, 323 unsigned long pfn, unsigned long vaddr, 324 pgprot_t prot) 325 { 326 pud_t pud = pfn_pud(pfn, prot); 327 328 if (!has_transparent_hugepage()) 329 return; 330 331 pr_debug("Validating PUD advanced\n"); 332 /* Align the address wrt HPAGE_PUD_SIZE */ 333 vaddr = (vaddr & HPAGE_PUD_MASK) + HPAGE_PUD_SIZE; 334 335 set_pud_at(mm, vaddr, pudp, pud); 336 pudp_set_wrprotect(mm, vaddr, pudp); 337 pud = READ_ONCE(*pudp); 338 WARN_ON(pud_write(pud)); 339 340 #ifndef __PAGETABLE_PMD_FOLDED 341 pudp_huge_get_and_clear(mm, vaddr, pudp); 342 pud = READ_ONCE(*pudp); 343 WARN_ON(!pud_none(pud)); 344 #endif /* __PAGETABLE_PMD_FOLDED */ 345 pud = pfn_pud(pfn, prot); 346 pud = pud_wrprotect(pud); 347 pud = pud_mkclean(pud); 348 set_pud_at(mm, vaddr, pudp, pud); 349 pud = pud_mkwrite(pud); 350 pud = pud_mkdirty(pud); 351 pudp_set_access_flags(vma, vaddr, pudp, pud, 1); 352 pud = READ_ONCE(*pudp); 353 WARN_ON(!(pud_write(pud) && pud_dirty(pud))); 354 355 #ifndef __PAGETABLE_PMD_FOLDED 356 pudp_huge_get_and_clear_full(mm, vaddr, pudp, 1); 357 pud = READ_ONCE(*pudp); 358 WARN_ON(!pud_none(pud)); 359 #endif /* __PAGETABLE_PMD_FOLDED */ 360 361 pud = pfn_pud(pfn, prot); 362 pud = pud_mkyoung(pud); 363 set_pud_at(mm, vaddr, pudp, pud); 364 pudp_test_and_clear_young(vma, vaddr, pudp); 365 pud = READ_ONCE(*pudp); 366 WARN_ON(pud_young(pud)); 367 368 pudp_huge_get_and_clear(mm, vaddr, pudp); 369 } 370 371 static void __init pud_leaf_tests(unsigned long pfn, pgprot_t prot) 372 { 373 pud_t pud = pfn_pud(pfn, prot); 374 375 pr_debug("Validating PUD leaf\n"); 376 /* 377 * PUD based THP is a leaf entry. 378 */ 379 pud = pud_mkhuge(pud); 380 WARN_ON(!pud_leaf(pud)); 381 } 382 383 #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP 384 static void __init pud_huge_tests(pud_t *pudp, unsigned long pfn, pgprot_t prot) 385 { 386 pud_t pud; 387 388 if (!arch_vmap_pud_supported(prot)) 389 return; 390 391 pr_debug("Validating PUD huge\n"); 392 /* 393 * X86 defined pud_set_huge() verifies that the given 394 * PUD is not a populated non-leaf entry. 395 */ 396 WRITE_ONCE(*pudp, __pud(0)); 397 WARN_ON(!pud_set_huge(pudp, __pfn_to_phys(pfn), prot)); 398 WARN_ON(!pud_clear_huge(pudp)); 399 pud = READ_ONCE(*pudp); 400 WARN_ON(!pud_none(pud)); 401 } 402 #else /* !CONFIG_HAVE_ARCH_HUGE_VMAP */ 403 static void __init pud_huge_tests(pud_t *pudp, unsigned long pfn, pgprot_t prot) { } 404 #endif /* !CONFIG_HAVE_ARCH_HUGE_VMAP */ 405 406 #else /* !CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ 407 static void __init pud_basic_tests(struct mm_struct *mm, unsigned long pfn, int idx) { } 408 static void __init pud_advanced_tests(struct mm_struct *mm, 409 struct vm_area_struct *vma, pud_t *pudp, 410 unsigned long pfn, unsigned long vaddr, 411 pgprot_t prot) 412 { 413 } 414 static void __init pud_leaf_tests(unsigned long pfn, pgprot_t prot) { } 415 static void __init pud_huge_tests(pud_t *pudp, unsigned long pfn, pgprot_t prot) 416 { 417 } 418 #endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ 419 #else /* !CONFIG_TRANSPARENT_HUGEPAGE */ 420 static void __init pmd_basic_tests(unsigned long pfn, int idx) { } 421 static void __init pud_basic_tests(struct mm_struct *mm, unsigned long pfn, int idx) { } 422 static void __init pmd_advanced_tests(struct mm_struct *mm, 423 struct vm_area_struct *vma, pmd_t *pmdp, 424 unsigned long pfn, unsigned long vaddr, 425 pgprot_t prot, pgtable_t pgtable) 426 { 427 } 428 static void __init pud_advanced_tests(struct mm_struct *mm, 429 struct vm_area_struct *vma, pud_t *pudp, 430 unsigned long pfn, unsigned long vaddr, 431 pgprot_t prot) 432 { 433 } 434 static void __init pmd_leaf_tests(unsigned long pfn, pgprot_t prot) { } 435 static void __init pud_leaf_tests(unsigned long pfn, pgprot_t prot) { } 436 static void __init pmd_huge_tests(pmd_t *pmdp, unsigned long pfn, pgprot_t prot) 437 { 438 } 439 static void __init pud_huge_tests(pud_t *pudp, unsigned long pfn, pgprot_t prot) 440 { 441 } 442 static void __init pmd_savedwrite_tests(unsigned long pfn, pgprot_t prot) { } 443 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 444 445 static void __init p4d_basic_tests(unsigned long pfn, pgprot_t prot) 446 { 447 p4d_t p4d; 448 449 pr_debug("Validating P4D basic\n"); 450 memset(&p4d, RANDOM_NZVALUE, sizeof(p4d_t)); 451 WARN_ON(!p4d_same(p4d, p4d)); 452 } 453 454 static void __init pgd_basic_tests(unsigned long pfn, pgprot_t prot) 455 { 456 pgd_t pgd; 457 458 pr_debug("Validating PGD basic\n"); 459 memset(&pgd, RANDOM_NZVALUE, sizeof(pgd_t)); 460 WARN_ON(!pgd_same(pgd, pgd)); 461 } 462 463 #ifndef __PAGETABLE_PUD_FOLDED 464 static void __init pud_clear_tests(struct mm_struct *mm, pud_t *pudp) 465 { 466 pud_t pud = READ_ONCE(*pudp); 467 468 if (mm_pmd_folded(mm)) 469 return; 470 471 pr_debug("Validating PUD clear\n"); 472 pud = __pud(pud_val(pud) | RANDOM_ORVALUE); 473 WRITE_ONCE(*pudp, pud); 474 pud_clear(pudp); 475 pud = READ_ONCE(*pudp); 476 WARN_ON(!pud_none(pud)); 477 } 478 479 static void __init pud_populate_tests(struct mm_struct *mm, pud_t *pudp, 480 pmd_t *pmdp) 481 { 482 pud_t pud; 483 484 if (mm_pmd_folded(mm)) 485 return; 486 487 pr_debug("Validating PUD populate\n"); 488 /* 489 * This entry points to next level page table page. 490 * Hence this must not qualify as pud_bad(). 491 */ 492 pud_populate(mm, pudp, pmdp); 493 pud = READ_ONCE(*pudp); 494 WARN_ON(pud_bad(pud)); 495 } 496 #else /* !__PAGETABLE_PUD_FOLDED */ 497 static void __init pud_clear_tests(struct mm_struct *mm, pud_t *pudp) { } 498 static void __init pud_populate_tests(struct mm_struct *mm, pud_t *pudp, 499 pmd_t *pmdp) 500 { 501 } 502 #endif /* PAGETABLE_PUD_FOLDED */ 503 504 #ifndef __PAGETABLE_P4D_FOLDED 505 static void __init p4d_clear_tests(struct mm_struct *mm, p4d_t *p4dp) 506 { 507 p4d_t p4d = READ_ONCE(*p4dp); 508 509 if (mm_pud_folded(mm)) 510 return; 511 512 pr_debug("Validating P4D clear\n"); 513 p4d = __p4d(p4d_val(p4d) | RANDOM_ORVALUE); 514 WRITE_ONCE(*p4dp, p4d); 515 p4d_clear(p4dp); 516 p4d = READ_ONCE(*p4dp); 517 WARN_ON(!p4d_none(p4d)); 518 } 519 520 static void __init p4d_populate_tests(struct mm_struct *mm, p4d_t *p4dp, 521 pud_t *pudp) 522 { 523 p4d_t p4d; 524 525 if (mm_pud_folded(mm)) 526 return; 527 528 pr_debug("Validating P4D populate\n"); 529 /* 530 * This entry points to next level page table page. 531 * Hence this must not qualify as p4d_bad(). 532 */ 533 pud_clear(pudp); 534 p4d_clear(p4dp); 535 p4d_populate(mm, p4dp, pudp); 536 p4d = READ_ONCE(*p4dp); 537 WARN_ON(p4d_bad(p4d)); 538 } 539 540 static void __init pgd_clear_tests(struct mm_struct *mm, pgd_t *pgdp) 541 { 542 pgd_t pgd = READ_ONCE(*pgdp); 543 544 if (mm_p4d_folded(mm)) 545 return; 546 547 pr_debug("Validating PGD clear\n"); 548 pgd = __pgd(pgd_val(pgd) | RANDOM_ORVALUE); 549 WRITE_ONCE(*pgdp, pgd); 550 pgd_clear(pgdp); 551 pgd = READ_ONCE(*pgdp); 552 WARN_ON(!pgd_none(pgd)); 553 } 554 555 static void __init pgd_populate_tests(struct mm_struct *mm, pgd_t *pgdp, 556 p4d_t *p4dp) 557 { 558 pgd_t pgd; 559 560 if (mm_p4d_folded(mm)) 561 return; 562 563 pr_debug("Validating PGD populate\n"); 564 /* 565 * This entry points to next level page table page. 566 * Hence this must not qualify as pgd_bad(). 567 */ 568 p4d_clear(p4dp); 569 pgd_clear(pgdp); 570 pgd_populate(mm, pgdp, p4dp); 571 pgd = READ_ONCE(*pgdp); 572 WARN_ON(pgd_bad(pgd)); 573 } 574 #else /* !__PAGETABLE_P4D_FOLDED */ 575 static void __init p4d_clear_tests(struct mm_struct *mm, p4d_t *p4dp) { } 576 static void __init pgd_clear_tests(struct mm_struct *mm, pgd_t *pgdp) { } 577 static void __init p4d_populate_tests(struct mm_struct *mm, p4d_t *p4dp, 578 pud_t *pudp) 579 { 580 } 581 static void __init pgd_populate_tests(struct mm_struct *mm, pgd_t *pgdp, 582 p4d_t *p4dp) 583 { 584 } 585 #endif /* PAGETABLE_P4D_FOLDED */ 586 587 static void __init pte_clear_tests(struct mm_struct *mm, pte_t *ptep, 588 unsigned long pfn, unsigned long vaddr, 589 pgprot_t prot) 590 { 591 pte_t pte = pfn_pte(pfn, prot); 592 593 pr_debug("Validating PTE clear\n"); 594 #ifndef CONFIG_RISCV 595 pte = __pte(pte_val(pte) | RANDOM_ORVALUE); 596 #endif 597 set_pte_at(mm, vaddr, ptep, pte); 598 barrier(); 599 pte_clear(mm, vaddr, ptep); 600 pte = ptep_get(ptep); 601 WARN_ON(!pte_none(pte)); 602 } 603 604 static void __init pmd_clear_tests(struct mm_struct *mm, pmd_t *pmdp) 605 { 606 pmd_t pmd = READ_ONCE(*pmdp); 607 608 pr_debug("Validating PMD clear\n"); 609 pmd = __pmd(pmd_val(pmd) | RANDOM_ORVALUE); 610 WRITE_ONCE(*pmdp, pmd); 611 pmd_clear(pmdp); 612 pmd = READ_ONCE(*pmdp); 613 WARN_ON(!pmd_none(pmd)); 614 } 615 616 static void __init pmd_populate_tests(struct mm_struct *mm, pmd_t *pmdp, 617 pgtable_t pgtable) 618 { 619 pmd_t pmd; 620 621 pr_debug("Validating PMD populate\n"); 622 /* 623 * This entry points to next level page table page. 624 * Hence this must not qualify as pmd_bad(). 625 */ 626 pmd_populate(mm, pmdp, pgtable); 627 pmd = READ_ONCE(*pmdp); 628 WARN_ON(pmd_bad(pmd)); 629 } 630 631 static void __init pte_special_tests(unsigned long pfn, pgprot_t prot) 632 { 633 pte_t pte = pfn_pte(pfn, prot); 634 635 if (!IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL)) 636 return; 637 638 pr_debug("Validating PTE special\n"); 639 WARN_ON(!pte_special(pte_mkspecial(pte))); 640 } 641 642 static void __init pte_protnone_tests(unsigned long pfn, pgprot_t prot) 643 { 644 pte_t pte = pfn_pte(pfn, prot); 645 646 if (!IS_ENABLED(CONFIG_NUMA_BALANCING)) 647 return; 648 649 pr_debug("Validating PTE protnone\n"); 650 WARN_ON(!pte_protnone(pte)); 651 WARN_ON(!pte_present(pte)); 652 } 653 654 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 655 static void __init pmd_protnone_tests(unsigned long pfn, pgprot_t prot) 656 { 657 pmd_t pmd = pmd_mkhuge(pfn_pmd(pfn, prot)); 658 659 if (!IS_ENABLED(CONFIG_NUMA_BALANCING)) 660 return; 661 662 pr_debug("Validating PMD protnone\n"); 663 WARN_ON(!pmd_protnone(pmd)); 664 WARN_ON(!pmd_present(pmd)); 665 } 666 #else /* !CONFIG_TRANSPARENT_HUGEPAGE */ 667 static void __init pmd_protnone_tests(unsigned long pfn, pgprot_t prot) { } 668 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 669 670 #ifdef CONFIG_ARCH_HAS_PTE_DEVMAP 671 static void __init pte_devmap_tests(unsigned long pfn, pgprot_t prot) 672 { 673 pte_t pte = pfn_pte(pfn, prot); 674 675 pr_debug("Validating PTE devmap\n"); 676 WARN_ON(!pte_devmap(pte_mkdevmap(pte))); 677 } 678 679 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 680 static void __init pmd_devmap_tests(unsigned long pfn, pgprot_t prot) 681 { 682 pmd_t pmd = pfn_pmd(pfn, prot); 683 684 pr_debug("Validating PMD devmap\n"); 685 WARN_ON(!pmd_devmap(pmd_mkdevmap(pmd))); 686 } 687 688 #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD 689 static void __init pud_devmap_tests(unsigned long pfn, pgprot_t prot) 690 { 691 pud_t pud = pfn_pud(pfn, prot); 692 693 pr_debug("Validating PUD devmap\n"); 694 WARN_ON(!pud_devmap(pud_mkdevmap(pud))); 695 } 696 #else /* !CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ 697 static void __init pud_devmap_tests(unsigned long pfn, pgprot_t prot) { } 698 #endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ 699 #else /* CONFIG_TRANSPARENT_HUGEPAGE */ 700 static void __init pmd_devmap_tests(unsigned long pfn, pgprot_t prot) { } 701 static void __init pud_devmap_tests(unsigned long pfn, pgprot_t prot) { } 702 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 703 #else 704 static void __init pte_devmap_tests(unsigned long pfn, pgprot_t prot) { } 705 static void __init pmd_devmap_tests(unsigned long pfn, pgprot_t prot) { } 706 static void __init pud_devmap_tests(unsigned long pfn, pgprot_t prot) { } 707 #endif /* CONFIG_ARCH_HAS_PTE_DEVMAP */ 708 709 static void __init pte_soft_dirty_tests(unsigned long pfn, pgprot_t prot) 710 { 711 pte_t pte = pfn_pte(pfn, prot); 712 713 if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY)) 714 return; 715 716 pr_debug("Validating PTE soft dirty\n"); 717 WARN_ON(!pte_soft_dirty(pte_mksoft_dirty(pte))); 718 WARN_ON(pte_soft_dirty(pte_clear_soft_dirty(pte))); 719 } 720 721 static void __init pte_swap_soft_dirty_tests(unsigned long pfn, pgprot_t prot) 722 { 723 pte_t pte = pfn_pte(pfn, prot); 724 725 if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY)) 726 return; 727 728 pr_debug("Validating PTE swap soft dirty\n"); 729 WARN_ON(!pte_swp_soft_dirty(pte_swp_mksoft_dirty(pte))); 730 WARN_ON(pte_swp_soft_dirty(pte_swp_clear_soft_dirty(pte))); 731 } 732 733 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 734 static void __init pmd_soft_dirty_tests(unsigned long pfn, pgprot_t prot) 735 { 736 pmd_t pmd = pfn_pmd(pfn, prot); 737 738 if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY)) 739 return; 740 741 pr_debug("Validating PMD soft dirty\n"); 742 WARN_ON(!pmd_soft_dirty(pmd_mksoft_dirty(pmd))); 743 WARN_ON(pmd_soft_dirty(pmd_clear_soft_dirty(pmd))); 744 } 745 746 static void __init pmd_swap_soft_dirty_tests(unsigned long pfn, pgprot_t prot) 747 { 748 pmd_t pmd = pfn_pmd(pfn, prot); 749 750 if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) || 751 !IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION)) 752 return; 753 754 pr_debug("Validating PMD swap soft dirty\n"); 755 WARN_ON(!pmd_swp_soft_dirty(pmd_swp_mksoft_dirty(pmd))); 756 WARN_ON(pmd_swp_soft_dirty(pmd_swp_clear_soft_dirty(pmd))); 757 } 758 #else /* !CONFIG_ARCH_HAS_PTE_DEVMAP */ 759 static void __init pmd_soft_dirty_tests(unsigned long pfn, pgprot_t prot) { } 760 static void __init pmd_swap_soft_dirty_tests(unsigned long pfn, pgprot_t prot) 761 { 762 } 763 #endif /* CONFIG_ARCH_HAS_PTE_DEVMAP */ 764 765 static void __init pte_swap_tests(unsigned long pfn, pgprot_t prot) 766 { 767 swp_entry_t swp; 768 pte_t pte; 769 770 pr_debug("Validating PTE swap\n"); 771 pte = pfn_pte(pfn, prot); 772 swp = __pte_to_swp_entry(pte); 773 pte = __swp_entry_to_pte(swp); 774 WARN_ON(pfn != pte_pfn(pte)); 775 } 776 777 #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION 778 static void __init pmd_swap_tests(unsigned long pfn, pgprot_t prot) 779 { 780 swp_entry_t swp; 781 pmd_t pmd; 782 783 pr_debug("Validating PMD swap\n"); 784 pmd = pfn_pmd(pfn, prot); 785 swp = __pmd_to_swp_entry(pmd); 786 pmd = __swp_entry_to_pmd(swp); 787 WARN_ON(pfn != pmd_pfn(pmd)); 788 } 789 #else /* !CONFIG_ARCH_ENABLE_THP_MIGRATION */ 790 static void __init pmd_swap_tests(unsigned long pfn, pgprot_t prot) { } 791 #endif /* CONFIG_ARCH_ENABLE_THP_MIGRATION */ 792 793 static void __init swap_migration_tests(void) 794 { 795 struct page *page; 796 swp_entry_t swp; 797 798 if (!IS_ENABLED(CONFIG_MIGRATION)) 799 return; 800 801 pr_debug("Validating swap migration\n"); 802 /* 803 * swap_migration_tests() requires a dedicated page as it needs to 804 * be locked before creating a migration entry from it. Locking the 805 * page that actually maps kernel text ('start_kernel') can be real 806 * problematic. Lets allocate a dedicated page explicitly for this 807 * purpose that will be freed subsequently. 808 */ 809 page = alloc_page(GFP_KERNEL); 810 if (!page) { 811 pr_err("page allocation failed\n"); 812 return; 813 } 814 815 /* 816 * make_migration_entry() expects given page to be 817 * locked, otherwise it stumbles upon a BUG_ON(). 818 */ 819 __SetPageLocked(page); 820 swp = make_migration_entry(page, 1); 821 WARN_ON(!is_migration_entry(swp)); 822 WARN_ON(!is_write_migration_entry(swp)); 823 824 make_migration_entry_read(&swp); 825 WARN_ON(!is_migration_entry(swp)); 826 WARN_ON(is_write_migration_entry(swp)); 827 828 swp = make_migration_entry(page, 0); 829 WARN_ON(!is_migration_entry(swp)); 830 WARN_ON(is_write_migration_entry(swp)); 831 __ClearPageLocked(page); 832 __free_page(page); 833 } 834 835 #ifdef CONFIG_HUGETLB_PAGE 836 static void __init hugetlb_basic_tests(unsigned long pfn, pgprot_t prot) 837 { 838 struct page *page; 839 pte_t pte; 840 841 pr_debug("Validating HugeTLB basic\n"); 842 /* 843 * Accessing the page associated with the pfn is safe here, 844 * as it was previously derived from a real kernel symbol. 845 */ 846 page = pfn_to_page(pfn); 847 pte = mk_huge_pte(page, prot); 848 849 WARN_ON(!huge_pte_dirty(huge_pte_mkdirty(pte))); 850 WARN_ON(!huge_pte_write(huge_pte_mkwrite(huge_pte_wrprotect(pte)))); 851 WARN_ON(huge_pte_write(huge_pte_wrprotect(huge_pte_mkwrite(pte)))); 852 853 #ifdef CONFIG_ARCH_WANT_GENERAL_HUGETLB 854 pte = pfn_pte(pfn, prot); 855 856 WARN_ON(!pte_huge(pte_mkhuge(pte))); 857 #endif /* CONFIG_ARCH_WANT_GENERAL_HUGETLB */ 858 } 859 #else /* !CONFIG_HUGETLB_PAGE */ 860 static void __init hugetlb_basic_tests(unsigned long pfn, pgprot_t prot) { } 861 #endif /* CONFIG_HUGETLB_PAGE */ 862 863 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 864 static void __init pmd_thp_tests(unsigned long pfn, pgprot_t prot) 865 { 866 pmd_t pmd; 867 868 if (!has_transparent_hugepage()) 869 return; 870 871 pr_debug("Validating PMD based THP\n"); 872 /* 873 * pmd_trans_huge() and pmd_present() must return positive after 874 * MMU invalidation with pmd_mkinvalid(). This behavior is an 875 * optimization for transparent huge page. pmd_trans_huge() must 876 * be true if pmd_page() returns a valid THP to avoid taking the 877 * pmd_lock when others walk over non transhuge pmds (i.e. there 878 * are no THP allocated). Especially when splitting a THP and 879 * removing the present bit from the pmd, pmd_trans_huge() still 880 * needs to return true. pmd_present() should be true whenever 881 * pmd_trans_huge() returns true. 882 */ 883 pmd = pfn_pmd(pfn, prot); 884 WARN_ON(!pmd_trans_huge(pmd_mkhuge(pmd))); 885 886 #ifndef __HAVE_ARCH_PMDP_INVALIDATE 887 WARN_ON(!pmd_trans_huge(pmd_mkinvalid(pmd_mkhuge(pmd)))); 888 WARN_ON(!pmd_present(pmd_mkinvalid(pmd_mkhuge(pmd)))); 889 #endif /* __HAVE_ARCH_PMDP_INVALIDATE */ 890 } 891 892 #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD 893 static void __init pud_thp_tests(unsigned long pfn, pgprot_t prot) 894 { 895 pud_t pud; 896 897 if (!has_transparent_hugepage()) 898 return; 899 900 pr_debug("Validating PUD based THP\n"); 901 pud = pfn_pud(pfn, prot); 902 WARN_ON(!pud_trans_huge(pud_mkhuge(pud))); 903 904 /* 905 * pud_mkinvalid() has been dropped for now. Enable back 906 * these tests when it comes back with a modified pud_present(). 907 * 908 * WARN_ON(!pud_trans_huge(pud_mkinvalid(pud_mkhuge(pud)))); 909 * WARN_ON(!pud_present(pud_mkinvalid(pud_mkhuge(pud)))); 910 */ 911 } 912 #else /* !CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ 913 static void __init pud_thp_tests(unsigned long pfn, pgprot_t prot) { } 914 #endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ 915 #else /* !CONFIG_TRANSPARENT_HUGEPAGE */ 916 static void __init pmd_thp_tests(unsigned long pfn, pgprot_t prot) { } 917 static void __init pud_thp_tests(unsigned long pfn, pgprot_t prot) { } 918 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 919 920 static unsigned long __init get_random_vaddr(void) 921 { 922 unsigned long random_vaddr, random_pages, total_user_pages; 923 924 total_user_pages = (TASK_SIZE - FIRST_USER_ADDRESS) / PAGE_SIZE; 925 926 random_pages = get_random_long() % total_user_pages; 927 random_vaddr = FIRST_USER_ADDRESS + random_pages * PAGE_SIZE; 928 929 return random_vaddr; 930 } 931 932 static int __init debug_vm_pgtable(void) 933 { 934 struct vm_area_struct *vma; 935 struct mm_struct *mm; 936 pgd_t *pgdp; 937 p4d_t *p4dp, *saved_p4dp; 938 pud_t *pudp, *saved_pudp; 939 pmd_t *pmdp, *saved_pmdp, pmd; 940 pte_t *ptep; 941 pgtable_t saved_ptep; 942 pgprot_t prot, protnone; 943 phys_addr_t paddr; 944 unsigned long vaddr, pte_aligned, pmd_aligned; 945 unsigned long pud_aligned, p4d_aligned, pgd_aligned; 946 spinlock_t *ptl = NULL; 947 int idx; 948 949 pr_info("Validating architecture page table helpers\n"); 950 prot = vm_get_page_prot(VMFLAGS); 951 vaddr = get_random_vaddr(); 952 mm = mm_alloc(); 953 if (!mm) { 954 pr_err("mm_struct allocation failed\n"); 955 return 1; 956 } 957 958 /* 959 * __P000 (or even __S000) will help create page table entries with 960 * PROT_NONE permission as required for pxx_protnone_tests(). 961 */ 962 protnone = __P000; 963 964 vma = vm_area_alloc(mm); 965 if (!vma) { 966 pr_err("vma allocation failed\n"); 967 return 1; 968 } 969 970 /* 971 * PFN for mapping at PTE level is determined from a standard kernel 972 * text symbol. But pfns for higher page table levels are derived by 973 * masking lower bits of this real pfn. These derived pfns might not 974 * exist on the platform but that does not really matter as pfn_pxx() 975 * helpers will still create appropriate entries for the test. This 976 * helps avoid large memory block allocations to be used for mapping 977 * at higher page table levels. 978 */ 979 paddr = __pa_symbol(&start_kernel); 980 981 pte_aligned = (paddr & PAGE_MASK) >> PAGE_SHIFT; 982 pmd_aligned = (paddr & PMD_MASK) >> PAGE_SHIFT; 983 pud_aligned = (paddr & PUD_MASK) >> PAGE_SHIFT; 984 p4d_aligned = (paddr & P4D_MASK) >> PAGE_SHIFT; 985 pgd_aligned = (paddr & PGDIR_MASK) >> PAGE_SHIFT; 986 WARN_ON(!pfn_valid(pte_aligned)); 987 988 pgdp = pgd_offset(mm, vaddr); 989 p4dp = p4d_alloc(mm, pgdp, vaddr); 990 pudp = pud_alloc(mm, p4dp, vaddr); 991 pmdp = pmd_alloc(mm, pudp, vaddr); 992 /* 993 * Allocate pgtable_t 994 */ 995 if (pte_alloc(mm, pmdp)) { 996 pr_err("pgtable allocation failed\n"); 997 return 1; 998 } 999 1000 /* 1001 * Save all the page table page addresses as the page table 1002 * entries will be used for testing with random or garbage 1003 * values. These saved addresses will be used for freeing 1004 * page table pages. 1005 */ 1006 pmd = READ_ONCE(*pmdp); 1007 saved_p4dp = p4d_offset(pgdp, 0UL); 1008 saved_pudp = pud_offset(p4dp, 0UL); 1009 saved_pmdp = pmd_offset(pudp, 0UL); 1010 saved_ptep = pmd_pgtable(pmd); 1011 1012 /* 1013 * Iterate over the protection_map[] to make sure that all 1014 * the basic page table transformation validations just hold 1015 * true irrespective of the starting protection value for a 1016 * given page table entry. 1017 */ 1018 for (idx = 0; idx < ARRAY_SIZE(protection_map); idx++) { 1019 pte_basic_tests(pte_aligned, idx); 1020 pmd_basic_tests(pmd_aligned, idx); 1021 pud_basic_tests(mm, pud_aligned, idx); 1022 } 1023 1024 /* 1025 * Both P4D and PGD level tests are very basic which do not 1026 * involve creating page table entries from the protection 1027 * value and the given pfn. Hence just keep them out from 1028 * the above iteration for now to save some test execution 1029 * time. 1030 */ 1031 p4d_basic_tests(p4d_aligned, prot); 1032 pgd_basic_tests(pgd_aligned, prot); 1033 1034 pmd_leaf_tests(pmd_aligned, prot); 1035 pud_leaf_tests(pud_aligned, prot); 1036 1037 pte_savedwrite_tests(pte_aligned, protnone); 1038 pmd_savedwrite_tests(pmd_aligned, protnone); 1039 1040 pte_special_tests(pte_aligned, prot); 1041 pte_protnone_tests(pte_aligned, protnone); 1042 pmd_protnone_tests(pmd_aligned, protnone); 1043 1044 pte_devmap_tests(pte_aligned, prot); 1045 pmd_devmap_tests(pmd_aligned, prot); 1046 pud_devmap_tests(pud_aligned, prot); 1047 1048 pte_soft_dirty_tests(pte_aligned, prot); 1049 pmd_soft_dirty_tests(pmd_aligned, prot); 1050 pte_swap_soft_dirty_tests(pte_aligned, prot); 1051 pmd_swap_soft_dirty_tests(pmd_aligned, prot); 1052 1053 pte_swap_tests(pte_aligned, prot); 1054 pmd_swap_tests(pmd_aligned, prot); 1055 1056 swap_migration_tests(); 1057 1058 pmd_thp_tests(pmd_aligned, prot); 1059 pud_thp_tests(pud_aligned, prot); 1060 1061 hugetlb_basic_tests(pte_aligned, prot); 1062 1063 /* 1064 * Page table modifying tests. They need to hold 1065 * proper page table lock. 1066 */ 1067 1068 ptep = pte_offset_map_lock(mm, pmdp, vaddr, &ptl); 1069 pte_clear_tests(mm, ptep, pte_aligned, vaddr, prot); 1070 pte_advanced_tests(mm, vma, ptep, pte_aligned, vaddr, prot); 1071 pte_unmap_unlock(ptep, ptl); 1072 1073 ptl = pmd_lock(mm, pmdp); 1074 pmd_clear_tests(mm, pmdp); 1075 pmd_advanced_tests(mm, vma, pmdp, pmd_aligned, vaddr, prot, saved_ptep); 1076 pmd_huge_tests(pmdp, pmd_aligned, prot); 1077 pmd_populate_tests(mm, pmdp, saved_ptep); 1078 spin_unlock(ptl); 1079 1080 ptl = pud_lock(mm, pudp); 1081 pud_clear_tests(mm, pudp); 1082 pud_advanced_tests(mm, vma, pudp, pud_aligned, vaddr, prot); 1083 pud_huge_tests(pudp, pud_aligned, prot); 1084 pud_populate_tests(mm, pudp, saved_pmdp); 1085 spin_unlock(ptl); 1086 1087 spin_lock(&mm->page_table_lock); 1088 p4d_clear_tests(mm, p4dp); 1089 pgd_clear_tests(mm, pgdp); 1090 p4d_populate_tests(mm, p4dp, saved_pudp); 1091 pgd_populate_tests(mm, pgdp, saved_p4dp); 1092 spin_unlock(&mm->page_table_lock); 1093 1094 p4d_free(mm, saved_p4dp); 1095 pud_free(mm, saved_pudp); 1096 pmd_free(mm, saved_pmdp); 1097 pte_free(mm, saved_ptep); 1098 1099 vm_area_free(vma); 1100 mm_dec_nr_puds(mm); 1101 mm_dec_nr_pmds(mm); 1102 mm_dec_nr_ptes(mm); 1103 mmdrop(mm); 1104 return 0; 1105 } 1106 late_initcall(debug_vm_pgtable); 1107