1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * This kernel test validates architecture page table helpers and 4 * accessors and helps in verifying their continued compliance with 5 * expected generic MM semantics. 6 * 7 * Copyright (C) 2019 ARM Ltd. 8 * 9 * Author: Anshuman Khandual <anshuman.khandual@arm.com> 10 */ 11 #define pr_fmt(fmt) "debug_vm_pgtable: [%-25s]: " fmt, __func__ 12 13 #include <linux/gfp.h> 14 #include <linux/highmem.h> 15 #include <linux/hugetlb.h> 16 #include <linux/kernel.h> 17 #include <linux/kconfig.h> 18 #include <linux/memblock.h> 19 #include <linux/mm.h> 20 #include <linux/mman.h> 21 #include <linux/mm_types.h> 22 #include <linux/module.h> 23 #include <linux/pfn_t.h> 24 #include <linux/printk.h> 25 #include <linux/pgtable.h> 26 #include <linux/random.h> 27 #include <linux/spinlock.h> 28 #include <linux/swap.h> 29 #include <linux/swapops.h> 30 #include <linux/start_kernel.h> 31 #include <linux/sched/mm.h> 32 #include <linux/io.h> 33 #include <linux/vmalloc.h> 34 35 #include <asm/cacheflush.h> 36 #include <asm/pgalloc.h> 37 #include <asm/tlbflush.h> 38 39 /* 40 * Please refer Documentation/mm/arch_pgtable_helpers.rst for the semantics 41 * expectations that are being validated here. All future changes in here 42 * or the documentation need to be in sync. 43 * 44 * On s390 platform, the lower 4 bits are used to identify given page table 45 * entry type. But these bits might affect the ability to clear entries with 46 * pxx_clear() because of how dynamic page table folding works on s390. So 47 * while loading up the entries do not change the lower 4 bits. It does not 48 * have affect any other platform. Also avoid the 62nd bit on ppc64 that is 49 * used to mark a pte entry. 50 */ 51 #define S390_SKIP_MASK GENMASK(3, 0) 52 #if __BITS_PER_LONG == 64 53 #define PPC64_SKIP_MASK GENMASK(62, 62) 54 #else 55 #define PPC64_SKIP_MASK 0x0 56 #endif 57 #define ARCH_SKIP_MASK (S390_SKIP_MASK | PPC64_SKIP_MASK) 58 #define RANDOM_ORVALUE (GENMASK(BITS_PER_LONG - 1, 0) & ~ARCH_SKIP_MASK) 59 #define RANDOM_NZVALUE GENMASK(7, 0) 60 61 struct pgtable_debug_args { 62 struct mm_struct *mm; 63 struct vm_area_struct *vma; 64 65 pgd_t *pgdp; 66 p4d_t *p4dp; 67 pud_t *pudp; 68 pmd_t *pmdp; 69 pte_t *ptep; 70 71 p4d_t *start_p4dp; 72 pud_t *start_pudp; 73 pmd_t *start_pmdp; 74 pgtable_t start_ptep; 75 76 unsigned long vaddr; 77 pgprot_t page_prot; 78 pgprot_t page_prot_none; 79 80 bool is_contiguous_page; 81 unsigned long pud_pfn; 82 unsigned long pmd_pfn; 83 unsigned long pte_pfn; 84 85 unsigned long fixed_alignment; 86 unsigned long fixed_pgd_pfn; 87 unsigned long fixed_p4d_pfn; 88 unsigned long fixed_pud_pfn; 89 unsigned long fixed_pmd_pfn; 90 unsigned long fixed_pte_pfn; 91 }; 92 93 static void __init pte_basic_tests(struct pgtable_debug_args *args, int idx) 94 { 95 pgprot_t prot = vm_get_page_prot(idx); 96 pte_t pte = pfn_pte(args->fixed_pte_pfn, prot); 97 unsigned long val = idx, *ptr = &val; 98 99 pr_debug("Validating PTE basic (%pGv)\n", ptr); 100 101 /* 102 * This test needs to be executed after the given page table entry 103 * is created with pfn_pte() to make sure that vm_get_page_prot(idx) 104 * does not have the dirty bit enabled from the beginning. This is 105 * important for platforms like arm64 where (!PTE_RDONLY) indicate 106 * dirty bit being set. 107 */ 108 WARN_ON(pte_dirty(pte_wrprotect(pte))); 109 110 WARN_ON(!pte_same(pte, pte)); 111 WARN_ON(!pte_young(pte_mkyoung(pte_mkold(pte)))); 112 WARN_ON(!pte_dirty(pte_mkdirty(pte_mkclean(pte)))); 113 WARN_ON(!pte_write(pte_mkwrite(pte_wrprotect(pte), args->vma))); 114 WARN_ON(pte_young(pte_mkold(pte_mkyoung(pte)))); 115 WARN_ON(pte_dirty(pte_mkclean(pte_mkdirty(pte)))); 116 WARN_ON(pte_write(pte_wrprotect(pte_mkwrite(pte, args->vma)))); 117 WARN_ON(pte_dirty(pte_wrprotect(pte_mkclean(pte)))); 118 WARN_ON(!pte_dirty(pte_wrprotect(pte_mkdirty(pte)))); 119 } 120 121 static void __init pte_advanced_tests(struct pgtable_debug_args *args) 122 { 123 struct page *page; 124 pte_t pte; 125 126 /* 127 * Architectures optimize set_pte_at by avoiding TLB flush. 128 * This requires set_pte_at to be not used to update an 129 * existing pte entry. Clear pte before we do set_pte_at 130 * 131 * flush_dcache_page() is called after set_pte_at() to clear 132 * PG_arch_1 for the page on ARM64. The page flag isn't cleared 133 * when it's released and page allocation check will fail when 134 * the page is allocated again. For architectures other than ARM64, 135 * the unexpected overhead of cache flushing is acceptable. 136 */ 137 page = (args->pte_pfn != ULONG_MAX) ? pfn_to_page(args->pte_pfn) : NULL; 138 if (!page) 139 return; 140 141 pr_debug("Validating PTE advanced\n"); 142 if (WARN_ON(!args->ptep)) 143 return; 144 145 pte = pfn_pte(args->pte_pfn, args->page_prot); 146 set_pte_at(args->mm, args->vaddr, args->ptep, pte); 147 flush_dcache_page(page); 148 ptep_set_wrprotect(args->mm, args->vaddr, args->ptep); 149 pte = ptep_get(args->ptep); 150 WARN_ON(pte_write(pte)); 151 ptep_get_and_clear(args->mm, args->vaddr, args->ptep); 152 pte = ptep_get(args->ptep); 153 WARN_ON(!pte_none(pte)); 154 155 pte = pfn_pte(args->pte_pfn, args->page_prot); 156 pte = pte_wrprotect(pte); 157 pte = pte_mkclean(pte); 158 set_pte_at(args->mm, args->vaddr, args->ptep, pte); 159 flush_dcache_page(page); 160 pte = pte_mkwrite(pte, args->vma); 161 pte = pte_mkdirty(pte); 162 ptep_set_access_flags(args->vma, args->vaddr, args->ptep, pte, 1); 163 pte = ptep_get(args->ptep); 164 WARN_ON(!(pte_write(pte) && pte_dirty(pte))); 165 ptep_get_and_clear_full(args->mm, args->vaddr, args->ptep, 1); 166 pte = ptep_get(args->ptep); 167 WARN_ON(!pte_none(pte)); 168 169 pte = pfn_pte(args->pte_pfn, args->page_prot); 170 pte = pte_mkyoung(pte); 171 set_pte_at(args->mm, args->vaddr, args->ptep, pte); 172 flush_dcache_page(page); 173 ptep_test_and_clear_young(args->vma, args->vaddr, args->ptep); 174 pte = ptep_get(args->ptep); 175 WARN_ON(pte_young(pte)); 176 177 ptep_get_and_clear_full(args->mm, args->vaddr, args->ptep, 1); 178 } 179 180 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 181 static void __init pmd_basic_tests(struct pgtable_debug_args *args, int idx) 182 { 183 pgprot_t prot = vm_get_page_prot(idx); 184 unsigned long val = idx, *ptr = &val; 185 pmd_t pmd; 186 187 if (!has_transparent_hugepage()) 188 return; 189 190 pr_debug("Validating PMD basic (%pGv)\n", ptr); 191 pmd = pfn_pmd(args->fixed_pmd_pfn, prot); 192 193 /* 194 * This test needs to be executed after the given page table entry 195 * is created with pfn_pmd() to make sure that vm_get_page_prot(idx) 196 * does not have the dirty bit enabled from the beginning. This is 197 * important for platforms like arm64 where (!PTE_RDONLY) indicate 198 * dirty bit being set. 199 */ 200 WARN_ON(pmd_dirty(pmd_wrprotect(pmd))); 201 202 203 WARN_ON(!pmd_same(pmd, pmd)); 204 WARN_ON(!pmd_young(pmd_mkyoung(pmd_mkold(pmd)))); 205 WARN_ON(!pmd_dirty(pmd_mkdirty(pmd_mkclean(pmd)))); 206 WARN_ON(!pmd_write(pmd_mkwrite(pmd_wrprotect(pmd), args->vma))); 207 WARN_ON(pmd_young(pmd_mkold(pmd_mkyoung(pmd)))); 208 WARN_ON(pmd_dirty(pmd_mkclean(pmd_mkdirty(pmd)))); 209 WARN_ON(pmd_write(pmd_wrprotect(pmd_mkwrite(pmd, args->vma)))); 210 WARN_ON(pmd_dirty(pmd_wrprotect(pmd_mkclean(pmd)))); 211 WARN_ON(!pmd_dirty(pmd_wrprotect(pmd_mkdirty(pmd)))); 212 /* 213 * A huge page does not point to next level page table 214 * entry. Hence this must qualify as pmd_bad(). 215 */ 216 WARN_ON(!pmd_bad(pmd_mkhuge(pmd))); 217 } 218 219 static void __init pmd_advanced_tests(struct pgtable_debug_args *args) 220 { 221 struct page *page; 222 pmd_t pmd; 223 unsigned long vaddr = args->vaddr; 224 225 if (!has_transparent_hugepage()) 226 return; 227 228 page = (args->pmd_pfn != ULONG_MAX) ? pfn_to_page(args->pmd_pfn) : NULL; 229 if (!page) 230 return; 231 232 /* 233 * flush_dcache_page() is called after set_pmd_at() to clear 234 * PG_arch_1 for the page on ARM64. The page flag isn't cleared 235 * when it's released and page allocation check will fail when 236 * the page is allocated again. For architectures other than ARM64, 237 * the unexpected overhead of cache flushing is acceptable. 238 */ 239 pr_debug("Validating PMD advanced\n"); 240 /* Align the address wrt HPAGE_PMD_SIZE */ 241 vaddr &= HPAGE_PMD_MASK; 242 243 pgtable_trans_huge_deposit(args->mm, args->pmdp, args->start_ptep); 244 245 pmd = pfn_pmd(args->pmd_pfn, args->page_prot); 246 set_pmd_at(args->mm, vaddr, args->pmdp, pmd); 247 flush_dcache_page(page); 248 pmdp_set_wrprotect(args->mm, vaddr, args->pmdp); 249 pmd = READ_ONCE(*args->pmdp); 250 WARN_ON(pmd_write(pmd)); 251 pmdp_huge_get_and_clear(args->mm, vaddr, args->pmdp); 252 pmd = READ_ONCE(*args->pmdp); 253 WARN_ON(!pmd_none(pmd)); 254 255 pmd = pfn_pmd(args->pmd_pfn, args->page_prot); 256 pmd = pmd_wrprotect(pmd); 257 pmd = pmd_mkclean(pmd); 258 set_pmd_at(args->mm, vaddr, args->pmdp, pmd); 259 flush_dcache_page(page); 260 pmd = pmd_mkwrite(pmd, args->vma); 261 pmd = pmd_mkdirty(pmd); 262 pmdp_set_access_flags(args->vma, vaddr, args->pmdp, pmd, 1); 263 pmd = READ_ONCE(*args->pmdp); 264 WARN_ON(!(pmd_write(pmd) && pmd_dirty(pmd))); 265 pmdp_huge_get_and_clear_full(args->vma, vaddr, args->pmdp, 1); 266 pmd = READ_ONCE(*args->pmdp); 267 WARN_ON(!pmd_none(pmd)); 268 269 pmd = pmd_mkhuge(pfn_pmd(args->pmd_pfn, args->page_prot)); 270 pmd = pmd_mkyoung(pmd); 271 set_pmd_at(args->mm, vaddr, args->pmdp, pmd); 272 flush_dcache_page(page); 273 pmdp_test_and_clear_young(args->vma, vaddr, args->pmdp); 274 pmd = READ_ONCE(*args->pmdp); 275 WARN_ON(pmd_young(pmd)); 276 277 /* Clear the pte entries */ 278 pmdp_huge_get_and_clear(args->mm, vaddr, args->pmdp); 279 pgtable_trans_huge_withdraw(args->mm, args->pmdp); 280 } 281 282 static void __init pmd_leaf_tests(struct pgtable_debug_args *args) 283 { 284 pmd_t pmd; 285 286 if (!has_transparent_hugepage()) 287 return; 288 289 pr_debug("Validating PMD leaf\n"); 290 pmd = pfn_pmd(args->fixed_pmd_pfn, args->page_prot); 291 292 /* 293 * PMD based THP is a leaf entry. 294 */ 295 pmd = pmd_mkhuge(pmd); 296 WARN_ON(!pmd_leaf(pmd)); 297 } 298 299 #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD 300 static void __init pud_basic_tests(struct pgtable_debug_args *args, int idx) 301 { 302 pgprot_t prot = vm_get_page_prot(idx); 303 unsigned long val = idx, *ptr = &val; 304 pud_t pud; 305 306 if (!has_transparent_pud_hugepage()) 307 return; 308 309 pr_debug("Validating PUD basic (%pGv)\n", ptr); 310 pud = pfn_pud(args->fixed_pud_pfn, prot); 311 312 /* 313 * This test needs to be executed after the given page table entry 314 * is created with pfn_pud() to make sure that vm_get_page_prot(idx) 315 * does not have the dirty bit enabled from the beginning. This is 316 * important for platforms like arm64 where (!PTE_RDONLY) indicate 317 * dirty bit being set. 318 */ 319 WARN_ON(pud_dirty(pud_wrprotect(pud))); 320 321 WARN_ON(!pud_same(pud, pud)); 322 WARN_ON(!pud_young(pud_mkyoung(pud_mkold(pud)))); 323 WARN_ON(!pud_dirty(pud_mkdirty(pud_mkclean(pud)))); 324 WARN_ON(pud_dirty(pud_mkclean(pud_mkdirty(pud)))); 325 WARN_ON(!pud_write(pud_mkwrite(pud_wrprotect(pud)))); 326 WARN_ON(pud_write(pud_wrprotect(pud_mkwrite(pud)))); 327 WARN_ON(pud_young(pud_mkold(pud_mkyoung(pud)))); 328 WARN_ON(pud_dirty(pud_wrprotect(pud_mkclean(pud)))); 329 WARN_ON(!pud_dirty(pud_wrprotect(pud_mkdirty(pud)))); 330 331 if (mm_pmd_folded(args->mm)) 332 return; 333 334 /* 335 * A huge page does not point to next level page table 336 * entry. Hence this must qualify as pud_bad(). 337 */ 338 WARN_ON(!pud_bad(pud_mkhuge(pud))); 339 } 340 341 static void __init pud_advanced_tests(struct pgtable_debug_args *args) 342 { 343 struct page *page; 344 unsigned long vaddr = args->vaddr; 345 pud_t pud; 346 347 if (!has_transparent_pud_hugepage()) 348 return; 349 350 page = (args->pud_pfn != ULONG_MAX) ? pfn_to_page(args->pud_pfn) : NULL; 351 if (!page) 352 return; 353 354 /* 355 * flush_dcache_page() is called after set_pud_at() to clear 356 * PG_arch_1 for the page on ARM64. The page flag isn't cleared 357 * when it's released and page allocation check will fail when 358 * the page is allocated again. For architectures other than ARM64, 359 * the unexpected overhead of cache flushing is acceptable. 360 */ 361 pr_debug("Validating PUD advanced\n"); 362 /* Align the address wrt HPAGE_PUD_SIZE */ 363 vaddr &= HPAGE_PUD_MASK; 364 365 pud = pfn_pud(args->pud_pfn, args->page_prot); 366 /* 367 * Some architectures have debug checks to make sure 368 * huge pud mapping are only found with devmap entries 369 * For now test with only devmap entries. 370 */ 371 pud = pud_mkdevmap(pud); 372 set_pud_at(args->mm, vaddr, args->pudp, pud); 373 flush_dcache_page(page); 374 pudp_set_wrprotect(args->mm, vaddr, args->pudp); 375 pud = READ_ONCE(*args->pudp); 376 WARN_ON(pud_write(pud)); 377 378 #ifndef __PAGETABLE_PMD_FOLDED 379 pudp_huge_get_and_clear(args->mm, vaddr, args->pudp); 380 pud = READ_ONCE(*args->pudp); 381 WARN_ON(!pud_none(pud)); 382 #endif /* __PAGETABLE_PMD_FOLDED */ 383 pud = pfn_pud(args->pud_pfn, args->page_prot); 384 pud = pud_mkdevmap(pud); 385 pud = pud_wrprotect(pud); 386 pud = pud_mkclean(pud); 387 set_pud_at(args->mm, vaddr, args->pudp, pud); 388 flush_dcache_page(page); 389 pud = pud_mkwrite(pud); 390 pud = pud_mkdirty(pud); 391 pudp_set_access_flags(args->vma, vaddr, args->pudp, pud, 1); 392 pud = READ_ONCE(*args->pudp); 393 WARN_ON(!(pud_write(pud) && pud_dirty(pud))); 394 395 #ifndef __PAGETABLE_PMD_FOLDED 396 pudp_huge_get_and_clear_full(args->vma, vaddr, args->pudp, 1); 397 pud = READ_ONCE(*args->pudp); 398 WARN_ON(!pud_none(pud)); 399 #endif /* __PAGETABLE_PMD_FOLDED */ 400 401 pud = pfn_pud(args->pud_pfn, args->page_prot); 402 pud = pud_mkdevmap(pud); 403 pud = pud_mkyoung(pud); 404 set_pud_at(args->mm, vaddr, args->pudp, pud); 405 flush_dcache_page(page); 406 pudp_test_and_clear_young(args->vma, vaddr, args->pudp); 407 pud = READ_ONCE(*args->pudp); 408 WARN_ON(pud_young(pud)); 409 410 pudp_huge_get_and_clear(args->mm, vaddr, args->pudp); 411 } 412 413 static void __init pud_leaf_tests(struct pgtable_debug_args *args) 414 { 415 pud_t pud; 416 417 if (!has_transparent_pud_hugepage()) 418 return; 419 420 pr_debug("Validating PUD leaf\n"); 421 pud = pfn_pud(args->fixed_pud_pfn, args->page_prot); 422 /* 423 * PUD based THP is a leaf entry. 424 */ 425 pud = pud_mkhuge(pud); 426 WARN_ON(!pud_leaf(pud)); 427 } 428 #else /* !CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ 429 static void __init pud_basic_tests(struct pgtable_debug_args *args, int idx) { } 430 static void __init pud_advanced_tests(struct pgtable_debug_args *args) { } 431 static void __init pud_leaf_tests(struct pgtable_debug_args *args) { } 432 #endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ 433 #else /* !CONFIG_TRANSPARENT_HUGEPAGE */ 434 static void __init pmd_basic_tests(struct pgtable_debug_args *args, int idx) { } 435 static void __init pud_basic_tests(struct pgtable_debug_args *args, int idx) { } 436 static void __init pmd_advanced_tests(struct pgtable_debug_args *args) { } 437 static void __init pud_advanced_tests(struct pgtable_debug_args *args) { } 438 static void __init pmd_leaf_tests(struct pgtable_debug_args *args) { } 439 static void __init pud_leaf_tests(struct pgtable_debug_args *args) { } 440 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 441 442 #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP 443 static void __init pmd_huge_tests(struct pgtable_debug_args *args) 444 { 445 pmd_t pmd; 446 447 if (!arch_vmap_pmd_supported(args->page_prot) || 448 args->fixed_alignment < PMD_SIZE) 449 return; 450 451 pr_debug("Validating PMD huge\n"); 452 /* 453 * X86 defined pmd_set_huge() verifies that the given 454 * PMD is not a populated non-leaf entry. 455 */ 456 WRITE_ONCE(*args->pmdp, __pmd(0)); 457 WARN_ON(!pmd_set_huge(args->pmdp, __pfn_to_phys(args->fixed_pmd_pfn), args->page_prot)); 458 WARN_ON(!pmd_clear_huge(args->pmdp)); 459 pmd = READ_ONCE(*args->pmdp); 460 WARN_ON(!pmd_none(pmd)); 461 } 462 463 static void __init pud_huge_tests(struct pgtable_debug_args *args) 464 { 465 pud_t pud; 466 467 if (!arch_vmap_pud_supported(args->page_prot) || 468 args->fixed_alignment < PUD_SIZE) 469 return; 470 471 pr_debug("Validating PUD huge\n"); 472 /* 473 * X86 defined pud_set_huge() verifies that the given 474 * PUD is not a populated non-leaf entry. 475 */ 476 WRITE_ONCE(*args->pudp, __pud(0)); 477 WARN_ON(!pud_set_huge(args->pudp, __pfn_to_phys(args->fixed_pud_pfn), args->page_prot)); 478 WARN_ON(!pud_clear_huge(args->pudp)); 479 pud = READ_ONCE(*args->pudp); 480 WARN_ON(!pud_none(pud)); 481 } 482 #else /* !CONFIG_HAVE_ARCH_HUGE_VMAP */ 483 static void __init pmd_huge_tests(struct pgtable_debug_args *args) { } 484 static void __init pud_huge_tests(struct pgtable_debug_args *args) { } 485 #endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */ 486 487 static void __init p4d_basic_tests(struct pgtable_debug_args *args) 488 { 489 p4d_t p4d; 490 491 pr_debug("Validating P4D basic\n"); 492 memset(&p4d, RANDOM_NZVALUE, sizeof(p4d_t)); 493 WARN_ON(!p4d_same(p4d, p4d)); 494 } 495 496 static void __init pgd_basic_tests(struct pgtable_debug_args *args) 497 { 498 pgd_t pgd; 499 500 pr_debug("Validating PGD basic\n"); 501 memset(&pgd, RANDOM_NZVALUE, sizeof(pgd_t)); 502 WARN_ON(!pgd_same(pgd, pgd)); 503 } 504 505 #ifndef __PAGETABLE_PUD_FOLDED 506 static void __init pud_clear_tests(struct pgtable_debug_args *args) 507 { 508 pud_t pud = READ_ONCE(*args->pudp); 509 510 if (mm_pmd_folded(args->mm)) 511 return; 512 513 pr_debug("Validating PUD clear\n"); 514 pud = __pud(pud_val(pud) | RANDOM_ORVALUE); 515 WRITE_ONCE(*args->pudp, pud); 516 pud_clear(args->pudp); 517 pud = READ_ONCE(*args->pudp); 518 WARN_ON(!pud_none(pud)); 519 } 520 521 static void __init pud_populate_tests(struct pgtable_debug_args *args) 522 { 523 pud_t pud; 524 525 if (mm_pmd_folded(args->mm)) 526 return; 527 528 pr_debug("Validating PUD populate\n"); 529 /* 530 * This entry points to next level page table page. 531 * Hence this must not qualify as pud_bad(). 532 */ 533 pud_populate(args->mm, args->pudp, args->start_pmdp); 534 pud = READ_ONCE(*args->pudp); 535 WARN_ON(pud_bad(pud)); 536 } 537 #else /* !__PAGETABLE_PUD_FOLDED */ 538 static void __init pud_clear_tests(struct pgtable_debug_args *args) { } 539 static void __init pud_populate_tests(struct pgtable_debug_args *args) { } 540 #endif /* PAGETABLE_PUD_FOLDED */ 541 542 #ifndef __PAGETABLE_P4D_FOLDED 543 static void __init p4d_clear_tests(struct pgtable_debug_args *args) 544 { 545 p4d_t p4d = READ_ONCE(*args->p4dp); 546 547 if (mm_pud_folded(args->mm)) 548 return; 549 550 pr_debug("Validating P4D clear\n"); 551 p4d = __p4d(p4d_val(p4d) | RANDOM_ORVALUE); 552 WRITE_ONCE(*args->p4dp, p4d); 553 p4d_clear(args->p4dp); 554 p4d = READ_ONCE(*args->p4dp); 555 WARN_ON(!p4d_none(p4d)); 556 } 557 558 static void __init p4d_populate_tests(struct pgtable_debug_args *args) 559 { 560 p4d_t p4d; 561 562 if (mm_pud_folded(args->mm)) 563 return; 564 565 pr_debug("Validating P4D populate\n"); 566 /* 567 * This entry points to next level page table page. 568 * Hence this must not qualify as p4d_bad(). 569 */ 570 pud_clear(args->pudp); 571 p4d_clear(args->p4dp); 572 p4d_populate(args->mm, args->p4dp, args->start_pudp); 573 p4d = READ_ONCE(*args->p4dp); 574 WARN_ON(p4d_bad(p4d)); 575 } 576 577 static void __init pgd_clear_tests(struct pgtable_debug_args *args) 578 { 579 pgd_t pgd = READ_ONCE(*(args->pgdp)); 580 581 if (mm_p4d_folded(args->mm)) 582 return; 583 584 pr_debug("Validating PGD clear\n"); 585 pgd = __pgd(pgd_val(pgd) | RANDOM_ORVALUE); 586 WRITE_ONCE(*args->pgdp, pgd); 587 pgd_clear(args->pgdp); 588 pgd = READ_ONCE(*args->pgdp); 589 WARN_ON(!pgd_none(pgd)); 590 } 591 592 static void __init pgd_populate_tests(struct pgtable_debug_args *args) 593 { 594 pgd_t pgd; 595 596 if (mm_p4d_folded(args->mm)) 597 return; 598 599 pr_debug("Validating PGD populate\n"); 600 /* 601 * This entry points to next level page table page. 602 * Hence this must not qualify as pgd_bad(). 603 */ 604 p4d_clear(args->p4dp); 605 pgd_clear(args->pgdp); 606 pgd_populate(args->mm, args->pgdp, args->start_p4dp); 607 pgd = READ_ONCE(*args->pgdp); 608 WARN_ON(pgd_bad(pgd)); 609 } 610 #else /* !__PAGETABLE_P4D_FOLDED */ 611 static void __init p4d_clear_tests(struct pgtable_debug_args *args) { } 612 static void __init pgd_clear_tests(struct pgtable_debug_args *args) { } 613 static void __init p4d_populate_tests(struct pgtable_debug_args *args) { } 614 static void __init pgd_populate_tests(struct pgtable_debug_args *args) { } 615 #endif /* PAGETABLE_P4D_FOLDED */ 616 617 static void __init pte_clear_tests(struct pgtable_debug_args *args) 618 { 619 struct page *page; 620 pte_t pte = pfn_pte(args->pte_pfn, args->page_prot); 621 622 page = (args->pte_pfn != ULONG_MAX) ? pfn_to_page(args->pte_pfn) : NULL; 623 if (!page) 624 return; 625 626 /* 627 * flush_dcache_page() is called after set_pte_at() to clear 628 * PG_arch_1 for the page on ARM64. The page flag isn't cleared 629 * when it's released and page allocation check will fail when 630 * the page is allocated again. For architectures other than ARM64, 631 * the unexpected overhead of cache flushing is acceptable. 632 */ 633 pr_debug("Validating PTE clear\n"); 634 if (WARN_ON(!args->ptep)) 635 return; 636 637 #ifndef CONFIG_RISCV 638 pte = __pte(pte_val(pte) | RANDOM_ORVALUE); 639 #endif 640 set_pte_at(args->mm, args->vaddr, args->ptep, pte); 641 flush_dcache_page(page); 642 barrier(); 643 ptep_clear(args->mm, args->vaddr, args->ptep); 644 pte = ptep_get(args->ptep); 645 WARN_ON(!pte_none(pte)); 646 } 647 648 static void __init pmd_clear_tests(struct pgtable_debug_args *args) 649 { 650 pmd_t pmd = READ_ONCE(*args->pmdp); 651 652 pr_debug("Validating PMD clear\n"); 653 pmd = __pmd(pmd_val(pmd) | RANDOM_ORVALUE); 654 WRITE_ONCE(*args->pmdp, pmd); 655 pmd_clear(args->pmdp); 656 pmd = READ_ONCE(*args->pmdp); 657 WARN_ON(!pmd_none(pmd)); 658 } 659 660 static void __init pmd_populate_tests(struct pgtable_debug_args *args) 661 { 662 pmd_t pmd; 663 664 pr_debug("Validating PMD populate\n"); 665 /* 666 * This entry points to next level page table page. 667 * Hence this must not qualify as pmd_bad(). 668 */ 669 pmd_populate(args->mm, args->pmdp, args->start_ptep); 670 pmd = READ_ONCE(*args->pmdp); 671 WARN_ON(pmd_bad(pmd)); 672 } 673 674 static void __init pte_special_tests(struct pgtable_debug_args *args) 675 { 676 pte_t pte = pfn_pte(args->fixed_pte_pfn, args->page_prot); 677 678 if (!IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL)) 679 return; 680 681 pr_debug("Validating PTE special\n"); 682 WARN_ON(!pte_special(pte_mkspecial(pte))); 683 } 684 685 static void __init pte_protnone_tests(struct pgtable_debug_args *args) 686 { 687 pte_t pte = pfn_pte(args->fixed_pte_pfn, args->page_prot_none); 688 689 if (!IS_ENABLED(CONFIG_NUMA_BALANCING)) 690 return; 691 692 pr_debug("Validating PTE protnone\n"); 693 WARN_ON(!pte_protnone(pte)); 694 WARN_ON(!pte_present(pte)); 695 } 696 697 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 698 static void __init pmd_protnone_tests(struct pgtable_debug_args *args) 699 { 700 pmd_t pmd; 701 702 if (!IS_ENABLED(CONFIG_NUMA_BALANCING)) 703 return; 704 705 if (!has_transparent_hugepage()) 706 return; 707 708 pr_debug("Validating PMD protnone\n"); 709 pmd = pmd_mkhuge(pfn_pmd(args->fixed_pmd_pfn, args->page_prot_none)); 710 WARN_ON(!pmd_protnone(pmd)); 711 WARN_ON(!pmd_present(pmd)); 712 } 713 #else /* !CONFIG_TRANSPARENT_HUGEPAGE */ 714 static void __init pmd_protnone_tests(struct pgtable_debug_args *args) { } 715 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 716 717 #ifdef CONFIG_ARCH_HAS_PTE_DEVMAP 718 static void __init pte_devmap_tests(struct pgtable_debug_args *args) 719 { 720 pte_t pte = pfn_pte(args->fixed_pte_pfn, args->page_prot); 721 722 pr_debug("Validating PTE devmap\n"); 723 WARN_ON(!pte_devmap(pte_mkdevmap(pte))); 724 } 725 726 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 727 static void __init pmd_devmap_tests(struct pgtable_debug_args *args) 728 { 729 pmd_t pmd; 730 731 if (!has_transparent_hugepage()) 732 return; 733 734 pr_debug("Validating PMD devmap\n"); 735 pmd = pfn_pmd(args->fixed_pmd_pfn, args->page_prot); 736 WARN_ON(!pmd_devmap(pmd_mkdevmap(pmd))); 737 } 738 739 #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD 740 static void __init pud_devmap_tests(struct pgtable_debug_args *args) 741 { 742 pud_t pud; 743 744 if (!has_transparent_pud_hugepage()) 745 return; 746 747 pr_debug("Validating PUD devmap\n"); 748 pud = pfn_pud(args->fixed_pud_pfn, args->page_prot); 749 WARN_ON(!pud_devmap(pud_mkdevmap(pud))); 750 } 751 #else /* !CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ 752 static void __init pud_devmap_tests(struct pgtable_debug_args *args) { } 753 #endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ 754 #else /* CONFIG_TRANSPARENT_HUGEPAGE */ 755 static void __init pmd_devmap_tests(struct pgtable_debug_args *args) { } 756 static void __init pud_devmap_tests(struct pgtable_debug_args *args) { } 757 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 758 #else 759 static void __init pte_devmap_tests(struct pgtable_debug_args *args) { } 760 static void __init pmd_devmap_tests(struct pgtable_debug_args *args) { } 761 static void __init pud_devmap_tests(struct pgtable_debug_args *args) { } 762 #endif /* CONFIG_ARCH_HAS_PTE_DEVMAP */ 763 764 static void __init pte_soft_dirty_tests(struct pgtable_debug_args *args) 765 { 766 pte_t pte = pfn_pte(args->fixed_pte_pfn, args->page_prot); 767 768 if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY)) 769 return; 770 771 pr_debug("Validating PTE soft dirty\n"); 772 WARN_ON(!pte_soft_dirty(pte_mksoft_dirty(pte))); 773 WARN_ON(pte_soft_dirty(pte_clear_soft_dirty(pte))); 774 } 775 776 static void __init pte_swap_soft_dirty_tests(struct pgtable_debug_args *args) 777 { 778 pte_t pte = pfn_pte(args->fixed_pte_pfn, args->page_prot); 779 780 if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY)) 781 return; 782 783 pr_debug("Validating PTE swap soft dirty\n"); 784 WARN_ON(!pte_swp_soft_dirty(pte_swp_mksoft_dirty(pte))); 785 WARN_ON(pte_swp_soft_dirty(pte_swp_clear_soft_dirty(pte))); 786 } 787 788 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 789 static void __init pmd_soft_dirty_tests(struct pgtable_debug_args *args) 790 { 791 pmd_t pmd; 792 793 if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY)) 794 return; 795 796 if (!has_transparent_hugepage()) 797 return; 798 799 pr_debug("Validating PMD soft dirty\n"); 800 pmd = pfn_pmd(args->fixed_pmd_pfn, args->page_prot); 801 WARN_ON(!pmd_soft_dirty(pmd_mksoft_dirty(pmd))); 802 WARN_ON(pmd_soft_dirty(pmd_clear_soft_dirty(pmd))); 803 } 804 805 static void __init pmd_swap_soft_dirty_tests(struct pgtable_debug_args *args) 806 { 807 pmd_t pmd; 808 809 if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) || 810 !IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION)) 811 return; 812 813 if (!has_transparent_hugepage()) 814 return; 815 816 pr_debug("Validating PMD swap soft dirty\n"); 817 pmd = pfn_pmd(args->fixed_pmd_pfn, args->page_prot); 818 WARN_ON(!pmd_swp_soft_dirty(pmd_swp_mksoft_dirty(pmd))); 819 WARN_ON(pmd_swp_soft_dirty(pmd_swp_clear_soft_dirty(pmd))); 820 } 821 #else /* !CONFIG_TRANSPARENT_HUGEPAGE */ 822 static void __init pmd_soft_dirty_tests(struct pgtable_debug_args *args) { } 823 static void __init pmd_swap_soft_dirty_tests(struct pgtable_debug_args *args) { } 824 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 825 826 static void __init pte_swap_exclusive_tests(struct pgtable_debug_args *args) 827 { 828 unsigned long max_swap_offset; 829 swp_entry_t entry, entry2; 830 pte_t pte; 831 832 pr_debug("Validating PTE swap exclusive\n"); 833 834 /* See generic_max_swapfile_size(): probe the maximum offset */ 835 max_swap_offset = swp_offset(pte_to_swp_entry(swp_entry_to_pte(swp_entry(0, ~0UL)))); 836 837 /* Create a swp entry with all possible bits set */ 838 entry = swp_entry((1 << MAX_SWAPFILES_SHIFT) - 1, max_swap_offset); 839 840 pte = swp_entry_to_pte(entry); 841 WARN_ON(pte_swp_exclusive(pte)); 842 WARN_ON(!is_swap_pte(pte)); 843 entry2 = pte_to_swp_entry(pte); 844 WARN_ON(memcmp(&entry, &entry2, sizeof(entry))); 845 846 pte = pte_swp_mkexclusive(pte); 847 WARN_ON(!pte_swp_exclusive(pte)); 848 WARN_ON(!is_swap_pte(pte)); 849 WARN_ON(pte_swp_soft_dirty(pte)); 850 entry2 = pte_to_swp_entry(pte); 851 WARN_ON(memcmp(&entry, &entry2, sizeof(entry))); 852 853 pte = pte_swp_clear_exclusive(pte); 854 WARN_ON(pte_swp_exclusive(pte)); 855 WARN_ON(!is_swap_pte(pte)); 856 entry2 = pte_to_swp_entry(pte); 857 WARN_ON(memcmp(&entry, &entry2, sizeof(entry))); 858 } 859 860 static void __init pte_swap_tests(struct pgtable_debug_args *args) 861 { 862 swp_entry_t swp; 863 pte_t pte; 864 865 pr_debug("Validating PTE swap\n"); 866 pte = pfn_pte(args->fixed_pte_pfn, args->page_prot); 867 swp = __pte_to_swp_entry(pte); 868 pte = __swp_entry_to_pte(swp); 869 WARN_ON(args->fixed_pte_pfn != pte_pfn(pte)); 870 } 871 872 #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION 873 static void __init pmd_swap_tests(struct pgtable_debug_args *args) 874 { 875 swp_entry_t swp; 876 pmd_t pmd; 877 878 if (!has_transparent_hugepage()) 879 return; 880 881 pr_debug("Validating PMD swap\n"); 882 pmd = pfn_pmd(args->fixed_pmd_pfn, args->page_prot); 883 swp = __pmd_to_swp_entry(pmd); 884 pmd = __swp_entry_to_pmd(swp); 885 WARN_ON(args->fixed_pmd_pfn != pmd_pfn(pmd)); 886 } 887 #else /* !CONFIG_ARCH_ENABLE_THP_MIGRATION */ 888 static void __init pmd_swap_tests(struct pgtable_debug_args *args) { } 889 #endif /* CONFIG_ARCH_ENABLE_THP_MIGRATION */ 890 891 static void __init swap_migration_tests(struct pgtable_debug_args *args) 892 { 893 struct page *page; 894 swp_entry_t swp; 895 896 if (!IS_ENABLED(CONFIG_MIGRATION)) 897 return; 898 899 /* 900 * swap_migration_tests() requires a dedicated page as it needs to 901 * be locked before creating a migration entry from it. Locking the 902 * page that actually maps kernel text ('start_kernel') can be real 903 * problematic. Lets use the allocated page explicitly for this 904 * purpose. 905 */ 906 page = (args->pte_pfn != ULONG_MAX) ? pfn_to_page(args->pte_pfn) : NULL; 907 if (!page) 908 return; 909 910 pr_debug("Validating swap migration\n"); 911 912 /* 913 * make_[readable|writable]_migration_entry() expects given page to 914 * be locked, otherwise it stumbles upon a BUG_ON(). 915 */ 916 __SetPageLocked(page); 917 swp = make_writable_migration_entry(page_to_pfn(page)); 918 WARN_ON(!is_migration_entry(swp)); 919 WARN_ON(!is_writable_migration_entry(swp)); 920 921 swp = make_readable_migration_entry(swp_offset(swp)); 922 WARN_ON(!is_migration_entry(swp)); 923 WARN_ON(is_writable_migration_entry(swp)); 924 925 swp = make_readable_migration_entry(page_to_pfn(page)); 926 WARN_ON(!is_migration_entry(swp)); 927 WARN_ON(is_writable_migration_entry(swp)); 928 __ClearPageLocked(page); 929 } 930 931 #ifdef CONFIG_HUGETLB_PAGE 932 static void __init hugetlb_basic_tests(struct pgtable_debug_args *args) 933 { 934 struct page *page; 935 pte_t pte; 936 937 pr_debug("Validating HugeTLB basic\n"); 938 /* 939 * Accessing the page associated with the pfn is safe here, 940 * as it was previously derived from a real kernel symbol. 941 */ 942 page = pfn_to_page(args->fixed_pmd_pfn); 943 pte = mk_huge_pte(page, args->page_prot); 944 945 WARN_ON(!huge_pte_dirty(huge_pte_mkdirty(pte))); 946 WARN_ON(!huge_pte_write(huge_pte_mkwrite(huge_pte_wrprotect(pte)))); 947 WARN_ON(huge_pte_write(huge_pte_wrprotect(huge_pte_mkwrite(pte)))); 948 949 #ifdef CONFIG_ARCH_WANT_GENERAL_HUGETLB 950 pte = pfn_pte(args->fixed_pmd_pfn, args->page_prot); 951 952 WARN_ON(!pte_huge(arch_make_huge_pte(pte, PMD_SHIFT, VM_ACCESS_FLAGS))); 953 #endif /* CONFIG_ARCH_WANT_GENERAL_HUGETLB */ 954 } 955 #else /* !CONFIG_HUGETLB_PAGE */ 956 static void __init hugetlb_basic_tests(struct pgtable_debug_args *args) { } 957 #endif /* CONFIG_HUGETLB_PAGE */ 958 959 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 960 static void __init pmd_thp_tests(struct pgtable_debug_args *args) 961 { 962 pmd_t pmd; 963 964 if (!has_transparent_hugepage()) 965 return; 966 967 pr_debug("Validating PMD based THP\n"); 968 /* 969 * pmd_trans_huge() and pmd_present() must return positive after 970 * MMU invalidation with pmd_mkinvalid(). This behavior is an 971 * optimization for transparent huge page. pmd_trans_huge() must 972 * be true if pmd_page() returns a valid THP to avoid taking the 973 * pmd_lock when others walk over non transhuge pmds (i.e. there 974 * are no THP allocated). Especially when splitting a THP and 975 * removing the present bit from the pmd, pmd_trans_huge() still 976 * needs to return true. pmd_present() should be true whenever 977 * pmd_trans_huge() returns true. 978 */ 979 pmd = pfn_pmd(args->fixed_pmd_pfn, args->page_prot); 980 WARN_ON(!pmd_trans_huge(pmd_mkhuge(pmd))); 981 982 #ifndef __HAVE_ARCH_PMDP_INVALIDATE 983 WARN_ON(!pmd_trans_huge(pmd_mkinvalid(pmd_mkhuge(pmd)))); 984 WARN_ON(!pmd_present(pmd_mkinvalid(pmd_mkhuge(pmd)))); 985 WARN_ON(!pmd_leaf(pmd_mkinvalid(pmd_mkhuge(pmd)))); 986 #endif /* __HAVE_ARCH_PMDP_INVALIDATE */ 987 } 988 989 #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD 990 static void __init pud_thp_tests(struct pgtable_debug_args *args) 991 { 992 pud_t pud; 993 994 if (!has_transparent_pud_hugepage()) 995 return; 996 997 pr_debug("Validating PUD based THP\n"); 998 pud = pfn_pud(args->fixed_pud_pfn, args->page_prot); 999 WARN_ON(!pud_trans_huge(pud_mkhuge(pud))); 1000 1001 /* 1002 * pud_mkinvalid() has been dropped for now. Enable back 1003 * these tests when it comes back with a modified pud_present(). 1004 * 1005 * WARN_ON(!pud_trans_huge(pud_mkinvalid(pud_mkhuge(pud)))); 1006 * WARN_ON(!pud_present(pud_mkinvalid(pud_mkhuge(pud)))); 1007 */ 1008 } 1009 #else /* !CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ 1010 static void __init pud_thp_tests(struct pgtable_debug_args *args) { } 1011 #endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ 1012 #else /* !CONFIG_TRANSPARENT_HUGEPAGE */ 1013 static void __init pmd_thp_tests(struct pgtable_debug_args *args) { } 1014 static void __init pud_thp_tests(struct pgtable_debug_args *args) { } 1015 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 1016 1017 static unsigned long __init get_random_vaddr(void) 1018 { 1019 unsigned long random_vaddr, random_pages, total_user_pages; 1020 1021 total_user_pages = (TASK_SIZE - FIRST_USER_ADDRESS) / PAGE_SIZE; 1022 1023 random_pages = get_random_long() % total_user_pages; 1024 random_vaddr = FIRST_USER_ADDRESS + random_pages * PAGE_SIZE; 1025 1026 return random_vaddr; 1027 } 1028 1029 static void __init destroy_args(struct pgtable_debug_args *args) 1030 { 1031 struct page *page = NULL; 1032 1033 /* Free (huge) page */ 1034 if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && 1035 has_transparent_pud_hugepage() && 1036 args->pud_pfn != ULONG_MAX) { 1037 if (args->is_contiguous_page) { 1038 free_contig_range(args->pud_pfn, 1039 (1 << (HPAGE_PUD_SHIFT - PAGE_SHIFT))); 1040 } else { 1041 page = pfn_to_page(args->pud_pfn); 1042 __free_pages(page, HPAGE_PUD_SHIFT - PAGE_SHIFT); 1043 } 1044 1045 args->pud_pfn = ULONG_MAX; 1046 args->pmd_pfn = ULONG_MAX; 1047 args->pte_pfn = ULONG_MAX; 1048 } 1049 1050 if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && 1051 has_transparent_hugepage() && 1052 args->pmd_pfn != ULONG_MAX) { 1053 if (args->is_contiguous_page) { 1054 free_contig_range(args->pmd_pfn, (1 << HPAGE_PMD_ORDER)); 1055 } else { 1056 page = pfn_to_page(args->pmd_pfn); 1057 __free_pages(page, HPAGE_PMD_ORDER); 1058 } 1059 1060 args->pmd_pfn = ULONG_MAX; 1061 args->pte_pfn = ULONG_MAX; 1062 } 1063 1064 if (args->pte_pfn != ULONG_MAX) { 1065 page = pfn_to_page(args->pte_pfn); 1066 __free_page(page); 1067 1068 args->pte_pfn = ULONG_MAX; 1069 } 1070 1071 /* Free page table entries */ 1072 if (args->start_ptep) { 1073 pte_free(args->mm, args->start_ptep); 1074 mm_dec_nr_ptes(args->mm); 1075 } 1076 1077 if (args->start_pmdp) { 1078 pmd_free(args->mm, args->start_pmdp); 1079 mm_dec_nr_pmds(args->mm); 1080 } 1081 1082 if (args->start_pudp) { 1083 pud_free(args->mm, args->start_pudp); 1084 mm_dec_nr_puds(args->mm); 1085 } 1086 1087 if (args->start_p4dp) 1088 p4d_free(args->mm, args->start_p4dp); 1089 1090 /* Free vma and mm struct */ 1091 if (args->vma) 1092 vm_area_free(args->vma); 1093 1094 if (args->mm) 1095 mmdrop(args->mm); 1096 } 1097 1098 static struct page * __init 1099 debug_vm_pgtable_alloc_huge_page(struct pgtable_debug_args *args, int order) 1100 { 1101 struct page *page = NULL; 1102 1103 #ifdef CONFIG_CONTIG_ALLOC 1104 if (order > MAX_PAGE_ORDER) { 1105 page = alloc_contig_pages((1 << order), GFP_KERNEL, 1106 first_online_node, NULL); 1107 if (page) { 1108 args->is_contiguous_page = true; 1109 return page; 1110 } 1111 } 1112 #endif 1113 1114 if (order <= MAX_PAGE_ORDER) 1115 page = alloc_pages(GFP_KERNEL, order); 1116 1117 return page; 1118 } 1119 1120 /* 1121 * Check if a physical memory range described by <pstart, pend> contains 1122 * an area that is of size psize, and aligned to psize. 1123 * 1124 * Don't use address 0, an all-zeroes physical address might mask bugs, and 1125 * it's not used on x86. 1126 */ 1127 static void __init phys_align_check(phys_addr_t pstart, 1128 phys_addr_t pend, unsigned long psize, 1129 phys_addr_t *physp, unsigned long *alignp) 1130 { 1131 phys_addr_t aligned_start, aligned_end; 1132 1133 if (pstart == 0) 1134 pstart = PAGE_SIZE; 1135 1136 aligned_start = ALIGN(pstart, psize); 1137 aligned_end = aligned_start + psize; 1138 1139 if (aligned_end > aligned_start && aligned_end <= pend) { 1140 *alignp = psize; 1141 *physp = aligned_start; 1142 } 1143 } 1144 1145 static void __init init_fixed_pfns(struct pgtable_debug_args *args) 1146 { 1147 u64 idx; 1148 phys_addr_t phys, pstart, pend; 1149 1150 /* 1151 * Initialize the fixed pfns. To do this, try to find a 1152 * valid physical range, preferably aligned to PUD_SIZE, 1153 * but settling for aligned to PMD_SIZE as a fallback. If 1154 * neither of those is found, use the physical address of 1155 * the start_kernel symbol. 1156 * 1157 * The memory doesn't need to be allocated, it just needs to exist 1158 * as usable memory. It won't be touched. 1159 * 1160 * The alignment is recorded, and can be checked to see if we 1161 * can run the tests that require an actual valid physical 1162 * address range on some architectures ({pmd,pud}_huge_test 1163 * on x86). 1164 */ 1165 1166 phys = __pa_symbol(&start_kernel); 1167 args->fixed_alignment = PAGE_SIZE; 1168 1169 for_each_mem_range(idx, &pstart, &pend) { 1170 /* First check for a PUD-aligned area */ 1171 phys_align_check(pstart, pend, PUD_SIZE, &phys, 1172 &args->fixed_alignment); 1173 1174 /* If a PUD-aligned area is found, we're done */ 1175 if (args->fixed_alignment == PUD_SIZE) 1176 break; 1177 1178 /* 1179 * If no PMD-aligned area found yet, check for one, 1180 * but continue the loop to look for a PUD-aligned area. 1181 */ 1182 if (args->fixed_alignment < PMD_SIZE) 1183 phys_align_check(pstart, pend, PMD_SIZE, &phys, 1184 &args->fixed_alignment); 1185 } 1186 1187 args->fixed_pgd_pfn = __phys_to_pfn(phys & PGDIR_MASK); 1188 args->fixed_p4d_pfn = __phys_to_pfn(phys & P4D_MASK); 1189 args->fixed_pud_pfn = __phys_to_pfn(phys & PUD_MASK); 1190 args->fixed_pmd_pfn = __phys_to_pfn(phys & PMD_MASK); 1191 args->fixed_pte_pfn = __phys_to_pfn(phys & PAGE_MASK); 1192 WARN_ON(!pfn_valid(args->fixed_pte_pfn)); 1193 } 1194 1195 1196 static int __init init_args(struct pgtable_debug_args *args) 1197 { 1198 struct page *page = NULL; 1199 int ret = 0; 1200 1201 /* 1202 * Initialize the debugging data. 1203 * 1204 * vm_get_page_prot(VM_NONE) or vm_get_page_prot(VM_SHARED|VM_NONE) 1205 * will help create page table entries with PROT_NONE permission as 1206 * required for pxx_protnone_tests(). 1207 */ 1208 memset(args, 0, sizeof(*args)); 1209 args->vaddr = get_random_vaddr(); 1210 args->page_prot = vm_get_page_prot(VM_ACCESS_FLAGS); 1211 args->page_prot_none = vm_get_page_prot(VM_NONE); 1212 args->is_contiguous_page = false; 1213 args->pud_pfn = ULONG_MAX; 1214 args->pmd_pfn = ULONG_MAX; 1215 args->pte_pfn = ULONG_MAX; 1216 args->fixed_pgd_pfn = ULONG_MAX; 1217 args->fixed_p4d_pfn = ULONG_MAX; 1218 args->fixed_pud_pfn = ULONG_MAX; 1219 args->fixed_pmd_pfn = ULONG_MAX; 1220 args->fixed_pte_pfn = ULONG_MAX; 1221 1222 /* Allocate mm and vma */ 1223 args->mm = mm_alloc(); 1224 if (!args->mm) { 1225 pr_err("Failed to allocate mm struct\n"); 1226 ret = -ENOMEM; 1227 goto error; 1228 } 1229 1230 args->vma = vm_area_alloc(args->mm); 1231 if (!args->vma) { 1232 pr_err("Failed to allocate vma\n"); 1233 ret = -ENOMEM; 1234 goto error; 1235 } 1236 1237 /* 1238 * Allocate page table entries. They will be modified in the tests. 1239 * Lets save the page table entries so that they can be released 1240 * when the tests are completed. 1241 */ 1242 args->pgdp = pgd_offset(args->mm, args->vaddr); 1243 args->p4dp = p4d_alloc(args->mm, args->pgdp, args->vaddr); 1244 if (!args->p4dp) { 1245 pr_err("Failed to allocate p4d entries\n"); 1246 ret = -ENOMEM; 1247 goto error; 1248 } 1249 args->start_p4dp = p4d_offset(args->pgdp, 0UL); 1250 WARN_ON(!args->start_p4dp); 1251 1252 args->pudp = pud_alloc(args->mm, args->p4dp, args->vaddr); 1253 if (!args->pudp) { 1254 pr_err("Failed to allocate pud entries\n"); 1255 ret = -ENOMEM; 1256 goto error; 1257 } 1258 args->start_pudp = pud_offset(args->p4dp, 0UL); 1259 WARN_ON(!args->start_pudp); 1260 1261 args->pmdp = pmd_alloc(args->mm, args->pudp, args->vaddr); 1262 if (!args->pmdp) { 1263 pr_err("Failed to allocate pmd entries\n"); 1264 ret = -ENOMEM; 1265 goto error; 1266 } 1267 args->start_pmdp = pmd_offset(args->pudp, 0UL); 1268 WARN_ON(!args->start_pmdp); 1269 1270 if (pte_alloc(args->mm, args->pmdp)) { 1271 pr_err("Failed to allocate pte entries\n"); 1272 ret = -ENOMEM; 1273 goto error; 1274 } 1275 args->start_ptep = pmd_pgtable(READ_ONCE(*args->pmdp)); 1276 WARN_ON(!args->start_ptep); 1277 1278 init_fixed_pfns(args); 1279 1280 /* 1281 * Allocate (huge) pages because some of the tests need to access 1282 * the data in the pages. The corresponding tests will be skipped 1283 * if we fail to allocate (huge) pages. 1284 */ 1285 if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && 1286 has_transparent_pud_hugepage()) { 1287 page = debug_vm_pgtable_alloc_huge_page(args, 1288 HPAGE_PUD_SHIFT - PAGE_SHIFT); 1289 if (page) { 1290 args->pud_pfn = page_to_pfn(page); 1291 args->pmd_pfn = args->pud_pfn; 1292 args->pte_pfn = args->pud_pfn; 1293 return 0; 1294 } 1295 } 1296 1297 if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && 1298 has_transparent_hugepage()) { 1299 page = debug_vm_pgtable_alloc_huge_page(args, HPAGE_PMD_ORDER); 1300 if (page) { 1301 args->pmd_pfn = page_to_pfn(page); 1302 args->pte_pfn = args->pmd_pfn; 1303 return 0; 1304 } 1305 } 1306 1307 page = alloc_page(GFP_KERNEL); 1308 if (page) 1309 args->pte_pfn = page_to_pfn(page); 1310 1311 return 0; 1312 1313 error: 1314 destroy_args(args); 1315 return ret; 1316 } 1317 1318 static int __init debug_vm_pgtable(void) 1319 { 1320 struct pgtable_debug_args args; 1321 spinlock_t *ptl = NULL; 1322 int idx, ret; 1323 1324 pr_info("Validating architecture page table helpers\n"); 1325 ret = init_args(&args); 1326 if (ret) 1327 return ret; 1328 1329 /* 1330 * Iterate over each possible vm_flags to make sure that all 1331 * the basic page table transformation validations just hold 1332 * true irrespective of the starting protection value for a 1333 * given page table entry. 1334 * 1335 * Protection based vm_flags combinations are always linear 1336 * and increasing i.e starting from VM_NONE and going up to 1337 * (VM_SHARED | READ | WRITE | EXEC). 1338 */ 1339 #define VM_FLAGS_START (VM_NONE) 1340 #define VM_FLAGS_END (VM_SHARED | VM_EXEC | VM_WRITE | VM_READ) 1341 1342 for (idx = VM_FLAGS_START; idx <= VM_FLAGS_END; idx++) { 1343 pte_basic_tests(&args, idx); 1344 pmd_basic_tests(&args, idx); 1345 pud_basic_tests(&args, idx); 1346 } 1347 1348 /* 1349 * Both P4D and PGD level tests are very basic which do not 1350 * involve creating page table entries from the protection 1351 * value and the given pfn. Hence just keep them out from 1352 * the above iteration for now to save some test execution 1353 * time. 1354 */ 1355 p4d_basic_tests(&args); 1356 pgd_basic_tests(&args); 1357 1358 pmd_leaf_tests(&args); 1359 pud_leaf_tests(&args); 1360 1361 pte_special_tests(&args); 1362 pte_protnone_tests(&args); 1363 pmd_protnone_tests(&args); 1364 1365 pte_devmap_tests(&args); 1366 pmd_devmap_tests(&args); 1367 pud_devmap_tests(&args); 1368 1369 pte_soft_dirty_tests(&args); 1370 pmd_soft_dirty_tests(&args); 1371 pte_swap_soft_dirty_tests(&args); 1372 pmd_swap_soft_dirty_tests(&args); 1373 1374 pte_swap_exclusive_tests(&args); 1375 1376 pte_swap_tests(&args); 1377 pmd_swap_tests(&args); 1378 1379 swap_migration_tests(&args); 1380 1381 pmd_thp_tests(&args); 1382 pud_thp_tests(&args); 1383 1384 hugetlb_basic_tests(&args); 1385 1386 /* 1387 * Page table modifying tests. They need to hold 1388 * proper page table lock. 1389 */ 1390 1391 args.ptep = pte_offset_map_lock(args.mm, args.pmdp, args.vaddr, &ptl); 1392 pte_clear_tests(&args); 1393 pte_advanced_tests(&args); 1394 if (args.ptep) 1395 pte_unmap_unlock(args.ptep, ptl); 1396 1397 ptl = pmd_lock(args.mm, args.pmdp); 1398 pmd_clear_tests(&args); 1399 pmd_advanced_tests(&args); 1400 pmd_huge_tests(&args); 1401 pmd_populate_tests(&args); 1402 spin_unlock(ptl); 1403 1404 ptl = pud_lock(args.mm, args.pudp); 1405 pud_clear_tests(&args); 1406 pud_advanced_tests(&args); 1407 pud_huge_tests(&args); 1408 pud_populate_tests(&args); 1409 spin_unlock(ptl); 1410 1411 spin_lock(&(args.mm->page_table_lock)); 1412 p4d_clear_tests(&args); 1413 pgd_clear_tests(&args); 1414 p4d_populate_tests(&args); 1415 pgd_populate_tests(&args); 1416 spin_unlock(&(args.mm->page_table_lock)); 1417 1418 destroy_args(&args); 1419 return 0; 1420 } 1421 late_initcall(debug_vm_pgtable); 1422