1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 /* 3 * Copyright (C) 2012 ARM Ltd. 4 */ 5 #ifndef __ASM_PGTABLE_H 6 #define __ASM_PGTABLE_H 7 8 #include <asm/bug.h> 9 #include <asm/proc-fns.h> 10 11 #include <asm/memory.h> 12 #include <asm/mte.h> 13 #include <asm/pgtable-hwdef.h> 14 #include <asm/pgtable-prot.h> 15 #include <asm/tlbflush.h> 16 17 /* 18 * VMALLOC range. 19 * 20 * VMALLOC_START: beginning of the kernel vmalloc space 21 * VMALLOC_END: extends to the available space below vmemmap 22 */ 23 #define VMALLOC_START (MODULES_END) 24 #if VA_BITS == VA_BITS_MIN 25 #define VMALLOC_END (VMEMMAP_START - SZ_8M) 26 #else 27 #define VMEMMAP_UNUSED_NPAGES ((_PAGE_OFFSET(vabits_actual) - PAGE_OFFSET) >> PAGE_SHIFT) 28 #define VMALLOC_END (VMEMMAP_START + VMEMMAP_UNUSED_NPAGES * sizeof(struct page) - SZ_8M) 29 #endif 30 31 #define vmemmap ((struct page *)VMEMMAP_START - (memstart_addr >> PAGE_SHIFT)) 32 33 #ifndef __ASSEMBLER__ 34 35 #include <asm/cmpxchg.h> 36 #include <asm/fixmap.h> 37 #include <asm/por.h> 38 #include <linux/mmdebug.h> 39 #include <linux/mm_types.h> 40 #include <linux/sched.h> 41 #include <linux/page_table_check.h> 42 43 static inline void emit_pte_barriers(void) 44 { 45 /* 46 * These barriers are emitted under certain conditions after a pte entry 47 * was modified (see e.g. __set_pte_complete()). The dsb makes the store 48 * visible to the table walker. The isb ensures that any previous 49 * speculative "invalid translation" marker that is in the CPU's 50 * pipeline gets cleared, so that any access to that address after 51 * setting the pte to valid won't cause a spurious fault. If the thread 52 * gets preempted after storing to the pgtable but before emitting these 53 * barriers, __switch_to() emits a dsb which ensure the walker gets to 54 * see the store. There is no guarantee of an isb being issued though. 55 * This is safe because it will still get issued (albeit on a 56 * potentially different CPU) when the thread starts running again, 57 * before any access to the address. 58 */ 59 dsb(ishst); 60 isb(); 61 } 62 63 static inline void queue_pte_barriers(void) 64 { 65 if (is_lazy_mmu_mode_active()) { 66 /* Avoid the atomic op if already set. */ 67 if (!test_thread_flag(TIF_LAZY_MMU_PENDING)) 68 set_thread_flag(TIF_LAZY_MMU_PENDING); 69 } else { 70 emit_pte_barriers(); 71 } 72 } 73 74 static inline void arch_enter_lazy_mmu_mode(void) {} 75 76 static inline void arch_flush_lazy_mmu_mode(void) 77 { 78 if (test_and_clear_thread_flag(TIF_LAZY_MMU_PENDING)) 79 emit_pte_barriers(); 80 } 81 82 static inline void arch_leave_lazy_mmu_mode(void) 83 { 84 arch_flush_lazy_mmu_mode(); 85 } 86 87 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 88 #define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE 89 90 /* Set stride and tlb_level in flush_*_tlb_range */ 91 #define flush_pmd_tlb_range(vma, addr, end) \ 92 __flush_tlb_range(vma, addr, end, PMD_SIZE, 2, TLBF_NONE) 93 #define flush_pud_tlb_range(vma, addr, end) \ 94 __flush_tlb_range(vma, addr, end, PUD_SIZE, 1, TLBF_NONE) 95 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 96 97 /* 98 * We use local TLB invalidation instruction when reusing page in 99 * write protection fault handler to avoid TLBI broadcast in the hot 100 * path. This will cause spurious page faults if stale read-only TLB 101 * entries exist. 102 */ 103 #define flush_tlb_fix_spurious_fault(vma, address, ptep) \ 104 __flush_tlb_page(vma, address, TLBF_NOBROADCAST | TLBF_NONOTIFY) 105 106 #define flush_tlb_fix_spurious_fault_pmd(vma, address, pmdp) \ 107 __flush_tlb_range(vma, address, address + PMD_SIZE, PMD_SIZE, 2, \ 108 TLBF_NOBROADCAST | TLBF_NONOTIFY | TLBF_NOWALKCACHE) 109 110 /* 111 * ZERO_PAGE is a global shared page that is always zero: used 112 * for zero-mapped memory areas etc.. 113 */ 114 extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; 115 #define ZERO_PAGE(vaddr) phys_to_page(__pa_symbol(empty_zero_page)) 116 117 #define pte_ERROR(e) \ 118 pr_err("%s:%d: bad pte %016llx.\n", __FILE__, __LINE__, pte_val(e)) 119 120 #ifdef CONFIG_ARM64_PA_BITS_52 121 static inline phys_addr_t __pte_to_phys(pte_t pte) 122 { 123 pte_val(pte) &= ~PTE_MAYBE_SHARED; 124 return (pte_val(pte) & PTE_ADDR_LOW) | 125 ((pte_val(pte) & PTE_ADDR_HIGH) << PTE_ADDR_HIGH_SHIFT); 126 } 127 static inline pteval_t __phys_to_pte_val(phys_addr_t phys) 128 { 129 return (phys | (phys >> PTE_ADDR_HIGH_SHIFT)) & PHYS_TO_PTE_ADDR_MASK; 130 } 131 #else 132 static inline phys_addr_t __pte_to_phys(pte_t pte) 133 { 134 return pte_val(pte) & PTE_ADDR_LOW; 135 } 136 137 static inline pteval_t __phys_to_pte_val(phys_addr_t phys) 138 { 139 return phys; 140 } 141 #endif 142 143 #define pte_pfn(pte) (__pte_to_phys(pte) >> PAGE_SHIFT) 144 #define pfn_pte(pfn,prot) \ 145 __pte(__phys_to_pte_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot)) 146 147 #define pte_none(pte) (!pte_val(pte)) 148 #define pte_page(pte) (pfn_to_page(pte_pfn(pte))) 149 150 /* 151 * The following only work if pte_present(). Undefined behaviour otherwise. 152 */ 153 #define pte_present(pte) (pte_valid(pte) || pte_present_invalid(pte)) 154 #define pte_young(pte) (!!(pte_val(pte) & PTE_AF)) 155 #define pte_special(pte) (!!(pte_val(pte) & PTE_SPECIAL)) 156 #define pte_write(pte) (!!(pte_val(pte) & PTE_WRITE)) 157 #define pte_rdonly(pte) (!!(pte_val(pte) & PTE_RDONLY)) 158 #define pte_user(pte) (!!(pte_val(pte) & PTE_USER)) 159 #define pte_user_exec(pte) (!(pte_val(pte) & PTE_UXN)) 160 #define pte_cont(pte) (!!(pte_val(pte) & PTE_CONT)) 161 #define pte_tagged(pte) ((pte_val(pte) & PTE_ATTRINDX_MASK) == \ 162 PTE_ATTRINDX(MT_NORMAL_TAGGED)) 163 164 #define pte_cont_addr_end(addr, end) \ 165 ({ unsigned long __boundary = ((addr) + CONT_PTE_SIZE) & CONT_PTE_MASK; \ 166 (__boundary - 1 < (end) - 1) ? __boundary : (end); \ 167 }) 168 169 #define pmd_cont_addr_end(addr, end) \ 170 ({ unsigned long __boundary = ((addr) + CONT_PMD_SIZE) & CONT_PMD_MASK; \ 171 (__boundary - 1 < (end) - 1) ? __boundary : (end); \ 172 }) 173 174 #define pte_hw_dirty(pte) (pte_write(pte) && !pte_rdonly(pte)) 175 #define pte_sw_dirty(pte) (!!(pte_val(pte) & PTE_DIRTY)) 176 #define pte_dirty(pte) (pte_sw_dirty(pte) || pte_hw_dirty(pte)) 177 178 #define pte_valid(pte) (!!(pte_val(pte) & PTE_VALID)) 179 #define pte_present_invalid(pte) \ 180 ((pte_val(pte) & (PTE_VALID | PTE_PRESENT_INVALID)) == PTE_PRESENT_INVALID) 181 /* 182 * Execute-only user mappings do not have the PTE_USER bit set. All valid 183 * kernel mappings have the PTE_UXN bit set. 184 */ 185 #define pte_valid_not_user(pte) \ 186 ((pte_val(pte) & (PTE_VALID | PTE_USER | PTE_UXN)) == (PTE_VALID | PTE_UXN)) 187 /* 188 * Returns true if the pte is valid and has the contiguous bit set. 189 */ 190 #define pte_valid_cont(pte) (pte_valid(pte) && pte_cont(pte)) 191 /* 192 * Could the pte be present in the TLB? We must check mm_tlb_flush_pending 193 * so that we don't erroneously return false for pages that have been 194 * remapped as PROT_NONE but are yet to be flushed from the TLB. 195 * Note that we can't make any assumptions based on the state of the access 196 * flag, since __ptep_clear_flush_young() elides a DSB when invalidating the 197 * TLB. 198 */ 199 #define pte_accessible(mm, pte) \ 200 (mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid(pte)) 201 202 static inline bool por_el0_allows_pkey(u8 pkey, bool write, bool execute) 203 { 204 u64 por; 205 206 if (!system_supports_poe()) 207 return true; 208 209 por = read_sysreg_s(SYS_POR_EL0); 210 211 if (write) 212 return por_elx_allows_write(por, pkey); 213 214 if (execute) 215 return por_elx_allows_exec(por, pkey); 216 217 return por_elx_allows_read(por, pkey); 218 } 219 220 /* 221 * p??_access_permitted() is true for valid user mappings (PTE_USER 222 * bit set, subject to the write permission check). For execute-only 223 * mappings, like PROT_EXEC with EPAN (both PTE_USER and PTE_UXN bits 224 * not set) must return false. PROT_NONE mappings do not have the 225 * PTE_VALID bit set. 226 */ 227 #define pte_access_permitted_no_overlay(pte, write) \ 228 (((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER)) && (!(write) || pte_write(pte))) 229 #define pte_access_permitted(pte, write) \ 230 (pte_access_permitted_no_overlay(pte, write) && \ 231 por_el0_allows_pkey(FIELD_GET(PTE_PO_IDX_MASK, pte_val(pte)), write, false)) 232 #define pmd_access_permitted(pmd, write) \ 233 (pte_access_permitted(pmd_pte(pmd), (write))) 234 #define pud_access_permitted(pud, write) \ 235 (pte_access_permitted(pud_pte(pud), (write))) 236 237 static inline pte_t clear_pte_bit(pte_t pte, pgprot_t prot) 238 { 239 pte_val(pte) &= ~pgprot_val(prot); 240 return pte; 241 } 242 243 static inline pte_t set_pte_bit(pte_t pte, pgprot_t prot) 244 { 245 pte_val(pte) |= pgprot_val(prot); 246 return pte; 247 } 248 249 static inline pmd_t clear_pmd_bit(pmd_t pmd, pgprot_t prot) 250 { 251 pmd_val(pmd) &= ~pgprot_val(prot); 252 return pmd; 253 } 254 255 static inline pmd_t set_pmd_bit(pmd_t pmd, pgprot_t prot) 256 { 257 pmd_val(pmd) |= pgprot_val(prot); 258 return pmd; 259 } 260 261 static inline pte_t pte_mkwrite_novma(pte_t pte) 262 { 263 pte = set_pte_bit(pte, __pgprot(PTE_WRITE)); 264 if (pte_sw_dirty(pte)) 265 pte = clear_pte_bit(pte, __pgprot(PTE_RDONLY)); 266 return pte; 267 } 268 269 static inline pte_t pte_mkclean(pte_t pte) 270 { 271 pte = clear_pte_bit(pte, __pgprot(PTE_DIRTY)); 272 pte = set_pte_bit(pte, __pgprot(PTE_RDONLY)); 273 274 return pte; 275 } 276 277 static inline pte_t pte_mkdirty(pte_t pte) 278 { 279 pte = set_pte_bit(pte, __pgprot(PTE_DIRTY)); 280 281 if (pte_write(pte)) 282 pte = clear_pte_bit(pte, __pgprot(PTE_RDONLY)); 283 284 return pte; 285 } 286 287 static inline pte_t pte_wrprotect(pte_t pte) 288 { 289 /* 290 * If hardware-dirty (PTE_WRITE/DBM bit set and PTE_RDONLY 291 * clear), set the PTE_DIRTY bit. 292 */ 293 if (pte_hw_dirty(pte)) 294 pte = set_pte_bit(pte, __pgprot(PTE_DIRTY)); 295 296 pte = clear_pte_bit(pte, __pgprot(PTE_WRITE)); 297 pte = set_pte_bit(pte, __pgprot(PTE_RDONLY)); 298 return pte; 299 } 300 301 static inline pte_t pte_mkold(pte_t pte) 302 { 303 return clear_pte_bit(pte, __pgprot(PTE_AF)); 304 } 305 306 static inline pte_t pte_mkyoung(pte_t pte) 307 { 308 return set_pte_bit(pte, __pgprot(PTE_AF)); 309 } 310 311 static inline pte_t pte_mkspecial(pte_t pte) 312 { 313 return set_pte_bit(pte, __pgprot(PTE_SPECIAL)); 314 } 315 316 static inline pte_t pte_mkcont(pte_t pte) 317 { 318 return set_pte_bit(pte, __pgprot(PTE_CONT)); 319 } 320 321 static inline pte_t pte_mknoncont(pte_t pte) 322 { 323 return clear_pte_bit(pte, __pgprot(PTE_CONT)); 324 } 325 326 static inline pte_t pte_mkvalid_k(pte_t pte) 327 { 328 pte = clear_pte_bit(pte, __pgprot(PTE_PRESENT_INVALID)); 329 pte = set_pte_bit(pte, __pgprot(PTE_PRESENT_VALID_KERNEL)); 330 return pte; 331 } 332 333 static inline pte_t pte_mkinvalid(pte_t pte) 334 { 335 pte = set_pte_bit(pte, __pgprot(PTE_PRESENT_INVALID)); 336 pte = clear_pte_bit(pte, __pgprot(PTE_VALID)); 337 return pte; 338 } 339 340 static inline pmd_t pmd_mkcont(pmd_t pmd) 341 { 342 return __pmd(pmd_val(pmd) | PMD_SECT_CONT); 343 } 344 345 static inline pmd_t pmd_mknoncont(pmd_t pmd) 346 { 347 return __pmd(pmd_val(pmd) & ~PMD_SECT_CONT); 348 } 349 350 #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP 351 static inline int pte_uffd_wp(pte_t pte) 352 { 353 return !!(pte_val(pte) & PTE_UFFD_WP); 354 } 355 356 static inline pte_t pte_mkuffd_wp(pte_t pte) 357 { 358 return pte_wrprotect(set_pte_bit(pte, __pgprot(PTE_UFFD_WP))); 359 } 360 361 static inline pte_t pte_clear_uffd_wp(pte_t pte) 362 { 363 return clear_pte_bit(pte, __pgprot(PTE_UFFD_WP)); 364 } 365 #endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ 366 367 static inline void __set_pte_nosync(pte_t *ptep, pte_t pte) 368 { 369 WRITE_ONCE(*ptep, pte); 370 } 371 372 static inline void __set_pte_complete(pte_t pte) 373 { 374 /* 375 * Only if the new pte is valid and kernel, otherwise TLB maintenance 376 * has the necessary barriers. 377 */ 378 if (pte_valid_not_user(pte)) 379 queue_pte_barriers(); 380 } 381 382 static inline void __set_pte(pte_t *ptep, pte_t pte) 383 { 384 __set_pte_nosync(ptep, pte); 385 __set_pte_complete(pte); 386 } 387 388 static inline pte_t __ptep_get(pte_t *ptep) 389 { 390 return READ_ONCE(*ptep); 391 } 392 393 extern void __sync_icache_dcache(pte_t pteval); 394 bool pgattr_change_is_safe(pteval_t old, pteval_t new); 395 396 /* 397 * PTE bits configuration in the presence of hardware Dirty Bit Management 398 * (PTE_WRITE == PTE_DBM): 399 * 400 * Dirty Writable | PTE_RDONLY PTE_WRITE PTE_DIRTY (sw) 401 * 0 0 | 1 0 0 402 * 0 1 | 1 1 0 403 * 1 0 | 1 0 1 404 * 1 1 | 0 1 x 405 * 406 * When hardware DBM is not present, the software PTE_DIRTY bit is updated via 407 * the page fault mechanism. Checking the dirty status of a pte becomes: 408 * 409 * PTE_DIRTY || (PTE_WRITE && !PTE_RDONLY) 410 */ 411 412 static inline void __check_safe_pte_update(struct mm_struct *mm, pte_t *ptep, 413 pte_t pte) 414 { 415 pte_t old_pte; 416 417 if (!IS_ENABLED(CONFIG_DEBUG_VM)) 418 return; 419 420 old_pte = __ptep_get(ptep); 421 422 if (!pte_valid(old_pte) || !pte_valid(pte)) 423 return; 424 if (mm != current->active_mm && atomic_read(&mm->mm_users) <= 1) 425 return; 426 427 /* 428 * Check for potential race with hardware updates of the pte 429 * (__ptep_set_access_flags safely changes valid ptes without going 430 * through an invalid entry). 431 */ 432 VM_WARN_ONCE(!pte_young(pte), 433 "%s: racy access flag clearing: 0x%016llx -> 0x%016llx", 434 __func__, pte_val(old_pte), pte_val(pte)); 435 VM_WARN_ONCE(pte_write(old_pte) && !pte_dirty(pte), 436 "%s: racy dirty state clearing: 0x%016llx -> 0x%016llx", 437 __func__, pte_val(old_pte), pte_val(pte)); 438 VM_WARN_ONCE(!pgattr_change_is_safe(pte_val(old_pte), pte_val(pte)), 439 "%s: unsafe attribute change: 0x%016llx -> 0x%016llx", 440 __func__, pte_val(old_pte), pte_val(pte)); 441 } 442 443 static inline void __sync_cache_and_tags(pte_t pte, unsigned int nr_pages) 444 { 445 if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte)) 446 __sync_icache_dcache(pte); 447 448 /* 449 * If the PTE would provide user space access to the tags associated 450 * with it then ensure that the MTE tags are synchronised. Although 451 * pte_access_permitted_no_overlay() returns false for exec only 452 * mappings, they don't expose tags (instruction fetches don't check 453 * tags). 454 */ 455 if (system_supports_mte() && pte_access_permitted_no_overlay(pte, false) && 456 !pte_special(pte) && pte_tagged(pte)) 457 mte_sync_tags(pte, nr_pages); 458 } 459 460 /* 461 * Select all bits except the pfn 462 */ 463 #define pte_pgprot pte_pgprot 464 static inline pgprot_t pte_pgprot(pte_t pte) 465 { 466 unsigned long pfn = pte_pfn(pte); 467 468 return __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte)); 469 } 470 471 #define pte_advance_pfn pte_advance_pfn 472 static inline pte_t pte_advance_pfn(pte_t pte, unsigned long nr) 473 { 474 return pfn_pte(pte_pfn(pte) + nr, pte_pgprot(pte)); 475 } 476 477 /* 478 * Hugetlb definitions. 479 */ 480 #define HUGE_MAX_HSTATE 4 481 #define HPAGE_SHIFT PMD_SHIFT 482 #define HPAGE_SIZE (_AC(1, UL) << HPAGE_SHIFT) 483 #define HPAGE_MASK (~(HPAGE_SIZE - 1)) 484 #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) 485 486 static inline pte_t pgd_pte(pgd_t pgd) 487 { 488 return __pte(pgd_val(pgd)); 489 } 490 491 static inline pte_t p4d_pte(p4d_t p4d) 492 { 493 return __pte(p4d_val(p4d)); 494 } 495 496 static inline pte_t pud_pte(pud_t pud) 497 { 498 return __pte(pud_val(pud)); 499 } 500 501 static inline pud_t pte_pud(pte_t pte) 502 { 503 return __pud(pte_val(pte)); 504 } 505 506 static inline pmd_t pud_pmd(pud_t pud) 507 { 508 return __pmd(pud_val(pud)); 509 } 510 511 static inline pte_t pmd_pte(pmd_t pmd) 512 { 513 return __pte(pmd_val(pmd)); 514 } 515 516 static inline pmd_t pte_pmd(pte_t pte) 517 { 518 return __pmd(pte_val(pte)); 519 } 520 521 static inline pgprot_t mk_pud_sect_prot(pgprot_t prot) 522 { 523 return __pgprot((pgprot_val(prot) & ~PUD_TYPE_MASK) | PUD_TYPE_SECT); 524 } 525 526 static inline pgprot_t mk_pmd_sect_prot(pgprot_t prot) 527 { 528 return __pgprot((pgprot_val(prot) & ~PMD_TYPE_MASK) | PMD_TYPE_SECT); 529 } 530 531 static inline pte_t pte_swp_mkexclusive(pte_t pte) 532 { 533 return set_pte_bit(pte, __pgprot(PTE_SWP_EXCLUSIVE)); 534 } 535 536 static inline bool pte_swp_exclusive(pte_t pte) 537 { 538 return pte_val(pte) & PTE_SWP_EXCLUSIVE; 539 } 540 541 static inline pte_t pte_swp_clear_exclusive(pte_t pte) 542 { 543 return clear_pte_bit(pte, __pgprot(PTE_SWP_EXCLUSIVE)); 544 } 545 546 #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP 547 static inline pte_t pte_swp_mkuffd_wp(pte_t pte) 548 { 549 return set_pte_bit(pte, __pgprot(PTE_SWP_UFFD_WP)); 550 } 551 552 static inline int pte_swp_uffd_wp(pte_t pte) 553 { 554 return !!(pte_val(pte) & PTE_SWP_UFFD_WP); 555 } 556 557 static inline pte_t pte_swp_clear_uffd_wp(pte_t pte) 558 { 559 return clear_pte_bit(pte, __pgprot(PTE_SWP_UFFD_WP)); 560 } 561 #endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ 562 563 #ifdef CONFIG_NUMA_BALANCING 564 /* 565 * See the comment in include/linux/pgtable.h 566 */ 567 static inline int pte_protnone(pte_t pte) 568 { 569 /* 570 * pte_present_invalid() tells us that the pte is invalid from HW 571 * perspective but present from SW perspective, so the fields are to be 572 * interpreted as per the HW layout. The second 2 checks are the unique 573 * encoding that we use for PROT_NONE. It is insufficient to only use 574 * the first check because we share the same encoding scheme with pmds 575 * which support pmd_mkinvalid(), so can be present-invalid without 576 * being PROT_NONE. 577 */ 578 return pte_present_invalid(pte) && !pte_user(pte) && !pte_user_exec(pte); 579 } 580 581 static inline int pmd_protnone(pmd_t pmd) 582 { 583 return pte_protnone(pmd_pte(pmd)); 584 } 585 #endif 586 587 #define pmd_present(pmd) pte_present(pmd_pte(pmd)) 588 #define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd)) 589 #define pmd_young(pmd) pte_young(pmd_pte(pmd)) 590 #define pmd_valid(pmd) pte_valid(pmd_pte(pmd)) 591 #define pmd_user(pmd) pte_user(pmd_pte(pmd)) 592 #define pmd_user_exec(pmd) pte_user_exec(pmd_pte(pmd)) 593 #define pmd_cont(pmd) pte_cont(pmd_pte(pmd)) 594 #define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd))) 595 #define pmd_mkold(pmd) pte_pmd(pte_mkold(pmd_pte(pmd))) 596 #define pmd_mkwrite_novma(pmd) pte_pmd(pte_mkwrite_novma(pmd_pte(pmd))) 597 #define pmd_mkclean(pmd) pte_pmd(pte_mkclean(pmd_pte(pmd))) 598 #define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd))) 599 #define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd))) 600 #define pmd_mkvalid_k(pmd) pte_pmd(pte_mkvalid_k(pmd_pte(pmd))) 601 #define pmd_mkinvalid(pmd) pte_pmd(pte_mkinvalid(pmd_pte(pmd))) 602 #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP 603 #define pmd_uffd_wp(pmd) pte_uffd_wp(pmd_pte(pmd)) 604 #define pmd_mkuffd_wp(pmd) pte_pmd(pte_mkuffd_wp(pmd_pte(pmd))) 605 #define pmd_clear_uffd_wp(pmd) pte_pmd(pte_clear_uffd_wp(pmd_pte(pmd))) 606 #define pmd_swp_uffd_wp(pmd) pte_swp_uffd_wp(pmd_pte(pmd)) 607 #define pmd_swp_mkuffd_wp(pmd) pte_pmd(pte_swp_mkuffd_wp(pmd_pte(pmd))) 608 #define pmd_swp_clear_uffd_wp(pmd) \ 609 pte_pmd(pte_swp_clear_uffd_wp(pmd_pte(pmd))) 610 #endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ 611 612 #define pmd_write(pmd) pte_write(pmd_pte(pmd)) 613 614 static inline pmd_t pmd_mkhuge(pmd_t pmd) 615 { 616 /* 617 * It's possible that the pmd is present-invalid on entry 618 * and in that case it needs to remain present-invalid on 619 * exit. So ensure the VALID bit does not get modified. 620 */ 621 pmdval_t mask = PMD_TYPE_MASK & ~PTE_VALID; 622 pmdval_t val = PMD_TYPE_SECT & ~PTE_VALID; 623 624 return __pmd((pmd_val(pmd) & ~mask) | val); 625 } 626 627 #ifdef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP 628 #define pmd_special(pte) (!!((pmd_val(pte) & PTE_SPECIAL))) 629 static inline pmd_t pmd_mkspecial(pmd_t pmd) 630 { 631 return set_pmd_bit(pmd, __pgprot(PTE_SPECIAL)); 632 } 633 #endif 634 635 #define __pmd_to_phys(pmd) __pte_to_phys(pmd_pte(pmd)) 636 #define __phys_to_pmd_val(phys) __phys_to_pte_val(phys) 637 #define pmd_pfn(pmd) ((__pmd_to_phys(pmd) & PMD_MASK) >> PAGE_SHIFT) 638 #define pfn_pmd(pfn,prot) __pmd(__phys_to_pmd_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot)) 639 640 #define pud_young(pud) pte_young(pud_pte(pud)) 641 #define pud_mkyoung(pud) pte_pud(pte_mkyoung(pud_pte(pud))) 642 #define pud_mkwrite_novma(pud) pte_pud(pte_mkwrite_novma(pud_pte(pud))) 643 #define pud_mkvalid_k(pud) pte_pud(pte_mkvalid_k(pud_pte(pud))) 644 #define pud_write(pud) pte_write(pud_pte(pud)) 645 646 static inline pud_t pud_mkhuge(pud_t pud) 647 { 648 /* 649 * It's possible that the pud is present-invalid on entry 650 * and in that case it needs to remain present-invalid on 651 * exit. So ensure the VALID bit does not get modified. 652 */ 653 pudval_t mask = PUD_TYPE_MASK & ~PTE_VALID; 654 pudval_t val = PUD_TYPE_SECT & ~PTE_VALID; 655 656 return __pud((pud_val(pud) & ~mask) | val); 657 } 658 659 #define __pud_to_phys(pud) __pte_to_phys(pud_pte(pud)) 660 #define __phys_to_pud_val(phys) __phys_to_pte_val(phys) 661 #define pud_pfn(pud) ((__pud_to_phys(pud) & PUD_MASK) >> PAGE_SHIFT) 662 #define pfn_pud(pfn,prot) __pud(__phys_to_pud_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot)) 663 664 #define pmd_pgprot pmd_pgprot 665 static inline pgprot_t pmd_pgprot(pmd_t pmd) 666 { 667 unsigned long pfn = pmd_pfn(pmd); 668 669 return __pgprot(pmd_val(pfn_pmd(pfn, __pgprot(0))) ^ pmd_val(pmd)); 670 } 671 672 #define pud_pgprot pud_pgprot 673 static inline pgprot_t pud_pgprot(pud_t pud) 674 { 675 unsigned long pfn = pud_pfn(pud); 676 677 return __pgprot(pud_val(pfn_pud(pfn, __pgprot(0))) ^ pud_val(pud)); 678 } 679 680 static inline void __set_ptes_anysz(struct mm_struct *mm, unsigned long addr, 681 pte_t *ptep, pte_t pte, unsigned int nr, 682 unsigned long pgsize) 683 { 684 unsigned long stride = pgsize >> PAGE_SHIFT; 685 686 switch (pgsize) { 687 case PAGE_SIZE: 688 page_table_check_ptes_set(mm, addr, ptep, pte, nr); 689 break; 690 case PMD_SIZE: 691 page_table_check_pmds_set(mm, addr, (pmd_t *)ptep, 692 pte_pmd(pte), nr); 693 break; 694 #ifndef __PAGETABLE_PMD_FOLDED 695 case PUD_SIZE: 696 page_table_check_puds_set(mm, addr, (pud_t *)ptep, 697 pte_pud(pte), nr); 698 break; 699 #endif 700 default: 701 VM_WARN_ON(1); 702 } 703 704 __sync_cache_and_tags(pte, nr * stride); 705 706 for (;;) { 707 __check_safe_pte_update(mm, ptep, pte); 708 __set_pte_nosync(ptep, pte); 709 if (--nr == 0) 710 break; 711 ptep++; 712 pte = pte_advance_pfn(pte, stride); 713 } 714 715 __set_pte_complete(pte); 716 } 717 718 static inline void __set_ptes(struct mm_struct *mm, unsigned long addr, 719 pte_t *ptep, pte_t pte, unsigned int nr) 720 { 721 __set_ptes_anysz(mm, addr, ptep, pte, nr, PAGE_SIZE); 722 } 723 724 static inline void __set_pmds(struct mm_struct *mm, unsigned long addr, 725 pmd_t *pmdp, pmd_t pmd, unsigned int nr) 726 { 727 __set_ptes_anysz(mm, addr, (pte_t *)pmdp, pmd_pte(pmd), nr, PMD_SIZE); 728 } 729 #define set_pmd_at(mm, addr, pmdp, pmd) __set_pmds(mm, addr, pmdp, pmd, 1) 730 731 static inline void __set_puds(struct mm_struct *mm, unsigned long addr, 732 pud_t *pudp, pud_t pud, unsigned int nr) 733 { 734 __set_ptes_anysz(mm, addr, (pte_t *)pudp, pud_pte(pud), nr, PUD_SIZE); 735 } 736 #define set_pud_at(mm, addr, pudp, pud) __set_puds(mm, addr, pudp, pud, 1) 737 738 #define __p4d_to_phys(p4d) __pte_to_phys(p4d_pte(p4d)) 739 #define __phys_to_p4d_val(phys) __phys_to_pte_val(phys) 740 741 #define __pgd_to_phys(pgd) __pte_to_phys(pgd_pte(pgd)) 742 #define __phys_to_pgd_val(phys) __phys_to_pte_val(phys) 743 744 #define __pgprot_modify(prot,mask,bits) \ 745 __pgprot((pgprot_val(prot) & ~(mask)) | (bits)) 746 747 #define pgprot_nx(prot) \ 748 __pgprot_modify(prot, PTE_MAYBE_GP, PTE_PXN) 749 750 #define pgprot_decrypted(prot) \ 751 __pgprot_modify(prot, PROT_NS_SHARED, PROT_NS_SHARED) 752 #define pgprot_encrypted(prot) \ 753 __pgprot_modify(prot, PROT_NS_SHARED, 0) 754 755 /* 756 * Mark the prot value as uncacheable and unbufferable. 757 */ 758 #define pgprot_noncached(prot) \ 759 __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_DEVICE_nGnRnE) | PTE_PXN | PTE_UXN) 760 #define pgprot_writecombine(prot) \ 761 __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_NC) | PTE_PXN | PTE_UXN) 762 #define pgprot_device(prot) \ 763 __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_DEVICE_nGnRE) | PTE_PXN | PTE_UXN) 764 #define pgprot_tagged(prot) \ 765 __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_TAGGED)) 766 #define pgprot_mhp pgprot_tagged 767 /* 768 * DMA allocations for non-coherent devices use what the Arm architecture calls 769 * "Normal non-cacheable" memory, which permits speculation, unaligned accesses 770 * and merging of writes. This is different from "Device-nGnR[nE]" memory which 771 * is intended for MMIO and thus forbids speculation, preserves access size, 772 * requires strict alignment and can also force write responses to come from the 773 * endpoint. 774 */ 775 #define pgprot_dmacoherent(prot) \ 776 __pgprot_modify(prot, PTE_ATTRINDX_MASK, \ 777 PTE_ATTRINDX(MT_NORMAL_NC) | PTE_PXN | PTE_UXN) 778 779 #define __HAVE_PHYS_MEM_ACCESS_PROT 780 struct file; 781 extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, 782 unsigned long size, pgprot_t vma_prot); 783 784 #define pmd_none(pmd) (!pmd_val(pmd)) 785 786 #define pmd_table(pmd) ((pmd_val(pmd) & PMD_TYPE_MASK) == \ 787 PMD_TYPE_TABLE) 788 789 #define pmd_leaf pmd_leaf 790 static inline bool pmd_leaf(pmd_t pmd) 791 { 792 return pmd_present(pmd) && !pmd_table(pmd); 793 } 794 795 #define pmd_bad(pmd) (!pmd_table(pmd)) 796 797 #define pmd_leaf_size(pmd) (pmd_cont(pmd) ? CONT_PMD_SIZE : PMD_SIZE) 798 #define pte_leaf_size(pte) (pte_cont(pte) ? CONT_PTE_SIZE : PAGE_SIZE) 799 800 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 801 static inline int pmd_trans_huge(pmd_t pmd) 802 { 803 /* 804 * If pmd is present-invalid, pmd_table() won't detect it 805 * as a table, so force the valid bit for the comparison. 806 */ 807 return pmd_present(pmd) && !pmd_table(__pmd(pmd_val(pmd) | PTE_VALID)); 808 } 809 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 810 811 #if defined(CONFIG_ARM64_64K_PAGES) || CONFIG_PGTABLE_LEVELS < 3 812 static inline bool pud_table(pud_t pud) { return true; } 813 #else 814 #define pud_table(pud) ((pud_val(pud) & PUD_TYPE_MASK) == \ 815 PUD_TYPE_TABLE) 816 #endif 817 818 extern pgd_t swapper_pg_dir[]; 819 extern pgd_t idmap_pg_dir[]; 820 extern pgd_t tramp_pg_dir[]; 821 extern pgd_t reserved_pg_dir[]; 822 823 extern void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd); 824 825 static inline bool in_swapper_pgdir(void *addr) 826 { 827 return ((unsigned long)addr & PAGE_MASK) == 828 ((unsigned long)swapper_pg_dir & PAGE_MASK); 829 } 830 831 static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) 832 { 833 #ifdef __PAGETABLE_PMD_FOLDED 834 if (in_swapper_pgdir(pmdp)) { 835 set_swapper_pgd((pgd_t *)pmdp, __pgd(pmd_val(pmd))); 836 return; 837 } 838 #endif /* __PAGETABLE_PMD_FOLDED */ 839 840 WRITE_ONCE(*pmdp, pmd); 841 842 if (pmd_valid(pmd)) 843 queue_pte_barriers(); 844 } 845 846 static inline void pmd_clear(pmd_t *pmdp) 847 { 848 set_pmd(pmdp, __pmd(0)); 849 } 850 851 static inline phys_addr_t pmd_page_paddr(pmd_t pmd) 852 { 853 return __pmd_to_phys(pmd); 854 } 855 856 static inline unsigned long pmd_page_vaddr(pmd_t pmd) 857 { 858 return (unsigned long)__va(pmd_page_paddr(pmd)); 859 } 860 861 /* Find an entry in the third-level page table. */ 862 #define pte_offset_phys(dir,addr) (pmd_page_paddr(READ_ONCE(*(dir))) + pte_index(addr) * sizeof(pte_t)) 863 864 #define pte_set_fixmap(addr) ((pte_t *)set_fixmap_offset(FIX_PTE, addr)) 865 #define pte_set_fixmap_offset(pmd, addr) pte_set_fixmap(pte_offset_phys(pmd, addr)) 866 #define pte_clear_fixmap() clear_fixmap(FIX_PTE) 867 868 #define pmd_page(pmd) phys_to_page(__pmd_to_phys(pmd)) 869 870 /* use ONLY for statically allocated translation tables */ 871 #define pte_offset_kimg(dir,addr) ((pte_t *)__phys_to_kimg(pte_offset_phys((dir), (addr)))) 872 873 #if CONFIG_PGTABLE_LEVELS > 2 874 875 #define pmd_ERROR(e) \ 876 pr_err("%s:%d: bad pmd %016llx.\n", __FILE__, __LINE__, pmd_val(e)) 877 878 #define pud_none(pud) (!pud_val(pud)) 879 #define pud_bad(pud) ((pud_val(pud) & PUD_TYPE_MASK) != \ 880 PUD_TYPE_TABLE) 881 #define pud_present(pud) pte_present(pud_pte(pud)) 882 #ifndef __PAGETABLE_PMD_FOLDED 883 #define pud_leaf pud_leaf 884 static inline bool pud_leaf(pud_t pud) 885 { 886 return pud_present(pud) && !pud_table(pud); 887 } 888 #else 889 #define pud_leaf(pud) false 890 #endif 891 #define pud_valid(pud) pte_valid(pud_pte(pud)) 892 #define pud_user(pud) pte_user(pud_pte(pud)) 893 #define pud_user_exec(pud) pte_user_exec(pud_pte(pud)) 894 895 static inline bool pgtable_l4_enabled(void); 896 897 static inline void set_pud(pud_t *pudp, pud_t pud) 898 { 899 if (!pgtable_l4_enabled() && in_swapper_pgdir(pudp)) { 900 set_swapper_pgd((pgd_t *)pudp, __pgd(pud_val(pud))); 901 return; 902 } 903 904 WRITE_ONCE(*pudp, pud); 905 906 if (pud_valid(pud)) 907 queue_pte_barriers(); 908 } 909 910 static inline void pud_clear(pud_t *pudp) 911 { 912 set_pud(pudp, __pud(0)); 913 } 914 915 static inline phys_addr_t pud_page_paddr(pud_t pud) 916 { 917 return __pud_to_phys(pud); 918 } 919 920 static inline pmd_t *pud_pgtable(pud_t pud) 921 { 922 return (pmd_t *)__va(pud_page_paddr(pud)); 923 } 924 925 /* Find an entry in the second-level page table. */ 926 #define pmd_offset_phys(dir, addr) (pud_page_paddr(READ_ONCE(*(dir))) + pmd_index(addr) * sizeof(pmd_t)) 927 928 #define pmd_set_fixmap(addr) ((pmd_t *)set_fixmap_offset(FIX_PMD, addr)) 929 #define pmd_set_fixmap_offset(pud, addr) pmd_set_fixmap(pmd_offset_phys(pud, addr)) 930 #define pmd_clear_fixmap() clear_fixmap(FIX_PMD) 931 932 #define pud_page(pud) phys_to_page(__pud_to_phys(pud)) 933 934 /* use ONLY for statically allocated translation tables */ 935 #define pmd_offset_kimg(dir,addr) ((pmd_t *)__phys_to_kimg(pmd_offset_phys((dir), (addr)))) 936 937 #else 938 939 #define pud_valid(pud) false 940 #define pud_page_paddr(pud) ({ BUILD_BUG(); 0; }) 941 #define pud_user_exec(pud) pud_user(pud) /* Always 0 with folding */ 942 943 /* Match pmd_offset folding in <asm/generic/pgtable-nopmd.h> */ 944 #define pmd_set_fixmap(addr) NULL 945 #define pmd_set_fixmap_offset(pudp, addr) ((pmd_t *)pudp) 946 #define pmd_clear_fixmap() 947 948 #define pmd_offset_kimg(dir,addr) ((pmd_t *)dir) 949 950 #endif /* CONFIG_PGTABLE_LEVELS > 2 */ 951 952 #if CONFIG_PGTABLE_LEVELS > 3 953 954 static __always_inline bool pgtable_l4_enabled(void) 955 { 956 if (CONFIG_PGTABLE_LEVELS > 4 || !IS_ENABLED(CONFIG_ARM64_LPA2)) 957 return true; 958 if (!alternative_has_cap_likely(ARM64_ALWAYS_BOOT)) 959 return vabits_actual == VA_BITS; 960 return alternative_has_cap_unlikely(ARM64_HAS_VA52); 961 } 962 963 static inline bool mm_pud_folded(const struct mm_struct *mm) 964 { 965 return !pgtable_l4_enabled(); 966 } 967 #define mm_pud_folded mm_pud_folded 968 969 #define pud_ERROR(e) \ 970 pr_err("%s:%d: bad pud %016llx.\n", __FILE__, __LINE__, pud_val(e)) 971 972 #define p4d_none(p4d) (pgtable_l4_enabled() && !p4d_val(p4d)) 973 #define p4d_bad(p4d) (pgtable_l4_enabled() && \ 974 ((p4d_val(p4d) & P4D_TYPE_MASK) != \ 975 P4D_TYPE_TABLE)) 976 #define p4d_present(p4d) (!p4d_none(p4d)) 977 978 static inline void set_p4d(p4d_t *p4dp, p4d_t p4d) 979 { 980 if (in_swapper_pgdir(p4dp)) { 981 set_swapper_pgd((pgd_t *)p4dp, __pgd(p4d_val(p4d))); 982 return; 983 } 984 985 WRITE_ONCE(*p4dp, p4d); 986 queue_pte_barriers(); 987 } 988 989 static inline void p4d_clear(p4d_t *p4dp) 990 { 991 if (pgtable_l4_enabled()) 992 set_p4d(p4dp, __p4d(0)); 993 } 994 995 static inline phys_addr_t p4d_page_paddr(p4d_t p4d) 996 { 997 return __p4d_to_phys(p4d); 998 } 999 1000 #define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) 1001 1002 static inline pud_t *p4d_to_folded_pud(p4d_t *p4dp, unsigned long addr) 1003 { 1004 /* Ensure that 'p4dp' indexes a page table according to 'addr' */ 1005 VM_BUG_ON(((addr >> P4D_SHIFT) ^ ((u64)p4dp >> 3)) % PTRS_PER_P4D); 1006 1007 return (pud_t *)PTR_ALIGN_DOWN(p4dp, PAGE_SIZE) + pud_index(addr); 1008 } 1009 1010 static inline pud_t *p4d_pgtable(p4d_t p4d) 1011 { 1012 return (pud_t *)__va(p4d_page_paddr(p4d)); 1013 } 1014 1015 static inline phys_addr_t pud_offset_phys(p4d_t *p4dp, unsigned long addr) 1016 { 1017 BUG_ON(!pgtable_l4_enabled()); 1018 1019 return p4d_page_paddr(READ_ONCE(*p4dp)) + pud_index(addr) * sizeof(pud_t); 1020 } 1021 1022 static inline 1023 pud_t *pud_offset_lockless(p4d_t *p4dp, p4d_t p4d, unsigned long addr) 1024 { 1025 if (!pgtable_l4_enabled()) 1026 return p4d_to_folded_pud(p4dp, addr); 1027 return (pud_t *)__va(p4d_page_paddr(p4d)) + pud_index(addr); 1028 } 1029 #define pud_offset_lockless pud_offset_lockless 1030 1031 static inline pud_t *pud_offset(p4d_t *p4dp, unsigned long addr) 1032 { 1033 return pud_offset_lockless(p4dp, READ_ONCE(*p4dp), addr); 1034 } 1035 #define pud_offset pud_offset 1036 1037 static inline pud_t *pud_set_fixmap(unsigned long addr) 1038 { 1039 if (!pgtable_l4_enabled()) 1040 return NULL; 1041 return (pud_t *)set_fixmap_offset(FIX_PUD, addr); 1042 } 1043 1044 static inline pud_t *pud_set_fixmap_offset(p4d_t *p4dp, unsigned long addr) 1045 { 1046 if (!pgtable_l4_enabled()) 1047 return p4d_to_folded_pud(p4dp, addr); 1048 return pud_set_fixmap(pud_offset_phys(p4dp, addr)); 1049 } 1050 1051 static inline void pud_clear_fixmap(void) 1052 { 1053 if (pgtable_l4_enabled()) 1054 clear_fixmap(FIX_PUD); 1055 } 1056 1057 /* use ONLY for statically allocated translation tables */ 1058 static inline pud_t *pud_offset_kimg(p4d_t *p4dp, u64 addr) 1059 { 1060 if (!pgtable_l4_enabled()) 1061 return p4d_to_folded_pud(p4dp, addr); 1062 return (pud_t *)__phys_to_kimg(pud_offset_phys(p4dp, addr)); 1063 } 1064 1065 #define p4d_page(p4d) pfn_to_page(__phys_to_pfn(__p4d_to_phys(p4d))) 1066 1067 #else 1068 1069 static inline bool pgtable_l4_enabled(void) { return false; } 1070 1071 #define p4d_page_paddr(p4d) ({ BUILD_BUG(); 0;}) 1072 1073 /* Match pud_offset folding in <asm/generic/pgtable-nopud.h> */ 1074 #define pud_set_fixmap(addr) NULL 1075 #define pud_set_fixmap_offset(pgdp, addr) ((pud_t *)pgdp) 1076 #define pud_clear_fixmap() 1077 1078 #define pud_offset_kimg(dir,addr) ((pud_t *)dir) 1079 1080 #endif /* CONFIG_PGTABLE_LEVELS > 3 */ 1081 1082 #if CONFIG_PGTABLE_LEVELS > 4 1083 1084 static __always_inline bool pgtable_l5_enabled(void) 1085 { 1086 if (!alternative_has_cap_likely(ARM64_ALWAYS_BOOT)) 1087 return vabits_actual == VA_BITS; 1088 return alternative_has_cap_unlikely(ARM64_HAS_VA52); 1089 } 1090 1091 static inline bool mm_p4d_folded(const struct mm_struct *mm) 1092 { 1093 return !pgtable_l5_enabled(); 1094 } 1095 #define mm_p4d_folded mm_p4d_folded 1096 1097 #define p4d_ERROR(e) \ 1098 pr_err("%s:%d: bad p4d %016llx.\n", __FILE__, __LINE__, p4d_val(e)) 1099 1100 #define pgd_none(pgd) (pgtable_l5_enabled() && !pgd_val(pgd)) 1101 #define pgd_bad(pgd) (pgtable_l5_enabled() && \ 1102 ((pgd_val(pgd) & PGD_TYPE_MASK) != \ 1103 PGD_TYPE_TABLE)) 1104 #define pgd_present(pgd) (!pgd_none(pgd)) 1105 1106 static inline void set_pgd(pgd_t *pgdp, pgd_t pgd) 1107 { 1108 if (in_swapper_pgdir(pgdp)) { 1109 set_swapper_pgd(pgdp, __pgd(pgd_val(pgd))); 1110 return; 1111 } 1112 1113 WRITE_ONCE(*pgdp, pgd); 1114 queue_pte_barriers(); 1115 } 1116 1117 static inline void pgd_clear(pgd_t *pgdp) 1118 { 1119 if (pgtable_l5_enabled()) 1120 set_pgd(pgdp, __pgd(0)); 1121 } 1122 1123 static inline phys_addr_t pgd_page_paddr(pgd_t pgd) 1124 { 1125 return __pgd_to_phys(pgd); 1126 } 1127 1128 #define p4d_index(addr) (((addr) >> P4D_SHIFT) & (PTRS_PER_P4D - 1)) 1129 1130 static inline p4d_t *pgd_to_folded_p4d(pgd_t *pgdp, unsigned long addr) 1131 { 1132 /* Ensure that 'pgdp' indexes a page table according to 'addr' */ 1133 VM_BUG_ON(((addr >> PGDIR_SHIFT) ^ ((u64)pgdp >> 3)) % PTRS_PER_PGD); 1134 1135 return (p4d_t *)PTR_ALIGN_DOWN(pgdp, PAGE_SIZE) + p4d_index(addr); 1136 } 1137 1138 static inline phys_addr_t p4d_offset_phys(pgd_t *pgdp, unsigned long addr) 1139 { 1140 BUG_ON(!pgtable_l5_enabled()); 1141 1142 return pgd_page_paddr(READ_ONCE(*pgdp)) + p4d_index(addr) * sizeof(p4d_t); 1143 } 1144 1145 static inline 1146 p4d_t *p4d_offset_lockless(pgd_t *pgdp, pgd_t pgd, unsigned long addr) 1147 { 1148 if (!pgtable_l5_enabled()) 1149 return pgd_to_folded_p4d(pgdp, addr); 1150 return (p4d_t *)__va(pgd_page_paddr(pgd)) + p4d_index(addr); 1151 } 1152 #define p4d_offset_lockless p4d_offset_lockless 1153 1154 static inline p4d_t *p4d_offset(pgd_t *pgdp, unsigned long addr) 1155 { 1156 return p4d_offset_lockless(pgdp, READ_ONCE(*pgdp), addr); 1157 } 1158 1159 static inline p4d_t *p4d_set_fixmap(unsigned long addr) 1160 { 1161 if (!pgtable_l5_enabled()) 1162 return NULL; 1163 return (p4d_t *)set_fixmap_offset(FIX_P4D, addr); 1164 } 1165 1166 static inline p4d_t *p4d_set_fixmap_offset(pgd_t *pgdp, unsigned long addr) 1167 { 1168 if (!pgtable_l5_enabled()) 1169 return pgd_to_folded_p4d(pgdp, addr); 1170 return p4d_set_fixmap(p4d_offset_phys(pgdp, addr)); 1171 } 1172 1173 static inline void p4d_clear_fixmap(void) 1174 { 1175 if (pgtable_l5_enabled()) 1176 clear_fixmap(FIX_P4D); 1177 } 1178 1179 /* use ONLY for statically allocated translation tables */ 1180 static inline p4d_t *p4d_offset_kimg(pgd_t *pgdp, u64 addr) 1181 { 1182 if (!pgtable_l5_enabled()) 1183 return pgd_to_folded_p4d(pgdp, addr); 1184 return (p4d_t *)__phys_to_kimg(p4d_offset_phys(pgdp, addr)); 1185 } 1186 1187 #define pgd_page(pgd) pfn_to_page(__phys_to_pfn(__pgd_to_phys(pgd))) 1188 1189 #else 1190 1191 static inline bool pgtable_l5_enabled(void) { return false; } 1192 1193 #define p4d_index(addr) (((addr) >> P4D_SHIFT) & (PTRS_PER_P4D - 1)) 1194 1195 /* Match p4d_offset folding in <asm/generic/pgtable-nop4d.h> */ 1196 #define p4d_set_fixmap(addr) NULL 1197 #define p4d_set_fixmap_offset(p4dp, addr) ((p4d_t *)p4dp) 1198 #define p4d_clear_fixmap() 1199 1200 #define p4d_offset_kimg(dir,addr) ((p4d_t *)dir) 1201 1202 static inline 1203 p4d_t *p4d_offset_lockless_folded(pgd_t *pgdp, pgd_t pgd, unsigned long addr) 1204 { 1205 /* 1206 * With runtime folding of the pud, pud_offset_lockless() passes 1207 * the 'pgd_t *' we return here to p4d_to_folded_pud(), which 1208 * will offset the pointer assuming that it points into 1209 * a page-table page. However, the fast GUP path passes us a 1210 * pgd_t allocated on the stack and so we must use the original 1211 * pointer in 'pgdp' to construct the p4d pointer instead of 1212 * using the generic p4d_offset_lockless() implementation. 1213 * 1214 * Note: reusing the original pointer means that we may 1215 * dereference the same (live) page-table entry multiple times. 1216 * This is safe because it is still only loaded once in the 1217 * context of each level and the CPU guarantees same-address 1218 * read-after-read ordering. 1219 */ 1220 return p4d_offset(pgdp, addr); 1221 } 1222 #define p4d_offset_lockless p4d_offset_lockless_folded 1223 1224 #endif /* CONFIG_PGTABLE_LEVELS > 4 */ 1225 1226 #define pgd_ERROR(e) \ 1227 pr_err("%s:%d: bad pgd %016llx.\n", __FILE__, __LINE__, pgd_val(e)) 1228 1229 #define pgd_set_fixmap(addr) ((pgd_t *)set_fixmap_offset(FIX_PGD, addr)) 1230 #define pgd_clear_fixmap() clear_fixmap(FIX_PGD) 1231 1232 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) 1233 { 1234 /* 1235 * Normal and Normal-Tagged are two different memory types and indices 1236 * in MAIR_EL1. The mask below has to include PTE_ATTRINDX_MASK. 1237 */ 1238 const pteval_t mask = PTE_USER | PTE_PXN | PTE_UXN | PTE_RDONLY | 1239 PTE_PRESENT_INVALID | PTE_VALID | PTE_WRITE | 1240 PTE_GP | PTE_ATTRINDX_MASK | PTE_PO_IDX_MASK; 1241 1242 /* preserve the hardware dirty information */ 1243 if (pte_hw_dirty(pte)) 1244 pte = set_pte_bit(pte, __pgprot(PTE_DIRTY)); 1245 1246 pte_val(pte) = (pte_val(pte) & ~mask) | (pgprot_val(newprot) & mask); 1247 /* 1248 * If we end up clearing hw dirtiness for a sw-dirty PTE, set hardware 1249 * dirtiness again. 1250 */ 1251 if (pte_sw_dirty(pte)) 1252 pte = pte_mkdirty(pte); 1253 return pte; 1254 } 1255 1256 static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) 1257 { 1258 return pte_pmd(pte_modify(pmd_pte(pmd), newprot)); 1259 } 1260 1261 extern int __ptep_set_access_flags_anysz(struct vm_area_struct *vma, 1262 unsigned long address, pte_t *ptep, 1263 pte_t entry, int dirty, 1264 unsigned long pgsize); 1265 1266 static inline int __ptep_set_access_flags(struct vm_area_struct *vma, 1267 unsigned long address, pte_t *ptep, 1268 pte_t entry, int dirty) 1269 { 1270 return __ptep_set_access_flags_anysz(vma, address, ptep, entry, dirty, 1271 PAGE_SIZE); 1272 } 1273 1274 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 1275 #define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS 1276 static inline int pmdp_set_access_flags(struct vm_area_struct *vma, 1277 unsigned long address, pmd_t *pmdp, 1278 pmd_t entry, int dirty) 1279 { 1280 return __ptep_set_access_flags_anysz(vma, address, (pte_t *)pmdp, 1281 pmd_pte(entry), dirty, PMD_SIZE); 1282 } 1283 #endif 1284 1285 #ifdef CONFIG_PAGE_TABLE_CHECK 1286 static inline bool pte_user_accessible_page(pte_t pte, unsigned long addr) 1287 { 1288 return pte_valid(pte) && (pte_user(pte) || pte_user_exec(pte)); 1289 } 1290 1291 static inline bool pmd_user_accessible_page(pmd_t pmd, unsigned long addr) 1292 { 1293 return pmd_valid(pmd) && !pmd_table(pmd) && (pmd_user(pmd) || pmd_user_exec(pmd)); 1294 } 1295 1296 static inline bool pud_user_accessible_page(pud_t pud, unsigned long addr) 1297 { 1298 return pud_valid(pud) && !pud_table(pud) && (pud_user(pud) || pud_user_exec(pud)); 1299 } 1300 #endif 1301 1302 /* 1303 * Atomic pte/pmd modifications. 1304 */ 1305 1306 static inline void __pte_clear(struct mm_struct *mm, 1307 unsigned long addr, pte_t *ptep) 1308 { 1309 __set_pte(ptep, __pte(0)); 1310 } 1311 1312 static inline int __ptep_test_and_clear_young(struct vm_area_struct *vma, 1313 unsigned long address, 1314 pte_t *ptep) 1315 { 1316 pte_t old_pte, pte; 1317 1318 pte = __ptep_get(ptep); 1319 do { 1320 old_pte = pte; 1321 pte = pte_mkold(pte); 1322 pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep), 1323 pte_val(old_pte), pte_val(pte)); 1324 } while (pte_val(pte) != pte_val(old_pte)); 1325 1326 return pte_young(pte); 1327 } 1328 1329 static inline int __ptep_clear_flush_young(struct vm_area_struct *vma, 1330 unsigned long address, pte_t *ptep) 1331 { 1332 int young = __ptep_test_and_clear_young(vma, address, ptep); 1333 1334 if (young) { 1335 /* 1336 * We can elide the trailing DSB here since the worst that can 1337 * happen is that a CPU continues to use the young entry in its 1338 * TLB and we mistakenly reclaim the associated page. The 1339 * window for such an event is bounded by the next 1340 * context-switch, which provides a DSB to complete the TLB 1341 * invalidation. 1342 */ 1343 __flush_tlb_page(vma, address, TLBF_NOSYNC); 1344 } 1345 1346 return young; 1347 } 1348 1349 #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) 1350 #define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG 1351 static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, 1352 unsigned long address, 1353 pmd_t *pmdp) 1354 { 1355 /* Operation applies to PMD table entry only if FEAT_HAFT is enabled */ 1356 VM_WARN_ON(pmd_table(READ_ONCE(*pmdp)) && !system_supports_haft()); 1357 return __ptep_test_and_clear_young(vma, address, (pte_t *)pmdp); 1358 } 1359 #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG */ 1360 1361 static inline pte_t __ptep_get_and_clear_anysz(struct mm_struct *mm, 1362 unsigned long address, 1363 pte_t *ptep, 1364 unsigned long pgsize) 1365 { 1366 pte_t pte = __pte(xchg_relaxed(&pte_val(*ptep), 0)); 1367 1368 switch (pgsize) { 1369 case PAGE_SIZE: 1370 page_table_check_pte_clear(mm, address, pte); 1371 break; 1372 case PMD_SIZE: 1373 page_table_check_pmd_clear(mm, address, pte_pmd(pte)); 1374 break; 1375 #ifndef __PAGETABLE_PMD_FOLDED 1376 case PUD_SIZE: 1377 page_table_check_pud_clear(mm, address, pte_pud(pte)); 1378 break; 1379 #endif 1380 default: 1381 VM_WARN_ON(1); 1382 } 1383 1384 return pte; 1385 } 1386 1387 static inline pte_t __ptep_get_and_clear(struct mm_struct *mm, 1388 unsigned long address, pte_t *ptep) 1389 { 1390 return __ptep_get_and_clear_anysz(mm, address, ptep, PAGE_SIZE); 1391 } 1392 1393 static inline void __clear_full_ptes(struct mm_struct *mm, unsigned long addr, 1394 pte_t *ptep, unsigned int nr, int full) 1395 { 1396 for (;;) { 1397 __ptep_get_and_clear(mm, addr, ptep); 1398 if (--nr == 0) 1399 break; 1400 ptep++; 1401 addr += PAGE_SIZE; 1402 } 1403 } 1404 1405 static inline pte_t __get_and_clear_full_ptes(struct mm_struct *mm, 1406 unsigned long addr, pte_t *ptep, 1407 unsigned int nr, int full) 1408 { 1409 pte_t pte, tmp_pte; 1410 1411 pte = __ptep_get_and_clear(mm, addr, ptep); 1412 while (--nr) { 1413 ptep++; 1414 addr += PAGE_SIZE; 1415 tmp_pte = __ptep_get_and_clear(mm, addr, ptep); 1416 if (pte_dirty(tmp_pte)) 1417 pte = pte_mkdirty(pte); 1418 if (pte_young(tmp_pte)) 1419 pte = pte_mkyoung(pte); 1420 } 1421 return pte; 1422 } 1423 1424 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 1425 #define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR 1426 static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, 1427 unsigned long address, pmd_t *pmdp) 1428 { 1429 return pte_pmd(__ptep_get_and_clear_anysz(mm, address, (pte_t *)pmdp, PMD_SIZE)); 1430 } 1431 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 1432 1433 static inline void ___ptep_set_wrprotect(struct mm_struct *mm, 1434 unsigned long address, pte_t *ptep, 1435 pte_t pte) 1436 { 1437 pte_t old_pte; 1438 1439 do { 1440 old_pte = pte; 1441 pte = pte_wrprotect(pte); 1442 pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep), 1443 pte_val(old_pte), pte_val(pte)); 1444 } while (pte_val(pte) != pte_val(old_pte)); 1445 } 1446 1447 /* 1448 * __ptep_set_wrprotect - mark read-only while transferring potential hardware 1449 * dirty status (PTE_DBM && !PTE_RDONLY) to the software PTE_DIRTY bit. 1450 */ 1451 static inline void __ptep_set_wrprotect(struct mm_struct *mm, 1452 unsigned long address, pte_t *ptep) 1453 { 1454 ___ptep_set_wrprotect(mm, address, ptep, __ptep_get(ptep)); 1455 } 1456 1457 static inline void __wrprotect_ptes(struct mm_struct *mm, unsigned long address, 1458 pte_t *ptep, unsigned int nr) 1459 { 1460 unsigned int i; 1461 1462 for (i = 0; i < nr; i++, address += PAGE_SIZE, ptep++) 1463 __ptep_set_wrprotect(mm, address, ptep); 1464 } 1465 1466 static inline void __clear_young_dirty_pte(struct vm_area_struct *vma, 1467 unsigned long addr, pte_t *ptep, 1468 pte_t pte, cydp_t flags) 1469 { 1470 pte_t old_pte; 1471 1472 do { 1473 old_pte = pte; 1474 1475 if (flags & CYDP_CLEAR_YOUNG) 1476 pte = pte_mkold(pte); 1477 if (flags & CYDP_CLEAR_DIRTY) 1478 pte = pte_mkclean(pte); 1479 1480 pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep), 1481 pte_val(old_pte), pte_val(pte)); 1482 } while (pte_val(pte) != pte_val(old_pte)); 1483 } 1484 1485 static inline void __clear_young_dirty_ptes(struct vm_area_struct *vma, 1486 unsigned long addr, pte_t *ptep, 1487 unsigned int nr, cydp_t flags) 1488 { 1489 pte_t pte; 1490 1491 for (;;) { 1492 pte = __ptep_get(ptep); 1493 1494 if (flags == (CYDP_CLEAR_YOUNG | CYDP_CLEAR_DIRTY)) 1495 __set_pte(ptep, pte_mkclean(pte_mkold(pte))); 1496 else 1497 __clear_young_dirty_pte(vma, addr, ptep, pte, flags); 1498 1499 if (--nr == 0) 1500 break; 1501 ptep++; 1502 addr += PAGE_SIZE; 1503 } 1504 } 1505 1506 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 1507 #define __HAVE_ARCH_PMDP_SET_WRPROTECT 1508 static inline void pmdp_set_wrprotect(struct mm_struct *mm, 1509 unsigned long address, pmd_t *pmdp) 1510 { 1511 __ptep_set_wrprotect(mm, address, (pte_t *)pmdp); 1512 } 1513 1514 #define pmdp_establish pmdp_establish 1515 static inline pmd_t pmdp_establish(struct vm_area_struct *vma, 1516 unsigned long address, pmd_t *pmdp, pmd_t pmd) 1517 { 1518 page_table_check_pmd_set(vma->vm_mm, address, pmdp, pmd); 1519 return __pmd(xchg_relaxed(&pmd_val(*pmdp), pmd_val(pmd))); 1520 } 1521 #endif 1522 1523 /* 1524 * Encode and decode a swap entry: 1525 * bits 0-1: present (must be zero) 1526 * bits 2: remember PG_anon_exclusive 1527 * bit 3: remember uffd-wp state 1528 * bits 6-10: swap type 1529 * bit 11: PTE_PRESENT_INVALID (must be zero) 1530 * bits 12-61: swap offset 1531 */ 1532 #define __SWP_TYPE_SHIFT 6 1533 #define __SWP_TYPE_BITS 5 1534 #define __SWP_TYPE_MASK ((1 << __SWP_TYPE_BITS) - 1) 1535 #define __SWP_OFFSET_SHIFT 12 1536 #define __SWP_OFFSET_BITS 50 1537 #define __SWP_OFFSET_MASK ((1UL << __SWP_OFFSET_BITS) - 1) 1538 1539 #define __swp_type(x) (((x).val >> __SWP_TYPE_SHIFT) & __SWP_TYPE_MASK) 1540 #define __swp_offset(x) (((x).val >> __SWP_OFFSET_SHIFT) & __SWP_OFFSET_MASK) 1541 #define __swp_entry(type,offset) ((swp_entry_t) { ((type) << __SWP_TYPE_SHIFT) | ((offset) << __SWP_OFFSET_SHIFT) }) 1542 1543 #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) 1544 #define __swp_entry_to_pte(swp) ((pte_t) { (swp).val }) 1545 1546 #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION 1547 #define __pmd_to_swp_entry(pmd) ((swp_entry_t) { pmd_val(pmd) }) 1548 #define __swp_entry_to_pmd(swp) __pmd((swp).val) 1549 #endif /* CONFIG_ARCH_ENABLE_THP_MIGRATION */ 1550 1551 /* 1552 * Ensure that there are not more swap files than can be encoded in the kernel 1553 * PTEs. 1554 */ 1555 #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > __SWP_TYPE_BITS) 1556 1557 #ifdef CONFIG_ARM64_MTE 1558 1559 #define __HAVE_ARCH_PREPARE_TO_SWAP 1560 extern int arch_prepare_to_swap(struct folio *folio); 1561 1562 #define __HAVE_ARCH_SWAP_INVALIDATE 1563 static inline void arch_swap_invalidate_page(int type, pgoff_t offset) 1564 { 1565 if (system_supports_mte()) 1566 mte_invalidate_tags(type, offset); 1567 } 1568 1569 static inline void arch_swap_invalidate_area(int type) 1570 { 1571 if (system_supports_mte()) 1572 mte_invalidate_tags_area(type); 1573 } 1574 1575 #define __HAVE_ARCH_SWAP_RESTORE 1576 extern void arch_swap_restore(swp_entry_t entry, struct folio *folio); 1577 1578 #endif /* CONFIG_ARM64_MTE */ 1579 1580 /* 1581 * On AArch64, the cache coherency is handled via the __set_ptes() function. 1582 */ 1583 static inline void update_mmu_cache_range(struct vm_fault *vmf, 1584 struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, 1585 unsigned int nr) 1586 { 1587 /* 1588 * We don't do anything here, so there's a very small chance of 1589 * us retaking a user fault which we just fixed up. The alternative 1590 * is doing a dsb(ishst), but that penalises the fastpath. 1591 */ 1592 } 1593 1594 #define update_mmu_cache(vma, addr, ptep) \ 1595 update_mmu_cache_range(NULL, vma, addr, ptep, 1) 1596 #define update_mmu_cache_pmd(vma, address, pmd) do { } while (0) 1597 1598 #ifdef CONFIG_ARM64_PA_BITS_52 1599 #define phys_to_ttbr(addr) (((addr) | ((addr) >> 46)) & TTBR_BADDR_MASK_52) 1600 #else 1601 #define phys_to_ttbr(addr) (addr) 1602 #endif 1603 1604 /* 1605 * On arm64 without hardware Access Flag, copying from user will fail because 1606 * the pte is old and cannot be marked young. So we always end up with zeroed 1607 * page after fork() + CoW for pfn mappings. We don't always have a 1608 * hardware-managed access flag on arm64. 1609 */ 1610 #define arch_has_hw_pte_young cpu_has_hw_af 1611 1612 #ifdef CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG 1613 #define arch_has_hw_nonleaf_pmd_young system_supports_haft 1614 #endif 1615 1616 /* 1617 * Experimentally, it's cheap to set the access flag in hardware and we 1618 * benefit from prefaulting mappings as 'old' to start with. 1619 */ 1620 #define arch_wants_old_prefaulted_pte cpu_has_hw_af 1621 1622 /* 1623 * Request exec memory is read into pagecache in at least 64K folios. This size 1624 * can be contpte-mapped when 4K base pages are in use (16 pages into 1 iTLB 1625 * entry), and HPA can coalesce it (4 pages into 1 TLB entry) when 16K base 1626 * pages are in use. 1627 */ 1628 #define exec_folio_order() ilog2(SZ_64K >> PAGE_SHIFT) 1629 1630 static inline bool pud_sect_supported(void) 1631 { 1632 return PAGE_SIZE == SZ_4K; 1633 } 1634 1635 1636 #define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION 1637 #define ptep_modify_prot_start ptep_modify_prot_start 1638 extern pte_t ptep_modify_prot_start(struct vm_area_struct *vma, 1639 unsigned long addr, pte_t *ptep); 1640 1641 #define ptep_modify_prot_commit ptep_modify_prot_commit 1642 extern void ptep_modify_prot_commit(struct vm_area_struct *vma, 1643 unsigned long addr, pte_t *ptep, 1644 pte_t old_pte, pte_t new_pte); 1645 1646 #define modify_prot_start_ptes modify_prot_start_ptes 1647 extern pte_t modify_prot_start_ptes(struct vm_area_struct *vma, 1648 unsigned long addr, pte_t *ptep, 1649 unsigned int nr); 1650 1651 #define modify_prot_commit_ptes modify_prot_commit_ptes 1652 extern void modify_prot_commit_ptes(struct vm_area_struct *vma, unsigned long addr, 1653 pte_t *ptep, pte_t old_pte, pte_t pte, 1654 unsigned int nr); 1655 1656 #ifdef CONFIG_ARM64_CONTPTE 1657 1658 /* 1659 * The contpte APIs are used to transparently manage the contiguous bit in ptes 1660 * where it is possible and makes sense to do so. The PTE_CONT bit is considered 1661 * a private implementation detail of the public ptep API (see below). 1662 */ 1663 extern void __contpte_try_fold(struct mm_struct *mm, unsigned long addr, 1664 pte_t *ptep, pte_t pte); 1665 extern void __contpte_try_unfold(struct mm_struct *mm, unsigned long addr, 1666 pte_t *ptep, pte_t pte); 1667 extern pte_t contpte_ptep_get(pte_t *ptep, pte_t orig_pte); 1668 extern pte_t contpte_ptep_get_lockless(pte_t *orig_ptep); 1669 extern void contpte_set_ptes(struct mm_struct *mm, unsigned long addr, 1670 pte_t *ptep, pte_t pte, unsigned int nr); 1671 extern void contpte_clear_full_ptes(struct mm_struct *mm, unsigned long addr, 1672 pte_t *ptep, unsigned int nr, int full); 1673 extern pte_t contpte_get_and_clear_full_ptes(struct mm_struct *mm, 1674 unsigned long addr, pte_t *ptep, 1675 unsigned int nr, int full); 1676 int contpte_test_and_clear_young_ptes(struct vm_area_struct *vma, 1677 unsigned long addr, pte_t *ptep, unsigned int nr); 1678 int contpte_clear_flush_young_ptes(struct vm_area_struct *vma, 1679 unsigned long addr, pte_t *ptep, unsigned int nr); 1680 extern void contpte_wrprotect_ptes(struct mm_struct *mm, unsigned long addr, 1681 pte_t *ptep, unsigned int nr); 1682 extern int contpte_ptep_set_access_flags(struct vm_area_struct *vma, 1683 unsigned long addr, pte_t *ptep, 1684 pte_t entry, int dirty); 1685 extern void contpte_clear_young_dirty_ptes(struct vm_area_struct *vma, 1686 unsigned long addr, pte_t *ptep, 1687 unsigned int nr, cydp_t flags); 1688 1689 static __always_inline void contpte_try_fold(struct mm_struct *mm, 1690 unsigned long addr, pte_t *ptep, pte_t pte) 1691 { 1692 /* 1693 * Only bother trying if both the virtual and physical addresses are 1694 * aligned and correspond to the last entry in a contig range. The core 1695 * code mostly modifies ranges from low to high, so this is the likely 1696 * the last modification in the contig range, so a good time to fold. 1697 * We can't fold special mappings, because there is no associated folio. 1698 */ 1699 1700 const unsigned long contmask = CONT_PTES - 1; 1701 bool valign = ((addr >> PAGE_SHIFT) & contmask) == contmask; 1702 1703 if (unlikely(valign)) { 1704 bool palign = (pte_pfn(pte) & contmask) == contmask; 1705 1706 if (unlikely(palign && 1707 pte_valid(pte) && !pte_cont(pte) && !pte_special(pte))) 1708 __contpte_try_fold(mm, addr, ptep, pte); 1709 } 1710 } 1711 1712 static __always_inline void contpte_try_unfold(struct mm_struct *mm, 1713 unsigned long addr, pte_t *ptep, pte_t pte) 1714 { 1715 if (unlikely(pte_valid_cont(pte))) 1716 __contpte_try_unfold(mm, addr, ptep, pte); 1717 } 1718 1719 #define pte_batch_hint pte_batch_hint 1720 static inline unsigned int pte_batch_hint(pte_t *ptep, pte_t pte) 1721 { 1722 if (!pte_valid_cont(pte)) 1723 return 1; 1724 1725 return CONT_PTES - (((unsigned long)ptep >> 3) & (CONT_PTES - 1)); 1726 } 1727 1728 /* 1729 * The below functions constitute the public API that arm64 presents to the 1730 * core-mm to manipulate PTE entries within their page tables (or at least this 1731 * is the subset of the API that arm64 needs to implement). These public 1732 * versions will automatically and transparently apply the contiguous bit where 1733 * it makes sense to do so. Therefore any users that are contig-aware (e.g. 1734 * hugetlb, kernel mapper) should NOT use these APIs, but instead use the 1735 * private versions, which are prefixed with double underscore. All of these 1736 * APIs except for ptep_get_lockless() are expected to be called with the PTL 1737 * held. Although the contiguous bit is considered private to the 1738 * implementation, it is deliberately allowed to leak through the getters (e.g. 1739 * ptep_get()), back to core code. This is required so that pte_leaf_size() can 1740 * provide an accurate size for perf_get_pgtable_size(). But this leakage means 1741 * its possible a pte will be passed to a setter with the contiguous bit set, so 1742 * we explicitly clear the contiguous bit in those cases to prevent accidentally 1743 * setting it in the pgtable. 1744 */ 1745 1746 #define ptep_get ptep_get 1747 static inline pte_t ptep_get(pte_t *ptep) 1748 { 1749 pte_t pte = __ptep_get(ptep); 1750 1751 if (likely(!pte_valid_cont(pte))) 1752 return pte; 1753 1754 return contpte_ptep_get(ptep, pte); 1755 } 1756 1757 #define ptep_get_lockless ptep_get_lockless 1758 static inline pte_t ptep_get_lockless(pte_t *ptep) 1759 { 1760 pte_t pte = __ptep_get(ptep); 1761 1762 if (likely(!pte_valid_cont(pte))) 1763 return pte; 1764 1765 return contpte_ptep_get_lockless(ptep); 1766 } 1767 1768 static inline void set_pte(pte_t *ptep, pte_t pte) 1769 { 1770 /* 1771 * We don't have the mm or vaddr so cannot unfold contig entries (since 1772 * it requires tlb maintenance). set_pte() is not used in core code, so 1773 * this should never even be called. Regardless do our best to service 1774 * any call and emit a warning if there is any attempt to set a pte on 1775 * top of an existing contig range. 1776 */ 1777 pte_t orig_pte = __ptep_get(ptep); 1778 1779 WARN_ON_ONCE(pte_valid_cont(orig_pte)); 1780 __set_pte(ptep, pte_mknoncont(pte)); 1781 } 1782 1783 #define set_ptes set_ptes 1784 static __always_inline void set_ptes(struct mm_struct *mm, unsigned long addr, 1785 pte_t *ptep, pte_t pte, unsigned int nr) 1786 { 1787 pte = pte_mknoncont(pte); 1788 1789 if (likely(nr == 1)) { 1790 contpte_try_unfold(mm, addr, ptep, __ptep_get(ptep)); 1791 __set_ptes(mm, addr, ptep, pte, 1); 1792 contpte_try_fold(mm, addr, ptep, pte); 1793 } else { 1794 contpte_set_ptes(mm, addr, ptep, pte, nr); 1795 } 1796 } 1797 1798 static inline void pte_clear(struct mm_struct *mm, 1799 unsigned long addr, pte_t *ptep) 1800 { 1801 contpte_try_unfold(mm, addr, ptep, __ptep_get(ptep)); 1802 __pte_clear(mm, addr, ptep); 1803 } 1804 1805 #define clear_full_ptes clear_full_ptes 1806 static inline void clear_full_ptes(struct mm_struct *mm, unsigned long addr, 1807 pte_t *ptep, unsigned int nr, int full) 1808 { 1809 if (likely(nr == 1)) { 1810 contpte_try_unfold(mm, addr, ptep, __ptep_get(ptep)); 1811 __clear_full_ptes(mm, addr, ptep, nr, full); 1812 } else { 1813 contpte_clear_full_ptes(mm, addr, ptep, nr, full); 1814 } 1815 } 1816 1817 #define get_and_clear_full_ptes get_and_clear_full_ptes 1818 static inline pte_t get_and_clear_full_ptes(struct mm_struct *mm, 1819 unsigned long addr, pte_t *ptep, 1820 unsigned int nr, int full) 1821 { 1822 pte_t pte; 1823 1824 if (likely(nr == 1)) { 1825 contpte_try_unfold(mm, addr, ptep, __ptep_get(ptep)); 1826 pte = __get_and_clear_full_ptes(mm, addr, ptep, nr, full); 1827 } else { 1828 pte = contpte_get_and_clear_full_ptes(mm, addr, ptep, nr, full); 1829 } 1830 1831 return pte; 1832 } 1833 1834 #define __HAVE_ARCH_PTEP_GET_AND_CLEAR 1835 static inline pte_t ptep_get_and_clear(struct mm_struct *mm, 1836 unsigned long addr, pte_t *ptep) 1837 { 1838 contpte_try_unfold(mm, addr, ptep, __ptep_get(ptep)); 1839 return __ptep_get_and_clear(mm, addr, ptep); 1840 } 1841 1842 #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG 1843 static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, 1844 unsigned long addr, pte_t *ptep) 1845 { 1846 pte_t orig_pte = __ptep_get(ptep); 1847 1848 if (likely(!pte_valid_cont(orig_pte))) 1849 return __ptep_test_and_clear_young(vma, addr, ptep); 1850 1851 return contpte_test_and_clear_young_ptes(vma, addr, ptep, 1); 1852 } 1853 1854 #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH 1855 static inline int ptep_clear_flush_young(struct vm_area_struct *vma, 1856 unsigned long addr, pte_t *ptep) 1857 { 1858 pte_t orig_pte = __ptep_get(ptep); 1859 1860 if (likely(!pte_valid_cont(orig_pte))) 1861 return __ptep_clear_flush_young(vma, addr, ptep); 1862 1863 return contpte_clear_flush_young_ptes(vma, addr, ptep, 1); 1864 } 1865 1866 #define clear_flush_young_ptes clear_flush_young_ptes 1867 static inline int clear_flush_young_ptes(struct vm_area_struct *vma, 1868 unsigned long addr, pte_t *ptep, 1869 unsigned int nr) 1870 { 1871 if (likely(nr == 1 && !pte_cont(__ptep_get(ptep)))) 1872 return __ptep_clear_flush_young(vma, addr, ptep); 1873 1874 return contpte_clear_flush_young_ptes(vma, addr, ptep, nr); 1875 } 1876 1877 #define wrprotect_ptes wrprotect_ptes 1878 static __always_inline void wrprotect_ptes(struct mm_struct *mm, 1879 unsigned long addr, pte_t *ptep, unsigned int nr) 1880 { 1881 if (likely(nr == 1)) { 1882 /* 1883 * Optimization: wrprotect_ptes() can only be called for present 1884 * ptes so we only need to check contig bit as condition for 1885 * unfold, and we can remove the contig bit from the pte we read 1886 * to avoid re-reading. This speeds up fork() which is sensitive 1887 * for order-0 folios. Equivalent to contpte_try_unfold(). 1888 */ 1889 pte_t orig_pte = __ptep_get(ptep); 1890 1891 if (unlikely(pte_cont(orig_pte))) { 1892 __contpte_try_unfold(mm, addr, ptep, orig_pte); 1893 orig_pte = pte_mknoncont(orig_pte); 1894 } 1895 ___ptep_set_wrprotect(mm, addr, ptep, orig_pte); 1896 } else { 1897 contpte_wrprotect_ptes(mm, addr, ptep, nr); 1898 } 1899 } 1900 1901 #define __HAVE_ARCH_PTEP_SET_WRPROTECT 1902 static inline void ptep_set_wrprotect(struct mm_struct *mm, 1903 unsigned long addr, pte_t *ptep) 1904 { 1905 wrprotect_ptes(mm, addr, ptep, 1); 1906 } 1907 1908 #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS 1909 static inline int ptep_set_access_flags(struct vm_area_struct *vma, 1910 unsigned long addr, pte_t *ptep, 1911 pte_t entry, int dirty) 1912 { 1913 pte_t orig_pte = __ptep_get(ptep); 1914 1915 entry = pte_mknoncont(entry); 1916 1917 if (likely(!pte_valid_cont(orig_pte))) 1918 return __ptep_set_access_flags(vma, addr, ptep, entry, dirty); 1919 1920 return contpte_ptep_set_access_flags(vma, addr, ptep, entry, dirty); 1921 } 1922 1923 #define clear_young_dirty_ptes clear_young_dirty_ptes 1924 static inline void clear_young_dirty_ptes(struct vm_area_struct *vma, 1925 unsigned long addr, pte_t *ptep, 1926 unsigned int nr, cydp_t flags) 1927 { 1928 if (likely(nr == 1 && !pte_cont(__ptep_get(ptep)))) 1929 __clear_young_dirty_ptes(vma, addr, ptep, nr, flags); 1930 else 1931 contpte_clear_young_dirty_ptes(vma, addr, ptep, nr, flags); 1932 } 1933 1934 #else /* CONFIG_ARM64_CONTPTE */ 1935 1936 #define ptep_get __ptep_get 1937 #define set_pte __set_pte 1938 #define set_ptes __set_ptes 1939 #define pte_clear __pte_clear 1940 #define clear_full_ptes __clear_full_ptes 1941 #define get_and_clear_full_ptes __get_and_clear_full_ptes 1942 #define __HAVE_ARCH_PTEP_GET_AND_CLEAR 1943 #define ptep_get_and_clear __ptep_get_and_clear 1944 #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG 1945 #define ptep_test_and_clear_young __ptep_test_and_clear_young 1946 #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH 1947 #define ptep_clear_flush_young __ptep_clear_flush_young 1948 #define __HAVE_ARCH_PTEP_SET_WRPROTECT 1949 #define ptep_set_wrprotect __ptep_set_wrprotect 1950 #define wrprotect_ptes __wrprotect_ptes 1951 #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS 1952 #define ptep_set_access_flags __ptep_set_access_flags 1953 #define clear_young_dirty_ptes __clear_young_dirty_ptes 1954 1955 #endif /* CONFIG_ARM64_CONTPTE */ 1956 1957 #endif /* !__ASSEMBLER__ */ 1958 1959 #endif /* __ASM_PGTABLE_H */ 1960