1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 /* 3 * Copyright (C) 2012 ARM Ltd. 4 */ 5 #ifndef __ASM_PGTABLE_H 6 #define __ASM_PGTABLE_H 7 8 #include <asm/bug.h> 9 #include <asm/proc-fns.h> 10 11 #include <asm/memory.h> 12 #include <asm/mte.h> 13 #include <asm/pgtable-hwdef.h> 14 #include <asm/pgtable-prot.h> 15 #include <asm/tlbflush.h> 16 17 /* 18 * VMALLOC range. 19 * 20 * VMALLOC_START: beginning of the kernel vmalloc space 21 * VMALLOC_END: extends to the available space below vmemmap 22 */ 23 #define VMALLOC_START (MODULES_END) 24 #if VA_BITS == VA_BITS_MIN 25 #define VMALLOC_END (VMEMMAP_START - SZ_8M) 26 #else 27 #define VMEMMAP_UNUSED_NPAGES ((_PAGE_OFFSET(vabits_actual) - PAGE_OFFSET) >> PAGE_SHIFT) 28 #define VMALLOC_END (VMEMMAP_START + VMEMMAP_UNUSED_NPAGES * sizeof(struct page) - SZ_8M) 29 #endif 30 31 #define vmemmap ((struct page *)VMEMMAP_START - (memstart_addr >> PAGE_SHIFT)) 32 33 #ifndef __ASSEMBLER__ 34 35 #include <asm/cmpxchg.h> 36 #include <asm/fixmap.h> 37 #include <asm/por.h> 38 #include <linux/mmdebug.h> 39 #include <linux/mm_types.h> 40 #include <linux/sched.h> 41 #include <linux/page_table_check.h> 42 43 static inline void emit_pte_barriers(void) 44 { 45 /* 46 * These barriers are emitted under certain conditions after a pte entry 47 * was modified (see e.g. __set_pte_complete()). The dsb makes the store 48 * visible to the table walker. The isb ensures that any previous 49 * speculative "invalid translation" marker that is in the CPU's 50 * pipeline gets cleared, so that any access to that address after 51 * setting the pte to valid won't cause a spurious fault. If the thread 52 * gets preempted after storing to the pgtable but before emitting these 53 * barriers, __switch_to() emits a dsb which ensure the walker gets to 54 * see the store. There is no guarantee of an isb being issued though. 55 * This is safe because it will still get issued (albeit on a 56 * potentially different CPU) when the thread starts running again, 57 * before any access to the address. 58 */ 59 dsb(ishst); 60 isb(); 61 } 62 63 static inline void queue_pte_barriers(void) 64 { 65 if (is_lazy_mmu_mode_active()) { 66 /* Avoid the atomic op if already set. */ 67 if (!test_thread_flag(TIF_LAZY_MMU_PENDING)) 68 set_thread_flag(TIF_LAZY_MMU_PENDING); 69 } else { 70 emit_pte_barriers(); 71 } 72 } 73 74 static inline void arch_enter_lazy_mmu_mode(void) {} 75 76 static inline void arch_flush_lazy_mmu_mode(void) 77 { 78 if (test_and_clear_thread_flag(TIF_LAZY_MMU_PENDING)) 79 emit_pte_barriers(); 80 } 81 82 static inline void arch_leave_lazy_mmu_mode(void) 83 { 84 arch_flush_lazy_mmu_mode(); 85 } 86 87 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 88 #define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE 89 90 /* Set stride and tlb_level in flush_*_tlb_range */ 91 #define flush_pmd_tlb_range(vma, addr, end) \ 92 __flush_tlb_range(vma, addr, end, PMD_SIZE, false, 2) 93 #define flush_pud_tlb_range(vma, addr, end) \ 94 __flush_tlb_range(vma, addr, end, PUD_SIZE, false, 1) 95 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 96 97 /* 98 * We use local TLB invalidation instruction when reusing page in 99 * write protection fault handler to avoid TLBI broadcast in the hot 100 * path. This will cause spurious page faults if stale read-only TLB 101 * entries exist. 102 */ 103 #define flush_tlb_fix_spurious_fault(vma, address, ptep) \ 104 local_flush_tlb_page_nonotify(vma, address) 105 106 #define flush_tlb_fix_spurious_fault_pmd(vma, address, pmdp) \ 107 local_flush_tlb_page_nonotify(vma, address) 108 109 #define pte_ERROR(e) \ 110 pr_err("%s:%d: bad pte %016llx.\n", __FILE__, __LINE__, pte_val(e)) 111 112 #ifdef CONFIG_ARM64_PA_BITS_52 113 static inline phys_addr_t __pte_to_phys(pte_t pte) 114 { 115 pte_val(pte) &= ~PTE_MAYBE_SHARED; 116 return (pte_val(pte) & PTE_ADDR_LOW) | 117 ((pte_val(pte) & PTE_ADDR_HIGH) << PTE_ADDR_HIGH_SHIFT); 118 } 119 static inline pteval_t __phys_to_pte_val(phys_addr_t phys) 120 { 121 return (phys | (phys >> PTE_ADDR_HIGH_SHIFT)) & PHYS_TO_PTE_ADDR_MASK; 122 } 123 #else 124 static inline phys_addr_t __pte_to_phys(pte_t pte) 125 { 126 return pte_val(pte) & PTE_ADDR_LOW; 127 } 128 129 static inline pteval_t __phys_to_pte_val(phys_addr_t phys) 130 { 131 return phys; 132 } 133 #endif 134 135 #define pte_pfn(pte) (__pte_to_phys(pte) >> PAGE_SHIFT) 136 #define pfn_pte(pfn,prot) \ 137 __pte(__phys_to_pte_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot)) 138 139 #define pte_none(pte) (!pte_val(pte)) 140 #define pte_page(pte) (pfn_to_page(pte_pfn(pte))) 141 142 /* 143 * The following only work if pte_present(). Undefined behaviour otherwise. 144 */ 145 #define pte_present(pte) (pte_valid(pte) || pte_present_invalid(pte)) 146 #define pte_young(pte) (!!(pte_val(pte) & PTE_AF)) 147 #define pte_special(pte) (!!(pte_val(pte) & PTE_SPECIAL)) 148 #define pte_write(pte) (!!(pte_val(pte) & PTE_WRITE)) 149 #define pte_rdonly(pte) (!!(pte_val(pte) & PTE_RDONLY)) 150 #define pte_user(pte) (!!(pte_val(pte) & PTE_USER)) 151 #define pte_user_exec(pte) (!(pte_val(pte) & PTE_UXN)) 152 #define pte_cont(pte) (!!(pte_val(pte) & PTE_CONT)) 153 #define pte_tagged(pte) ((pte_val(pte) & PTE_ATTRINDX_MASK) == \ 154 PTE_ATTRINDX(MT_NORMAL_TAGGED)) 155 156 #define pte_cont_addr_end(addr, end) \ 157 ({ unsigned long __boundary = ((addr) + CONT_PTE_SIZE) & CONT_PTE_MASK; \ 158 (__boundary - 1 < (end) - 1) ? __boundary : (end); \ 159 }) 160 161 #define pmd_cont_addr_end(addr, end) \ 162 ({ unsigned long __boundary = ((addr) + CONT_PMD_SIZE) & CONT_PMD_MASK; \ 163 (__boundary - 1 < (end) - 1) ? __boundary : (end); \ 164 }) 165 166 #define pte_hw_dirty(pte) (pte_write(pte) && !pte_rdonly(pte)) 167 #define pte_sw_dirty(pte) (!!(pte_val(pte) & PTE_DIRTY)) 168 #define pte_dirty(pte) (pte_sw_dirty(pte) || pte_hw_dirty(pte)) 169 170 #define pte_valid(pte) (!!(pte_val(pte) & PTE_VALID)) 171 #define pte_present_invalid(pte) \ 172 ((pte_val(pte) & (PTE_VALID | PTE_PRESENT_INVALID)) == PTE_PRESENT_INVALID) 173 /* 174 * Execute-only user mappings do not have the PTE_USER bit set. All valid 175 * kernel mappings have the PTE_UXN bit set. 176 */ 177 #define pte_valid_not_user(pte) \ 178 ((pte_val(pte) & (PTE_VALID | PTE_USER | PTE_UXN)) == (PTE_VALID | PTE_UXN)) 179 /* 180 * Returns true if the pte is valid and has the contiguous bit set. 181 */ 182 #define pte_valid_cont(pte) (pte_valid(pte) && pte_cont(pte)) 183 /* 184 * Could the pte be present in the TLB? We must check mm_tlb_flush_pending 185 * so that we don't erroneously return false for pages that have been 186 * remapped as PROT_NONE but are yet to be flushed from the TLB. 187 * Note that we can't make any assumptions based on the state of the access 188 * flag, since __ptep_clear_flush_young() elides a DSB when invalidating the 189 * TLB. 190 */ 191 #define pte_accessible(mm, pte) \ 192 (mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid(pte)) 193 194 static inline bool por_el0_allows_pkey(u8 pkey, bool write, bool execute) 195 { 196 u64 por; 197 198 if (!system_supports_poe()) 199 return true; 200 201 por = read_sysreg_s(SYS_POR_EL0); 202 203 if (write) 204 return por_elx_allows_write(por, pkey); 205 206 if (execute) 207 return por_elx_allows_exec(por, pkey); 208 209 return por_elx_allows_read(por, pkey); 210 } 211 212 /* 213 * p??_access_permitted() is true for valid user mappings (PTE_USER 214 * bit set, subject to the write permission check). For execute-only 215 * mappings, like PROT_EXEC with EPAN (both PTE_USER and PTE_UXN bits 216 * not set) must return false. PROT_NONE mappings do not have the 217 * PTE_VALID bit set. 218 */ 219 #define pte_access_permitted_no_overlay(pte, write) \ 220 (((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER)) && (!(write) || pte_write(pte))) 221 #define pte_access_permitted(pte, write) \ 222 (pte_access_permitted_no_overlay(pte, write) && \ 223 por_el0_allows_pkey(FIELD_GET(PTE_PO_IDX_MASK, pte_val(pte)), write, false)) 224 #define pmd_access_permitted(pmd, write) \ 225 (pte_access_permitted(pmd_pte(pmd), (write))) 226 #define pud_access_permitted(pud, write) \ 227 (pte_access_permitted(pud_pte(pud), (write))) 228 229 static inline pte_t clear_pte_bit(pte_t pte, pgprot_t prot) 230 { 231 pte_val(pte) &= ~pgprot_val(prot); 232 return pte; 233 } 234 235 static inline pte_t set_pte_bit(pte_t pte, pgprot_t prot) 236 { 237 pte_val(pte) |= pgprot_val(prot); 238 return pte; 239 } 240 241 static inline pmd_t clear_pmd_bit(pmd_t pmd, pgprot_t prot) 242 { 243 pmd_val(pmd) &= ~pgprot_val(prot); 244 return pmd; 245 } 246 247 static inline pmd_t set_pmd_bit(pmd_t pmd, pgprot_t prot) 248 { 249 pmd_val(pmd) |= pgprot_val(prot); 250 return pmd; 251 } 252 253 static inline pte_t pte_mkwrite_novma(pte_t pte) 254 { 255 pte = set_pte_bit(pte, __pgprot(PTE_WRITE)); 256 if (pte_sw_dirty(pte)) 257 pte = clear_pte_bit(pte, __pgprot(PTE_RDONLY)); 258 return pte; 259 } 260 261 static inline pte_t pte_mkclean(pte_t pte) 262 { 263 pte = clear_pte_bit(pte, __pgprot(PTE_DIRTY)); 264 pte = set_pte_bit(pte, __pgprot(PTE_RDONLY)); 265 266 return pte; 267 } 268 269 static inline pte_t pte_mkdirty(pte_t pte) 270 { 271 pte = set_pte_bit(pte, __pgprot(PTE_DIRTY)); 272 273 if (pte_write(pte)) 274 pte = clear_pte_bit(pte, __pgprot(PTE_RDONLY)); 275 276 return pte; 277 } 278 279 static inline pte_t pte_wrprotect(pte_t pte) 280 { 281 /* 282 * If hardware-dirty (PTE_WRITE/DBM bit set and PTE_RDONLY 283 * clear), set the PTE_DIRTY bit. 284 */ 285 if (pte_hw_dirty(pte)) 286 pte = set_pte_bit(pte, __pgprot(PTE_DIRTY)); 287 288 pte = clear_pte_bit(pte, __pgprot(PTE_WRITE)); 289 pte = set_pte_bit(pte, __pgprot(PTE_RDONLY)); 290 return pte; 291 } 292 293 static inline pte_t pte_mkold(pte_t pte) 294 { 295 return clear_pte_bit(pte, __pgprot(PTE_AF)); 296 } 297 298 static inline pte_t pte_mkyoung(pte_t pte) 299 { 300 return set_pte_bit(pte, __pgprot(PTE_AF)); 301 } 302 303 static inline pte_t pte_mkspecial(pte_t pte) 304 { 305 return set_pte_bit(pte, __pgprot(PTE_SPECIAL)); 306 } 307 308 static inline pte_t pte_mkcont(pte_t pte) 309 { 310 return set_pte_bit(pte, __pgprot(PTE_CONT)); 311 } 312 313 static inline pte_t pte_mknoncont(pte_t pte) 314 { 315 return clear_pte_bit(pte, __pgprot(PTE_CONT)); 316 } 317 318 static inline pte_t pte_mkvalid(pte_t pte) 319 { 320 return set_pte_bit(pte, __pgprot(PTE_VALID)); 321 } 322 323 static inline pte_t pte_mkinvalid(pte_t pte) 324 { 325 pte = set_pte_bit(pte, __pgprot(PTE_PRESENT_INVALID)); 326 pte = clear_pte_bit(pte, __pgprot(PTE_VALID)); 327 return pte; 328 } 329 330 static inline pmd_t pmd_mkcont(pmd_t pmd) 331 { 332 return __pmd(pmd_val(pmd) | PMD_SECT_CONT); 333 } 334 335 static inline pmd_t pmd_mknoncont(pmd_t pmd) 336 { 337 return __pmd(pmd_val(pmd) & ~PMD_SECT_CONT); 338 } 339 340 #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP 341 static inline int pte_uffd_wp(pte_t pte) 342 { 343 return !!(pte_val(pte) & PTE_UFFD_WP); 344 } 345 346 static inline pte_t pte_mkuffd_wp(pte_t pte) 347 { 348 return pte_wrprotect(set_pte_bit(pte, __pgprot(PTE_UFFD_WP))); 349 } 350 351 static inline pte_t pte_clear_uffd_wp(pte_t pte) 352 { 353 return clear_pte_bit(pte, __pgprot(PTE_UFFD_WP)); 354 } 355 #endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ 356 357 static inline void __set_pte_nosync(pte_t *ptep, pte_t pte) 358 { 359 WRITE_ONCE(*ptep, pte); 360 } 361 362 static inline void __set_pte_complete(pte_t pte) 363 { 364 /* 365 * Only if the new pte is valid and kernel, otherwise TLB maintenance 366 * has the necessary barriers. 367 */ 368 if (pte_valid_not_user(pte)) 369 queue_pte_barriers(); 370 } 371 372 static inline void __set_pte(pte_t *ptep, pte_t pte) 373 { 374 __set_pte_nosync(ptep, pte); 375 __set_pte_complete(pte); 376 } 377 378 static inline pte_t __ptep_get(pte_t *ptep) 379 { 380 return READ_ONCE(*ptep); 381 } 382 383 extern void __sync_icache_dcache(pte_t pteval); 384 bool pgattr_change_is_safe(pteval_t old, pteval_t new); 385 386 /* 387 * PTE bits configuration in the presence of hardware Dirty Bit Management 388 * (PTE_WRITE == PTE_DBM): 389 * 390 * Dirty Writable | PTE_RDONLY PTE_WRITE PTE_DIRTY (sw) 391 * 0 0 | 1 0 0 392 * 0 1 | 1 1 0 393 * 1 0 | 1 0 1 394 * 1 1 | 0 1 x 395 * 396 * When hardware DBM is not present, the software PTE_DIRTY bit is updated via 397 * the page fault mechanism. Checking the dirty status of a pte becomes: 398 * 399 * PTE_DIRTY || (PTE_WRITE && !PTE_RDONLY) 400 */ 401 402 static inline void __check_safe_pte_update(struct mm_struct *mm, pte_t *ptep, 403 pte_t pte) 404 { 405 pte_t old_pte; 406 407 if (!IS_ENABLED(CONFIG_DEBUG_VM)) 408 return; 409 410 old_pte = __ptep_get(ptep); 411 412 if (!pte_valid(old_pte) || !pte_valid(pte)) 413 return; 414 if (mm != current->active_mm && atomic_read(&mm->mm_users) <= 1) 415 return; 416 417 /* 418 * Check for potential race with hardware updates of the pte 419 * (__ptep_set_access_flags safely changes valid ptes without going 420 * through an invalid entry). 421 */ 422 VM_WARN_ONCE(!pte_young(pte), 423 "%s: racy access flag clearing: 0x%016llx -> 0x%016llx", 424 __func__, pte_val(old_pte), pte_val(pte)); 425 VM_WARN_ONCE(pte_write(old_pte) && !pte_dirty(pte), 426 "%s: racy dirty state clearing: 0x%016llx -> 0x%016llx", 427 __func__, pte_val(old_pte), pte_val(pte)); 428 VM_WARN_ONCE(!pgattr_change_is_safe(pte_val(old_pte), pte_val(pte)), 429 "%s: unsafe attribute change: 0x%016llx -> 0x%016llx", 430 __func__, pte_val(old_pte), pte_val(pte)); 431 } 432 433 static inline void __sync_cache_and_tags(pte_t pte, unsigned int nr_pages) 434 { 435 if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte)) 436 __sync_icache_dcache(pte); 437 438 /* 439 * If the PTE would provide user space access to the tags associated 440 * with it then ensure that the MTE tags are synchronised. Although 441 * pte_access_permitted_no_overlay() returns false for exec only 442 * mappings, they don't expose tags (instruction fetches don't check 443 * tags). 444 */ 445 if (system_supports_mte() && pte_access_permitted_no_overlay(pte, false) && 446 !pte_special(pte) && pte_tagged(pte)) 447 mte_sync_tags(pte, nr_pages); 448 } 449 450 /* 451 * Select all bits except the pfn 452 */ 453 #define pte_pgprot pte_pgprot 454 static inline pgprot_t pte_pgprot(pte_t pte) 455 { 456 unsigned long pfn = pte_pfn(pte); 457 458 return __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte)); 459 } 460 461 #define pte_advance_pfn pte_advance_pfn 462 static inline pte_t pte_advance_pfn(pte_t pte, unsigned long nr) 463 { 464 return pfn_pte(pte_pfn(pte) + nr, pte_pgprot(pte)); 465 } 466 467 /* 468 * Hugetlb definitions. 469 */ 470 #define HUGE_MAX_HSTATE 4 471 #define HPAGE_SHIFT PMD_SHIFT 472 #define HPAGE_SIZE (_AC(1, UL) << HPAGE_SHIFT) 473 #define HPAGE_MASK (~(HPAGE_SIZE - 1)) 474 #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) 475 476 static inline pte_t pgd_pte(pgd_t pgd) 477 { 478 return __pte(pgd_val(pgd)); 479 } 480 481 static inline pte_t p4d_pte(p4d_t p4d) 482 { 483 return __pte(p4d_val(p4d)); 484 } 485 486 static inline pte_t pud_pte(pud_t pud) 487 { 488 return __pte(pud_val(pud)); 489 } 490 491 static inline pud_t pte_pud(pte_t pte) 492 { 493 return __pud(pte_val(pte)); 494 } 495 496 static inline pmd_t pud_pmd(pud_t pud) 497 { 498 return __pmd(pud_val(pud)); 499 } 500 501 static inline pte_t pmd_pte(pmd_t pmd) 502 { 503 return __pte(pmd_val(pmd)); 504 } 505 506 static inline pmd_t pte_pmd(pte_t pte) 507 { 508 return __pmd(pte_val(pte)); 509 } 510 511 static inline pgprot_t mk_pud_sect_prot(pgprot_t prot) 512 { 513 return __pgprot((pgprot_val(prot) & ~PUD_TYPE_MASK) | PUD_TYPE_SECT); 514 } 515 516 static inline pgprot_t mk_pmd_sect_prot(pgprot_t prot) 517 { 518 return __pgprot((pgprot_val(prot) & ~PMD_TYPE_MASK) | PMD_TYPE_SECT); 519 } 520 521 static inline pte_t pte_swp_mkexclusive(pte_t pte) 522 { 523 return set_pte_bit(pte, __pgprot(PTE_SWP_EXCLUSIVE)); 524 } 525 526 static inline bool pte_swp_exclusive(pte_t pte) 527 { 528 return pte_val(pte) & PTE_SWP_EXCLUSIVE; 529 } 530 531 static inline pte_t pte_swp_clear_exclusive(pte_t pte) 532 { 533 return clear_pte_bit(pte, __pgprot(PTE_SWP_EXCLUSIVE)); 534 } 535 536 #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP 537 static inline pte_t pte_swp_mkuffd_wp(pte_t pte) 538 { 539 return set_pte_bit(pte, __pgprot(PTE_SWP_UFFD_WP)); 540 } 541 542 static inline int pte_swp_uffd_wp(pte_t pte) 543 { 544 return !!(pte_val(pte) & PTE_SWP_UFFD_WP); 545 } 546 547 static inline pte_t pte_swp_clear_uffd_wp(pte_t pte) 548 { 549 return clear_pte_bit(pte, __pgprot(PTE_SWP_UFFD_WP)); 550 } 551 #endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ 552 553 #ifdef CONFIG_NUMA_BALANCING 554 /* 555 * See the comment in include/linux/pgtable.h 556 */ 557 static inline int pte_protnone(pte_t pte) 558 { 559 /* 560 * pte_present_invalid() tells us that the pte is invalid from HW 561 * perspective but present from SW perspective, so the fields are to be 562 * interpreted as per the HW layout. The second 2 checks are the unique 563 * encoding that we use for PROT_NONE. It is insufficient to only use 564 * the first check because we share the same encoding scheme with pmds 565 * which support pmd_mkinvalid(), so can be present-invalid without 566 * being PROT_NONE. 567 */ 568 return pte_present_invalid(pte) && !pte_user(pte) && !pte_user_exec(pte); 569 } 570 571 static inline int pmd_protnone(pmd_t pmd) 572 { 573 return pte_protnone(pmd_pte(pmd)); 574 } 575 #endif 576 577 #define pmd_present(pmd) pte_present(pmd_pte(pmd)) 578 #define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd)) 579 #define pmd_young(pmd) pte_young(pmd_pte(pmd)) 580 #define pmd_valid(pmd) pte_valid(pmd_pte(pmd)) 581 #define pmd_user(pmd) pte_user(pmd_pte(pmd)) 582 #define pmd_user_exec(pmd) pte_user_exec(pmd_pte(pmd)) 583 #define pmd_cont(pmd) pte_cont(pmd_pte(pmd)) 584 #define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd))) 585 #define pmd_mkold(pmd) pte_pmd(pte_mkold(pmd_pte(pmd))) 586 #define pmd_mkwrite_novma(pmd) pte_pmd(pte_mkwrite_novma(pmd_pte(pmd))) 587 #define pmd_mkclean(pmd) pte_pmd(pte_mkclean(pmd_pte(pmd))) 588 #define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd))) 589 #define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd))) 590 #define pmd_mkinvalid(pmd) pte_pmd(pte_mkinvalid(pmd_pte(pmd))) 591 #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP 592 #define pmd_uffd_wp(pmd) pte_uffd_wp(pmd_pte(pmd)) 593 #define pmd_mkuffd_wp(pmd) pte_pmd(pte_mkuffd_wp(pmd_pte(pmd))) 594 #define pmd_clear_uffd_wp(pmd) pte_pmd(pte_clear_uffd_wp(pmd_pte(pmd))) 595 #define pmd_swp_uffd_wp(pmd) pte_swp_uffd_wp(pmd_pte(pmd)) 596 #define pmd_swp_mkuffd_wp(pmd) pte_pmd(pte_swp_mkuffd_wp(pmd_pte(pmd))) 597 #define pmd_swp_clear_uffd_wp(pmd) \ 598 pte_pmd(pte_swp_clear_uffd_wp(pmd_pte(pmd))) 599 #endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ 600 601 #define pmd_write(pmd) pte_write(pmd_pte(pmd)) 602 603 static inline pmd_t pmd_mkhuge(pmd_t pmd) 604 { 605 /* 606 * It's possible that the pmd is present-invalid on entry 607 * and in that case it needs to remain present-invalid on 608 * exit. So ensure the VALID bit does not get modified. 609 */ 610 pmdval_t mask = PMD_TYPE_MASK & ~PTE_VALID; 611 pmdval_t val = PMD_TYPE_SECT & ~PTE_VALID; 612 613 return __pmd((pmd_val(pmd) & ~mask) | val); 614 } 615 616 #ifdef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP 617 #define pmd_special(pte) (!!((pmd_val(pte) & PTE_SPECIAL))) 618 static inline pmd_t pmd_mkspecial(pmd_t pmd) 619 { 620 return set_pmd_bit(pmd, __pgprot(PTE_SPECIAL)); 621 } 622 #endif 623 624 #define __pmd_to_phys(pmd) __pte_to_phys(pmd_pte(pmd)) 625 #define __phys_to_pmd_val(phys) __phys_to_pte_val(phys) 626 #define pmd_pfn(pmd) ((__pmd_to_phys(pmd) & PMD_MASK) >> PAGE_SHIFT) 627 #define pfn_pmd(pfn,prot) __pmd(__phys_to_pmd_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot)) 628 629 #define pud_young(pud) pte_young(pud_pte(pud)) 630 #define pud_mkyoung(pud) pte_pud(pte_mkyoung(pud_pte(pud))) 631 #define pud_write(pud) pte_write(pud_pte(pud)) 632 633 static inline pud_t pud_mkhuge(pud_t pud) 634 { 635 /* 636 * It's possible that the pud is present-invalid on entry 637 * and in that case it needs to remain present-invalid on 638 * exit. So ensure the VALID bit does not get modified. 639 */ 640 pudval_t mask = PUD_TYPE_MASK & ~PTE_VALID; 641 pudval_t val = PUD_TYPE_SECT & ~PTE_VALID; 642 643 return __pud((pud_val(pud) & ~mask) | val); 644 } 645 646 #define __pud_to_phys(pud) __pte_to_phys(pud_pte(pud)) 647 #define __phys_to_pud_val(phys) __phys_to_pte_val(phys) 648 #define pud_pfn(pud) ((__pud_to_phys(pud) & PUD_MASK) >> PAGE_SHIFT) 649 #define pfn_pud(pfn,prot) __pud(__phys_to_pud_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot)) 650 651 #define pmd_pgprot pmd_pgprot 652 static inline pgprot_t pmd_pgprot(pmd_t pmd) 653 { 654 unsigned long pfn = pmd_pfn(pmd); 655 656 return __pgprot(pmd_val(pfn_pmd(pfn, __pgprot(0))) ^ pmd_val(pmd)); 657 } 658 659 #define pud_pgprot pud_pgprot 660 static inline pgprot_t pud_pgprot(pud_t pud) 661 { 662 unsigned long pfn = pud_pfn(pud); 663 664 return __pgprot(pud_val(pfn_pud(pfn, __pgprot(0))) ^ pud_val(pud)); 665 } 666 667 static inline void __set_ptes_anysz(struct mm_struct *mm, unsigned long addr, 668 pte_t *ptep, pte_t pte, unsigned int nr, 669 unsigned long pgsize) 670 { 671 unsigned long stride = pgsize >> PAGE_SHIFT; 672 673 switch (pgsize) { 674 case PAGE_SIZE: 675 page_table_check_ptes_set(mm, addr, ptep, pte, nr); 676 break; 677 case PMD_SIZE: 678 page_table_check_pmds_set(mm, addr, (pmd_t *)ptep, 679 pte_pmd(pte), nr); 680 break; 681 #ifndef __PAGETABLE_PMD_FOLDED 682 case PUD_SIZE: 683 page_table_check_puds_set(mm, addr, (pud_t *)ptep, 684 pte_pud(pte), nr); 685 break; 686 #endif 687 default: 688 VM_WARN_ON(1); 689 } 690 691 __sync_cache_and_tags(pte, nr * stride); 692 693 for (;;) { 694 __check_safe_pte_update(mm, ptep, pte); 695 __set_pte_nosync(ptep, pte); 696 if (--nr == 0) 697 break; 698 ptep++; 699 pte = pte_advance_pfn(pte, stride); 700 } 701 702 __set_pte_complete(pte); 703 } 704 705 static inline void __set_ptes(struct mm_struct *mm, unsigned long addr, 706 pte_t *ptep, pte_t pte, unsigned int nr) 707 { 708 __set_ptes_anysz(mm, addr, ptep, pte, nr, PAGE_SIZE); 709 } 710 711 static inline void __set_pmds(struct mm_struct *mm, unsigned long addr, 712 pmd_t *pmdp, pmd_t pmd, unsigned int nr) 713 { 714 __set_ptes_anysz(mm, addr, (pte_t *)pmdp, pmd_pte(pmd), nr, PMD_SIZE); 715 } 716 #define set_pmd_at(mm, addr, pmdp, pmd) __set_pmds(mm, addr, pmdp, pmd, 1) 717 718 static inline void __set_puds(struct mm_struct *mm, unsigned long addr, 719 pud_t *pudp, pud_t pud, unsigned int nr) 720 { 721 __set_ptes_anysz(mm, addr, (pte_t *)pudp, pud_pte(pud), nr, PUD_SIZE); 722 } 723 #define set_pud_at(mm, addr, pudp, pud) __set_puds(mm, addr, pudp, pud, 1) 724 725 #define __p4d_to_phys(p4d) __pte_to_phys(p4d_pte(p4d)) 726 #define __phys_to_p4d_val(phys) __phys_to_pte_val(phys) 727 728 #define __pgd_to_phys(pgd) __pte_to_phys(pgd_pte(pgd)) 729 #define __phys_to_pgd_val(phys) __phys_to_pte_val(phys) 730 731 #define __pgprot_modify(prot,mask,bits) \ 732 __pgprot((pgprot_val(prot) & ~(mask)) | (bits)) 733 734 #define pgprot_nx(prot) \ 735 __pgprot_modify(prot, PTE_MAYBE_GP, PTE_PXN) 736 737 #define pgprot_decrypted(prot) \ 738 __pgprot_modify(prot, PROT_NS_SHARED, PROT_NS_SHARED) 739 #define pgprot_encrypted(prot) \ 740 __pgprot_modify(prot, PROT_NS_SHARED, 0) 741 742 /* 743 * Mark the prot value as uncacheable and unbufferable. 744 */ 745 #define pgprot_noncached(prot) \ 746 __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_DEVICE_nGnRnE) | PTE_PXN | PTE_UXN) 747 #define pgprot_writecombine(prot) \ 748 __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_NC) | PTE_PXN | PTE_UXN) 749 #define pgprot_device(prot) \ 750 __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_DEVICE_nGnRE) | PTE_PXN | PTE_UXN) 751 #define pgprot_tagged(prot) \ 752 __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_TAGGED)) 753 #define pgprot_mhp pgprot_tagged 754 /* 755 * DMA allocations for non-coherent devices use what the Arm architecture calls 756 * "Normal non-cacheable" memory, which permits speculation, unaligned accesses 757 * and merging of writes. This is different from "Device-nGnR[nE]" memory which 758 * is intended for MMIO and thus forbids speculation, preserves access size, 759 * requires strict alignment and can also force write responses to come from the 760 * endpoint. 761 */ 762 #define pgprot_dmacoherent(prot) \ 763 __pgprot_modify(prot, PTE_ATTRINDX_MASK, \ 764 PTE_ATTRINDX(MT_NORMAL_NC) | PTE_PXN | PTE_UXN) 765 766 #define __HAVE_PHYS_MEM_ACCESS_PROT 767 struct file; 768 extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, 769 unsigned long size, pgprot_t vma_prot); 770 771 #define pmd_none(pmd) (!pmd_val(pmd)) 772 773 #define pmd_table(pmd) ((pmd_val(pmd) & PMD_TYPE_MASK) == \ 774 PMD_TYPE_TABLE) 775 #define pmd_sect(pmd) ((pmd_val(pmd) & PMD_TYPE_MASK) == \ 776 PMD_TYPE_SECT) 777 #define pmd_leaf(pmd) (pmd_present(pmd) && !pmd_table(pmd)) 778 #define pmd_bad(pmd) (!pmd_table(pmd)) 779 780 #define pmd_leaf_size(pmd) (pmd_cont(pmd) ? CONT_PMD_SIZE : PMD_SIZE) 781 #define pte_leaf_size(pte) (pte_cont(pte) ? CONT_PTE_SIZE : PAGE_SIZE) 782 783 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 784 static inline int pmd_trans_huge(pmd_t pmd) 785 { 786 /* 787 * If pmd is present-invalid, pmd_table() won't detect it 788 * as a table, so force the valid bit for the comparison. 789 */ 790 return pmd_present(pmd) && !pmd_table(__pmd(pmd_val(pmd) | PTE_VALID)); 791 } 792 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 793 794 #if defined(CONFIG_ARM64_64K_PAGES) || CONFIG_PGTABLE_LEVELS < 3 795 static inline bool pud_sect(pud_t pud) { return false; } 796 static inline bool pud_table(pud_t pud) { return true; } 797 #else 798 #define pud_sect(pud) ((pud_val(pud) & PUD_TYPE_MASK) == \ 799 PUD_TYPE_SECT) 800 #define pud_table(pud) ((pud_val(pud) & PUD_TYPE_MASK) == \ 801 PUD_TYPE_TABLE) 802 #endif 803 804 extern pgd_t swapper_pg_dir[]; 805 extern pgd_t idmap_pg_dir[]; 806 extern pgd_t tramp_pg_dir[]; 807 extern pgd_t reserved_pg_dir[]; 808 809 extern void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd); 810 811 static inline bool in_swapper_pgdir(void *addr) 812 { 813 return ((unsigned long)addr & PAGE_MASK) == 814 ((unsigned long)swapper_pg_dir & PAGE_MASK); 815 } 816 817 static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) 818 { 819 #ifdef __PAGETABLE_PMD_FOLDED 820 if (in_swapper_pgdir(pmdp)) { 821 set_swapper_pgd((pgd_t *)pmdp, __pgd(pmd_val(pmd))); 822 return; 823 } 824 #endif /* __PAGETABLE_PMD_FOLDED */ 825 826 WRITE_ONCE(*pmdp, pmd); 827 828 if (pmd_valid(pmd)) 829 queue_pte_barriers(); 830 } 831 832 static inline void pmd_clear(pmd_t *pmdp) 833 { 834 set_pmd(pmdp, __pmd(0)); 835 } 836 837 static inline phys_addr_t pmd_page_paddr(pmd_t pmd) 838 { 839 return __pmd_to_phys(pmd); 840 } 841 842 static inline unsigned long pmd_page_vaddr(pmd_t pmd) 843 { 844 return (unsigned long)__va(pmd_page_paddr(pmd)); 845 } 846 847 /* Find an entry in the third-level page table. */ 848 #define pte_offset_phys(dir,addr) (pmd_page_paddr(READ_ONCE(*(dir))) + pte_index(addr) * sizeof(pte_t)) 849 850 #define pte_set_fixmap(addr) ((pte_t *)set_fixmap_offset(FIX_PTE, addr)) 851 #define pte_set_fixmap_offset(pmd, addr) pte_set_fixmap(pte_offset_phys(pmd, addr)) 852 #define pte_clear_fixmap() clear_fixmap(FIX_PTE) 853 854 #define pmd_page(pmd) phys_to_page(__pmd_to_phys(pmd)) 855 856 /* use ONLY for statically allocated translation tables */ 857 #define pte_offset_kimg(dir,addr) ((pte_t *)__phys_to_kimg(pte_offset_phys((dir), (addr)))) 858 859 #if CONFIG_PGTABLE_LEVELS > 2 860 861 #define pmd_ERROR(e) \ 862 pr_err("%s:%d: bad pmd %016llx.\n", __FILE__, __LINE__, pmd_val(e)) 863 864 #define pud_none(pud) (!pud_val(pud)) 865 #define pud_bad(pud) ((pud_val(pud) & PUD_TYPE_MASK) != \ 866 PUD_TYPE_TABLE) 867 #define pud_present(pud) pte_present(pud_pte(pud)) 868 #ifndef __PAGETABLE_PMD_FOLDED 869 #define pud_leaf(pud) (pud_present(pud) && !pud_table(pud)) 870 #else 871 #define pud_leaf(pud) false 872 #endif 873 #define pud_valid(pud) pte_valid(pud_pte(pud)) 874 #define pud_user(pud) pte_user(pud_pte(pud)) 875 #define pud_user_exec(pud) pte_user_exec(pud_pte(pud)) 876 877 static inline bool pgtable_l4_enabled(void); 878 879 static inline void set_pud(pud_t *pudp, pud_t pud) 880 { 881 if (!pgtable_l4_enabled() && in_swapper_pgdir(pudp)) { 882 set_swapper_pgd((pgd_t *)pudp, __pgd(pud_val(pud))); 883 return; 884 } 885 886 WRITE_ONCE(*pudp, pud); 887 888 if (pud_valid(pud)) 889 queue_pte_barriers(); 890 } 891 892 static inline void pud_clear(pud_t *pudp) 893 { 894 set_pud(pudp, __pud(0)); 895 } 896 897 static inline phys_addr_t pud_page_paddr(pud_t pud) 898 { 899 return __pud_to_phys(pud); 900 } 901 902 static inline pmd_t *pud_pgtable(pud_t pud) 903 { 904 return (pmd_t *)__va(pud_page_paddr(pud)); 905 } 906 907 /* Find an entry in the second-level page table. */ 908 #define pmd_offset_phys(dir, addr) (pud_page_paddr(READ_ONCE(*(dir))) + pmd_index(addr) * sizeof(pmd_t)) 909 910 #define pmd_set_fixmap(addr) ((pmd_t *)set_fixmap_offset(FIX_PMD, addr)) 911 #define pmd_set_fixmap_offset(pud, addr) pmd_set_fixmap(pmd_offset_phys(pud, addr)) 912 #define pmd_clear_fixmap() clear_fixmap(FIX_PMD) 913 914 #define pud_page(pud) phys_to_page(__pud_to_phys(pud)) 915 916 /* use ONLY for statically allocated translation tables */ 917 #define pmd_offset_kimg(dir,addr) ((pmd_t *)__phys_to_kimg(pmd_offset_phys((dir), (addr)))) 918 919 #else 920 921 #define pud_valid(pud) false 922 #define pud_page_paddr(pud) ({ BUILD_BUG(); 0; }) 923 #define pud_user_exec(pud) pud_user(pud) /* Always 0 with folding */ 924 925 /* Match pmd_offset folding in <asm/generic/pgtable-nopmd.h> */ 926 #define pmd_set_fixmap(addr) NULL 927 #define pmd_set_fixmap_offset(pudp, addr) ((pmd_t *)pudp) 928 #define pmd_clear_fixmap() 929 930 #define pmd_offset_kimg(dir,addr) ((pmd_t *)dir) 931 932 #endif /* CONFIG_PGTABLE_LEVELS > 2 */ 933 934 #if CONFIG_PGTABLE_LEVELS > 3 935 936 static __always_inline bool pgtable_l4_enabled(void) 937 { 938 if (CONFIG_PGTABLE_LEVELS > 4 || !IS_ENABLED(CONFIG_ARM64_LPA2)) 939 return true; 940 if (!alternative_has_cap_likely(ARM64_ALWAYS_BOOT)) 941 return vabits_actual == VA_BITS; 942 return alternative_has_cap_unlikely(ARM64_HAS_VA52); 943 } 944 945 static inline bool mm_pud_folded(const struct mm_struct *mm) 946 { 947 return !pgtable_l4_enabled(); 948 } 949 #define mm_pud_folded mm_pud_folded 950 951 #define pud_ERROR(e) \ 952 pr_err("%s:%d: bad pud %016llx.\n", __FILE__, __LINE__, pud_val(e)) 953 954 #define p4d_none(p4d) (pgtable_l4_enabled() && !p4d_val(p4d)) 955 #define p4d_bad(p4d) (pgtable_l4_enabled() && \ 956 ((p4d_val(p4d) & P4D_TYPE_MASK) != \ 957 P4D_TYPE_TABLE)) 958 #define p4d_present(p4d) (!p4d_none(p4d)) 959 960 static inline void set_p4d(p4d_t *p4dp, p4d_t p4d) 961 { 962 if (in_swapper_pgdir(p4dp)) { 963 set_swapper_pgd((pgd_t *)p4dp, __pgd(p4d_val(p4d))); 964 return; 965 } 966 967 WRITE_ONCE(*p4dp, p4d); 968 queue_pte_barriers(); 969 } 970 971 static inline void p4d_clear(p4d_t *p4dp) 972 { 973 if (pgtable_l4_enabled()) 974 set_p4d(p4dp, __p4d(0)); 975 } 976 977 static inline phys_addr_t p4d_page_paddr(p4d_t p4d) 978 { 979 return __p4d_to_phys(p4d); 980 } 981 982 #define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) 983 984 static inline pud_t *p4d_to_folded_pud(p4d_t *p4dp, unsigned long addr) 985 { 986 /* Ensure that 'p4dp' indexes a page table according to 'addr' */ 987 VM_BUG_ON(((addr >> P4D_SHIFT) ^ ((u64)p4dp >> 3)) % PTRS_PER_P4D); 988 989 return (pud_t *)PTR_ALIGN_DOWN(p4dp, PAGE_SIZE) + pud_index(addr); 990 } 991 992 static inline pud_t *p4d_pgtable(p4d_t p4d) 993 { 994 return (pud_t *)__va(p4d_page_paddr(p4d)); 995 } 996 997 static inline phys_addr_t pud_offset_phys(p4d_t *p4dp, unsigned long addr) 998 { 999 BUG_ON(!pgtable_l4_enabled()); 1000 1001 return p4d_page_paddr(READ_ONCE(*p4dp)) + pud_index(addr) * sizeof(pud_t); 1002 } 1003 1004 static inline 1005 pud_t *pud_offset_lockless(p4d_t *p4dp, p4d_t p4d, unsigned long addr) 1006 { 1007 if (!pgtable_l4_enabled()) 1008 return p4d_to_folded_pud(p4dp, addr); 1009 return (pud_t *)__va(p4d_page_paddr(p4d)) + pud_index(addr); 1010 } 1011 #define pud_offset_lockless pud_offset_lockless 1012 1013 static inline pud_t *pud_offset(p4d_t *p4dp, unsigned long addr) 1014 { 1015 return pud_offset_lockless(p4dp, READ_ONCE(*p4dp), addr); 1016 } 1017 #define pud_offset pud_offset 1018 1019 static inline pud_t *pud_set_fixmap(unsigned long addr) 1020 { 1021 if (!pgtable_l4_enabled()) 1022 return NULL; 1023 return (pud_t *)set_fixmap_offset(FIX_PUD, addr); 1024 } 1025 1026 static inline pud_t *pud_set_fixmap_offset(p4d_t *p4dp, unsigned long addr) 1027 { 1028 if (!pgtable_l4_enabled()) 1029 return p4d_to_folded_pud(p4dp, addr); 1030 return pud_set_fixmap(pud_offset_phys(p4dp, addr)); 1031 } 1032 1033 static inline void pud_clear_fixmap(void) 1034 { 1035 if (pgtable_l4_enabled()) 1036 clear_fixmap(FIX_PUD); 1037 } 1038 1039 /* use ONLY for statically allocated translation tables */ 1040 static inline pud_t *pud_offset_kimg(p4d_t *p4dp, u64 addr) 1041 { 1042 if (!pgtable_l4_enabled()) 1043 return p4d_to_folded_pud(p4dp, addr); 1044 return (pud_t *)__phys_to_kimg(pud_offset_phys(p4dp, addr)); 1045 } 1046 1047 #define p4d_page(p4d) pfn_to_page(__phys_to_pfn(__p4d_to_phys(p4d))) 1048 1049 #else 1050 1051 static inline bool pgtable_l4_enabled(void) { return false; } 1052 1053 #define p4d_page_paddr(p4d) ({ BUILD_BUG(); 0;}) 1054 1055 /* Match pud_offset folding in <asm/generic/pgtable-nopud.h> */ 1056 #define pud_set_fixmap(addr) NULL 1057 #define pud_set_fixmap_offset(pgdp, addr) ((pud_t *)pgdp) 1058 #define pud_clear_fixmap() 1059 1060 #define pud_offset_kimg(dir,addr) ((pud_t *)dir) 1061 1062 #endif /* CONFIG_PGTABLE_LEVELS > 3 */ 1063 1064 #if CONFIG_PGTABLE_LEVELS > 4 1065 1066 static __always_inline bool pgtable_l5_enabled(void) 1067 { 1068 if (!alternative_has_cap_likely(ARM64_ALWAYS_BOOT)) 1069 return vabits_actual == VA_BITS; 1070 return alternative_has_cap_unlikely(ARM64_HAS_VA52); 1071 } 1072 1073 static inline bool mm_p4d_folded(const struct mm_struct *mm) 1074 { 1075 return !pgtable_l5_enabled(); 1076 } 1077 #define mm_p4d_folded mm_p4d_folded 1078 1079 #define p4d_ERROR(e) \ 1080 pr_err("%s:%d: bad p4d %016llx.\n", __FILE__, __LINE__, p4d_val(e)) 1081 1082 #define pgd_none(pgd) (pgtable_l5_enabled() && !pgd_val(pgd)) 1083 #define pgd_bad(pgd) (pgtable_l5_enabled() && \ 1084 ((pgd_val(pgd) & PGD_TYPE_MASK) != \ 1085 PGD_TYPE_TABLE)) 1086 #define pgd_present(pgd) (!pgd_none(pgd)) 1087 1088 static inline void set_pgd(pgd_t *pgdp, pgd_t pgd) 1089 { 1090 if (in_swapper_pgdir(pgdp)) { 1091 set_swapper_pgd(pgdp, __pgd(pgd_val(pgd))); 1092 return; 1093 } 1094 1095 WRITE_ONCE(*pgdp, pgd); 1096 queue_pte_barriers(); 1097 } 1098 1099 static inline void pgd_clear(pgd_t *pgdp) 1100 { 1101 if (pgtable_l5_enabled()) 1102 set_pgd(pgdp, __pgd(0)); 1103 } 1104 1105 static inline phys_addr_t pgd_page_paddr(pgd_t pgd) 1106 { 1107 return __pgd_to_phys(pgd); 1108 } 1109 1110 #define p4d_index(addr) (((addr) >> P4D_SHIFT) & (PTRS_PER_P4D - 1)) 1111 1112 static inline p4d_t *pgd_to_folded_p4d(pgd_t *pgdp, unsigned long addr) 1113 { 1114 /* Ensure that 'pgdp' indexes a page table according to 'addr' */ 1115 VM_BUG_ON(((addr >> PGDIR_SHIFT) ^ ((u64)pgdp >> 3)) % PTRS_PER_PGD); 1116 1117 return (p4d_t *)PTR_ALIGN_DOWN(pgdp, PAGE_SIZE) + p4d_index(addr); 1118 } 1119 1120 static inline phys_addr_t p4d_offset_phys(pgd_t *pgdp, unsigned long addr) 1121 { 1122 BUG_ON(!pgtable_l5_enabled()); 1123 1124 return pgd_page_paddr(READ_ONCE(*pgdp)) + p4d_index(addr) * sizeof(p4d_t); 1125 } 1126 1127 static inline 1128 p4d_t *p4d_offset_lockless(pgd_t *pgdp, pgd_t pgd, unsigned long addr) 1129 { 1130 if (!pgtable_l5_enabled()) 1131 return pgd_to_folded_p4d(pgdp, addr); 1132 return (p4d_t *)__va(pgd_page_paddr(pgd)) + p4d_index(addr); 1133 } 1134 #define p4d_offset_lockless p4d_offset_lockless 1135 1136 static inline p4d_t *p4d_offset(pgd_t *pgdp, unsigned long addr) 1137 { 1138 return p4d_offset_lockless(pgdp, READ_ONCE(*pgdp), addr); 1139 } 1140 1141 static inline p4d_t *p4d_set_fixmap(unsigned long addr) 1142 { 1143 if (!pgtable_l5_enabled()) 1144 return NULL; 1145 return (p4d_t *)set_fixmap_offset(FIX_P4D, addr); 1146 } 1147 1148 static inline p4d_t *p4d_set_fixmap_offset(pgd_t *pgdp, unsigned long addr) 1149 { 1150 if (!pgtable_l5_enabled()) 1151 return pgd_to_folded_p4d(pgdp, addr); 1152 return p4d_set_fixmap(p4d_offset_phys(pgdp, addr)); 1153 } 1154 1155 static inline void p4d_clear_fixmap(void) 1156 { 1157 if (pgtable_l5_enabled()) 1158 clear_fixmap(FIX_P4D); 1159 } 1160 1161 /* use ONLY for statically allocated translation tables */ 1162 static inline p4d_t *p4d_offset_kimg(pgd_t *pgdp, u64 addr) 1163 { 1164 if (!pgtable_l5_enabled()) 1165 return pgd_to_folded_p4d(pgdp, addr); 1166 return (p4d_t *)__phys_to_kimg(p4d_offset_phys(pgdp, addr)); 1167 } 1168 1169 #define pgd_page(pgd) pfn_to_page(__phys_to_pfn(__pgd_to_phys(pgd))) 1170 1171 #else 1172 1173 static inline bool pgtable_l5_enabled(void) { return false; } 1174 1175 #define p4d_index(addr) (((addr) >> P4D_SHIFT) & (PTRS_PER_P4D - 1)) 1176 1177 /* Match p4d_offset folding in <asm/generic/pgtable-nop4d.h> */ 1178 #define p4d_set_fixmap(addr) NULL 1179 #define p4d_set_fixmap_offset(p4dp, addr) ((p4d_t *)p4dp) 1180 #define p4d_clear_fixmap() 1181 1182 #define p4d_offset_kimg(dir,addr) ((p4d_t *)dir) 1183 1184 static inline 1185 p4d_t *p4d_offset_lockless_folded(pgd_t *pgdp, pgd_t pgd, unsigned long addr) 1186 { 1187 /* 1188 * With runtime folding of the pud, pud_offset_lockless() passes 1189 * the 'pgd_t *' we return here to p4d_to_folded_pud(), which 1190 * will offset the pointer assuming that it points into 1191 * a page-table page. However, the fast GUP path passes us a 1192 * pgd_t allocated on the stack and so we must use the original 1193 * pointer in 'pgdp' to construct the p4d pointer instead of 1194 * using the generic p4d_offset_lockless() implementation. 1195 * 1196 * Note: reusing the original pointer means that we may 1197 * dereference the same (live) page-table entry multiple times. 1198 * This is safe because it is still only loaded once in the 1199 * context of each level and the CPU guarantees same-address 1200 * read-after-read ordering. 1201 */ 1202 return p4d_offset(pgdp, addr); 1203 } 1204 #define p4d_offset_lockless p4d_offset_lockless_folded 1205 1206 #endif /* CONFIG_PGTABLE_LEVELS > 4 */ 1207 1208 #define pgd_ERROR(e) \ 1209 pr_err("%s:%d: bad pgd %016llx.\n", __FILE__, __LINE__, pgd_val(e)) 1210 1211 #define pgd_set_fixmap(addr) ((pgd_t *)set_fixmap_offset(FIX_PGD, addr)) 1212 #define pgd_clear_fixmap() clear_fixmap(FIX_PGD) 1213 1214 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) 1215 { 1216 /* 1217 * Normal and Normal-Tagged are two different memory types and indices 1218 * in MAIR_EL1. The mask below has to include PTE_ATTRINDX_MASK. 1219 */ 1220 const pteval_t mask = PTE_USER | PTE_PXN | PTE_UXN | PTE_RDONLY | 1221 PTE_PRESENT_INVALID | PTE_VALID | PTE_WRITE | 1222 PTE_GP | PTE_ATTRINDX_MASK | PTE_PO_IDX_MASK; 1223 1224 /* preserve the hardware dirty information */ 1225 if (pte_hw_dirty(pte)) 1226 pte = set_pte_bit(pte, __pgprot(PTE_DIRTY)); 1227 1228 pte_val(pte) = (pte_val(pte) & ~mask) | (pgprot_val(newprot) & mask); 1229 /* 1230 * If we end up clearing hw dirtiness for a sw-dirty PTE, set hardware 1231 * dirtiness again. 1232 */ 1233 if (pte_sw_dirty(pte)) 1234 pte = pte_mkdirty(pte); 1235 return pte; 1236 } 1237 1238 static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) 1239 { 1240 return pte_pmd(pte_modify(pmd_pte(pmd), newprot)); 1241 } 1242 1243 extern int __ptep_set_access_flags(struct vm_area_struct *vma, 1244 unsigned long address, pte_t *ptep, 1245 pte_t entry, int dirty); 1246 1247 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 1248 #define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS 1249 static inline int pmdp_set_access_flags(struct vm_area_struct *vma, 1250 unsigned long address, pmd_t *pmdp, 1251 pmd_t entry, int dirty) 1252 { 1253 return __ptep_set_access_flags(vma, address, (pte_t *)pmdp, 1254 pmd_pte(entry), dirty); 1255 } 1256 #endif 1257 1258 #ifdef CONFIG_PAGE_TABLE_CHECK 1259 static inline bool pte_user_accessible_page(pte_t pte, unsigned long addr) 1260 { 1261 return pte_valid(pte) && (pte_user(pte) || pte_user_exec(pte)); 1262 } 1263 1264 static inline bool pmd_user_accessible_page(pmd_t pmd, unsigned long addr) 1265 { 1266 return pmd_valid(pmd) && !pmd_table(pmd) && (pmd_user(pmd) || pmd_user_exec(pmd)); 1267 } 1268 1269 static inline bool pud_user_accessible_page(pud_t pud, unsigned long addr) 1270 { 1271 return pud_valid(pud) && !pud_table(pud) && (pud_user(pud) || pud_user_exec(pud)); 1272 } 1273 #endif 1274 1275 /* 1276 * Atomic pte/pmd modifications. 1277 */ 1278 1279 static inline void __pte_clear(struct mm_struct *mm, 1280 unsigned long addr, pte_t *ptep) 1281 { 1282 __set_pte(ptep, __pte(0)); 1283 } 1284 1285 static inline bool __ptep_test_and_clear_young(struct vm_area_struct *vma, 1286 unsigned long address, pte_t *ptep) 1287 { 1288 pte_t old_pte, pte; 1289 1290 pte = __ptep_get(ptep); 1291 do { 1292 old_pte = pte; 1293 pte = pte_mkold(pte); 1294 pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep), 1295 pte_val(old_pte), pte_val(pte)); 1296 } while (pte_val(pte) != pte_val(old_pte)); 1297 1298 return pte_young(pte); 1299 } 1300 1301 static inline bool __ptep_clear_flush_young(struct vm_area_struct *vma, 1302 unsigned long address, pte_t *ptep) 1303 { 1304 bool young = __ptep_test_and_clear_young(vma, address, ptep); 1305 1306 if (young) { 1307 /* 1308 * We can elide the trailing DSB here since the worst that can 1309 * happen is that a CPU continues to use the young entry in its 1310 * TLB and we mistakenly reclaim the associated page. The 1311 * window for such an event is bounded by the next 1312 * context-switch, which provides a DSB to complete the TLB 1313 * invalidation. 1314 */ 1315 flush_tlb_page_nosync(vma, address); 1316 } 1317 1318 return young; 1319 } 1320 1321 #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) 1322 #define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG 1323 static inline bool pmdp_test_and_clear_young(struct vm_area_struct *vma, 1324 unsigned long address, pmd_t *pmdp) 1325 { 1326 /* Operation applies to PMD table entry only if FEAT_HAFT is enabled */ 1327 VM_WARN_ON(pmd_table(READ_ONCE(*pmdp)) && !system_supports_haft()); 1328 return __ptep_test_and_clear_young(vma, address, (pte_t *)pmdp); 1329 } 1330 #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG */ 1331 1332 static inline pte_t __ptep_get_and_clear_anysz(struct mm_struct *mm, 1333 unsigned long address, 1334 pte_t *ptep, 1335 unsigned long pgsize) 1336 { 1337 pte_t pte = __pte(xchg_relaxed(&pte_val(*ptep), 0)); 1338 1339 switch (pgsize) { 1340 case PAGE_SIZE: 1341 page_table_check_pte_clear(mm, address, pte); 1342 break; 1343 case PMD_SIZE: 1344 page_table_check_pmd_clear(mm, address, pte_pmd(pte)); 1345 break; 1346 #ifndef __PAGETABLE_PMD_FOLDED 1347 case PUD_SIZE: 1348 page_table_check_pud_clear(mm, address, pte_pud(pte)); 1349 break; 1350 #endif 1351 default: 1352 VM_WARN_ON(1); 1353 } 1354 1355 return pte; 1356 } 1357 1358 static inline pte_t __ptep_get_and_clear(struct mm_struct *mm, 1359 unsigned long address, pte_t *ptep) 1360 { 1361 return __ptep_get_and_clear_anysz(mm, address, ptep, PAGE_SIZE); 1362 } 1363 1364 static inline void __clear_full_ptes(struct mm_struct *mm, unsigned long addr, 1365 pte_t *ptep, unsigned int nr, int full) 1366 { 1367 for (;;) { 1368 __ptep_get_and_clear(mm, addr, ptep); 1369 if (--nr == 0) 1370 break; 1371 ptep++; 1372 addr += PAGE_SIZE; 1373 } 1374 } 1375 1376 static inline pte_t __get_and_clear_full_ptes(struct mm_struct *mm, 1377 unsigned long addr, pte_t *ptep, 1378 unsigned int nr, int full) 1379 { 1380 pte_t pte, tmp_pte; 1381 1382 pte = __ptep_get_and_clear(mm, addr, ptep); 1383 while (--nr) { 1384 ptep++; 1385 addr += PAGE_SIZE; 1386 tmp_pte = __ptep_get_and_clear(mm, addr, ptep); 1387 if (pte_dirty(tmp_pte)) 1388 pte = pte_mkdirty(pte); 1389 if (pte_young(tmp_pte)) 1390 pte = pte_mkyoung(pte); 1391 } 1392 return pte; 1393 } 1394 1395 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 1396 #define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR 1397 static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, 1398 unsigned long address, pmd_t *pmdp) 1399 { 1400 return pte_pmd(__ptep_get_and_clear_anysz(mm, address, (pte_t *)pmdp, PMD_SIZE)); 1401 } 1402 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 1403 1404 static inline void ___ptep_set_wrprotect(struct mm_struct *mm, 1405 unsigned long address, pte_t *ptep, 1406 pte_t pte) 1407 { 1408 pte_t old_pte; 1409 1410 do { 1411 old_pte = pte; 1412 pte = pte_wrprotect(pte); 1413 pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep), 1414 pte_val(old_pte), pte_val(pte)); 1415 } while (pte_val(pte) != pte_val(old_pte)); 1416 } 1417 1418 /* 1419 * __ptep_set_wrprotect - mark read-only while transferring potential hardware 1420 * dirty status (PTE_DBM && !PTE_RDONLY) to the software PTE_DIRTY bit. 1421 */ 1422 static inline void __ptep_set_wrprotect(struct mm_struct *mm, 1423 unsigned long address, pte_t *ptep) 1424 { 1425 ___ptep_set_wrprotect(mm, address, ptep, __ptep_get(ptep)); 1426 } 1427 1428 static inline void __wrprotect_ptes(struct mm_struct *mm, unsigned long address, 1429 pte_t *ptep, unsigned int nr) 1430 { 1431 unsigned int i; 1432 1433 for (i = 0; i < nr; i++, address += PAGE_SIZE, ptep++) 1434 __ptep_set_wrprotect(mm, address, ptep); 1435 } 1436 1437 static inline void __clear_young_dirty_pte(struct vm_area_struct *vma, 1438 unsigned long addr, pte_t *ptep, 1439 pte_t pte, cydp_t flags) 1440 { 1441 pte_t old_pte; 1442 1443 do { 1444 old_pte = pte; 1445 1446 if (flags & CYDP_CLEAR_YOUNG) 1447 pte = pte_mkold(pte); 1448 if (flags & CYDP_CLEAR_DIRTY) 1449 pte = pte_mkclean(pte); 1450 1451 pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep), 1452 pte_val(old_pte), pte_val(pte)); 1453 } while (pte_val(pte) != pte_val(old_pte)); 1454 } 1455 1456 static inline void __clear_young_dirty_ptes(struct vm_area_struct *vma, 1457 unsigned long addr, pte_t *ptep, 1458 unsigned int nr, cydp_t flags) 1459 { 1460 pte_t pte; 1461 1462 for (;;) { 1463 pte = __ptep_get(ptep); 1464 1465 if (flags == (CYDP_CLEAR_YOUNG | CYDP_CLEAR_DIRTY)) 1466 __set_pte(ptep, pte_mkclean(pte_mkold(pte))); 1467 else 1468 __clear_young_dirty_pte(vma, addr, ptep, pte, flags); 1469 1470 if (--nr == 0) 1471 break; 1472 ptep++; 1473 addr += PAGE_SIZE; 1474 } 1475 } 1476 1477 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 1478 #define __HAVE_ARCH_PMDP_SET_WRPROTECT 1479 static inline void pmdp_set_wrprotect(struct mm_struct *mm, 1480 unsigned long address, pmd_t *pmdp) 1481 { 1482 __ptep_set_wrprotect(mm, address, (pte_t *)pmdp); 1483 } 1484 1485 #define pmdp_establish pmdp_establish 1486 static inline pmd_t pmdp_establish(struct vm_area_struct *vma, 1487 unsigned long address, pmd_t *pmdp, pmd_t pmd) 1488 { 1489 page_table_check_pmd_set(vma->vm_mm, address, pmdp, pmd); 1490 return __pmd(xchg_relaxed(&pmd_val(*pmdp), pmd_val(pmd))); 1491 } 1492 #endif 1493 1494 /* 1495 * Encode and decode a swap entry: 1496 * bits 0-1: present (must be zero) 1497 * bits 2: remember PG_anon_exclusive 1498 * bit 3: remember uffd-wp state 1499 * bits 6-10: swap type 1500 * bit 11: PTE_PRESENT_INVALID (must be zero) 1501 * bits 12-61: swap offset 1502 */ 1503 #define __SWP_TYPE_SHIFT 6 1504 #define __SWP_TYPE_BITS 5 1505 #define __SWP_TYPE_MASK ((1 << __SWP_TYPE_BITS) - 1) 1506 #define __SWP_OFFSET_SHIFT 12 1507 #define __SWP_OFFSET_BITS 50 1508 #define __SWP_OFFSET_MASK ((1UL << __SWP_OFFSET_BITS) - 1) 1509 1510 #define __swp_type(x) (((x).val >> __SWP_TYPE_SHIFT) & __SWP_TYPE_MASK) 1511 #define __swp_offset(x) (((x).val >> __SWP_OFFSET_SHIFT) & __SWP_OFFSET_MASK) 1512 #define __swp_entry(type,offset) ((swp_entry_t) { ((type) << __SWP_TYPE_SHIFT) | ((offset) << __SWP_OFFSET_SHIFT) }) 1513 1514 #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) 1515 #define __swp_entry_to_pte(swp) ((pte_t) { (swp).val }) 1516 1517 #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION 1518 #define __pmd_to_swp_entry(pmd) ((swp_entry_t) { pmd_val(pmd) }) 1519 #define __swp_entry_to_pmd(swp) __pmd((swp).val) 1520 #endif /* CONFIG_ARCH_ENABLE_THP_MIGRATION */ 1521 1522 /* 1523 * Ensure that there are not more swap files than can be encoded in the kernel 1524 * PTEs. 1525 */ 1526 #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > __SWP_TYPE_BITS) 1527 1528 #ifdef CONFIG_ARM64_MTE 1529 1530 #define __HAVE_ARCH_PREPARE_TO_SWAP 1531 extern int arch_prepare_to_swap(struct folio *folio); 1532 1533 #define __HAVE_ARCH_SWAP_INVALIDATE 1534 static inline void arch_swap_invalidate_page(int type, pgoff_t offset) 1535 { 1536 if (system_supports_mte()) 1537 mte_invalidate_tags(type, offset); 1538 } 1539 1540 static inline void arch_swap_invalidate_area(int type) 1541 { 1542 if (system_supports_mte()) 1543 mte_invalidate_tags_area(type); 1544 } 1545 1546 #define __HAVE_ARCH_SWAP_RESTORE 1547 extern void arch_swap_restore(swp_entry_t entry, struct folio *folio); 1548 1549 #endif /* CONFIG_ARM64_MTE */ 1550 1551 /* 1552 * On AArch64, the cache coherency is handled via the __set_ptes() function. 1553 */ 1554 static inline void update_mmu_cache_range(struct vm_fault *vmf, 1555 struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, 1556 unsigned int nr) 1557 { 1558 /* 1559 * We don't do anything here, so there's a very small chance of 1560 * us retaking a user fault which we just fixed up. The alternative 1561 * is doing a dsb(ishst), but that penalises the fastpath. 1562 */ 1563 } 1564 1565 #define update_mmu_cache(vma, addr, ptep) \ 1566 update_mmu_cache_range(NULL, vma, addr, ptep, 1) 1567 #define update_mmu_cache_pmd(vma, address, pmd) do { } while (0) 1568 1569 #ifdef CONFIG_ARM64_PA_BITS_52 1570 #define phys_to_ttbr(addr) (((addr) | ((addr) >> 46)) & TTBR_BADDR_MASK_52) 1571 #else 1572 #define phys_to_ttbr(addr) (addr) 1573 #endif 1574 1575 /* 1576 * On arm64 without hardware Access Flag, copying from user will fail because 1577 * the pte is old and cannot be marked young. So we always end up with zeroed 1578 * page after fork() + CoW for pfn mappings. We don't always have a 1579 * hardware-managed access flag on arm64. 1580 */ 1581 #define arch_has_hw_pte_young cpu_has_hw_af 1582 1583 #ifdef CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG 1584 #define arch_has_hw_nonleaf_pmd_young system_supports_haft 1585 #endif 1586 1587 /* 1588 * Experimentally, it's cheap to set the access flag in hardware and we 1589 * benefit from prefaulting mappings as 'old' to start with. 1590 */ 1591 #define arch_wants_old_prefaulted_pte cpu_has_hw_af 1592 1593 /* 1594 * Request exec memory is read into pagecache in at least 64K folios. This size 1595 * can be contpte-mapped when 4K base pages are in use (16 pages into 1 iTLB 1596 * entry), and HPA can coalesce it (4 pages into 1 TLB entry) when 16K base 1597 * pages are in use. 1598 */ 1599 #define exec_folio_order() ilog2(SZ_64K >> PAGE_SHIFT) 1600 1601 static inline bool pud_sect_supported(void) 1602 { 1603 return PAGE_SIZE == SZ_4K; 1604 } 1605 1606 1607 #define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION 1608 #define ptep_modify_prot_start ptep_modify_prot_start 1609 extern pte_t ptep_modify_prot_start(struct vm_area_struct *vma, 1610 unsigned long addr, pte_t *ptep); 1611 1612 #define ptep_modify_prot_commit ptep_modify_prot_commit 1613 extern void ptep_modify_prot_commit(struct vm_area_struct *vma, 1614 unsigned long addr, pte_t *ptep, 1615 pte_t old_pte, pte_t new_pte); 1616 1617 #define modify_prot_start_ptes modify_prot_start_ptes 1618 extern pte_t modify_prot_start_ptes(struct vm_area_struct *vma, 1619 unsigned long addr, pte_t *ptep, 1620 unsigned int nr); 1621 1622 #define modify_prot_commit_ptes modify_prot_commit_ptes 1623 extern void modify_prot_commit_ptes(struct vm_area_struct *vma, unsigned long addr, 1624 pte_t *ptep, pte_t old_pte, pte_t pte, 1625 unsigned int nr); 1626 1627 #ifdef CONFIG_ARM64_CONTPTE 1628 1629 /* 1630 * The contpte APIs are used to transparently manage the contiguous bit in ptes 1631 * where it is possible and makes sense to do so. The PTE_CONT bit is considered 1632 * a private implementation detail of the public ptep API (see below). 1633 */ 1634 extern void __contpte_try_fold(struct mm_struct *mm, unsigned long addr, 1635 pte_t *ptep, pte_t pte); 1636 extern void __contpte_try_unfold(struct mm_struct *mm, unsigned long addr, 1637 pte_t *ptep, pte_t pte); 1638 extern pte_t contpte_ptep_get(pte_t *ptep, pte_t orig_pte); 1639 extern pte_t contpte_ptep_get_lockless(pte_t *orig_ptep); 1640 extern void contpte_set_ptes(struct mm_struct *mm, unsigned long addr, 1641 pte_t *ptep, pte_t pte, unsigned int nr); 1642 extern void contpte_clear_full_ptes(struct mm_struct *mm, unsigned long addr, 1643 pte_t *ptep, unsigned int nr, int full); 1644 extern pte_t contpte_get_and_clear_full_ptes(struct mm_struct *mm, 1645 unsigned long addr, pte_t *ptep, 1646 unsigned int nr, int full); 1647 bool contpte_test_and_clear_young_ptes(struct vm_area_struct *vma, 1648 unsigned long addr, pte_t *ptep, unsigned int nr); 1649 bool contpte_clear_flush_young_ptes(struct vm_area_struct *vma, 1650 unsigned long addr, pte_t *ptep, unsigned int nr); 1651 extern void contpte_wrprotect_ptes(struct mm_struct *mm, unsigned long addr, 1652 pte_t *ptep, unsigned int nr); 1653 extern int contpte_ptep_set_access_flags(struct vm_area_struct *vma, 1654 unsigned long addr, pte_t *ptep, 1655 pte_t entry, int dirty); 1656 extern void contpte_clear_young_dirty_ptes(struct vm_area_struct *vma, 1657 unsigned long addr, pte_t *ptep, 1658 unsigned int nr, cydp_t flags); 1659 1660 static __always_inline void contpte_try_fold(struct mm_struct *mm, 1661 unsigned long addr, pte_t *ptep, pte_t pte) 1662 { 1663 /* 1664 * Only bother trying if both the virtual and physical addresses are 1665 * aligned and correspond to the last entry in a contig range. The core 1666 * code mostly modifies ranges from low to high, so this is the likely 1667 * the last modification in the contig range, so a good time to fold. 1668 * We can't fold special mappings, because there is no associated folio. 1669 */ 1670 1671 const unsigned long contmask = CONT_PTES - 1; 1672 bool valign = ((addr >> PAGE_SHIFT) & contmask) == contmask; 1673 1674 if (unlikely(valign)) { 1675 bool palign = (pte_pfn(pte) & contmask) == contmask; 1676 1677 if (unlikely(palign && 1678 pte_valid(pte) && !pte_cont(pte) && !pte_special(pte))) 1679 __contpte_try_fold(mm, addr, ptep, pte); 1680 } 1681 } 1682 1683 static __always_inline void contpte_try_unfold(struct mm_struct *mm, 1684 unsigned long addr, pte_t *ptep, pte_t pte) 1685 { 1686 if (unlikely(pte_valid_cont(pte))) 1687 __contpte_try_unfold(mm, addr, ptep, pte); 1688 } 1689 1690 #define pte_batch_hint pte_batch_hint 1691 static inline unsigned int pte_batch_hint(pte_t *ptep, pte_t pte) 1692 { 1693 if (!pte_valid_cont(pte)) 1694 return 1; 1695 1696 return CONT_PTES - (((unsigned long)ptep >> 3) & (CONT_PTES - 1)); 1697 } 1698 1699 /* 1700 * The below functions constitute the public API that arm64 presents to the 1701 * core-mm to manipulate PTE entries within their page tables (or at least this 1702 * is the subset of the API that arm64 needs to implement). These public 1703 * versions will automatically and transparently apply the contiguous bit where 1704 * it makes sense to do so. Therefore any users that are contig-aware (e.g. 1705 * hugetlb, kernel mapper) should NOT use these APIs, but instead use the 1706 * private versions, which are prefixed with double underscore. All of these 1707 * APIs except for ptep_get_lockless() are expected to be called with the PTL 1708 * held. Although the contiguous bit is considered private to the 1709 * implementation, it is deliberately allowed to leak through the getters (e.g. 1710 * ptep_get()), back to core code. This is required so that pte_leaf_size() can 1711 * provide an accurate size for perf_get_pgtable_size(). But this leakage means 1712 * its possible a pte will be passed to a setter with the contiguous bit set, so 1713 * we explicitly clear the contiguous bit in those cases to prevent accidentally 1714 * setting it in the pgtable. 1715 */ 1716 1717 #define ptep_get ptep_get 1718 static inline pte_t ptep_get(pte_t *ptep) 1719 { 1720 pte_t pte = __ptep_get(ptep); 1721 1722 if (likely(!pte_valid_cont(pte))) 1723 return pte; 1724 1725 return contpte_ptep_get(ptep, pte); 1726 } 1727 1728 #define ptep_get_lockless ptep_get_lockless 1729 static inline pte_t ptep_get_lockless(pte_t *ptep) 1730 { 1731 pte_t pte = __ptep_get(ptep); 1732 1733 if (likely(!pte_valid_cont(pte))) 1734 return pte; 1735 1736 return contpte_ptep_get_lockless(ptep); 1737 } 1738 1739 static inline void set_pte(pte_t *ptep, pte_t pte) 1740 { 1741 /* 1742 * We don't have the mm or vaddr so cannot unfold contig entries (since 1743 * it requires tlb maintenance). set_pte() is not used in core code, so 1744 * this should never even be called. Regardless do our best to service 1745 * any call and emit a warning if there is any attempt to set a pte on 1746 * top of an existing contig range. 1747 */ 1748 pte_t orig_pte = __ptep_get(ptep); 1749 1750 WARN_ON_ONCE(pte_valid_cont(orig_pte)); 1751 __set_pte(ptep, pte_mknoncont(pte)); 1752 } 1753 1754 #define set_ptes set_ptes 1755 static __always_inline void set_ptes(struct mm_struct *mm, unsigned long addr, 1756 pte_t *ptep, pte_t pte, unsigned int nr) 1757 { 1758 pte = pte_mknoncont(pte); 1759 1760 if (likely(nr == 1)) { 1761 contpte_try_unfold(mm, addr, ptep, __ptep_get(ptep)); 1762 __set_ptes(mm, addr, ptep, pte, 1); 1763 contpte_try_fold(mm, addr, ptep, pte); 1764 } else { 1765 contpte_set_ptes(mm, addr, ptep, pte, nr); 1766 } 1767 } 1768 1769 static inline void pte_clear(struct mm_struct *mm, 1770 unsigned long addr, pte_t *ptep) 1771 { 1772 contpte_try_unfold(mm, addr, ptep, __ptep_get(ptep)); 1773 __pte_clear(mm, addr, ptep); 1774 } 1775 1776 #define clear_full_ptes clear_full_ptes 1777 static inline void clear_full_ptes(struct mm_struct *mm, unsigned long addr, 1778 pte_t *ptep, unsigned int nr, int full) 1779 { 1780 if (likely(nr == 1)) { 1781 contpte_try_unfold(mm, addr, ptep, __ptep_get(ptep)); 1782 __clear_full_ptes(mm, addr, ptep, nr, full); 1783 } else { 1784 contpte_clear_full_ptes(mm, addr, ptep, nr, full); 1785 } 1786 } 1787 1788 #define get_and_clear_full_ptes get_and_clear_full_ptes 1789 static inline pte_t get_and_clear_full_ptes(struct mm_struct *mm, 1790 unsigned long addr, pte_t *ptep, 1791 unsigned int nr, int full) 1792 { 1793 pte_t pte; 1794 1795 if (likely(nr == 1)) { 1796 contpte_try_unfold(mm, addr, ptep, __ptep_get(ptep)); 1797 pte = __get_and_clear_full_ptes(mm, addr, ptep, nr, full); 1798 } else { 1799 pte = contpte_get_and_clear_full_ptes(mm, addr, ptep, nr, full); 1800 } 1801 1802 return pte; 1803 } 1804 1805 #define __HAVE_ARCH_PTEP_GET_AND_CLEAR 1806 static inline pte_t ptep_get_and_clear(struct mm_struct *mm, 1807 unsigned long addr, pte_t *ptep) 1808 { 1809 contpte_try_unfold(mm, addr, ptep, __ptep_get(ptep)); 1810 return __ptep_get_and_clear(mm, addr, ptep); 1811 } 1812 1813 #define test_and_clear_young_ptes test_and_clear_young_ptes 1814 static inline bool test_and_clear_young_ptes(struct vm_area_struct *vma, 1815 unsigned long addr, pte_t *ptep, unsigned int nr) 1816 { 1817 if (likely(nr == 1 && !pte_cont(__ptep_get(ptep)))) 1818 return __ptep_test_and_clear_young(vma, addr, ptep); 1819 1820 return contpte_test_and_clear_young_ptes(vma, addr, ptep, nr); 1821 } 1822 1823 #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG 1824 static inline bool ptep_test_and_clear_young(struct vm_area_struct *vma, 1825 unsigned long addr, pte_t *ptep) 1826 { 1827 return test_and_clear_young_ptes(vma, addr, ptep, 1); 1828 } 1829 1830 #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH 1831 static inline bool ptep_clear_flush_young(struct vm_area_struct *vma, 1832 unsigned long addr, pte_t *ptep) 1833 { 1834 pte_t orig_pte = __ptep_get(ptep); 1835 1836 if (likely(!pte_valid_cont(orig_pte))) 1837 return __ptep_clear_flush_young(vma, addr, ptep); 1838 1839 return contpte_clear_flush_young_ptes(vma, addr, ptep, 1); 1840 } 1841 1842 #define clear_flush_young_ptes clear_flush_young_ptes 1843 static inline bool clear_flush_young_ptes(struct vm_area_struct *vma, 1844 unsigned long addr, pte_t *ptep, unsigned int nr) 1845 { 1846 if (likely(nr == 1 && !pte_cont(__ptep_get(ptep)))) 1847 return __ptep_clear_flush_young(vma, addr, ptep); 1848 1849 return contpte_clear_flush_young_ptes(vma, addr, ptep, nr); 1850 } 1851 1852 #define wrprotect_ptes wrprotect_ptes 1853 static __always_inline void wrprotect_ptes(struct mm_struct *mm, 1854 unsigned long addr, pte_t *ptep, unsigned int nr) 1855 { 1856 if (likely(nr == 1)) { 1857 /* 1858 * Optimization: wrprotect_ptes() can only be called for present 1859 * ptes so we only need to check contig bit as condition for 1860 * unfold, and we can remove the contig bit from the pte we read 1861 * to avoid re-reading. This speeds up fork() which is sensitive 1862 * for order-0 folios. Equivalent to contpte_try_unfold(). 1863 */ 1864 pte_t orig_pte = __ptep_get(ptep); 1865 1866 if (unlikely(pte_cont(orig_pte))) { 1867 __contpte_try_unfold(mm, addr, ptep, orig_pte); 1868 orig_pte = pte_mknoncont(orig_pte); 1869 } 1870 ___ptep_set_wrprotect(mm, addr, ptep, orig_pte); 1871 } else { 1872 contpte_wrprotect_ptes(mm, addr, ptep, nr); 1873 } 1874 } 1875 1876 #define __HAVE_ARCH_PTEP_SET_WRPROTECT 1877 static inline void ptep_set_wrprotect(struct mm_struct *mm, 1878 unsigned long addr, pte_t *ptep) 1879 { 1880 wrprotect_ptes(mm, addr, ptep, 1); 1881 } 1882 1883 #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS 1884 static inline int ptep_set_access_flags(struct vm_area_struct *vma, 1885 unsigned long addr, pte_t *ptep, 1886 pte_t entry, int dirty) 1887 { 1888 pte_t orig_pte = __ptep_get(ptep); 1889 1890 entry = pte_mknoncont(entry); 1891 1892 if (likely(!pte_valid_cont(orig_pte))) 1893 return __ptep_set_access_flags(vma, addr, ptep, entry, dirty); 1894 1895 return contpte_ptep_set_access_flags(vma, addr, ptep, entry, dirty); 1896 } 1897 1898 #define clear_young_dirty_ptes clear_young_dirty_ptes 1899 static inline void clear_young_dirty_ptes(struct vm_area_struct *vma, 1900 unsigned long addr, pte_t *ptep, 1901 unsigned int nr, cydp_t flags) 1902 { 1903 if (likely(nr == 1 && !pte_cont(__ptep_get(ptep)))) 1904 __clear_young_dirty_ptes(vma, addr, ptep, nr, flags); 1905 else 1906 contpte_clear_young_dirty_ptes(vma, addr, ptep, nr, flags); 1907 } 1908 1909 #else /* CONFIG_ARM64_CONTPTE */ 1910 1911 #define ptep_get __ptep_get 1912 #define set_pte __set_pte 1913 #define set_ptes __set_ptes 1914 #define pte_clear __pte_clear 1915 #define clear_full_ptes __clear_full_ptes 1916 #define get_and_clear_full_ptes __get_and_clear_full_ptes 1917 #define __HAVE_ARCH_PTEP_GET_AND_CLEAR 1918 #define ptep_get_and_clear __ptep_get_and_clear 1919 #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG 1920 #define ptep_test_and_clear_young __ptep_test_and_clear_young 1921 #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH 1922 #define ptep_clear_flush_young __ptep_clear_flush_young 1923 #define __HAVE_ARCH_PTEP_SET_WRPROTECT 1924 #define ptep_set_wrprotect __ptep_set_wrprotect 1925 #define wrprotect_ptes __wrprotect_ptes 1926 #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS 1927 #define ptep_set_access_flags __ptep_set_access_flags 1928 #define clear_young_dirty_ptes __clear_young_dirty_ptes 1929 1930 #endif /* CONFIG_ARM64_CONTPTE */ 1931 1932 #endif /* !__ASSEMBLER__ */ 1933 1934 #endif /* __ASM_PGTABLE_H */ 1935