xref: /linux/arch/arm64/include/asm/pgtable.h (revision 69050f8d6d075dc01af7a5f2f550a8067510366f)
1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /*
3  * Copyright (C) 2012 ARM Ltd.
4  */
5 #ifndef __ASM_PGTABLE_H
6 #define __ASM_PGTABLE_H
7 
8 #include <asm/bug.h>
9 #include <asm/proc-fns.h>
10 
11 #include <asm/memory.h>
12 #include <asm/mte.h>
13 #include <asm/pgtable-hwdef.h>
14 #include <asm/pgtable-prot.h>
15 #include <asm/tlbflush.h>
16 
17 /*
18  * VMALLOC range.
19  *
20  * VMALLOC_START: beginning of the kernel vmalloc space
21  * VMALLOC_END: extends to the available space below vmemmap
22  */
23 #define VMALLOC_START		(MODULES_END)
24 #if VA_BITS == VA_BITS_MIN
25 #define VMALLOC_END		(VMEMMAP_START - SZ_8M)
26 #else
27 #define VMEMMAP_UNUSED_NPAGES	((_PAGE_OFFSET(vabits_actual) - PAGE_OFFSET) >> PAGE_SHIFT)
28 #define VMALLOC_END		(VMEMMAP_START + VMEMMAP_UNUSED_NPAGES * sizeof(struct page) - SZ_8M)
29 #endif
30 
31 #define vmemmap			((struct page *)VMEMMAP_START - (memstart_addr >> PAGE_SHIFT))
32 
33 #ifndef __ASSEMBLER__
34 
35 #include <asm/cmpxchg.h>
36 #include <asm/fixmap.h>
37 #include <asm/por.h>
38 #include <linux/mmdebug.h>
39 #include <linux/mm_types.h>
40 #include <linux/sched.h>
41 #include <linux/page_table_check.h>
42 
43 static inline void emit_pte_barriers(void)
44 {
45 	/*
46 	 * These barriers are emitted under certain conditions after a pte entry
47 	 * was modified (see e.g. __set_pte_complete()). The dsb makes the store
48 	 * visible to the table walker. The isb ensures that any previous
49 	 * speculative "invalid translation" marker that is in the CPU's
50 	 * pipeline gets cleared, so that any access to that address after
51 	 * setting the pte to valid won't cause a spurious fault. If the thread
52 	 * gets preempted after storing to the pgtable but before emitting these
53 	 * barriers, __switch_to() emits a dsb which ensure the walker gets to
54 	 * see the store. There is no guarantee of an isb being issued though.
55 	 * This is safe because it will still get issued (albeit on a
56 	 * potentially different CPU) when the thread starts running again,
57 	 * before any access to the address.
58 	 */
59 	dsb(ishst);
60 	isb();
61 }
62 
63 static inline void queue_pte_barriers(void)
64 {
65 	if (is_lazy_mmu_mode_active()) {
66 		/* Avoid the atomic op if already set. */
67 		if (!test_thread_flag(TIF_LAZY_MMU_PENDING))
68 			set_thread_flag(TIF_LAZY_MMU_PENDING);
69 	} else {
70 		emit_pte_barriers();
71 	}
72 }
73 
74 static inline void arch_enter_lazy_mmu_mode(void) {}
75 
76 static inline void arch_flush_lazy_mmu_mode(void)
77 {
78 	if (test_and_clear_thread_flag(TIF_LAZY_MMU_PENDING))
79 		emit_pte_barriers();
80 }
81 
82 static inline void arch_leave_lazy_mmu_mode(void)
83 {
84 	arch_flush_lazy_mmu_mode();
85 }
86 
87 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
88 #define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
89 
90 /* Set stride and tlb_level in flush_*_tlb_range */
91 #define flush_pmd_tlb_range(vma, addr, end)	\
92 	__flush_tlb_range(vma, addr, end, PMD_SIZE, false, 2)
93 #define flush_pud_tlb_range(vma, addr, end)	\
94 	__flush_tlb_range(vma, addr, end, PUD_SIZE, false, 1)
95 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
96 
97 /*
98  * We use local TLB invalidation instruction when reusing page in
99  * write protection fault handler to avoid TLBI broadcast in the hot
100  * path.  This will cause spurious page faults if stale read-only TLB
101  * entries exist.
102  */
103 #define flush_tlb_fix_spurious_fault(vma, address, ptep)	\
104 	local_flush_tlb_page_nonotify(vma, address)
105 
106 #define flush_tlb_fix_spurious_fault_pmd(vma, address, pmdp)	\
107 	local_flush_tlb_page_nonotify(vma, address)
108 
109 /*
110  * ZERO_PAGE is a global shared page that is always zero: used
111  * for zero-mapped memory areas etc..
112  */
113 extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
114 #define ZERO_PAGE(vaddr)	phys_to_page(__pa_symbol(empty_zero_page))
115 
116 #define pte_ERROR(e)	\
117 	pr_err("%s:%d: bad pte %016llx.\n", __FILE__, __LINE__, pte_val(e))
118 
119 #ifdef CONFIG_ARM64_PA_BITS_52
120 static inline phys_addr_t __pte_to_phys(pte_t pte)
121 {
122 	pte_val(pte) &= ~PTE_MAYBE_SHARED;
123 	return (pte_val(pte) & PTE_ADDR_LOW) |
124 		((pte_val(pte) & PTE_ADDR_HIGH) << PTE_ADDR_HIGH_SHIFT);
125 }
126 static inline pteval_t __phys_to_pte_val(phys_addr_t phys)
127 {
128 	return (phys | (phys >> PTE_ADDR_HIGH_SHIFT)) & PHYS_TO_PTE_ADDR_MASK;
129 }
130 #else
131 static inline phys_addr_t __pte_to_phys(pte_t pte)
132 {
133 	return pte_val(pte) & PTE_ADDR_LOW;
134 }
135 
136 static inline pteval_t __phys_to_pte_val(phys_addr_t phys)
137 {
138 	return phys;
139 }
140 #endif
141 
142 #define pte_pfn(pte)		(__pte_to_phys(pte) >> PAGE_SHIFT)
143 #define pfn_pte(pfn,prot)	\
144 	__pte(__phys_to_pte_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))
145 
146 #define pte_none(pte)		(!pte_val(pte))
147 #define pte_page(pte)		(pfn_to_page(pte_pfn(pte)))
148 
149 /*
150  * The following only work if pte_present(). Undefined behaviour otherwise.
151  */
152 #define pte_present(pte)	(pte_valid(pte) || pte_present_invalid(pte))
153 #define pte_young(pte)		(!!(pte_val(pte) & PTE_AF))
154 #define pte_special(pte)	(!!(pte_val(pte) & PTE_SPECIAL))
155 #define pte_write(pte)		(!!(pte_val(pte) & PTE_WRITE))
156 #define pte_rdonly(pte)		(!!(pte_val(pte) & PTE_RDONLY))
157 #define pte_user(pte)		(!!(pte_val(pte) & PTE_USER))
158 #define pte_user_exec(pte)	(!(pte_val(pte) & PTE_UXN))
159 #define pte_cont(pte)		(!!(pte_val(pte) & PTE_CONT))
160 #define pte_tagged(pte)		((pte_val(pte) & PTE_ATTRINDX_MASK) == \
161 				 PTE_ATTRINDX(MT_NORMAL_TAGGED))
162 
163 #define pte_cont_addr_end(addr, end)						\
164 ({	unsigned long __boundary = ((addr) + CONT_PTE_SIZE) & CONT_PTE_MASK;	\
165 	(__boundary - 1 < (end) - 1) ? __boundary : (end);			\
166 })
167 
168 #define pmd_cont_addr_end(addr, end)						\
169 ({	unsigned long __boundary = ((addr) + CONT_PMD_SIZE) & CONT_PMD_MASK;	\
170 	(__boundary - 1 < (end) - 1) ? __boundary : (end);			\
171 })
172 
173 #define pte_hw_dirty(pte)	(pte_write(pte) && !pte_rdonly(pte))
174 #define pte_sw_dirty(pte)	(!!(pte_val(pte) & PTE_DIRTY))
175 #define pte_dirty(pte)		(pte_sw_dirty(pte) || pte_hw_dirty(pte))
176 
177 #define pte_valid(pte)		(!!(pte_val(pte) & PTE_VALID))
178 #define pte_present_invalid(pte) \
179 	((pte_val(pte) & (PTE_VALID | PTE_PRESENT_INVALID)) == PTE_PRESENT_INVALID)
180 /*
181  * Execute-only user mappings do not have the PTE_USER bit set. All valid
182  * kernel mappings have the PTE_UXN bit set.
183  */
184 #define pte_valid_not_user(pte) \
185 	((pte_val(pte) & (PTE_VALID | PTE_USER | PTE_UXN)) == (PTE_VALID | PTE_UXN))
186 /*
187  * Returns true if the pte is valid and has the contiguous bit set.
188  */
189 #define pte_valid_cont(pte)	(pte_valid(pte) && pte_cont(pte))
190 /*
191  * Could the pte be present in the TLB? We must check mm_tlb_flush_pending
192  * so that we don't erroneously return false for pages that have been
193  * remapped as PROT_NONE but are yet to be flushed from the TLB.
194  * Note that we can't make any assumptions based on the state of the access
195  * flag, since __ptep_clear_flush_young() elides a DSB when invalidating the
196  * TLB.
197  */
198 #define pte_accessible(mm, pte)	\
199 	(mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid(pte))
200 
201 static inline bool por_el0_allows_pkey(u8 pkey, bool write, bool execute)
202 {
203 	u64 por;
204 
205 	if (!system_supports_poe())
206 		return true;
207 
208 	por = read_sysreg_s(SYS_POR_EL0);
209 
210 	if (write)
211 		return por_elx_allows_write(por, pkey);
212 
213 	if (execute)
214 		return por_elx_allows_exec(por, pkey);
215 
216 	return por_elx_allows_read(por, pkey);
217 }
218 
219 /*
220  * p??_access_permitted() is true for valid user mappings (PTE_USER
221  * bit set, subject to the write permission check). For execute-only
222  * mappings, like PROT_EXEC with EPAN (both PTE_USER and PTE_UXN bits
223  * not set) must return false. PROT_NONE mappings do not have the
224  * PTE_VALID bit set.
225  */
226 #define pte_access_permitted_no_overlay(pte, write) \
227 	(((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER)) && (!(write) || pte_write(pte)))
228 #define pte_access_permitted(pte, write) \
229 	(pte_access_permitted_no_overlay(pte, write) && \
230 	por_el0_allows_pkey(FIELD_GET(PTE_PO_IDX_MASK, pte_val(pte)), write, false))
231 #define pmd_access_permitted(pmd, write) \
232 	(pte_access_permitted(pmd_pte(pmd), (write)))
233 #define pud_access_permitted(pud, write) \
234 	(pte_access_permitted(pud_pte(pud), (write)))
235 
236 static inline pte_t clear_pte_bit(pte_t pte, pgprot_t prot)
237 {
238 	pte_val(pte) &= ~pgprot_val(prot);
239 	return pte;
240 }
241 
242 static inline pte_t set_pte_bit(pte_t pte, pgprot_t prot)
243 {
244 	pte_val(pte) |= pgprot_val(prot);
245 	return pte;
246 }
247 
248 static inline pmd_t clear_pmd_bit(pmd_t pmd, pgprot_t prot)
249 {
250 	pmd_val(pmd) &= ~pgprot_val(prot);
251 	return pmd;
252 }
253 
254 static inline pmd_t set_pmd_bit(pmd_t pmd, pgprot_t prot)
255 {
256 	pmd_val(pmd) |= pgprot_val(prot);
257 	return pmd;
258 }
259 
260 static inline pte_t pte_mkwrite_novma(pte_t pte)
261 {
262 	pte = set_pte_bit(pte, __pgprot(PTE_WRITE));
263 	if (pte_sw_dirty(pte))
264 		pte = clear_pte_bit(pte, __pgprot(PTE_RDONLY));
265 	return pte;
266 }
267 
268 static inline pte_t pte_mkclean(pte_t pte)
269 {
270 	pte = clear_pte_bit(pte, __pgprot(PTE_DIRTY));
271 	pte = set_pte_bit(pte, __pgprot(PTE_RDONLY));
272 
273 	return pte;
274 }
275 
276 static inline pte_t pte_mkdirty(pte_t pte)
277 {
278 	pte = set_pte_bit(pte, __pgprot(PTE_DIRTY));
279 
280 	if (pte_write(pte))
281 		pte = clear_pte_bit(pte, __pgprot(PTE_RDONLY));
282 
283 	return pte;
284 }
285 
286 static inline pte_t pte_wrprotect(pte_t pte)
287 {
288 	/*
289 	 * If hardware-dirty (PTE_WRITE/DBM bit set and PTE_RDONLY
290 	 * clear), set the PTE_DIRTY bit.
291 	 */
292 	if (pte_hw_dirty(pte))
293 		pte = set_pte_bit(pte, __pgprot(PTE_DIRTY));
294 
295 	pte = clear_pte_bit(pte, __pgprot(PTE_WRITE));
296 	pte = set_pte_bit(pte, __pgprot(PTE_RDONLY));
297 	return pte;
298 }
299 
300 static inline pte_t pte_mkold(pte_t pte)
301 {
302 	return clear_pte_bit(pte, __pgprot(PTE_AF));
303 }
304 
305 static inline pte_t pte_mkyoung(pte_t pte)
306 {
307 	return set_pte_bit(pte, __pgprot(PTE_AF));
308 }
309 
310 static inline pte_t pte_mkspecial(pte_t pte)
311 {
312 	return set_pte_bit(pte, __pgprot(PTE_SPECIAL));
313 }
314 
315 static inline pte_t pte_mkcont(pte_t pte)
316 {
317 	return set_pte_bit(pte, __pgprot(PTE_CONT));
318 }
319 
320 static inline pte_t pte_mknoncont(pte_t pte)
321 {
322 	return clear_pte_bit(pte, __pgprot(PTE_CONT));
323 }
324 
325 static inline pte_t pte_mkvalid(pte_t pte)
326 {
327 	return set_pte_bit(pte, __pgprot(PTE_VALID));
328 }
329 
330 static inline pte_t pte_mkinvalid(pte_t pte)
331 {
332 	pte = set_pte_bit(pte, __pgprot(PTE_PRESENT_INVALID));
333 	pte = clear_pte_bit(pte, __pgprot(PTE_VALID));
334 	return pte;
335 }
336 
337 static inline pmd_t pmd_mkcont(pmd_t pmd)
338 {
339 	return __pmd(pmd_val(pmd) | PMD_SECT_CONT);
340 }
341 
342 static inline pmd_t pmd_mknoncont(pmd_t pmd)
343 {
344 	return __pmd(pmd_val(pmd) & ~PMD_SECT_CONT);
345 }
346 
347 #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
348 static inline int pte_uffd_wp(pte_t pte)
349 {
350 	return !!(pte_val(pte) & PTE_UFFD_WP);
351 }
352 
353 static inline pte_t pte_mkuffd_wp(pte_t pte)
354 {
355 	return pte_wrprotect(set_pte_bit(pte, __pgprot(PTE_UFFD_WP)));
356 }
357 
358 static inline pte_t pte_clear_uffd_wp(pte_t pte)
359 {
360 	return clear_pte_bit(pte, __pgprot(PTE_UFFD_WP));
361 }
362 #endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */
363 
364 static inline void __set_pte_nosync(pte_t *ptep, pte_t pte)
365 {
366 	WRITE_ONCE(*ptep, pte);
367 }
368 
369 static inline void __set_pte_complete(pte_t pte)
370 {
371 	/*
372 	 * Only if the new pte is valid and kernel, otherwise TLB maintenance
373 	 * has the necessary barriers.
374 	 */
375 	if (pte_valid_not_user(pte))
376 		queue_pte_barriers();
377 }
378 
379 static inline void __set_pte(pte_t *ptep, pte_t pte)
380 {
381 	__set_pte_nosync(ptep, pte);
382 	__set_pte_complete(pte);
383 }
384 
385 static inline pte_t __ptep_get(pte_t *ptep)
386 {
387 	return READ_ONCE(*ptep);
388 }
389 
390 extern void __sync_icache_dcache(pte_t pteval);
391 bool pgattr_change_is_safe(pteval_t old, pteval_t new);
392 
393 /*
394  * PTE bits configuration in the presence of hardware Dirty Bit Management
395  * (PTE_WRITE == PTE_DBM):
396  *
397  * Dirty  Writable | PTE_RDONLY  PTE_WRITE  PTE_DIRTY (sw)
398  *   0      0      |   1           0          0
399  *   0      1      |   1           1          0
400  *   1      0      |   1           0          1
401  *   1      1      |   0           1          x
402  *
403  * When hardware DBM is not present, the software PTE_DIRTY bit is updated via
404  * the page fault mechanism. Checking the dirty status of a pte becomes:
405  *
406  *   PTE_DIRTY || (PTE_WRITE && !PTE_RDONLY)
407  */
408 
409 static inline void __check_safe_pte_update(struct mm_struct *mm, pte_t *ptep,
410 					   pte_t pte)
411 {
412 	pte_t old_pte;
413 
414 	if (!IS_ENABLED(CONFIG_DEBUG_VM))
415 		return;
416 
417 	old_pte = __ptep_get(ptep);
418 
419 	if (!pte_valid(old_pte) || !pte_valid(pte))
420 		return;
421 	if (mm != current->active_mm && atomic_read(&mm->mm_users) <= 1)
422 		return;
423 
424 	/*
425 	 * Check for potential race with hardware updates of the pte
426 	 * (__ptep_set_access_flags safely changes valid ptes without going
427 	 * through an invalid entry).
428 	 */
429 	VM_WARN_ONCE(!pte_young(pte),
430 		     "%s: racy access flag clearing: 0x%016llx -> 0x%016llx",
431 		     __func__, pte_val(old_pte), pte_val(pte));
432 	VM_WARN_ONCE(pte_write(old_pte) && !pte_dirty(pte),
433 		     "%s: racy dirty state clearing: 0x%016llx -> 0x%016llx",
434 		     __func__, pte_val(old_pte), pte_val(pte));
435 	VM_WARN_ONCE(!pgattr_change_is_safe(pte_val(old_pte), pte_val(pte)),
436 		     "%s: unsafe attribute change: 0x%016llx -> 0x%016llx",
437 		     __func__, pte_val(old_pte), pte_val(pte));
438 }
439 
440 static inline void __sync_cache_and_tags(pte_t pte, unsigned int nr_pages)
441 {
442 	if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte))
443 		__sync_icache_dcache(pte);
444 
445 	/*
446 	 * If the PTE would provide user space access to the tags associated
447 	 * with it then ensure that the MTE tags are synchronised.  Although
448 	 * pte_access_permitted_no_overlay() returns false for exec only
449 	 * mappings, they don't expose tags (instruction fetches don't check
450 	 * tags).
451 	 */
452 	if (system_supports_mte() && pte_access_permitted_no_overlay(pte, false) &&
453 	    !pte_special(pte) && pte_tagged(pte))
454 		mte_sync_tags(pte, nr_pages);
455 }
456 
457 /*
458  * Select all bits except the pfn
459  */
460 #define pte_pgprot pte_pgprot
461 static inline pgprot_t pte_pgprot(pte_t pte)
462 {
463 	unsigned long pfn = pte_pfn(pte);
464 
465 	return __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte));
466 }
467 
468 #define pte_advance_pfn pte_advance_pfn
469 static inline pte_t pte_advance_pfn(pte_t pte, unsigned long nr)
470 {
471 	return pfn_pte(pte_pfn(pte) + nr, pte_pgprot(pte));
472 }
473 
474 /*
475  * Hugetlb definitions.
476  */
477 #define HUGE_MAX_HSTATE		4
478 #define HPAGE_SHIFT		PMD_SHIFT
479 #define HPAGE_SIZE		(_AC(1, UL) << HPAGE_SHIFT)
480 #define HPAGE_MASK		(~(HPAGE_SIZE - 1))
481 #define HUGETLB_PAGE_ORDER	(HPAGE_SHIFT - PAGE_SHIFT)
482 
483 static inline pte_t pgd_pte(pgd_t pgd)
484 {
485 	return __pte(pgd_val(pgd));
486 }
487 
488 static inline pte_t p4d_pte(p4d_t p4d)
489 {
490 	return __pte(p4d_val(p4d));
491 }
492 
493 static inline pte_t pud_pte(pud_t pud)
494 {
495 	return __pte(pud_val(pud));
496 }
497 
498 static inline pud_t pte_pud(pte_t pte)
499 {
500 	return __pud(pte_val(pte));
501 }
502 
503 static inline pmd_t pud_pmd(pud_t pud)
504 {
505 	return __pmd(pud_val(pud));
506 }
507 
508 static inline pte_t pmd_pte(pmd_t pmd)
509 {
510 	return __pte(pmd_val(pmd));
511 }
512 
513 static inline pmd_t pte_pmd(pte_t pte)
514 {
515 	return __pmd(pte_val(pte));
516 }
517 
518 static inline pgprot_t mk_pud_sect_prot(pgprot_t prot)
519 {
520 	return __pgprot((pgprot_val(prot) & ~PUD_TYPE_MASK) | PUD_TYPE_SECT);
521 }
522 
523 static inline pgprot_t mk_pmd_sect_prot(pgprot_t prot)
524 {
525 	return __pgprot((pgprot_val(prot) & ~PMD_TYPE_MASK) | PMD_TYPE_SECT);
526 }
527 
528 static inline pte_t pte_swp_mkexclusive(pte_t pte)
529 {
530 	return set_pte_bit(pte, __pgprot(PTE_SWP_EXCLUSIVE));
531 }
532 
533 static inline bool pte_swp_exclusive(pte_t pte)
534 {
535 	return pte_val(pte) & PTE_SWP_EXCLUSIVE;
536 }
537 
538 static inline pte_t pte_swp_clear_exclusive(pte_t pte)
539 {
540 	return clear_pte_bit(pte, __pgprot(PTE_SWP_EXCLUSIVE));
541 }
542 
543 #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
544 static inline pte_t pte_swp_mkuffd_wp(pte_t pte)
545 {
546 	return set_pte_bit(pte, __pgprot(PTE_SWP_UFFD_WP));
547 }
548 
549 static inline int pte_swp_uffd_wp(pte_t pte)
550 {
551 	return !!(pte_val(pte) & PTE_SWP_UFFD_WP);
552 }
553 
554 static inline pte_t pte_swp_clear_uffd_wp(pte_t pte)
555 {
556 	return clear_pte_bit(pte, __pgprot(PTE_SWP_UFFD_WP));
557 }
558 #endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */
559 
560 #ifdef CONFIG_NUMA_BALANCING
561 /*
562  * See the comment in include/linux/pgtable.h
563  */
564 static inline int pte_protnone(pte_t pte)
565 {
566 	/*
567 	 * pte_present_invalid() tells us that the pte is invalid from HW
568 	 * perspective but present from SW perspective, so the fields are to be
569 	 * interpreted as per the HW layout. The second 2 checks are the unique
570 	 * encoding that we use for PROT_NONE. It is insufficient to only use
571 	 * the first check because we share the same encoding scheme with pmds
572 	 * which support pmd_mkinvalid(), so can be present-invalid without
573 	 * being PROT_NONE.
574 	 */
575 	return pte_present_invalid(pte) && !pte_user(pte) && !pte_user_exec(pte);
576 }
577 
578 static inline int pmd_protnone(pmd_t pmd)
579 {
580 	return pte_protnone(pmd_pte(pmd));
581 }
582 #endif
583 
584 #define pmd_present(pmd)	pte_present(pmd_pte(pmd))
585 #define pmd_dirty(pmd)		pte_dirty(pmd_pte(pmd))
586 #define pmd_young(pmd)		pte_young(pmd_pte(pmd))
587 #define pmd_valid(pmd)		pte_valid(pmd_pte(pmd))
588 #define pmd_user(pmd)		pte_user(pmd_pte(pmd))
589 #define pmd_user_exec(pmd)	pte_user_exec(pmd_pte(pmd))
590 #define pmd_cont(pmd)		pte_cont(pmd_pte(pmd))
591 #define pmd_wrprotect(pmd)	pte_pmd(pte_wrprotect(pmd_pte(pmd)))
592 #define pmd_mkold(pmd)		pte_pmd(pte_mkold(pmd_pte(pmd)))
593 #define pmd_mkwrite_novma(pmd)	pte_pmd(pte_mkwrite_novma(pmd_pte(pmd)))
594 #define pmd_mkclean(pmd)	pte_pmd(pte_mkclean(pmd_pte(pmd)))
595 #define pmd_mkdirty(pmd)	pte_pmd(pte_mkdirty(pmd_pte(pmd)))
596 #define pmd_mkyoung(pmd)	pte_pmd(pte_mkyoung(pmd_pte(pmd)))
597 #define pmd_mkinvalid(pmd)	pte_pmd(pte_mkinvalid(pmd_pte(pmd)))
598 #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
599 #define pmd_uffd_wp(pmd)	pte_uffd_wp(pmd_pte(pmd))
600 #define pmd_mkuffd_wp(pmd)	pte_pmd(pte_mkuffd_wp(pmd_pte(pmd)))
601 #define pmd_clear_uffd_wp(pmd)	pte_pmd(pte_clear_uffd_wp(pmd_pte(pmd)))
602 #define pmd_swp_uffd_wp(pmd)	pte_swp_uffd_wp(pmd_pte(pmd))
603 #define pmd_swp_mkuffd_wp(pmd)	pte_pmd(pte_swp_mkuffd_wp(pmd_pte(pmd)))
604 #define pmd_swp_clear_uffd_wp(pmd) \
605 				pte_pmd(pte_swp_clear_uffd_wp(pmd_pte(pmd)))
606 #endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */
607 
608 #define pmd_write(pmd)		pte_write(pmd_pte(pmd))
609 
610 static inline pmd_t pmd_mkhuge(pmd_t pmd)
611 {
612 	/*
613 	 * It's possible that the pmd is present-invalid on entry
614 	 * and in that case it needs to remain present-invalid on
615 	 * exit. So ensure the VALID bit does not get modified.
616 	 */
617 	pmdval_t mask = PMD_TYPE_MASK & ~PTE_VALID;
618 	pmdval_t val = PMD_TYPE_SECT & ~PTE_VALID;
619 
620 	return __pmd((pmd_val(pmd) & ~mask) | val);
621 }
622 
623 #ifdef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP
624 #define pmd_special(pte)	(!!((pmd_val(pte) & PTE_SPECIAL)))
625 static inline pmd_t pmd_mkspecial(pmd_t pmd)
626 {
627 	return set_pmd_bit(pmd, __pgprot(PTE_SPECIAL));
628 }
629 #endif
630 
631 #define __pmd_to_phys(pmd)	__pte_to_phys(pmd_pte(pmd))
632 #define __phys_to_pmd_val(phys)	__phys_to_pte_val(phys)
633 #define pmd_pfn(pmd)		((__pmd_to_phys(pmd) & PMD_MASK) >> PAGE_SHIFT)
634 #define pfn_pmd(pfn,prot)	__pmd(__phys_to_pmd_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))
635 
636 #define pud_young(pud)		pte_young(pud_pte(pud))
637 #define pud_mkyoung(pud)	pte_pud(pte_mkyoung(pud_pte(pud)))
638 #define pud_write(pud)		pte_write(pud_pte(pud))
639 
640 static inline pud_t pud_mkhuge(pud_t pud)
641 {
642 	/*
643 	 * It's possible that the pud is present-invalid on entry
644 	 * and in that case it needs to remain present-invalid on
645 	 * exit. So ensure the VALID bit does not get modified.
646 	 */
647 	pudval_t mask = PUD_TYPE_MASK & ~PTE_VALID;
648 	pudval_t val = PUD_TYPE_SECT & ~PTE_VALID;
649 
650 	return __pud((pud_val(pud) & ~mask) | val);
651 }
652 
653 #define __pud_to_phys(pud)	__pte_to_phys(pud_pte(pud))
654 #define __phys_to_pud_val(phys)	__phys_to_pte_val(phys)
655 #define pud_pfn(pud)		((__pud_to_phys(pud) & PUD_MASK) >> PAGE_SHIFT)
656 #define pfn_pud(pfn,prot)	__pud(__phys_to_pud_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))
657 
658 #define pmd_pgprot pmd_pgprot
659 static inline pgprot_t pmd_pgprot(pmd_t pmd)
660 {
661 	unsigned long pfn = pmd_pfn(pmd);
662 
663 	return __pgprot(pmd_val(pfn_pmd(pfn, __pgprot(0))) ^ pmd_val(pmd));
664 }
665 
666 #define pud_pgprot pud_pgprot
667 static inline pgprot_t pud_pgprot(pud_t pud)
668 {
669 	unsigned long pfn = pud_pfn(pud);
670 
671 	return __pgprot(pud_val(pfn_pud(pfn, __pgprot(0))) ^ pud_val(pud));
672 }
673 
674 static inline void __set_ptes_anysz(struct mm_struct *mm, unsigned long addr,
675 				    pte_t *ptep, pte_t pte, unsigned int nr,
676 				    unsigned long pgsize)
677 {
678 	unsigned long stride = pgsize >> PAGE_SHIFT;
679 
680 	switch (pgsize) {
681 	case PAGE_SIZE:
682 		page_table_check_ptes_set(mm, addr, ptep, pte, nr);
683 		break;
684 	case PMD_SIZE:
685 		page_table_check_pmds_set(mm, addr, (pmd_t *)ptep,
686 					  pte_pmd(pte), nr);
687 		break;
688 #ifndef __PAGETABLE_PMD_FOLDED
689 	case PUD_SIZE:
690 		page_table_check_puds_set(mm, addr, (pud_t *)ptep,
691 					  pte_pud(pte), nr);
692 		break;
693 #endif
694 	default:
695 		VM_WARN_ON(1);
696 	}
697 
698 	__sync_cache_and_tags(pte, nr * stride);
699 
700 	for (;;) {
701 		__check_safe_pte_update(mm, ptep, pte);
702 		__set_pte_nosync(ptep, pte);
703 		if (--nr == 0)
704 			break;
705 		ptep++;
706 		pte = pte_advance_pfn(pte, stride);
707 	}
708 
709 	__set_pte_complete(pte);
710 }
711 
712 static inline void __set_ptes(struct mm_struct *mm, unsigned long addr,
713 			      pte_t *ptep, pte_t pte, unsigned int nr)
714 {
715 	__set_ptes_anysz(mm, addr, ptep, pte, nr, PAGE_SIZE);
716 }
717 
718 static inline void __set_pmds(struct mm_struct *mm, unsigned long addr,
719 			      pmd_t *pmdp, pmd_t pmd, unsigned int nr)
720 {
721 	__set_ptes_anysz(mm, addr, (pte_t *)pmdp, pmd_pte(pmd), nr, PMD_SIZE);
722 }
723 #define set_pmd_at(mm, addr, pmdp, pmd) __set_pmds(mm, addr, pmdp, pmd, 1)
724 
725 static inline void __set_puds(struct mm_struct *mm, unsigned long addr,
726 			      pud_t *pudp, pud_t pud, unsigned int nr)
727 {
728 	__set_ptes_anysz(mm, addr, (pte_t *)pudp, pud_pte(pud), nr, PUD_SIZE);
729 }
730 #define set_pud_at(mm, addr, pudp, pud) __set_puds(mm, addr, pudp, pud, 1)
731 
732 #define __p4d_to_phys(p4d)	__pte_to_phys(p4d_pte(p4d))
733 #define __phys_to_p4d_val(phys)	__phys_to_pte_val(phys)
734 
735 #define __pgd_to_phys(pgd)	__pte_to_phys(pgd_pte(pgd))
736 #define __phys_to_pgd_val(phys)	__phys_to_pte_val(phys)
737 
738 #define __pgprot_modify(prot,mask,bits) \
739 	__pgprot((pgprot_val(prot) & ~(mask)) | (bits))
740 
741 #define pgprot_nx(prot) \
742 	__pgprot_modify(prot, PTE_MAYBE_GP, PTE_PXN)
743 
744 #define pgprot_decrypted(prot) \
745 	__pgprot_modify(prot, PROT_NS_SHARED, PROT_NS_SHARED)
746 #define pgprot_encrypted(prot) \
747 	__pgprot_modify(prot, PROT_NS_SHARED, 0)
748 
749 /*
750  * Mark the prot value as uncacheable and unbufferable.
751  */
752 #define pgprot_noncached(prot) \
753 	__pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_DEVICE_nGnRnE) | PTE_PXN | PTE_UXN)
754 #define pgprot_writecombine(prot) \
755 	__pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_NC) | PTE_PXN | PTE_UXN)
756 #define pgprot_device(prot) \
757 	__pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_DEVICE_nGnRE) | PTE_PXN | PTE_UXN)
758 #define pgprot_tagged(prot) \
759 	__pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_TAGGED))
760 #define pgprot_mhp	pgprot_tagged
761 /*
762  * DMA allocations for non-coherent devices use what the Arm architecture calls
763  * "Normal non-cacheable" memory, which permits speculation, unaligned accesses
764  * and merging of writes.  This is different from "Device-nGnR[nE]" memory which
765  * is intended for MMIO and thus forbids speculation, preserves access size,
766  * requires strict alignment and can also force write responses to come from the
767  * endpoint.
768  */
769 #define pgprot_dmacoherent(prot) \
770 	__pgprot_modify(prot, PTE_ATTRINDX_MASK, \
771 			PTE_ATTRINDX(MT_NORMAL_NC) | PTE_PXN | PTE_UXN)
772 
773 #define __HAVE_PHYS_MEM_ACCESS_PROT
774 struct file;
775 extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
776 				     unsigned long size, pgprot_t vma_prot);
777 
778 #define pmd_none(pmd)		(!pmd_val(pmd))
779 
780 #define pmd_table(pmd)		((pmd_val(pmd) & PMD_TYPE_MASK) == \
781 				 PMD_TYPE_TABLE)
782 #define pmd_sect(pmd)		((pmd_val(pmd) & PMD_TYPE_MASK) == \
783 				 PMD_TYPE_SECT)
784 #define pmd_leaf(pmd)		(pmd_present(pmd) && !pmd_table(pmd))
785 #define pmd_bad(pmd)		(!pmd_table(pmd))
786 
787 #define pmd_leaf_size(pmd)	(pmd_cont(pmd) ? CONT_PMD_SIZE : PMD_SIZE)
788 #define pte_leaf_size(pte)	(pte_cont(pte) ? CONT_PTE_SIZE : PAGE_SIZE)
789 
790 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
791 static inline int pmd_trans_huge(pmd_t pmd)
792 {
793 	/*
794 	 * If pmd is present-invalid, pmd_table() won't detect it
795 	 * as a table, so force the valid bit for the comparison.
796 	 */
797 	return pmd_present(pmd) && !pmd_table(__pmd(pmd_val(pmd) | PTE_VALID));
798 }
799 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
800 
801 #if defined(CONFIG_ARM64_64K_PAGES) || CONFIG_PGTABLE_LEVELS < 3
802 static inline bool pud_sect(pud_t pud) { return false; }
803 static inline bool pud_table(pud_t pud) { return true; }
804 #else
805 #define pud_sect(pud)		((pud_val(pud) & PUD_TYPE_MASK) == \
806 				 PUD_TYPE_SECT)
807 #define pud_table(pud)		((pud_val(pud) & PUD_TYPE_MASK) == \
808 				 PUD_TYPE_TABLE)
809 #endif
810 
811 extern pgd_t swapper_pg_dir[];
812 extern pgd_t idmap_pg_dir[];
813 extern pgd_t tramp_pg_dir[];
814 extern pgd_t reserved_pg_dir[];
815 
816 extern void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd);
817 
818 static inline bool in_swapper_pgdir(void *addr)
819 {
820 	return ((unsigned long)addr & PAGE_MASK) ==
821 	        ((unsigned long)swapper_pg_dir & PAGE_MASK);
822 }
823 
824 static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
825 {
826 #ifdef __PAGETABLE_PMD_FOLDED
827 	if (in_swapper_pgdir(pmdp)) {
828 		set_swapper_pgd((pgd_t *)pmdp, __pgd(pmd_val(pmd)));
829 		return;
830 	}
831 #endif /* __PAGETABLE_PMD_FOLDED */
832 
833 	WRITE_ONCE(*pmdp, pmd);
834 
835 	if (pmd_valid(pmd))
836 		queue_pte_barriers();
837 }
838 
839 static inline void pmd_clear(pmd_t *pmdp)
840 {
841 	set_pmd(pmdp, __pmd(0));
842 }
843 
844 static inline phys_addr_t pmd_page_paddr(pmd_t pmd)
845 {
846 	return __pmd_to_phys(pmd);
847 }
848 
849 static inline unsigned long pmd_page_vaddr(pmd_t pmd)
850 {
851 	return (unsigned long)__va(pmd_page_paddr(pmd));
852 }
853 
854 /* Find an entry in the third-level page table. */
855 #define pte_offset_phys(dir,addr)	(pmd_page_paddr(READ_ONCE(*(dir))) + pte_index(addr) * sizeof(pte_t))
856 
857 #define pte_set_fixmap(addr)		((pte_t *)set_fixmap_offset(FIX_PTE, addr))
858 #define pte_set_fixmap_offset(pmd, addr)	pte_set_fixmap(pte_offset_phys(pmd, addr))
859 #define pte_clear_fixmap()		clear_fixmap(FIX_PTE)
860 
861 #define pmd_page(pmd)			phys_to_page(__pmd_to_phys(pmd))
862 
863 /* use ONLY for statically allocated translation tables */
864 #define pte_offset_kimg(dir,addr)	((pte_t *)__phys_to_kimg(pte_offset_phys((dir), (addr))))
865 
866 #if CONFIG_PGTABLE_LEVELS > 2
867 
868 #define pmd_ERROR(e)	\
869 	pr_err("%s:%d: bad pmd %016llx.\n", __FILE__, __LINE__, pmd_val(e))
870 
871 #define pud_none(pud)		(!pud_val(pud))
872 #define pud_bad(pud)		((pud_val(pud) & PUD_TYPE_MASK) != \
873 				 PUD_TYPE_TABLE)
874 #define pud_present(pud)	pte_present(pud_pte(pud))
875 #ifndef __PAGETABLE_PMD_FOLDED
876 #define pud_leaf(pud)		(pud_present(pud) && !pud_table(pud))
877 #else
878 #define pud_leaf(pud)		false
879 #endif
880 #define pud_valid(pud)		pte_valid(pud_pte(pud))
881 #define pud_user(pud)		pte_user(pud_pte(pud))
882 #define pud_user_exec(pud)	pte_user_exec(pud_pte(pud))
883 
884 static inline bool pgtable_l4_enabled(void);
885 
886 static inline void set_pud(pud_t *pudp, pud_t pud)
887 {
888 	if (!pgtable_l4_enabled() && in_swapper_pgdir(pudp)) {
889 		set_swapper_pgd((pgd_t *)pudp, __pgd(pud_val(pud)));
890 		return;
891 	}
892 
893 	WRITE_ONCE(*pudp, pud);
894 
895 	if (pud_valid(pud))
896 		queue_pte_barriers();
897 }
898 
899 static inline void pud_clear(pud_t *pudp)
900 {
901 	set_pud(pudp, __pud(0));
902 }
903 
904 static inline phys_addr_t pud_page_paddr(pud_t pud)
905 {
906 	return __pud_to_phys(pud);
907 }
908 
909 static inline pmd_t *pud_pgtable(pud_t pud)
910 {
911 	return (pmd_t *)__va(pud_page_paddr(pud));
912 }
913 
914 /* Find an entry in the second-level page table. */
915 #define pmd_offset_phys(dir, addr)	(pud_page_paddr(READ_ONCE(*(dir))) + pmd_index(addr) * sizeof(pmd_t))
916 
917 #define pmd_set_fixmap(addr)		((pmd_t *)set_fixmap_offset(FIX_PMD, addr))
918 #define pmd_set_fixmap_offset(pud, addr)	pmd_set_fixmap(pmd_offset_phys(pud, addr))
919 #define pmd_clear_fixmap()		clear_fixmap(FIX_PMD)
920 
921 #define pud_page(pud)			phys_to_page(__pud_to_phys(pud))
922 
923 /* use ONLY for statically allocated translation tables */
924 #define pmd_offset_kimg(dir,addr)	((pmd_t *)__phys_to_kimg(pmd_offset_phys((dir), (addr))))
925 
926 #else
927 
928 #define pud_valid(pud)		false
929 #define pud_page_paddr(pud)	({ BUILD_BUG(); 0; })
930 #define pud_user_exec(pud)	pud_user(pud) /* Always 0 with folding */
931 
932 /* Match pmd_offset folding in <asm/generic/pgtable-nopmd.h> */
933 #define pmd_set_fixmap(addr)		NULL
934 #define pmd_set_fixmap_offset(pudp, addr)	((pmd_t *)pudp)
935 #define pmd_clear_fixmap()
936 
937 #define pmd_offset_kimg(dir,addr)	((pmd_t *)dir)
938 
939 #endif	/* CONFIG_PGTABLE_LEVELS > 2 */
940 
941 #if CONFIG_PGTABLE_LEVELS > 3
942 
943 static __always_inline bool pgtable_l4_enabled(void)
944 {
945 	if (CONFIG_PGTABLE_LEVELS > 4 || !IS_ENABLED(CONFIG_ARM64_LPA2))
946 		return true;
947 	if (!alternative_has_cap_likely(ARM64_ALWAYS_BOOT))
948 		return vabits_actual == VA_BITS;
949 	return alternative_has_cap_unlikely(ARM64_HAS_VA52);
950 }
951 
952 static inline bool mm_pud_folded(const struct mm_struct *mm)
953 {
954 	return !pgtable_l4_enabled();
955 }
956 #define mm_pud_folded  mm_pud_folded
957 
958 #define pud_ERROR(e)	\
959 	pr_err("%s:%d: bad pud %016llx.\n", __FILE__, __LINE__, pud_val(e))
960 
961 #define p4d_none(p4d)		(pgtable_l4_enabled() && !p4d_val(p4d))
962 #define p4d_bad(p4d)		(pgtable_l4_enabled() && \
963 				((p4d_val(p4d) & P4D_TYPE_MASK) != \
964 				 P4D_TYPE_TABLE))
965 #define p4d_present(p4d)	(!p4d_none(p4d))
966 
967 static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)
968 {
969 	if (in_swapper_pgdir(p4dp)) {
970 		set_swapper_pgd((pgd_t *)p4dp, __pgd(p4d_val(p4d)));
971 		return;
972 	}
973 
974 	WRITE_ONCE(*p4dp, p4d);
975 	queue_pte_barriers();
976 }
977 
978 static inline void p4d_clear(p4d_t *p4dp)
979 {
980 	if (pgtable_l4_enabled())
981 		set_p4d(p4dp, __p4d(0));
982 }
983 
984 static inline phys_addr_t p4d_page_paddr(p4d_t p4d)
985 {
986 	return __p4d_to_phys(p4d);
987 }
988 
989 #define pud_index(addr)		(((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
990 
991 static inline pud_t *p4d_to_folded_pud(p4d_t *p4dp, unsigned long addr)
992 {
993 	/* Ensure that 'p4dp' indexes a page table according to 'addr' */
994 	VM_BUG_ON(((addr >> P4D_SHIFT) ^ ((u64)p4dp >> 3)) % PTRS_PER_P4D);
995 
996 	return (pud_t *)PTR_ALIGN_DOWN(p4dp, PAGE_SIZE) + pud_index(addr);
997 }
998 
999 static inline pud_t *p4d_pgtable(p4d_t p4d)
1000 {
1001 	return (pud_t *)__va(p4d_page_paddr(p4d));
1002 }
1003 
1004 static inline phys_addr_t pud_offset_phys(p4d_t *p4dp, unsigned long addr)
1005 {
1006 	BUG_ON(!pgtable_l4_enabled());
1007 
1008 	return p4d_page_paddr(READ_ONCE(*p4dp)) + pud_index(addr) * sizeof(pud_t);
1009 }
1010 
1011 static inline
1012 pud_t *pud_offset_lockless(p4d_t *p4dp, p4d_t p4d, unsigned long addr)
1013 {
1014 	if (!pgtable_l4_enabled())
1015 		return p4d_to_folded_pud(p4dp, addr);
1016 	return (pud_t *)__va(p4d_page_paddr(p4d)) + pud_index(addr);
1017 }
1018 #define pud_offset_lockless pud_offset_lockless
1019 
1020 static inline pud_t *pud_offset(p4d_t *p4dp, unsigned long addr)
1021 {
1022 	return pud_offset_lockless(p4dp, READ_ONCE(*p4dp), addr);
1023 }
1024 #define pud_offset	pud_offset
1025 
1026 static inline pud_t *pud_set_fixmap(unsigned long addr)
1027 {
1028 	if (!pgtable_l4_enabled())
1029 		return NULL;
1030 	return (pud_t *)set_fixmap_offset(FIX_PUD, addr);
1031 }
1032 
1033 static inline pud_t *pud_set_fixmap_offset(p4d_t *p4dp, unsigned long addr)
1034 {
1035 	if (!pgtable_l4_enabled())
1036 		return p4d_to_folded_pud(p4dp, addr);
1037 	return pud_set_fixmap(pud_offset_phys(p4dp, addr));
1038 }
1039 
1040 static inline void pud_clear_fixmap(void)
1041 {
1042 	if (pgtable_l4_enabled())
1043 		clear_fixmap(FIX_PUD);
1044 }
1045 
1046 /* use ONLY for statically allocated translation tables */
1047 static inline pud_t *pud_offset_kimg(p4d_t *p4dp, u64 addr)
1048 {
1049 	if (!pgtable_l4_enabled())
1050 		return p4d_to_folded_pud(p4dp, addr);
1051 	return (pud_t *)__phys_to_kimg(pud_offset_phys(p4dp, addr));
1052 }
1053 
1054 #define p4d_page(p4d)		pfn_to_page(__phys_to_pfn(__p4d_to_phys(p4d)))
1055 
1056 #else
1057 
1058 static inline bool pgtable_l4_enabled(void) { return false; }
1059 
1060 #define p4d_page_paddr(p4d)	({ BUILD_BUG(); 0;})
1061 
1062 /* Match pud_offset folding in <asm/generic/pgtable-nopud.h> */
1063 #define pud_set_fixmap(addr)		NULL
1064 #define pud_set_fixmap_offset(pgdp, addr)	((pud_t *)pgdp)
1065 #define pud_clear_fixmap()
1066 
1067 #define pud_offset_kimg(dir,addr)	((pud_t *)dir)
1068 
1069 #endif  /* CONFIG_PGTABLE_LEVELS > 3 */
1070 
1071 #if CONFIG_PGTABLE_LEVELS > 4
1072 
1073 static __always_inline bool pgtable_l5_enabled(void)
1074 {
1075 	if (!alternative_has_cap_likely(ARM64_ALWAYS_BOOT))
1076 		return vabits_actual == VA_BITS;
1077 	return alternative_has_cap_unlikely(ARM64_HAS_VA52);
1078 }
1079 
1080 static inline bool mm_p4d_folded(const struct mm_struct *mm)
1081 {
1082 	return !pgtable_l5_enabled();
1083 }
1084 #define mm_p4d_folded  mm_p4d_folded
1085 
1086 #define p4d_ERROR(e)	\
1087 	pr_err("%s:%d: bad p4d %016llx.\n", __FILE__, __LINE__, p4d_val(e))
1088 
1089 #define pgd_none(pgd)		(pgtable_l5_enabled() && !pgd_val(pgd))
1090 #define pgd_bad(pgd)		(pgtable_l5_enabled() && \
1091 				((pgd_val(pgd) & PGD_TYPE_MASK) != \
1092 				 PGD_TYPE_TABLE))
1093 #define pgd_present(pgd)	(!pgd_none(pgd))
1094 
1095 static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)
1096 {
1097 	if (in_swapper_pgdir(pgdp)) {
1098 		set_swapper_pgd(pgdp, __pgd(pgd_val(pgd)));
1099 		return;
1100 	}
1101 
1102 	WRITE_ONCE(*pgdp, pgd);
1103 	queue_pte_barriers();
1104 }
1105 
1106 static inline void pgd_clear(pgd_t *pgdp)
1107 {
1108 	if (pgtable_l5_enabled())
1109 		set_pgd(pgdp, __pgd(0));
1110 }
1111 
1112 static inline phys_addr_t pgd_page_paddr(pgd_t pgd)
1113 {
1114 	return __pgd_to_phys(pgd);
1115 }
1116 
1117 #define p4d_index(addr)		(((addr) >> P4D_SHIFT) & (PTRS_PER_P4D - 1))
1118 
1119 static inline p4d_t *pgd_to_folded_p4d(pgd_t *pgdp, unsigned long addr)
1120 {
1121 	/* Ensure that 'pgdp' indexes a page table according to 'addr' */
1122 	VM_BUG_ON(((addr >> PGDIR_SHIFT) ^ ((u64)pgdp >> 3)) % PTRS_PER_PGD);
1123 
1124 	return (p4d_t *)PTR_ALIGN_DOWN(pgdp, PAGE_SIZE) + p4d_index(addr);
1125 }
1126 
1127 static inline phys_addr_t p4d_offset_phys(pgd_t *pgdp, unsigned long addr)
1128 {
1129 	BUG_ON(!pgtable_l5_enabled());
1130 
1131 	return pgd_page_paddr(READ_ONCE(*pgdp)) + p4d_index(addr) * sizeof(p4d_t);
1132 }
1133 
1134 static inline
1135 p4d_t *p4d_offset_lockless(pgd_t *pgdp, pgd_t pgd, unsigned long addr)
1136 {
1137 	if (!pgtable_l5_enabled())
1138 		return pgd_to_folded_p4d(pgdp, addr);
1139 	return (p4d_t *)__va(pgd_page_paddr(pgd)) + p4d_index(addr);
1140 }
1141 #define p4d_offset_lockless p4d_offset_lockless
1142 
1143 static inline p4d_t *p4d_offset(pgd_t *pgdp, unsigned long addr)
1144 {
1145 	return p4d_offset_lockless(pgdp, READ_ONCE(*pgdp), addr);
1146 }
1147 
1148 static inline p4d_t *p4d_set_fixmap(unsigned long addr)
1149 {
1150 	if (!pgtable_l5_enabled())
1151 		return NULL;
1152 	return (p4d_t *)set_fixmap_offset(FIX_P4D, addr);
1153 }
1154 
1155 static inline p4d_t *p4d_set_fixmap_offset(pgd_t *pgdp, unsigned long addr)
1156 {
1157 	if (!pgtable_l5_enabled())
1158 		return pgd_to_folded_p4d(pgdp, addr);
1159 	return p4d_set_fixmap(p4d_offset_phys(pgdp, addr));
1160 }
1161 
1162 static inline void p4d_clear_fixmap(void)
1163 {
1164 	if (pgtable_l5_enabled())
1165 		clear_fixmap(FIX_P4D);
1166 }
1167 
1168 /* use ONLY for statically allocated translation tables */
1169 static inline p4d_t *p4d_offset_kimg(pgd_t *pgdp, u64 addr)
1170 {
1171 	if (!pgtable_l5_enabled())
1172 		return pgd_to_folded_p4d(pgdp, addr);
1173 	return (p4d_t *)__phys_to_kimg(p4d_offset_phys(pgdp, addr));
1174 }
1175 
1176 #define pgd_page(pgd)		pfn_to_page(__phys_to_pfn(__pgd_to_phys(pgd)))
1177 
1178 #else
1179 
1180 static inline bool pgtable_l5_enabled(void) { return false; }
1181 
1182 #define p4d_index(addr)		(((addr) >> P4D_SHIFT) & (PTRS_PER_P4D - 1))
1183 
1184 /* Match p4d_offset folding in <asm/generic/pgtable-nop4d.h> */
1185 #define p4d_set_fixmap(addr)		NULL
1186 #define p4d_set_fixmap_offset(p4dp, addr)	((p4d_t *)p4dp)
1187 #define p4d_clear_fixmap()
1188 
1189 #define p4d_offset_kimg(dir,addr)	((p4d_t *)dir)
1190 
1191 static inline
1192 p4d_t *p4d_offset_lockless_folded(pgd_t *pgdp, pgd_t pgd, unsigned long addr)
1193 {
1194 	/*
1195 	 * With runtime folding of the pud, pud_offset_lockless() passes
1196 	 * the 'pgd_t *' we return here to p4d_to_folded_pud(), which
1197 	 * will offset the pointer assuming that it points into
1198 	 * a page-table page. However, the fast GUP path passes us a
1199 	 * pgd_t allocated on the stack and so we must use the original
1200 	 * pointer in 'pgdp' to construct the p4d pointer instead of
1201 	 * using the generic p4d_offset_lockless() implementation.
1202 	 *
1203 	 * Note: reusing the original pointer means that we may
1204 	 * dereference the same (live) page-table entry multiple times.
1205 	 * This is safe because it is still only loaded once in the
1206 	 * context of each level and the CPU guarantees same-address
1207 	 * read-after-read ordering.
1208 	 */
1209 	return p4d_offset(pgdp, addr);
1210 }
1211 #define p4d_offset_lockless p4d_offset_lockless_folded
1212 
1213 #endif  /* CONFIG_PGTABLE_LEVELS > 4 */
1214 
1215 #define pgd_ERROR(e)	\
1216 	pr_err("%s:%d: bad pgd %016llx.\n", __FILE__, __LINE__, pgd_val(e))
1217 
1218 #define pgd_set_fixmap(addr)	((pgd_t *)set_fixmap_offset(FIX_PGD, addr))
1219 #define pgd_clear_fixmap()	clear_fixmap(FIX_PGD)
1220 
1221 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
1222 {
1223 	/*
1224 	 * Normal and Normal-Tagged are two different memory types and indices
1225 	 * in MAIR_EL1. The mask below has to include PTE_ATTRINDX_MASK.
1226 	 */
1227 	const pteval_t mask = PTE_USER | PTE_PXN | PTE_UXN | PTE_RDONLY |
1228 			      PTE_PRESENT_INVALID | PTE_VALID | PTE_WRITE |
1229 			      PTE_GP | PTE_ATTRINDX_MASK | PTE_PO_IDX_MASK;
1230 
1231 	/* preserve the hardware dirty information */
1232 	if (pte_hw_dirty(pte))
1233 		pte = set_pte_bit(pte, __pgprot(PTE_DIRTY));
1234 
1235 	pte_val(pte) = (pte_val(pte) & ~mask) | (pgprot_val(newprot) & mask);
1236 	/*
1237 	 * If we end up clearing hw dirtiness for a sw-dirty PTE, set hardware
1238 	 * dirtiness again.
1239 	 */
1240 	if (pte_sw_dirty(pte))
1241 		pte = pte_mkdirty(pte);
1242 	return pte;
1243 }
1244 
1245 static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
1246 {
1247 	return pte_pmd(pte_modify(pmd_pte(pmd), newprot));
1248 }
1249 
1250 extern int __ptep_set_access_flags(struct vm_area_struct *vma,
1251 				 unsigned long address, pte_t *ptep,
1252 				 pte_t entry, int dirty);
1253 
1254 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
1255 #define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
1256 static inline int pmdp_set_access_flags(struct vm_area_struct *vma,
1257 					unsigned long address, pmd_t *pmdp,
1258 					pmd_t entry, int dirty)
1259 {
1260 	return __ptep_set_access_flags(vma, address, (pte_t *)pmdp,
1261 							pmd_pte(entry), dirty);
1262 }
1263 #endif
1264 
1265 #ifdef CONFIG_PAGE_TABLE_CHECK
1266 static inline bool pte_user_accessible_page(pte_t pte, unsigned long addr)
1267 {
1268 	return pte_valid(pte) && (pte_user(pte) || pte_user_exec(pte));
1269 }
1270 
1271 static inline bool pmd_user_accessible_page(pmd_t pmd, unsigned long addr)
1272 {
1273 	return pmd_valid(pmd) && !pmd_table(pmd) && (pmd_user(pmd) || pmd_user_exec(pmd));
1274 }
1275 
1276 static inline bool pud_user_accessible_page(pud_t pud, unsigned long addr)
1277 {
1278 	return pud_valid(pud) && !pud_table(pud) && (pud_user(pud) || pud_user_exec(pud));
1279 }
1280 #endif
1281 
1282 /*
1283  * Atomic pte/pmd modifications.
1284  */
1285 
1286 static inline void __pte_clear(struct mm_struct *mm,
1287 			       unsigned long addr, pte_t *ptep)
1288 {
1289 	__set_pte(ptep, __pte(0));
1290 }
1291 
1292 static inline int __ptep_test_and_clear_young(struct vm_area_struct *vma,
1293 					      unsigned long address,
1294 					      pte_t *ptep)
1295 {
1296 	pte_t old_pte, pte;
1297 
1298 	pte = __ptep_get(ptep);
1299 	do {
1300 		old_pte = pte;
1301 		pte = pte_mkold(pte);
1302 		pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep),
1303 					       pte_val(old_pte), pte_val(pte));
1304 	} while (pte_val(pte) != pte_val(old_pte));
1305 
1306 	return pte_young(pte);
1307 }
1308 
1309 static inline int __ptep_clear_flush_young(struct vm_area_struct *vma,
1310 					 unsigned long address, pte_t *ptep)
1311 {
1312 	int young = __ptep_test_and_clear_young(vma, address, ptep);
1313 
1314 	if (young) {
1315 		/*
1316 		 * We can elide the trailing DSB here since the worst that can
1317 		 * happen is that a CPU continues to use the young entry in its
1318 		 * TLB and we mistakenly reclaim the associated page. The
1319 		 * window for such an event is bounded by the next
1320 		 * context-switch, which provides a DSB to complete the TLB
1321 		 * invalidation.
1322 		 */
1323 		flush_tlb_page_nosync(vma, address);
1324 	}
1325 
1326 	return young;
1327 }
1328 
1329 #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG)
1330 #define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
1331 static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
1332 					    unsigned long address,
1333 					    pmd_t *pmdp)
1334 {
1335 	/* Operation applies to PMD table entry only if FEAT_HAFT is enabled */
1336 	VM_WARN_ON(pmd_table(READ_ONCE(*pmdp)) && !system_supports_haft());
1337 	return __ptep_test_and_clear_young(vma, address, (pte_t *)pmdp);
1338 }
1339 #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG */
1340 
1341 static inline pte_t __ptep_get_and_clear_anysz(struct mm_struct *mm,
1342 					       unsigned long address,
1343 					       pte_t *ptep,
1344 					       unsigned long pgsize)
1345 {
1346 	pte_t pte = __pte(xchg_relaxed(&pte_val(*ptep), 0));
1347 
1348 	switch (pgsize) {
1349 	case PAGE_SIZE:
1350 		page_table_check_pte_clear(mm, address, pte);
1351 		break;
1352 	case PMD_SIZE:
1353 		page_table_check_pmd_clear(mm, address, pte_pmd(pte));
1354 		break;
1355 #ifndef __PAGETABLE_PMD_FOLDED
1356 	case PUD_SIZE:
1357 		page_table_check_pud_clear(mm, address, pte_pud(pte));
1358 		break;
1359 #endif
1360 	default:
1361 		VM_WARN_ON(1);
1362 	}
1363 
1364 	return pte;
1365 }
1366 
1367 static inline pte_t __ptep_get_and_clear(struct mm_struct *mm,
1368 				       unsigned long address, pte_t *ptep)
1369 {
1370 	return __ptep_get_and_clear_anysz(mm, address, ptep, PAGE_SIZE);
1371 }
1372 
1373 static inline void __clear_full_ptes(struct mm_struct *mm, unsigned long addr,
1374 				pte_t *ptep, unsigned int nr, int full)
1375 {
1376 	for (;;) {
1377 		__ptep_get_and_clear(mm, addr, ptep);
1378 		if (--nr == 0)
1379 			break;
1380 		ptep++;
1381 		addr += PAGE_SIZE;
1382 	}
1383 }
1384 
1385 static inline pte_t __get_and_clear_full_ptes(struct mm_struct *mm,
1386 				unsigned long addr, pte_t *ptep,
1387 				unsigned int nr, int full)
1388 {
1389 	pte_t pte, tmp_pte;
1390 
1391 	pte = __ptep_get_and_clear(mm, addr, ptep);
1392 	while (--nr) {
1393 		ptep++;
1394 		addr += PAGE_SIZE;
1395 		tmp_pte = __ptep_get_and_clear(mm, addr, ptep);
1396 		if (pte_dirty(tmp_pte))
1397 			pte = pte_mkdirty(pte);
1398 		if (pte_young(tmp_pte))
1399 			pte = pte_mkyoung(pte);
1400 	}
1401 	return pte;
1402 }
1403 
1404 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
1405 #define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
1406 static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
1407 					    unsigned long address, pmd_t *pmdp)
1408 {
1409 	return pte_pmd(__ptep_get_and_clear_anysz(mm, address, (pte_t *)pmdp, PMD_SIZE));
1410 }
1411 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
1412 
1413 static inline void ___ptep_set_wrprotect(struct mm_struct *mm,
1414 					unsigned long address, pte_t *ptep,
1415 					pte_t pte)
1416 {
1417 	pte_t old_pte;
1418 
1419 	do {
1420 		old_pte = pte;
1421 		pte = pte_wrprotect(pte);
1422 		pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep),
1423 					       pte_val(old_pte), pte_val(pte));
1424 	} while (pte_val(pte) != pte_val(old_pte));
1425 }
1426 
1427 /*
1428  * __ptep_set_wrprotect - mark read-only while transferring potential hardware
1429  * dirty status (PTE_DBM && !PTE_RDONLY) to the software PTE_DIRTY bit.
1430  */
1431 static inline void __ptep_set_wrprotect(struct mm_struct *mm,
1432 					unsigned long address, pte_t *ptep)
1433 {
1434 	___ptep_set_wrprotect(mm, address, ptep, __ptep_get(ptep));
1435 }
1436 
1437 static inline void __wrprotect_ptes(struct mm_struct *mm, unsigned long address,
1438 				pte_t *ptep, unsigned int nr)
1439 {
1440 	unsigned int i;
1441 
1442 	for (i = 0; i < nr; i++, address += PAGE_SIZE, ptep++)
1443 		__ptep_set_wrprotect(mm, address, ptep);
1444 }
1445 
1446 static inline void __clear_young_dirty_pte(struct vm_area_struct *vma,
1447 					   unsigned long addr, pte_t *ptep,
1448 					   pte_t pte, cydp_t flags)
1449 {
1450 	pte_t old_pte;
1451 
1452 	do {
1453 		old_pte = pte;
1454 
1455 		if (flags & CYDP_CLEAR_YOUNG)
1456 			pte = pte_mkold(pte);
1457 		if (flags & CYDP_CLEAR_DIRTY)
1458 			pte = pte_mkclean(pte);
1459 
1460 		pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep),
1461 					       pte_val(old_pte), pte_val(pte));
1462 	} while (pte_val(pte) != pte_val(old_pte));
1463 }
1464 
1465 static inline void __clear_young_dirty_ptes(struct vm_area_struct *vma,
1466 					    unsigned long addr, pte_t *ptep,
1467 					    unsigned int nr, cydp_t flags)
1468 {
1469 	pte_t pte;
1470 
1471 	for (;;) {
1472 		pte = __ptep_get(ptep);
1473 
1474 		if (flags == (CYDP_CLEAR_YOUNG | CYDP_CLEAR_DIRTY))
1475 			__set_pte(ptep, pte_mkclean(pte_mkold(pte)));
1476 		else
1477 			__clear_young_dirty_pte(vma, addr, ptep, pte, flags);
1478 
1479 		if (--nr == 0)
1480 			break;
1481 		ptep++;
1482 		addr += PAGE_SIZE;
1483 	}
1484 }
1485 
1486 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
1487 #define __HAVE_ARCH_PMDP_SET_WRPROTECT
1488 static inline void pmdp_set_wrprotect(struct mm_struct *mm,
1489 				      unsigned long address, pmd_t *pmdp)
1490 {
1491 	__ptep_set_wrprotect(mm, address, (pte_t *)pmdp);
1492 }
1493 
1494 #define pmdp_establish pmdp_establish
1495 static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
1496 		unsigned long address, pmd_t *pmdp, pmd_t pmd)
1497 {
1498 	page_table_check_pmd_set(vma->vm_mm, address, pmdp, pmd);
1499 	return __pmd(xchg_relaxed(&pmd_val(*pmdp), pmd_val(pmd)));
1500 }
1501 #endif
1502 
1503 /*
1504  * Encode and decode a swap entry:
1505  *	bits 0-1:	present (must be zero)
1506  *	bits 2:		remember PG_anon_exclusive
1507  *	bit  3:		remember uffd-wp state
1508  *	bits 6-10:	swap type
1509  *	bit  11:	PTE_PRESENT_INVALID (must be zero)
1510  *	bits 12-61:	swap offset
1511  */
1512 #define __SWP_TYPE_SHIFT	6
1513 #define __SWP_TYPE_BITS		5
1514 #define __SWP_TYPE_MASK		((1 << __SWP_TYPE_BITS) - 1)
1515 #define __SWP_OFFSET_SHIFT	12
1516 #define __SWP_OFFSET_BITS	50
1517 #define __SWP_OFFSET_MASK	((1UL << __SWP_OFFSET_BITS) - 1)
1518 
1519 #define __swp_type(x)		(((x).val >> __SWP_TYPE_SHIFT) & __SWP_TYPE_MASK)
1520 #define __swp_offset(x)		(((x).val >> __SWP_OFFSET_SHIFT) & __SWP_OFFSET_MASK)
1521 #define __swp_entry(type,offset) ((swp_entry_t) { ((type) << __SWP_TYPE_SHIFT) | ((offset) << __SWP_OFFSET_SHIFT) })
1522 
1523 #define __pte_to_swp_entry(pte)	((swp_entry_t) { pte_val(pte) })
1524 #define __swp_entry_to_pte(swp)	((pte_t) { (swp).val })
1525 
1526 #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
1527 #define __pmd_to_swp_entry(pmd)		((swp_entry_t) { pmd_val(pmd) })
1528 #define __swp_entry_to_pmd(swp)		__pmd((swp).val)
1529 #endif /* CONFIG_ARCH_ENABLE_THP_MIGRATION */
1530 
1531 /*
1532  * Ensure that there are not more swap files than can be encoded in the kernel
1533  * PTEs.
1534  */
1535 #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > __SWP_TYPE_BITS)
1536 
1537 #ifdef CONFIG_ARM64_MTE
1538 
1539 #define __HAVE_ARCH_PREPARE_TO_SWAP
1540 extern int arch_prepare_to_swap(struct folio *folio);
1541 
1542 #define __HAVE_ARCH_SWAP_INVALIDATE
1543 static inline void arch_swap_invalidate_page(int type, pgoff_t offset)
1544 {
1545 	if (system_supports_mte())
1546 		mte_invalidate_tags(type, offset);
1547 }
1548 
1549 static inline void arch_swap_invalidate_area(int type)
1550 {
1551 	if (system_supports_mte())
1552 		mte_invalidate_tags_area(type);
1553 }
1554 
1555 #define __HAVE_ARCH_SWAP_RESTORE
1556 extern void arch_swap_restore(swp_entry_t entry, struct folio *folio);
1557 
1558 #endif /* CONFIG_ARM64_MTE */
1559 
1560 /*
1561  * On AArch64, the cache coherency is handled via the __set_ptes() function.
1562  */
1563 static inline void update_mmu_cache_range(struct vm_fault *vmf,
1564 		struct vm_area_struct *vma, unsigned long addr, pte_t *ptep,
1565 		unsigned int nr)
1566 {
1567 	/*
1568 	 * We don't do anything here, so there's a very small chance of
1569 	 * us retaking a user fault which we just fixed up. The alternative
1570 	 * is doing a dsb(ishst), but that penalises the fastpath.
1571 	 */
1572 }
1573 
1574 #define update_mmu_cache(vma, addr, ptep) \
1575 	update_mmu_cache_range(NULL, vma, addr, ptep, 1)
1576 #define update_mmu_cache_pmd(vma, address, pmd) do { } while (0)
1577 
1578 #ifdef CONFIG_ARM64_PA_BITS_52
1579 #define phys_to_ttbr(addr)	(((addr) | ((addr) >> 46)) & TTBR_BADDR_MASK_52)
1580 #else
1581 #define phys_to_ttbr(addr)	(addr)
1582 #endif
1583 
1584 /*
1585  * On arm64 without hardware Access Flag, copying from user will fail because
1586  * the pte is old and cannot be marked young. So we always end up with zeroed
1587  * page after fork() + CoW for pfn mappings. We don't always have a
1588  * hardware-managed access flag on arm64.
1589  */
1590 #define arch_has_hw_pte_young		cpu_has_hw_af
1591 
1592 #ifdef CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG
1593 #define arch_has_hw_nonleaf_pmd_young	system_supports_haft
1594 #endif
1595 
1596 /*
1597  * Experimentally, it's cheap to set the access flag in hardware and we
1598  * benefit from prefaulting mappings as 'old' to start with.
1599  */
1600 #define arch_wants_old_prefaulted_pte	cpu_has_hw_af
1601 
1602 /*
1603  * Request exec memory is read into pagecache in at least 64K folios. This size
1604  * can be contpte-mapped when 4K base pages are in use (16 pages into 1 iTLB
1605  * entry), and HPA can coalesce it (4 pages into 1 TLB entry) when 16K base
1606  * pages are in use.
1607  */
1608 #define exec_folio_order() ilog2(SZ_64K >> PAGE_SHIFT)
1609 
1610 static inline bool pud_sect_supported(void)
1611 {
1612 	return PAGE_SIZE == SZ_4K;
1613 }
1614 
1615 
1616 #define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
1617 #define ptep_modify_prot_start ptep_modify_prot_start
1618 extern pte_t ptep_modify_prot_start(struct vm_area_struct *vma,
1619 				    unsigned long addr, pte_t *ptep);
1620 
1621 #define ptep_modify_prot_commit ptep_modify_prot_commit
1622 extern void ptep_modify_prot_commit(struct vm_area_struct *vma,
1623 				    unsigned long addr, pte_t *ptep,
1624 				    pte_t old_pte, pte_t new_pte);
1625 
1626 #define modify_prot_start_ptes modify_prot_start_ptes
1627 extern pte_t modify_prot_start_ptes(struct vm_area_struct *vma,
1628 				    unsigned long addr, pte_t *ptep,
1629 				    unsigned int nr);
1630 
1631 #define modify_prot_commit_ptes modify_prot_commit_ptes
1632 extern void modify_prot_commit_ptes(struct vm_area_struct *vma, unsigned long addr,
1633 				    pte_t *ptep, pte_t old_pte, pte_t pte,
1634 				    unsigned int nr);
1635 
1636 #ifdef CONFIG_ARM64_CONTPTE
1637 
1638 /*
1639  * The contpte APIs are used to transparently manage the contiguous bit in ptes
1640  * where it is possible and makes sense to do so. The PTE_CONT bit is considered
1641  * a private implementation detail of the public ptep API (see below).
1642  */
1643 extern void __contpte_try_fold(struct mm_struct *mm, unsigned long addr,
1644 				pte_t *ptep, pte_t pte);
1645 extern void __contpte_try_unfold(struct mm_struct *mm, unsigned long addr,
1646 				pte_t *ptep, pte_t pte);
1647 extern pte_t contpte_ptep_get(pte_t *ptep, pte_t orig_pte);
1648 extern pte_t contpte_ptep_get_lockless(pte_t *orig_ptep);
1649 extern void contpte_set_ptes(struct mm_struct *mm, unsigned long addr,
1650 				pte_t *ptep, pte_t pte, unsigned int nr);
1651 extern void contpte_clear_full_ptes(struct mm_struct *mm, unsigned long addr,
1652 				pte_t *ptep, unsigned int nr, int full);
1653 extern pte_t contpte_get_and_clear_full_ptes(struct mm_struct *mm,
1654 				unsigned long addr, pte_t *ptep,
1655 				unsigned int nr, int full);
1656 int contpte_test_and_clear_young_ptes(struct vm_area_struct *vma,
1657 				unsigned long addr, pte_t *ptep, unsigned int nr);
1658 int contpte_clear_flush_young_ptes(struct vm_area_struct *vma,
1659 				unsigned long addr, pte_t *ptep, unsigned int nr);
1660 extern void contpte_wrprotect_ptes(struct mm_struct *mm, unsigned long addr,
1661 				pte_t *ptep, unsigned int nr);
1662 extern int contpte_ptep_set_access_flags(struct vm_area_struct *vma,
1663 				unsigned long addr, pte_t *ptep,
1664 				pte_t entry, int dirty);
1665 extern void contpte_clear_young_dirty_ptes(struct vm_area_struct *vma,
1666 				unsigned long addr, pte_t *ptep,
1667 				unsigned int nr, cydp_t flags);
1668 
1669 static __always_inline void contpte_try_fold(struct mm_struct *mm,
1670 				unsigned long addr, pte_t *ptep, pte_t pte)
1671 {
1672 	/*
1673 	 * Only bother trying if both the virtual and physical addresses are
1674 	 * aligned and correspond to the last entry in a contig range. The core
1675 	 * code mostly modifies ranges from low to high, so this is the likely
1676 	 * the last modification in the contig range, so a good time to fold.
1677 	 * We can't fold special mappings, because there is no associated folio.
1678 	 */
1679 
1680 	const unsigned long contmask = CONT_PTES - 1;
1681 	bool valign = ((addr >> PAGE_SHIFT) & contmask) == contmask;
1682 
1683 	if (unlikely(valign)) {
1684 		bool palign = (pte_pfn(pte) & contmask) == contmask;
1685 
1686 		if (unlikely(palign &&
1687 		    pte_valid(pte) && !pte_cont(pte) && !pte_special(pte)))
1688 			__contpte_try_fold(mm, addr, ptep, pte);
1689 	}
1690 }
1691 
1692 static __always_inline void contpte_try_unfold(struct mm_struct *mm,
1693 				unsigned long addr, pte_t *ptep, pte_t pte)
1694 {
1695 	if (unlikely(pte_valid_cont(pte)))
1696 		__contpte_try_unfold(mm, addr, ptep, pte);
1697 }
1698 
1699 #define pte_batch_hint pte_batch_hint
1700 static inline unsigned int pte_batch_hint(pte_t *ptep, pte_t pte)
1701 {
1702 	if (!pte_valid_cont(pte))
1703 		return 1;
1704 
1705 	return CONT_PTES - (((unsigned long)ptep >> 3) & (CONT_PTES - 1));
1706 }
1707 
1708 /*
1709  * The below functions constitute the public API that arm64 presents to the
1710  * core-mm to manipulate PTE entries within their page tables (or at least this
1711  * is the subset of the API that arm64 needs to implement). These public
1712  * versions will automatically and transparently apply the contiguous bit where
1713  * it makes sense to do so. Therefore any users that are contig-aware (e.g.
1714  * hugetlb, kernel mapper) should NOT use these APIs, but instead use the
1715  * private versions, which are prefixed with double underscore. All of these
1716  * APIs except for ptep_get_lockless() are expected to be called with the PTL
1717  * held. Although the contiguous bit is considered private to the
1718  * implementation, it is deliberately allowed to leak through the getters (e.g.
1719  * ptep_get()), back to core code. This is required so that pte_leaf_size() can
1720  * provide an accurate size for perf_get_pgtable_size(). But this leakage means
1721  * its possible a pte will be passed to a setter with the contiguous bit set, so
1722  * we explicitly clear the contiguous bit in those cases to prevent accidentally
1723  * setting it in the pgtable.
1724  */
1725 
1726 #define ptep_get ptep_get
1727 static inline pte_t ptep_get(pte_t *ptep)
1728 {
1729 	pte_t pte = __ptep_get(ptep);
1730 
1731 	if (likely(!pte_valid_cont(pte)))
1732 		return pte;
1733 
1734 	return contpte_ptep_get(ptep, pte);
1735 }
1736 
1737 #define ptep_get_lockless ptep_get_lockless
1738 static inline pte_t ptep_get_lockless(pte_t *ptep)
1739 {
1740 	pte_t pte = __ptep_get(ptep);
1741 
1742 	if (likely(!pte_valid_cont(pte)))
1743 		return pte;
1744 
1745 	return contpte_ptep_get_lockless(ptep);
1746 }
1747 
1748 static inline void set_pte(pte_t *ptep, pte_t pte)
1749 {
1750 	/*
1751 	 * We don't have the mm or vaddr so cannot unfold contig entries (since
1752 	 * it requires tlb maintenance). set_pte() is not used in core code, so
1753 	 * this should never even be called. Regardless do our best to service
1754 	 * any call and emit a warning if there is any attempt to set a pte on
1755 	 * top of an existing contig range.
1756 	 */
1757 	pte_t orig_pte = __ptep_get(ptep);
1758 
1759 	WARN_ON_ONCE(pte_valid_cont(orig_pte));
1760 	__set_pte(ptep, pte_mknoncont(pte));
1761 }
1762 
1763 #define set_ptes set_ptes
1764 static __always_inline void set_ptes(struct mm_struct *mm, unsigned long addr,
1765 				pte_t *ptep, pte_t pte, unsigned int nr)
1766 {
1767 	pte = pte_mknoncont(pte);
1768 
1769 	if (likely(nr == 1)) {
1770 		contpte_try_unfold(mm, addr, ptep, __ptep_get(ptep));
1771 		__set_ptes(mm, addr, ptep, pte, 1);
1772 		contpte_try_fold(mm, addr, ptep, pte);
1773 	} else {
1774 		contpte_set_ptes(mm, addr, ptep, pte, nr);
1775 	}
1776 }
1777 
1778 static inline void pte_clear(struct mm_struct *mm,
1779 				unsigned long addr, pte_t *ptep)
1780 {
1781 	contpte_try_unfold(mm, addr, ptep, __ptep_get(ptep));
1782 	__pte_clear(mm, addr, ptep);
1783 }
1784 
1785 #define clear_full_ptes clear_full_ptes
1786 static inline void clear_full_ptes(struct mm_struct *mm, unsigned long addr,
1787 				pte_t *ptep, unsigned int nr, int full)
1788 {
1789 	if (likely(nr == 1)) {
1790 		contpte_try_unfold(mm, addr, ptep, __ptep_get(ptep));
1791 		__clear_full_ptes(mm, addr, ptep, nr, full);
1792 	} else {
1793 		contpte_clear_full_ptes(mm, addr, ptep, nr, full);
1794 	}
1795 }
1796 
1797 #define get_and_clear_full_ptes get_and_clear_full_ptes
1798 static inline pte_t get_and_clear_full_ptes(struct mm_struct *mm,
1799 				unsigned long addr, pte_t *ptep,
1800 				unsigned int nr, int full)
1801 {
1802 	pte_t pte;
1803 
1804 	if (likely(nr == 1)) {
1805 		contpte_try_unfold(mm, addr, ptep, __ptep_get(ptep));
1806 		pte = __get_and_clear_full_ptes(mm, addr, ptep, nr, full);
1807 	} else {
1808 		pte = contpte_get_and_clear_full_ptes(mm, addr, ptep, nr, full);
1809 	}
1810 
1811 	return pte;
1812 }
1813 
1814 #define __HAVE_ARCH_PTEP_GET_AND_CLEAR
1815 static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
1816 				unsigned long addr, pte_t *ptep)
1817 {
1818 	contpte_try_unfold(mm, addr, ptep, __ptep_get(ptep));
1819 	return __ptep_get_and_clear(mm, addr, ptep);
1820 }
1821 
1822 #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
1823 static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
1824 				unsigned long addr, pte_t *ptep)
1825 {
1826 	pte_t orig_pte = __ptep_get(ptep);
1827 
1828 	if (likely(!pte_valid_cont(orig_pte)))
1829 		return __ptep_test_and_clear_young(vma, addr, ptep);
1830 
1831 	return contpte_test_and_clear_young_ptes(vma, addr, ptep, 1);
1832 }
1833 
1834 #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
1835 static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
1836 				unsigned long addr, pte_t *ptep)
1837 {
1838 	pte_t orig_pte = __ptep_get(ptep);
1839 
1840 	if (likely(!pte_valid_cont(orig_pte)))
1841 		return __ptep_clear_flush_young(vma, addr, ptep);
1842 
1843 	return contpte_clear_flush_young_ptes(vma, addr, ptep, 1);
1844 }
1845 
1846 #define clear_flush_young_ptes clear_flush_young_ptes
1847 static inline int clear_flush_young_ptes(struct vm_area_struct *vma,
1848 					 unsigned long addr, pte_t *ptep,
1849 					 unsigned int nr)
1850 {
1851 	if (likely(nr == 1 && !pte_cont(__ptep_get(ptep))))
1852 		return __ptep_clear_flush_young(vma, addr, ptep);
1853 
1854 	return contpte_clear_flush_young_ptes(vma, addr, ptep, nr);
1855 }
1856 
1857 #define wrprotect_ptes wrprotect_ptes
1858 static __always_inline void wrprotect_ptes(struct mm_struct *mm,
1859 				unsigned long addr, pte_t *ptep, unsigned int nr)
1860 {
1861 	if (likely(nr == 1)) {
1862 		/*
1863 		 * Optimization: wrprotect_ptes() can only be called for present
1864 		 * ptes so we only need to check contig bit as condition for
1865 		 * unfold, and we can remove the contig bit from the pte we read
1866 		 * to avoid re-reading. This speeds up fork() which is sensitive
1867 		 * for order-0 folios. Equivalent to contpte_try_unfold().
1868 		 */
1869 		pte_t orig_pte = __ptep_get(ptep);
1870 
1871 		if (unlikely(pte_cont(orig_pte))) {
1872 			__contpte_try_unfold(mm, addr, ptep, orig_pte);
1873 			orig_pte = pte_mknoncont(orig_pte);
1874 		}
1875 		___ptep_set_wrprotect(mm, addr, ptep, orig_pte);
1876 	} else {
1877 		contpte_wrprotect_ptes(mm, addr, ptep, nr);
1878 	}
1879 }
1880 
1881 #define __HAVE_ARCH_PTEP_SET_WRPROTECT
1882 static inline void ptep_set_wrprotect(struct mm_struct *mm,
1883 				unsigned long addr, pte_t *ptep)
1884 {
1885 	wrprotect_ptes(mm, addr, ptep, 1);
1886 }
1887 
1888 #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
1889 static inline int ptep_set_access_flags(struct vm_area_struct *vma,
1890 				unsigned long addr, pte_t *ptep,
1891 				pte_t entry, int dirty)
1892 {
1893 	pte_t orig_pte = __ptep_get(ptep);
1894 
1895 	entry = pte_mknoncont(entry);
1896 
1897 	if (likely(!pte_valid_cont(orig_pte)))
1898 		return __ptep_set_access_flags(vma, addr, ptep, entry, dirty);
1899 
1900 	return contpte_ptep_set_access_flags(vma, addr, ptep, entry, dirty);
1901 }
1902 
1903 #define clear_young_dirty_ptes clear_young_dirty_ptes
1904 static inline void clear_young_dirty_ptes(struct vm_area_struct *vma,
1905 					  unsigned long addr, pte_t *ptep,
1906 					  unsigned int nr, cydp_t flags)
1907 {
1908 	if (likely(nr == 1 && !pte_cont(__ptep_get(ptep))))
1909 		__clear_young_dirty_ptes(vma, addr, ptep, nr, flags);
1910 	else
1911 		contpte_clear_young_dirty_ptes(vma, addr, ptep, nr, flags);
1912 }
1913 
1914 #else /* CONFIG_ARM64_CONTPTE */
1915 
1916 #define ptep_get				__ptep_get
1917 #define set_pte					__set_pte
1918 #define set_ptes				__set_ptes
1919 #define pte_clear				__pte_clear
1920 #define clear_full_ptes				__clear_full_ptes
1921 #define get_and_clear_full_ptes			__get_and_clear_full_ptes
1922 #define __HAVE_ARCH_PTEP_GET_AND_CLEAR
1923 #define ptep_get_and_clear			__ptep_get_and_clear
1924 #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
1925 #define ptep_test_and_clear_young		__ptep_test_and_clear_young
1926 #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
1927 #define ptep_clear_flush_young			__ptep_clear_flush_young
1928 #define __HAVE_ARCH_PTEP_SET_WRPROTECT
1929 #define ptep_set_wrprotect			__ptep_set_wrprotect
1930 #define wrprotect_ptes				__wrprotect_ptes
1931 #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
1932 #define ptep_set_access_flags			__ptep_set_access_flags
1933 #define clear_young_dirty_ptes			__clear_young_dirty_ptes
1934 
1935 #endif /* CONFIG_ARM64_CONTPTE */
1936 
1937 #endif /* !__ASSEMBLER__ */
1938 
1939 #endif /* __ASM_PGTABLE_H */
1940