xref: /linux/arch/powerpc/include/asm/book3s/64/pgtable.h (revision 3df692169e8486fc3dd91fcd5ea81c27a0bac033)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 #ifndef _ASM_POWERPC_BOOK3S_64_PGTABLE_H_
3 #define _ASM_POWERPC_BOOK3S_64_PGTABLE_H_
4 
5 #include <asm-generic/pgtable-nop4d.h>
6 
7 #ifndef __ASSEMBLY__
8 #include <linux/mmdebug.h>
9 #include <linux/bug.h>
10 #include <linux/sizes.h>
11 #endif
12 
13 /*
14  * Common bits between hash and Radix page table
15  */
16 
17 #define _PAGE_EXEC		0x00001 /* execute permission */
18 #define _PAGE_WRITE		0x00002 /* write access allowed */
19 #define _PAGE_READ		0x00004	/* read access allowed */
20 #define _PAGE_NA		_PAGE_PRIVILEGED
21 #define _PAGE_NAX		_PAGE_EXEC
22 #define _PAGE_RO		_PAGE_READ
23 #define _PAGE_ROX		(_PAGE_READ | _PAGE_EXEC)
24 #define _PAGE_RW		(_PAGE_READ | _PAGE_WRITE)
25 #define _PAGE_RWX		(_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC)
26 #define _PAGE_PRIVILEGED	0x00008 /* kernel access only */
27 #define _PAGE_SAO		0x00010 /* Strong access order */
28 #define _PAGE_NON_IDEMPOTENT	0x00020 /* non idempotent memory */
29 #define _PAGE_TOLERANT		0x00030 /* tolerant memory, cache inhibited */
30 #define _PAGE_DIRTY		0x00080 /* C: page changed */
31 #define _PAGE_ACCESSED		0x00100 /* R: page referenced */
32 /*
33  * Software bits
34  */
35 #define _RPAGE_SW0		0x2000000000000000UL
36 #define _RPAGE_SW1		0x00800
37 #define _RPAGE_SW2		0x00400
38 #define _RPAGE_SW3		0x00200
39 #define _RPAGE_RSV1		0x00040UL
40 
41 #define _RPAGE_PKEY_BIT4	0x1000000000000000UL
42 #define _RPAGE_PKEY_BIT3	0x0800000000000000UL
43 #define _RPAGE_PKEY_BIT2	0x0400000000000000UL
44 #define _RPAGE_PKEY_BIT1	0x0200000000000000UL
45 #define _RPAGE_PKEY_BIT0	0x0100000000000000UL
46 
47 #define _PAGE_PTE		0x4000000000000000UL	/* distinguishes PTEs from pointers */
48 #define _PAGE_PRESENT		0x8000000000000000UL	/* pte contains a translation */
49 /*
50  * We need to mark a pmd pte invalid while splitting. We can do that by clearing
51  * the _PAGE_PRESENT bit. But then that will be taken as a swap pte. In order to
52  * differentiate between two use a SW field when invalidating.
53  *
54  * We do that temporary invalidate for regular pte entry in ptep_set_access_flags
55  *
56  * This is used only when _PAGE_PRESENT is cleared.
57  */
58 #define _PAGE_INVALID		_RPAGE_SW0
59 
60 /*
61  * Top and bottom bits of RPN which can be used by hash
62  * translation mode, because we expect them to be zero
63  * otherwise.
64  */
65 #define _RPAGE_RPN0		0x01000
66 #define _RPAGE_RPN1		0x02000
67 #define _RPAGE_RPN43		0x0080000000000000UL
68 #define _RPAGE_RPN42		0x0040000000000000UL
69 #define _RPAGE_RPN41		0x0020000000000000UL
70 
71 /* Max physical address bit as per radix table */
72 #define _RPAGE_PA_MAX		56
73 
74 /*
75  * Max physical address bit we will use for now.
76  *
77  * This is mostly a hardware limitation and for now Power9 has
78  * a 51 bit limit.
79  *
80  * This is different from the number of physical bit required to address
81  * the last byte of memory. That is defined by MAX_PHYSMEM_BITS.
82  * MAX_PHYSMEM_BITS is a linux limitation imposed by the maximum
83  * number of sections we can support (SECTIONS_SHIFT).
84  *
85  * This is different from Radix page table limitation above and
86  * should always be less than that. The limit is done such that
87  * we can overload the bits between _RPAGE_PA_MAX and _PAGE_PA_MAX
88  * for hash linux page table specific bits.
89  *
90  * In order to be compatible with future hardware generations we keep
91  * some offsets and limit this for now to 53
92  */
93 #define _PAGE_PA_MAX		53
94 
95 #define _PAGE_SOFT_DIRTY	_RPAGE_SW3 /* software: software dirty tracking */
96 #define _PAGE_SPECIAL		_RPAGE_SW2 /* software: special page */
97 #define _PAGE_DEVMAP		_RPAGE_SW1 /* software: ZONE_DEVICE page */
98 
99 /*
100  * Drivers request for cache inhibited pte mapping using _PAGE_NO_CACHE
101  * Instead of fixing all of them, add an alternate define which
102  * maps CI pte mapping.
103  */
104 #define _PAGE_NO_CACHE		_PAGE_TOLERANT
105 /*
106  * We support _RPAGE_PA_MAX bit real address in pte. On the linux side
107  * we are limited by _PAGE_PA_MAX. Clear everything above _PAGE_PA_MAX
108  * and every thing below PAGE_SHIFT;
109  */
110 #define PTE_RPN_MASK	(((1UL << _PAGE_PA_MAX) - 1) & (PAGE_MASK))
111 #define PTE_RPN_SHIFT	PAGE_SHIFT
112 /*
113  * set of bits not changed in pmd_modify. Even though we have hash specific bits
114  * in here, on radix we expect them to be zero.
115  */
116 #define _HPAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \
117 			 _PAGE_ACCESSED | H_PAGE_THP_HUGE | _PAGE_PTE | \
118 			 _PAGE_SOFT_DIRTY | _PAGE_DEVMAP)
119 /*
120  * user access blocked by key
121  */
122 #define _PAGE_KERNEL_RW		(_PAGE_PRIVILEGED | _PAGE_RW | _PAGE_DIRTY)
123 #define _PAGE_KERNEL_RO		 (_PAGE_PRIVILEGED | _PAGE_READ)
124 #define _PAGE_KERNEL_ROX	 (_PAGE_PRIVILEGED | _PAGE_READ | _PAGE_EXEC)
125 #define _PAGE_KERNEL_RWX	(_PAGE_PRIVILEGED | _PAGE_DIRTY | _PAGE_RW | _PAGE_EXEC)
126 /*
127  * _PAGE_CHG_MASK masks of bits that are to be preserved across
128  * pgprot changes
129  */
130 #define _PAGE_CHG_MASK	(PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \
131 			 _PAGE_ACCESSED | _PAGE_SPECIAL | _PAGE_PTE |	\
132 			 _PAGE_SOFT_DIRTY | _PAGE_DEVMAP)
133 
134 /*
135  * We define 2 sets of base prot bits, one for basic pages (ie,
136  * cacheable kernel and user pages) and one for non cacheable
137  * pages. We always set _PAGE_COHERENT when SMP is enabled or
138  * the processor might need it for DMA coherency.
139  */
140 #define _PAGE_BASE_NC	(_PAGE_PRESENT | _PAGE_ACCESSED)
141 #define _PAGE_BASE	(_PAGE_BASE_NC)
142 
143 #include <asm/pgtable-masks.h>
144 
145 /* Permission masks used for kernel mappings */
146 #define PAGE_KERNEL	__pgprot(_PAGE_BASE | _PAGE_KERNEL_RW)
147 #define PAGE_KERNEL_NC	__pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | _PAGE_TOLERANT)
148 #define PAGE_KERNEL_NCG	__pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | _PAGE_NON_IDEMPOTENT)
149 #define PAGE_KERNEL_X	__pgprot(_PAGE_BASE | _PAGE_KERNEL_RWX)
150 #define PAGE_KERNEL_RO	__pgprot(_PAGE_BASE | _PAGE_KERNEL_RO)
151 #define PAGE_KERNEL_ROX	__pgprot(_PAGE_BASE | _PAGE_KERNEL_ROX)
152 
153 #ifndef __ASSEMBLY__
154 /*
155  * page table defines
156  */
157 extern unsigned long __pte_index_size;
158 extern unsigned long __pmd_index_size;
159 extern unsigned long __pud_index_size;
160 extern unsigned long __pgd_index_size;
161 extern unsigned long __pud_cache_index;
162 #define PTE_INDEX_SIZE  __pte_index_size
163 #define PMD_INDEX_SIZE  __pmd_index_size
164 #define PUD_INDEX_SIZE  __pud_index_size
165 #define PGD_INDEX_SIZE  __pgd_index_size
166 /* pmd table use page table fragments */
167 #define PMD_CACHE_INDEX  0
168 #define PUD_CACHE_INDEX __pud_cache_index
169 /*
170  * Because of use of pte fragments and THP, size of page table
171  * are not always derived out of index size above.
172  */
173 extern unsigned long __pte_table_size;
174 extern unsigned long __pmd_table_size;
175 extern unsigned long __pud_table_size;
176 extern unsigned long __pgd_table_size;
177 #define PTE_TABLE_SIZE	__pte_table_size
178 #define PMD_TABLE_SIZE	__pmd_table_size
179 #define PUD_TABLE_SIZE	__pud_table_size
180 #define PGD_TABLE_SIZE	__pgd_table_size
181 
182 extern unsigned long __pmd_val_bits;
183 extern unsigned long __pud_val_bits;
184 extern unsigned long __pgd_val_bits;
185 #define PMD_VAL_BITS	__pmd_val_bits
186 #define PUD_VAL_BITS	__pud_val_bits
187 #define PGD_VAL_BITS	__pgd_val_bits
188 
189 extern unsigned long __pte_frag_nr;
190 #define PTE_FRAG_NR __pte_frag_nr
191 extern unsigned long __pte_frag_size_shift;
192 #define PTE_FRAG_SIZE_SHIFT __pte_frag_size_shift
193 #define PTE_FRAG_SIZE (1UL << PTE_FRAG_SIZE_SHIFT)
194 
195 extern unsigned long __pmd_frag_nr;
196 #define PMD_FRAG_NR __pmd_frag_nr
197 extern unsigned long __pmd_frag_size_shift;
198 #define PMD_FRAG_SIZE_SHIFT __pmd_frag_size_shift
199 #define PMD_FRAG_SIZE (1UL << PMD_FRAG_SIZE_SHIFT)
200 
201 #define PTRS_PER_PTE	(1 << PTE_INDEX_SIZE)
202 #define PTRS_PER_PMD	(1 << PMD_INDEX_SIZE)
203 #define PTRS_PER_PUD	(1 << PUD_INDEX_SIZE)
204 #define PTRS_PER_PGD	(1 << PGD_INDEX_SIZE)
205 
206 #define MAX_PTRS_PER_PTE ((H_PTRS_PER_PTE > R_PTRS_PER_PTE) ? H_PTRS_PER_PTE : R_PTRS_PER_PTE)
207 #define MAX_PTRS_PER_PMD ((H_PTRS_PER_PMD > R_PTRS_PER_PMD) ? H_PTRS_PER_PMD : R_PTRS_PER_PMD)
208 #define MAX_PTRS_PER_PUD ((H_PTRS_PER_PUD > R_PTRS_PER_PUD) ? H_PTRS_PER_PUD : R_PTRS_PER_PUD)
209 #define MAX_PTRS_PER_PGD	(1 << (H_PGD_INDEX_SIZE > RADIX_PGD_INDEX_SIZE ? \
210 				       H_PGD_INDEX_SIZE : RADIX_PGD_INDEX_SIZE))
211 
212 /* PMD_SHIFT determines what a second-level page table entry can map */
213 #define PMD_SHIFT	(PAGE_SHIFT + PTE_INDEX_SIZE)
214 #define PMD_SIZE	(1UL << PMD_SHIFT)
215 #define PMD_MASK	(~(PMD_SIZE-1))
216 
217 /* PUD_SHIFT determines what a third-level page table entry can map */
218 #define PUD_SHIFT	(PMD_SHIFT + PMD_INDEX_SIZE)
219 #define PUD_SIZE	(1UL << PUD_SHIFT)
220 #define PUD_MASK	(~(PUD_SIZE-1))
221 
222 /* PGDIR_SHIFT determines what a fourth-level page table entry can map */
223 #define PGDIR_SHIFT	(PUD_SHIFT + PUD_INDEX_SIZE)
224 #define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
225 #define PGDIR_MASK	(~(PGDIR_SIZE-1))
226 
227 /* Bits to mask out from a PMD to get to the PTE page */
228 #define PMD_MASKED_BITS		0xc0000000000000ffUL
229 /* Bits to mask out from a PUD to get to the PMD page */
230 #define PUD_MASKED_BITS		0xc0000000000000ffUL
231 /* Bits to mask out from a PGD to get to the PUD page */
232 #define P4D_MASKED_BITS		0xc0000000000000ffUL
233 
234 /*
235  * Used as an indicator for rcu callback functions
236  */
237 enum pgtable_index {
238 	PTE_INDEX = 0,
239 	PMD_INDEX,
240 	PUD_INDEX,
241 	PGD_INDEX,
242 	/*
243 	 * Below are used with 4k page size and hugetlb
244 	 */
245 	HTLB_16M_INDEX,
246 	HTLB_16G_INDEX,
247 };
248 
249 extern unsigned long __vmalloc_start;
250 extern unsigned long __vmalloc_end;
251 #define VMALLOC_START	__vmalloc_start
252 #define VMALLOC_END	__vmalloc_end
253 
254 static inline unsigned int ioremap_max_order(void)
255 {
256 	if (radix_enabled())
257 		return PUD_SHIFT;
258 	return 7 + PAGE_SHIFT; /* default from linux/vmalloc.h */
259 }
260 #define IOREMAP_MAX_ORDER ioremap_max_order()
261 
262 extern unsigned long __kernel_virt_start;
263 extern unsigned long __kernel_io_start;
264 extern unsigned long __kernel_io_end;
265 #define KERN_VIRT_START __kernel_virt_start
266 #define KERN_IO_START  __kernel_io_start
267 #define KERN_IO_END __kernel_io_end
268 
269 extern struct page *vmemmap;
270 extern unsigned long pci_io_base;
271 #endif /* __ASSEMBLY__ */
272 
273 #include <asm/book3s/64/hash.h>
274 #include <asm/book3s/64/radix.h>
275 
276 #if H_MAX_PHYSMEM_BITS > R_MAX_PHYSMEM_BITS
277 #define  MAX_PHYSMEM_BITS	H_MAX_PHYSMEM_BITS
278 #else
279 #define  MAX_PHYSMEM_BITS	R_MAX_PHYSMEM_BITS
280 #endif
281 
282 
283 #ifdef CONFIG_PPC_64K_PAGES
284 #include <asm/book3s/64/pgtable-64k.h>
285 #else
286 #include <asm/book3s/64/pgtable-4k.h>
287 #endif
288 
289 #include <asm/barrier.h>
290 /*
291  * IO space itself carved into the PIO region (ISA and PHB IO space) and
292  * the ioremap space
293  *
294  *  ISA_IO_BASE = KERN_IO_START, 64K reserved area
295  *  PHB_IO_BASE = ISA_IO_BASE + 64K to ISA_IO_BASE + 2G, PHB IO spaces
296  * IOREMAP_BASE = ISA_IO_BASE + 2G to VMALLOC_START + PGTABLE_RANGE
297  */
298 #define FULL_IO_SIZE	0x80000000ul
299 #define  ISA_IO_BASE	(KERN_IO_START)
300 #define  ISA_IO_END	(KERN_IO_START + 0x10000ul)
301 #define  PHB_IO_BASE	(ISA_IO_END)
302 #define  PHB_IO_END	(KERN_IO_START + FULL_IO_SIZE)
303 #define IOREMAP_BASE	(PHB_IO_END)
304 #define IOREMAP_START	(ioremap_bot)
305 #define IOREMAP_END	(KERN_IO_END - FIXADDR_SIZE)
306 #define FIXADDR_SIZE	SZ_32M
307 #define FIXADDR_TOP	(IOREMAP_END + FIXADDR_SIZE)
308 
309 #ifndef __ASSEMBLY__
310 
311 /*
312  * This is the default implementation of various PTE accessors, it's
313  * used in all cases except Book3S with 64K pages where we have a
314  * concept of sub-pages
315  */
316 #ifndef __real_pte
317 
318 #define __real_pte(e, p, o)		((real_pte_t){(e)})
319 #define __rpte_to_pte(r)	((r).pte)
320 #define __rpte_to_hidx(r,index)	(pte_val(__rpte_to_pte(r)) >> H_PAGE_F_GIX_SHIFT)
321 
322 #define pte_iterate_hashed_subpages(rpte, psize, va, index, shift)       \
323 	do {							         \
324 		index = 0;					         \
325 		shift = mmu_psize_defs[psize].shift;		         \
326 
327 #define pte_iterate_hashed_end() } while(0)
328 
329 /*
330  * We expect this to be called only for user addresses or kernel virtual
331  * addresses other than the linear mapping.
332  */
333 #define pte_pagesize_index(mm, addr, pte)	MMU_PAGE_4K
334 
335 #endif /* __real_pte */
336 
337 static inline unsigned long pte_update(struct mm_struct *mm, unsigned long addr,
338 				       pte_t *ptep, unsigned long clr,
339 				       unsigned long set, int huge)
340 {
341 	if (radix_enabled())
342 		return radix__pte_update(mm, addr, ptep, clr, set, huge);
343 	return hash__pte_update(mm, addr, ptep, clr, set, huge);
344 }
345 /*
346  * For hash even if we have _PAGE_ACCESSED = 0, we do a pte_update.
347  * We currently remove entries from the hashtable regardless of whether
348  * the entry was young or dirty.
349  *
350  * We should be more intelligent about this but for the moment we override
351  * these functions and force a tlb flush unconditionally
352  * For radix: H_PAGE_HASHPTE should be zero. Hence we can use the same
353  * function for both hash and radix.
354  */
355 static inline int __ptep_test_and_clear_young(struct mm_struct *mm,
356 					      unsigned long addr, pte_t *ptep)
357 {
358 	unsigned long old;
359 
360 	if ((pte_raw(*ptep) & cpu_to_be64(_PAGE_ACCESSED | H_PAGE_HASHPTE)) == 0)
361 		return 0;
362 	old = pte_update(mm, addr, ptep, _PAGE_ACCESSED, 0, 0);
363 	return (old & _PAGE_ACCESSED) != 0;
364 }
365 
366 #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
367 #define ptep_test_and_clear_young(__vma, __addr, __ptep)	\
368 ({								\
369 	__ptep_test_and_clear_young((__vma)->vm_mm, __addr, __ptep); \
370 })
371 
372 /*
373  * On Book3S CPUs, clearing the accessed bit without a TLB flush
374  * doesn't cause data corruption. [ It could cause incorrect
375  * page aging and the (mistaken) reclaim of hot pages, but the
376  * chance of that should be relatively low. ]
377  *
378  * So as a performance optimization don't flush the TLB when
379  * clearing the accessed bit, it will eventually be flushed by
380  * a context switch or a VM operation anyway. [ In the rare
381  * event of it not getting flushed for a long time the delay
382  * shouldn't really matter because there's no real memory
383  * pressure for swapout to react to. ]
384  *
385  * Note: this optimisation also exists in pte_needs_flush() and
386  * huge_pmd_needs_flush().
387  */
388 #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
389 #define ptep_clear_flush_young ptep_test_and_clear_young
390 
391 #define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
392 #define pmdp_clear_flush_young pmdp_test_and_clear_young
393 
394 static inline int pte_write(pte_t pte)
395 {
396 	return !!(pte_raw(pte) & cpu_to_be64(_PAGE_WRITE));
397 }
398 
399 static inline int pte_read(pte_t pte)
400 {
401 	return !!(pte_raw(pte) & cpu_to_be64(_PAGE_READ));
402 }
403 
404 #define __HAVE_ARCH_PTEP_SET_WRPROTECT
405 static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
406 				      pte_t *ptep)
407 {
408 	if (pte_write(*ptep))
409 		pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 0);
410 }
411 
412 #define __HAVE_ARCH_HUGE_PTEP_SET_WRPROTECT
413 static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
414 					   unsigned long addr, pte_t *ptep)
415 {
416 	if (pte_write(*ptep))
417 		pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 1);
418 }
419 
420 #define __HAVE_ARCH_PTEP_GET_AND_CLEAR
421 static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
422 				       unsigned long addr, pte_t *ptep)
423 {
424 	unsigned long old = pte_update(mm, addr, ptep, ~0UL, 0, 0);
425 	return __pte(old);
426 }
427 
428 #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
429 static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
430 					    unsigned long addr,
431 					    pte_t *ptep, int full)
432 {
433 	if (full && radix_enabled()) {
434 		/*
435 		 * We know that this is a full mm pte clear and
436 		 * hence can be sure there is no parallel set_pte.
437 		 */
438 		return radix__ptep_get_and_clear_full(mm, addr, ptep, full);
439 	}
440 	return ptep_get_and_clear(mm, addr, ptep);
441 }
442 
443 
444 static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
445 			     pte_t * ptep)
446 {
447 	pte_update(mm, addr, ptep, ~0UL, 0, 0);
448 }
449 
450 static inline int pte_dirty(pte_t pte)
451 {
452 	return !!(pte_raw(pte) & cpu_to_be64(_PAGE_DIRTY));
453 }
454 
455 static inline int pte_young(pte_t pte)
456 {
457 	return !!(pte_raw(pte) & cpu_to_be64(_PAGE_ACCESSED));
458 }
459 
460 static inline int pte_special(pte_t pte)
461 {
462 	return !!(pte_raw(pte) & cpu_to_be64(_PAGE_SPECIAL));
463 }
464 
465 static inline bool pte_exec(pte_t pte)
466 {
467 	return !!(pte_raw(pte) & cpu_to_be64(_PAGE_EXEC));
468 }
469 
470 
471 #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
472 static inline bool pte_soft_dirty(pte_t pte)
473 {
474 	return !!(pte_raw(pte) & cpu_to_be64(_PAGE_SOFT_DIRTY));
475 }
476 
477 static inline pte_t pte_mksoft_dirty(pte_t pte)
478 {
479 	return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_SOFT_DIRTY));
480 }
481 
482 static inline pte_t pte_clear_soft_dirty(pte_t pte)
483 {
484 	return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_SOFT_DIRTY));
485 }
486 #endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
487 
488 #ifdef CONFIG_NUMA_BALANCING
489 static inline int pte_protnone(pte_t pte)
490 {
491 	return (pte_raw(pte) & cpu_to_be64(_PAGE_PRESENT | _PAGE_PTE | _PAGE_RWX)) ==
492 		cpu_to_be64(_PAGE_PRESENT | _PAGE_PTE);
493 }
494 #endif /* CONFIG_NUMA_BALANCING */
495 
496 static inline bool pte_hw_valid(pte_t pte)
497 {
498 	return (pte_raw(pte) & cpu_to_be64(_PAGE_PRESENT | _PAGE_PTE)) ==
499 		cpu_to_be64(_PAGE_PRESENT | _PAGE_PTE);
500 }
501 
502 static inline int pte_present(pte_t pte)
503 {
504 	/*
505 	 * A pte is considerent present if _PAGE_PRESENT is set.
506 	 * We also need to consider the pte present which is marked
507 	 * invalid during ptep_set_access_flags. Hence we look for _PAGE_INVALID
508 	 * if we find _PAGE_PRESENT cleared.
509 	 */
510 
511 	if (pte_hw_valid(pte))
512 		return true;
513 	return (pte_raw(pte) & cpu_to_be64(_PAGE_INVALID | _PAGE_PTE)) ==
514 		cpu_to_be64(_PAGE_INVALID | _PAGE_PTE);
515 }
516 
517 #ifdef CONFIG_PPC_MEM_KEYS
518 extern bool arch_pte_access_permitted(u64 pte, bool write, bool execute);
519 #else
520 static inline bool arch_pte_access_permitted(u64 pte, bool write, bool execute)
521 {
522 	return true;
523 }
524 #endif /* CONFIG_PPC_MEM_KEYS */
525 
526 static inline bool pte_user(pte_t pte)
527 {
528 	return !(pte_raw(pte) & cpu_to_be64(_PAGE_PRIVILEGED));
529 }
530 
531 #define pte_access_permitted pte_access_permitted
532 static inline bool pte_access_permitted(pte_t pte, bool write)
533 {
534 	/*
535 	 * _PAGE_READ is needed for any access and will be
536 	 * cleared for PROT_NONE
537 	 */
538 	if (!pte_present(pte) || !pte_user(pte) || !pte_read(pte))
539 		return false;
540 
541 	if (write && !pte_write(pte))
542 		return false;
543 
544 	return arch_pte_access_permitted(pte_val(pte), write, 0);
545 }
546 
547 /*
548  * Conversion functions: convert a page and protection to a page entry,
549  * and a page entry and page directory to the page they refer to.
550  *
551  * Even if PTEs can be unsigned long long, a PFN is always an unsigned
552  * long for now.
553  */
554 static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot)
555 {
556 	VM_BUG_ON(pfn >> (64 - PAGE_SHIFT));
557 	VM_BUG_ON((pfn << PAGE_SHIFT) & ~PTE_RPN_MASK);
558 
559 	return __pte(((pte_basic_t)pfn << PAGE_SHIFT) | pgprot_val(pgprot) | _PAGE_PTE);
560 }
561 
562 /* Generic modifiers for PTE bits */
563 static inline pte_t pte_wrprotect(pte_t pte)
564 {
565 	return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_WRITE));
566 }
567 
568 static inline pte_t pte_exprotect(pte_t pte)
569 {
570 	return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_EXEC));
571 }
572 
573 static inline pte_t pte_mkclean(pte_t pte)
574 {
575 	return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_DIRTY));
576 }
577 
578 static inline pte_t pte_mkold(pte_t pte)
579 {
580 	return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_ACCESSED));
581 }
582 
583 static inline pte_t pte_mkexec(pte_t pte)
584 {
585 	return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_EXEC));
586 }
587 
588 static inline pte_t pte_mkwrite_novma(pte_t pte)
589 {
590 	/*
591 	 * write implies read, hence set both
592 	 */
593 	return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_RW));
594 }
595 
596 static inline pte_t pte_mkdirty(pte_t pte)
597 {
598 	return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_DIRTY | _PAGE_SOFT_DIRTY));
599 }
600 
601 static inline pte_t pte_mkyoung(pte_t pte)
602 {
603 	return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_ACCESSED));
604 }
605 
606 static inline pte_t pte_mkspecial(pte_t pte)
607 {
608 	return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_SPECIAL));
609 }
610 
611 static inline pte_t pte_mkhuge(pte_t pte)
612 {
613 	return pte;
614 }
615 
616 static inline pte_t pte_mkdevmap(pte_t pte)
617 {
618 	return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_SPECIAL | _PAGE_DEVMAP));
619 }
620 
621 /*
622  * This is potentially called with a pmd as the argument, in which case it's not
623  * safe to check _PAGE_DEVMAP unless we also confirm that _PAGE_PTE is set.
624  * That's because the bit we use for _PAGE_DEVMAP is not reserved for software
625  * use in page directory entries (ie. non-ptes).
626  */
627 static inline int pte_devmap(pte_t pte)
628 {
629 	__be64 mask = cpu_to_be64(_PAGE_DEVMAP | _PAGE_PTE);
630 
631 	return (pte_raw(pte) & mask) == mask;
632 }
633 
634 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
635 {
636 	/* FIXME!! check whether this need to be a conditional */
637 	return __pte_raw((pte_raw(pte) & cpu_to_be64(_PAGE_CHG_MASK)) |
638 			 cpu_to_be64(pgprot_val(newprot)));
639 }
640 
641 /* Encode and de-code a swap entry */
642 #define MAX_SWAPFILES_CHECK() do { \
643 	BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS); \
644 	/*							\
645 	 * Don't have overlapping bits with _PAGE_HPTEFLAGS	\
646 	 * We filter HPTEFLAGS on set_pte.			\
647 	 */							\
648 	BUILD_BUG_ON(_PAGE_HPTEFLAGS & SWP_TYPE_MASK); \
649 	BUILD_BUG_ON(_PAGE_HPTEFLAGS & _PAGE_SWP_SOFT_DIRTY);	\
650 	BUILD_BUG_ON(_PAGE_HPTEFLAGS & _PAGE_SWP_EXCLUSIVE);	\
651 	} while (0)
652 
653 #define SWP_TYPE_BITS 5
654 #define SWP_TYPE_MASK		((1UL << SWP_TYPE_BITS) - 1)
655 #define __swp_type(x)		((x).val & SWP_TYPE_MASK)
656 #define __swp_offset(x)		(((x).val & PTE_RPN_MASK) >> PAGE_SHIFT)
657 #define __swp_entry(type, offset)	((swp_entry_t) { \
658 				(type) | (((offset) << PAGE_SHIFT) & PTE_RPN_MASK)})
659 /*
660  * swp_entry_t must be independent of pte bits. We build a swp_entry_t from
661  * swap type and offset we get from swap and convert that to pte to find a
662  * matching pte in linux page table.
663  * Clear bits not found in swap entries here.
664  */
665 #define __pte_to_swp_entry(pte)	((swp_entry_t) { pte_val((pte)) & ~_PAGE_PTE })
666 #define __swp_entry_to_pte(x)	__pte((x).val | _PAGE_PTE)
667 #define __pmd_to_swp_entry(pmd)	(__pte_to_swp_entry(pmd_pte(pmd)))
668 #define __swp_entry_to_pmd(x)	(pte_pmd(__swp_entry_to_pte(x)))
669 
670 #ifdef CONFIG_MEM_SOFT_DIRTY
671 #define _PAGE_SWP_SOFT_DIRTY	_PAGE_SOFT_DIRTY
672 #else
673 #define _PAGE_SWP_SOFT_DIRTY	0UL
674 #endif /* CONFIG_MEM_SOFT_DIRTY */
675 
676 #define _PAGE_SWP_EXCLUSIVE	_PAGE_NON_IDEMPOTENT
677 
678 #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
679 static inline pte_t pte_swp_mksoft_dirty(pte_t pte)
680 {
681 	return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_SWP_SOFT_DIRTY));
682 }
683 
684 static inline bool pte_swp_soft_dirty(pte_t pte)
685 {
686 	return !!(pte_raw(pte) & cpu_to_be64(_PAGE_SWP_SOFT_DIRTY));
687 }
688 
689 static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
690 {
691 	return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_SWP_SOFT_DIRTY));
692 }
693 #endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
694 
695 static inline pte_t pte_swp_mkexclusive(pte_t pte)
696 {
697 	return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_SWP_EXCLUSIVE));
698 }
699 
700 static inline int pte_swp_exclusive(pte_t pte)
701 {
702 	return !!(pte_raw(pte) & cpu_to_be64(_PAGE_SWP_EXCLUSIVE));
703 }
704 
705 static inline pte_t pte_swp_clear_exclusive(pte_t pte)
706 {
707 	return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_SWP_EXCLUSIVE));
708 }
709 
710 static inline bool check_pte_access(unsigned long access, unsigned long ptev)
711 {
712 	/*
713 	 * This check for _PAGE_RWX and _PAGE_PRESENT bits
714 	 */
715 	if (access & ~ptev)
716 		return false;
717 	/*
718 	 * This check for access to privilege space
719 	 */
720 	if ((access & _PAGE_PRIVILEGED) != (ptev & _PAGE_PRIVILEGED))
721 		return false;
722 
723 	return true;
724 }
725 /*
726  * Generic functions with hash/radix callbacks
727  */
728 
729 static inline void __ptep_set_access_flags(struct vm_area_struct *vma,
730 					   pte_t *ptep, pte_t entry,
731 					   unsigned long address,
732 					   int psize)
733 {
734 	if (radix_enabled())
735 		return radix__ptep_set_access_flags(vma, ptep, entry,
736 						    address, psize);
737 	return hash__ptep_set_access_flags(ptep, entry);
738 }
739 
740 #define __HAVE_ARCH_PTE_SAME
741 static inline int pte_same(pte_t pte_a, pte_t pte_b)
742 {
743 	if (radix_enabled())
744 		return radix__pte_same(pte_a, pte_b);
745 	return hash__pte_same(pte_a, pte_b);
746 }
747 
748 static inline int pte_none(pte_t pte)
749 {
750 	if (radix_enabled())
751 		return radix__pte_none(pte);
752 	return hash__pte_none(pte);
753 }
754 
755 static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
756 				pte_t *ptep, pte_t pte, int percpu)
757 {
758 
759 	VM_WARN_ON(!(pte_raw(pte) & cpu_to_be64(_PAGE_PTE)));
760 	/*
761 	 * Keep the _PAGE_PTE added till we are sure we handle _PAGE_PTE
762 	 * in all the callers.
763 	 */
764 	pte = __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_PTE));
765 
766 	if (radix_enabled())
767 		return radix__set_pte_at(mm, addr, ptep, pte, percpu);
768 	return hash__set_pte_at(mm, addr, ptep, pte, percpu);
769 }
770 
771 #define _PAGE_CACHE_CTL	(_PAGE_SAO | _PAGE_NON_IDEMPOTENT | _PAGE_TOLERANT)
772 
773 #define pgprot_noncached pgprot_noncached
774 static inline pgprot_t pgprot_noncached(pgprot_t prot)
775 {
776 	return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) |
777 			_PAGE_NON_IDEMPOTENT);
778 }
779 
780 #define pgprot_noncached_wc pgprot_noncached_wc
781 static inline pgprot_t pgprot_noncached_wc(pgprot_t prot)
782 {
783 	return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) |
784 			_PAGE_TOLERANT);
785 }
786 
787 #define pgprot_cached pgprot_cached
788 static inline pgprot_t pgprot_cached(pgprot_t prot)
789 {
790 	return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL));
791 }
792 
793 #define pgprot_writecombine pgprot_writecombine
794 static inline pgprot_t pgprot_writecombine(pgprot_t prot)
795 {
796 	return pgprot_noncached_wc(prot);
797 }
798 /*
799  * check a pte mapping have cache inhibited property
800  */
801 static inline bool pte_ci(pte_t pte)
802 {
803 	__be64 pte_v = pte_raw(pte);
804 
805 	if (((pte_v & cpu_to_be64(_PAGE_CACHE_CTL)) == cpu_to_be64(_PAGE_TOLERANT)) ||
806 	    ((pte_v & cpu_to_be64(_PAGE_CACHE_CTL)) == cpu_to_be64(_PAGE_NON_IDEMPOTENT)))
807 		return true;
808 	return false;
809 }
810 
811 static inline void pmd_clear(pmd_t *pmdp)
812 {
813 	if (IS_ENABLED(CONFIG_DEBUG_VM) && !radix_enabled()) {
814 		/*
815 		 * Don't use this if we can possibly have a hash page table
816 		 * entry mapping this.
817 		 */
818 		WARN_ON((pmd_val(*pmdp) & (H_PAGE_HASHPTE | _PAGE_PTE)) == (H_PAGE_HASHPTE | _PAGE_PTE));
819 	}
820 	*pmdp = __pmd(0);
821 }
822 
823 static inline int pmd_none(pmd_t pmd)
824 {
825 	return !pmd_raw(pmd);
826 }
827 
828 static inline int pmd_present(pmd_t pmd)
829 {
830 	/*
831 	 * A pmd is considerent present if _PAGE_PRESENT is set.
832 	 * We also need to consider the pmd present which is marked
833 	 * invalid during a split. Hence we look for _PAGE_INVALID
834 	 * if we find _PAGE_PRESENT cleared.
835 	 */
836 	if (pmd_raw(pmd) & cpu_to_be64(_PAGE_PRESENT | _PAGE_INVALID))
837 		return true;
838 
839 	return false;
840 }
841 
842 static inline int pmd_is_serializing(pmd_t pmd)
843 {
844 	/*
845 	 * If the pmd is undergoing a split, the _PAGE_PRESENT bit is clear
846 	 * and _PAGE_INVALID is set (see pmd_present, pmdp_invalidate).
847 	 *
848 	 * This condition may also occur when flushing a pmd while flushing
849 	 * it (see ptep_modify_prot_start), so callers must ensure this
850 	 * case is fine as well.
851 	 */
852 	if ((pmd_raw(pmd) & cpu_to_be64(_PAGE_PRESENT | _PAGE_INVALID)) ==
853 						cpu_to_be64(_PAGE_INVALID))
854 		return true;
855 
856 	return false;
857 }
858 
859 static inline int pmd_bad(pmd_t pmd)
860 {
861 	if (radix_enabled())
862 		return radix__pmd_bad(pmd);
863 	return hash__pmd_bad(pmd);
864 }
865 
866 static inline void pud_clear(pud_t *pudp)
867 {
868 	if (IS_ENABLED(CONFIG_DEBUG_VM) && !radix_enabled()) {
869 		/*
870 		 * Don't use this if we can possibly have a hash page table
871 		 * entry mapping this.
872 		 */
873 		WARN_ON((pud_val(*pudp) & (H_PAGE_HASHPTE | _PAGE_PTE)) == (H_PAGE_HASHPTE | _PAGE_PTE));
874 	}
875 	*pudp = __pud(0);
876 }
877 
878 static inline int pud_none(pud_t pud)
879 {
880 	return !pud_raw(pud);
881 }
882 
883 static inline int pud_present(pud_t pud)
884 {
885 	return !!(pud_raw(pud) & cpu_to_be64(_PAGE_PRESENT));
886 }
887 
888 extern struct page *pud_page(pud_t pud);
889 extern struct page *pmd_page(pmd_t pmd);
890 static inline pte_t pud_pte(pud_t pud)
891 {
892 	return __pte_raw(pud_raw(pud));
893 }
894 
895 static inline pud_t pte_pud(pte_t pte)
896 {
897 	return __pud_raw(pte_raw(pte));
898 }
899 
900 static inline pte_t *pudp_ptep(pud_t *pud)
901 {
902 	return (pte_t *)pud;
903 }
904 
905 #define pud_pfn(pud)		pte_pfn(pud_pte(pud))
906 #define pud_dirty(pud)		pte_dirty(pud_pte(pud))
907 #define pud_young(pud)		pte_young(pud_pte(pud))
908 #define pud_mkold(pud)		pte_pud(pte_mkold(pud_pte(pud)))
909 #define pud_wrprotect(pud)	pte_pud(pte_wrprotect(pud_pte(pud)))
910 #define pud_mkdirty(pud)	pte_pud(pte_mkdirty(pud_pte(pud)))
911 #define pud_mkclean(pud)	pte_pud(pte_mkclean(pud_pte(pud)))
912 #define pud_mkyoung(pud)	pte_pud(pte_mkyoung(pud_pte(pud)))
913 #define pud_mkwrite(pud)	pte_pud(pte_mkwrite_novma(pud_pte(pud)))
914 #define pud_write(pud)		pte_write(pud_pte(pud))
915 
916 #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
917 #define pud_soft_dirty(pmd)    pte_soft_dirty(pud_pte(pud))
918 #define pud_mksoft_dirty(pmd)  pte_pud(pte_mksoft_dirty(pud_pte(pud)))
919 #define pud_clear_soft_dirty(pmd) pte_pud(pte_clear_soft_dirty(pud_pte(pud)))
920 #endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
921 
922 static inline int pud_bad(pud_t pud)
923 {
924 	if (radix_enabled())
925 		return radix__pud_bad(pud);
926 	return hash__pud_bad(pud);
927 }
928 
929 #define pud_access_permitted pud_access_permitted
930 static inline bool pud_access_permitted(pud_t pud, bool write)
931 {
932 	return pte_access_permitted(pud_pte(pud), write);
933 }
934 
935 #define __p4d_raw(x)	((p4d_t) { __pgd_raw(x) })
936 static inline __be64 p4d_raw(p4d_t x)
937 {
938 	return pgd_raw(x.pgd);
939 }
940 
941 #define p4d_write(p4d)		pte_write(p4d_pte(p4d))
942 
943 static inline void p4d_clear(p4d_t *p4dp)
944 {
945 	*p4dp = __p4d(0);
946 }
947 
948 static inline int p4d_none(p4d_t p4d)
949 {
950 	return !p4d_raw(p4d);
951 }
952 
953 static inline int p4d_present(p4d_t p4d)
954 {
955 	return !!(p4d_raw(p4d) & cpu_to_be64(_PAGE_PRESENT));
956 }
957 
958 static inline pte_t p4d_pte(p4d_t p4d)
959 {
960 	return __pte_raw(p4d_raw(p4d));
961 }
962 
963 static inline p4d_t pte_p4d(pte_t pte)
964 {
965 	return __p4d_raw(pte_raw(pte));
966 }
967 
968 static inline int p4d_bad(p4d_t p4d)
969 {
970 	if (radix_enabled())
971 		return radix__p4d_bad(p4d);
972 	return hash__p4d_bad(p4d);
973 }
974 
975 #define p4d_access_permitted p4d_access_permitted
976 static inline bool p4d_access_permitted(p4d_t p4d, bool write)
977 {
978 	return pte_access_permitted(p4d_pte(p4d), write);
979 }
980 
981 extern struct page *p4d_page(p4d_t p4d);
982 
983 /* Pointers in the page table tree are physical addresses */
984 #define __pgtable_ptr_val(ptr)	__pa(ptr)
985 
986 static inline pud_t *p4d_pgtable(p4d_t p4d)
987 {
988 	return (pud_t *)__va(p4d_val(p4d) & ~P4D_MASKED_BITS);
989 }
990 
991 static inline pmd_t *pud_pgtable(pud_t pud)
992 {
993 	return (pmd_t *)__va(pud_val(pud) & ~PUD_MASKED_BITS);
994 }
995 
996 #define pmd_ERROR(e) \
997 	pr_err("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e))
998 #define pud_ERROR(e) \
999 	pr_err("%s:%d: bad pud %08lx.\n", __FILE__, __LINE__, pud_val(e))
1000 #define pgd_ERROR(e) \
1001 	pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
1002 
1003 static inline int map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot)
1004 {
1005 	if (radix_enabled()) {
1006 #if defined(CONFIG_PPC_RADIX_MMU) && defined(DEBUG_VM)
1007 		unsigned long page_size = 1 << mmu_psize_defs[mmu_io_psize].shift;
1008 		WARN((page_size != PAGE_SIZE), "I/O page size != PAGE_SIZE");
1009 #endif
1010 		return radix__map_kernel_page(ea, pa, prot, PAGE_SIZE);
1011 	}
1012 	return hash__map_kernel_page(ea, pa, prot);
1013 }
1014 
1015 void unmap_kernel_page(unsigned long va);
1016 
1017 static inline int __meminit vmemmap_create_mapping(unsigned long start,
1018 						   unsigned long page_size,
1019 						   unsigned long phys)
1020 {
1021 	if (radix_enabled())
1022 		return radix__vmemmap_create_mapping(start, page_size, phys);
1023 	return hash__vmemmap_create_mapping(start, page_size, phys);
1024 }
1025 
1026 #ifdef CONFIG_MEMORY_HOTPLUG
1027 static inline void vmemmap_remove_mapping(unsigned long start,
1028 					  unsigned long page_size)
1029 {
1030 	if (radix_enabled())
1031 		return radix__vmemmap_remove_mapping(start, page_size);
1032 	return hash__vmemmap_remove_mapping(start, page_size);
1033 }
1034 #endif
1035 
1036 #if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE)
1037 static inline void __kernel_map_pages(struct page *page, int numpages, int enable)
1038 {
1039 	if (radix_enabled())
1040 		radix__kernel_map_pages(page, numpages, enable);
1041 	else
1042 		hash__kernel_map_pages(page, numpages, enable);
1043 }
1044 #endif
1045 
1046 static inline pte_t pmd_pte(pmd_t pmd)
1047 {
1048 	return __pte_raw(pmd_raw(pmd));
1049 }
1050 
1051 static inline pmd_t pte_pmd(pte_t pte)
1052 {
1053 	return __pmd_raw(pte_raw(pte));
1054 }
1055 
1056 static inline pte_t *pmdp_ptep(pmd_t *pmd)
1057 {
1058 	return (pte_t *)pmd;
1059 }
1060 #define pmd_pfn(pmd)		pte_pfn(pmd_pte(pmd))
1061 #define pmd_dirty(pmd)		pte_dirty(pmd_pte(pmd))
1062 #define pmd_young(pmd)		pte_young(pmd_pte(pmd))
1063 #define pmd_mkold(pmd)		pte_pmd(pte_mkold(pmd_pte(pmd)))
1064 #define pmd_wrprotect(pmd)	pte_pmd(pte_wrprotect(pmd_pte(pmd)))
1065 #define pmd_mkdirty(pmd)	pte_pmd(pte_mkdirty(pmd_pte(pmd)))
1066 #define pmd_mkclean(pmd)	pte_pmd(pte_mkclean(pmd_pte(pmd)))
1067 #define pmd_mkyoung(pmd)	pte_pmd(pte_mkyoung(pmd_pte(pmd)))
1068 #define pmd_mkwrite_novma(pmd)	pte_pmd(pte_mkwrite_novma(pmd_pte(pmd)))
1069 
1070 #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
1071 #define pmd_soft_dirty(pmd)    pte_soft_dirty(pmd_pte(pmd))
1072 #define pmd_mksoft_dirty(pmd)  pte_pmd(pte_mksoft_dirty(pmd_pte(pmd)))
1073 #define pmd_clear_soft_dirty(pmd) pte_pmd(pte_clear_soft_dirty(pmd_pte(pmd)))
1074 
1075 #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
1076 #define pmd_swp_mksoft_dirty(pmd)	pte_pmd(pte_swp_mksoft_dirty(pmd_pte(pmd)))
1077 #define pmd_swp_soft_dirty(pmd)		pte_swp_soft_dirty(pmd_pte(pmd))
1078 #define pmd_swp_clear_soft_dirty(pmd)	pte_pmd(pte_swp_clear_soft_dirty(pmd_pte(pmd)))
1079 #endif
1080 #endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
1081 
1082 #ifdef CONFIG_NUMA_BALANCING
1083 static inline int pmd_protnone(pmd_t pmd)
1084 {
1085 	return pte_protnone(pmd_pte(pmd));
1086 }
1087 #endif /* CONFIG_NUMA_BALANCING */
1088 
1089 #define pmd_write(pmd)		pte_write(pmd_pte(pmd))
1090 
1091 #define pmd_access_permitted pmd_access_permitted
1092 static inline bool pmd_access_permitted(pmd_t pmd, bool write)
1093 {
1094 	/*
1095 	 * pmdp_invalidate sets this combination (which is not caught by
1096 	 * !pte_present() check in pte_access_permitted), to prevent
1097 	 * lock-free lookups, as part of the serialize_against_pte_lookup()
1098 	 * synchronisation.
1099 	 *
1100 	 * This also catches the case where the PTE's hardware PRESENT bit is
1101 	 * cleared while TLB is flushed, which is suboptimal but should not
1102 	 * be frequent.
1103 	 */
1104 	if (pmd_is_serializing(pmd))
1105 		return false;
1106 
1107 	return pte_access_permitted(pmd_pte(pmd), write);
1108 }
1109 
1110 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
1111 extern pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot);
1112 extern pud_t pfn_pud(unsigned long pfn, pgprot_t pgprot);
1113 extern pmd_t mk_pmd(struct page *page, pgprot_t pgprot);
1114 extern pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot);
1115 extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
1116 		       pmd_t *pmdp, pmd_t pmd);
1117 extern void set_pud_at(struct mm_struct *mm, unsigned long addr,
1118 		       pud_t *pudp, pud_t pud);
1119 
1120 static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
1121 					unsigned long addr, pmd_t *pmd)
1122 {
1123 }
1124 
1125 static inline void update_mmu_cache_pud(struct vm_area_struct *vma,
1126 					unsigned long addr, pud_t *pud)
1127 {
1128 }
1129 
1130 extern int hash__has_transparent_hugepage(void);
1131 static inline int has_transparent_hugepage(void)
1132 {
1133 	if (radix_enabled())
1134 		return radix__has_transparent_hugepage();
1135 	return hash__has_transparent_hugepage();
1136 }
1137 #define has_transparent_hugepage has_transparent_hugepage
1138 
1139 static inline int has_transparent_pud_hugepage(void)
1140 {
1141 	if (radix_enabled())
1142 		return radix__has_transparent_pud_hugepage();
1143 	return 0;
1144 }
1145 #define has_transparent_pud_hugepage has_transparent_pud_hugepage
1146 
1147 static inline unsigned long
1148 pmd_hugepage_update(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp,
1149 		    unsigned long clr, unsigned long set)
1150 {
1151 	if (radix_enabled())
1152 		return radix__pmd_hugepage_update(mm, addr, pmdp, clr, set);
1153 	return hash__pmd_hugepage_update(mm, addr, pmdp, clr, set);
1154 }
1155 
1156 static inline unsigned long
1157 pud_hugepage_update(struct mm_struct *mm, unsigned long addr, pud_t *pudp,
1158 		    unsigned long clr, unsigned long set)
1159 {
1160 	if (radix_enabled())
1161 		return radix__pud_hugepage_update(mm, addr, pudp, clr, set);
1162 	BUG();
1163 	return pud_val(*pudp);
1164 }
1165 
1166 /*
1167  * returns true for pmd migration entries, THP, devmap, hugetlb
1168  * But compile time dependent on THP config
1169  */
1170 static inline int pmd_large(pmd_t pmd)
1171 {
1172 	return !!(pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE));
1173 }
1174 
1175 static inline int pud_large(pud_t pud)
1176 {
1177 	return !!(pud_raw(pud) & cpu_to_be64(_PAGE_PTE));
1178 }
1179 
1180 /*
1181  * For radix we should always find H_PAGE_HASHPTE zero. Hence
1182  * the below will work for radix too
1183  */
1184 static inline int __pmdp_test_and_clear_young(struct mm_struct *mm,
1185 					      unsigned long addr, pmd_t *pmdp)
1186 {
1187 	unsigned long old;
1188 
1189 	if ((pmd_raw(*pmdp) & cpu_to_be64(_PAGE_ACCESSED | H_PAGE_HASHPTE)) == 0)
1190 		return 0;
1191 	old = pmd_hugepage_update(mm, addr, pmdp, _PAGE_ACCESSED, 0);
1192 	return ((old & _PAGE_ACCESSED) != 0);
1193 }
1194 
1195 static inline int __pudp_test_and_clear_young(struct mm_struct *mm,
1196 					      unsigned long addr, pud_t *pudp)
1197 {
1198 	unsigned long old;
1199 
1200 	if ((pud_raw(*pudp) & cpu_to_be64(_PAGE_ACCESSED | H_PAGE_HASHPTE)) == 0)
1201 		return 0;
1202 	old = pud_hugepage_update(mm, addr, pudp, _PAGE_ACCESSED, 0);
1203 	return ((old & _PAGE_ACCESSED) != 0);
1204 }
1205 
1206 #define __HAVE_ARCH_PMDP_SET_WRPROTECT
1207 static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr,
1208 				      pmd_t *pmdp)
1209 {
1210 	if (pmd_write(*pmdp))
1211 		pmd_hugepage_update(mm, addr, pmdp, _PAGE_WRITE, 0);
1212 }
1213 
1214 #define __HAVE_ARCH_PUDP_SET_WRPROTECT
1215 static inline void pudp_set_wrprotect(struct mm_struct *mm, unsigned long addr,
1216 				      pud_t *pudp)
1217 {
1218 	if (pud_write(*pudp))
1219 		pud_hugepage_update(mm, addr, pudp, _PAGE_WRITE, 0);
1220 }
1221 
1222 /*
1223  * Only returns true for a THP. False for pmd migration entry.
1224  * We also need to return true when we come across a pte that
1225  * in between a thp split. While splitting THP, we mark the pmd
1226  * invalid (pmdp_invalidate()) before we set it with pte page
1227  * address. A pmd_trans_huge() check against a pmd entry during that time
1228  * should return true.
1229  * We should not call this on a hugetlb entry. We should check for HugeTLB
1230  * entry using vma->vm_flags
1231  * The page table walk rule is explained in Documentation/mm/transhuge.rst
1232  */
1233 static inline int pmd_trans_huge(pmd_t pmd)
1234 {
1235 	if (!pmd_present(pmd))
1236 		return false;
1237 
1238 	if (radix_enabled())
1239 		return radix__pmd_trans_huge(pmd);
1240 	return hash__pmd_trans_huge(pmd);
1241 }
1242 
1243 static inline int pud_trans_huge(pud_t pud)
1244 {
1245 	if (!pud_present(pud))
1246 		return false;
1247 
1248 	if (radix_enabled())
1249 		return radix__pud_trans_huge(pud);
1250 	return 0;
1251 }
1252 
1253 
1254 #define __HAVE_ARCH_PMD_SAME
1255 static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
1256 {
1257 	if (radix_enabled())
1258 		return radix__pmd_same(pmd_a, pmd_b);
1259 	return hash__pmd_same(pmd_a, pmd_b);
1260 }
1261 
1262 #define pud_same pud_same
1263 static inline int pud_same(pud_t pud_a, pud_t pud_b)
1264 {
1265 	if (radix_enabled())
1266 		return radix__pud_same(pud_a, pud_b);
1267 	return hash__pud_same(pud_a, pud_b);
1268 }
1269 
1270 
1271 static inline pmd_t __pmd_mkhuge(pmd_t pmd)
1272 {
1273 	if (radix_enabled())
1274 		return radix__pmd_mkhuge(pmd);
1275 	return hash__pmd_mkhuge(pmd);
1276 }
1277 
1278 static inline pud_t __pud_mkhuge(pud_t pud)
1279 {
1280 	if (radix_enabled())
1281 		return radix__pud_mkhuge(pud);
1282 	BUG();
1283 	return pud;
1284 }
1285 
1286 /*
1287  * pfn_pmd return a pmd_t that can be used as pmd pte entry.
1288  */
1289 static inline pmd_t pmd_mkhuge(pmd_t pmd)
1290 {
1291 #ifdef CONFIG_DEBUG_VM
1292 	if (radix_enabled())
1293 		WARN_ON((pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE)) == 0);
1294 	else
1295 		WARN_ON((pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE | H_PAGE_THP_HUGE)) !=
1296 			cpu_to_be64(_PAGE_PTE | H_PAGE_THP_HUGE));
1297 #endif
1298 	return pmd;
1299 }
1300 
1301 static inline pud_t pud_mkhuge(pud_t pud)
1302 {
1303 #ifdef CONFIG_DEBUG_VM
1304 	if (radix_enabled())
1305 		WARN_ON((pud_raw(pud) & cpu_to_be64(_PAGE_PTE)) == 0);
1306 	else
1307 		WARN_ON(1);
1308 #endif
1309 	return pud;
1310 }
1311 
1312 
1313 #define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
1314 extern int pmdp_set_access_flags(struct vm_area_struct *vma,
1315 				 unsigned long address, pmd_t *pmdp,
1316 				 pmd_t entry, int dirty);
1317 #define __HAVE_ARCH_PUDP_SET_ACCESS_FLAGS
1318 extern int pudp_set_access_flags(struct vm_area_struct *vma,
1319 				 unsigned long address, pud_t *pudp,
1320 				 pud_t entry, int dirty);
1321 
1322 #define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
1323 extern int pmdp_test_and_clear_young(struct vm_area_struct *vma,
1324 				     unsigned long address, pmd_t *pmdp);
1325 #define __HAVE_ARCH_PUDP_TEST_AND_CLEAR_YOUNG
1326 extern int pudp_test_and_clear_young(struct vm_area_struct *vma,
1327 				     unsigned long address, pud_t *pudp);
1328 
1329 
1330 #define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
1331 static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
1332 					    unsigned long addr, pmd_t *pmdp)
1333 {
1334 	if (radix_enabled())
1335 		return radix__pmdp_huge_get_and_clear(mm, addr, pmdp);
1336 	return hash__pmdp_huge_get_and_clear(mm, addr, pmdp);
1337 }
1338 
1339 #define __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR
1340 static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm,
1341 					    unsigned long addr, pud_t *pudp)
1342 {
1343 	if (radix_enabled())
1344 		return radix__pudp_huge_get_and_clear(mm, addr, pudp);
1345 	BUG();
1346 	return *pudp;
1347 }
1348 
1349 static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
1350 					unsigned long address, pmd_t *pmdp)
1351 {
1352 	if (radix_enabled())
1353 		return radix__pmdp_collapse_flush(vma, address, pmdp);
1354 	return hash__pmdp_collapse_flush(vma, address, pmdp);
1355 }
1356 #define pmdp_collapse_flush pmdp_collapse_flush
1357 
1358 #define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR_FULL
1359 pmd_t pmdp_huge_get_and_clear_full(struct vm_area_struct *vma,
1360 				   unsigned long addr,
1361 				   pmd_t *pmdp, int full);
1362 
1363 #define __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR_FULL
1364 pud_t pudp_huge_get_and_clear_full(struct vm_area_struct *vma,
1365 				   unsigned long addr,
1366 				   pud_t *pudp, int full);
1367 
1368 #define __HAVE_ARCH_PGTABLE_DEPOSIT
1369 static inline void pgtable_trans_huge_deposit(struct mm_struct *mm,
1370 					      pmd_t *pmdp, pgtable_t pgtable)
1371 {
1372 	if (radix_enabled())
1373 		return radix__pgtable_trans_huge_deposit(mm, pmdp, pgtable);
1374 	return hash__pgtable_trans_huge_deposit(mm, pmdp, pgtable);
1375 }
1376 
1377 #define __HAVE_ARCH_PGTABLE_WITHDRAW
1378 static inline pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm,
1379 						    pmd_t *pmdp)
1380 {
1381 	if (radix_enabled())
1382 		return radix__pgtable_trans_huge_withdraw(mm, pmdp);
1383 	return hash__pgtable_trans_huge_withdraw(mm, pmdp);
1384 }
1385 
1386 #define __HAVE_ARCH_PMDP_INVALIDATE
1387 extern pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
1388 			     pmd_t *pmdp);
1389 
1390 #define pmd_move_must_withdraw pmd_move_must_withdraw
1391 struct spinlock;
1392 extern int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl,
1393 				  struct spinlock *old_pmd_ptl,
1394 				  struct vm_area_struct *vma);
1395 /*
1396  * Hash translation mode use the deposited table to store hash pte
1397  * slot information.
1398  */
1399 #define arch_needs_pgtable_deposit arch_needs_pgtable_deposit
1400 static inline bool arch_needs_pgtable_deposit(void)
1401 {
1402 	if (radix_enabled())
1403 		return false;
1404 	return true;
1405 }
1406 extern void serialize_against_pte_lookup(struct mm_struct *mm);
1407 
1408 
1409 static inline pmd_t pmd_mkdevmap(pmd_t pmd)
1410 {
1411 	if (radix_enabled())
1412 		return radix__pmd_mkdevmap(pmd);
1413 	return hash__pmd_mkdevmap(pmd);
1414 }
1415 
1416 static inline pud_t pud_mkdevmap(pud_t pud)
1417 {
1418 	if (radix_enabled())
1419 		return radix__pud_mkdevmap(pud);
1420 	BUG();
1421 	return pud;
1422 }
1423 
1424 static inline int pmd_devmap(pmd_t pmd)
1425 {
1426 	return pte_devmap(pmd_pte(pmd));
1427 }
1428 
1429 static inline int pud_devmap(pud_t pud)
1430 {
1431 	return pte_devmap(pud_pte(pud));
1432 }
1433 
1434 static inline int pgd_devmap(pgd_t pgd)
1435 {
1436 	return 0;
1437 }
1438 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
1439 
1440 #define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
1441 pte_t ptep_modify_prot_start(struct vm_area_struct *, unsigned long, pte_t *);
1442 void ptep_modify_prot_commit(struct vm_area_struct *, unsigned long,
1443 			     pte_t *, pte_t, pte_t);
1444 
1445 /*
1446  * Returns true for a R -> RW upgrade of pte
1447  */
1448 static inline bool is_pte_rw_upgrade(unsigned long old_val, unsigned long new_val)
1449 {
1450 	if (!(old_val & _PAGE_READ))
1451 		return false;
1452 
1453 	if ((!(old_val & _PAGE_WRITE)) && (new_val & _PAGE_WRITE))
1454 		return true;
1455 
1456 	return false;
1457 }
1458 
1459 /*
1460  * Like pmd_huge() and pmd_large(), but works regardless of config options
1461  */
1462 #define pmd_is_leaf pmd_is_leaf
1463 #define pmd_leaf pmd_is_leaf
1464 static inline bool pmd_is_leaf(pmd_t pmd)
1465 {
1466 	return !!(pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE));
1467 }
1468 
1469 #define pud_is_leaf pud_is_leaf
1470 #define pud_leaf pud_is_leaf
1471 static inline bool pud_is_leaf(pud_t pud)
1472 {
1473 	return !!(pud_raw(pud) & cpu_to_be64(_PAGE_PTE));
1474 }
1475 
1476 #endif /* __ASSEMBLY__ */
1477 #endif /* _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ */
1478