xref: /linux/mm/debug_vm_pgtable.c (revision 5c8166419acf468b5bc3e48f928a040485d3e0c2)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * This kernel test validates architecture page table helpers and
4  * accessors and helps in verifying their continued compliance with
5  * expected generic MM semantics.
6  *
7  * Copyright (C) 2019 ARM Ltd.
8  *
9  * Author: Anshuman Khandual <anshuman.khandual@arm.com>
10  */
11 #define pr_fmt(fmt) "debug_vm_pgtable: [%-25s]: " fmt, __func__
12 
13 #include <linux/gfp.h>
14 #include <linux/highmem.h>
15 #include <linux/hugetlb.h>
16 #include <linux/kernel.h>
17 #include <linux/kconfig.h>
18 #include <linux/mm.h>
19 #include <linux/mman.h>
20 #include <linux/mm_types.h>
21 #include <linux/module.h>
22 #include <linux/pfn_t.h>
23 #include <linux/printk.h>
24 #include <linux/pgtable.h>
25 #include <linux/random.h>
26 #include <linux/spinlock.h>
27 #include <linux/swap.h>
28 #include <linux/swapops.h>
29 #include <linux/start_kernel.h>
30 #include <linux/sched/mm.h>
31 #include <linux/io.h>
32 
33 #include <asm/cacheflush.h>
34 #include <asm/pgalloc.h>
35 #include <asm/tlbflush.h>
36 
37 /*
38  * Please refer Documentation/vm/arch_pgtable_helpers.rst for the semantics
39  * expectations that are being validated here. All future changes in here
40  * or the documentation need to be in sync.
41  */
42 
43 #define VMFLAGS	(VM_READ|VM_WRITE|VM_EXEC)
44 
45 /*
46  * On s390 platform, the lower 4 bits are used to identify given page table
47  * entry type. But these bits might affect the ability to clear entries with
48  * pxx_clear() because of how dynamic page table folding works on s390. So
49  * while loading up the entries do not change the lower 4 bits. It does not
50  * have affect any other platform. Also avoid the 62nd bit on ppc64 that is
51  * used to mark a pte entry.
52  */
53 #define S390_SKIP_MASK		GENMASK(3, 0)
54 #if __BITS_PER_LONG == 64
55 #define PPC64_SKIP_MASK		GENMASK(62, 62)
56 #else
57 #define PPC64_SKIP_MASK		0x0
58 #endif
59 #define ARCH_SKIP_MASK (S390_SKIP_MASK | PPC64_SKIP_MASK)
60 #define RANDOM_ORVALUE (GENMASK(BITS_PER_LONG - 1, 0) & ~ARCH_SKIP_MASK)
61 #define RANDOM_NZVALUE	GENMASK(7, 0)
62 
63 struct pgtable_debug_args {
64 	struct mm_struct	*mm;
65 	struct vm_area_struct	*vma;
66 
67 	pgd_t			*pgdp;
68 	p4d_t			*p4dp;
69 	pud_t			*pudp;
70 	pmd_t			*pmdp;
71 	pte_t			*ptep;
72 
73 	p4d_t			*start_p4dp;
74 	pud_t			*start_pudp;
75 	pmd_t			*start_pmdp;
76 	pgtable_t		start_ptep;
77 
78 	unsigned long		vaddr;
79 	pgprot_t		page_prot;
80 	pgprot_t		page_prot_none;
81 
82 	bool			is_contiguous_page;
83 	unsigned long		pud_pfn;
84 	unsigned long		pmd_pfn;
85 	unsigned long		pte_pfn;
86 
87 	unsigned long		fixed_pgd_pfn;
88 	unsigned long		fixed_p4d_pfn;
89 	unsigned long		fixed_pud_pfn;
90 	unsigned long		fixed_pmd_pfn;
91 	unsigned long		fixed_pte_pfn;
92 };
93 
94 static void __init pte_basic_tests(struct pgtable_debug_args *args, int idx)
95 {
96 	pgprot_t prot = protection_map[idx];
97 	pte_t pte = pfn_pte(args->fixed_pte_pfn, prot);
98 	unsigned long val = idx, *ptr = &val;
99 
100 	pr_debug("Validating PTE basic (%pGv)\n", ptr);
101 
102 	/*
103 	 * This test needs to be executed after the given page table entry
104 	 * is created with pfn_pte() to make sure that protection_map[idx]
105 	 * does not have the dirty bit enabled from the beginning. This is
106 	 * important for platforms like arm64 where (!PTE_RDONLY) indicate
107 	 * dirty bit being set.
108 	 */
109 	WARN_ON(pte_dirty(pte_wrprotect(pte)));
110 
111 	WARN_ON(!pte_same(pte, pte));
112 	WARN_ON(!pte_young(pte_mkyoung(pte_mkold(pte))));
113 	WARN_ON(!pte_dirty(pte_mkdirty(pte_mkclean(pte))));
114 	WARN_ON(!pte_write(pte_mkwrite(pte_wrprotect(pte))));
115 	WARN_ON(pte_young(pte_mkold(pte_mkyoung(pte))));
116 	WARN_ON(pte_dirty(pte_mkclean(pte_mkdirty(pte))));
117 	WARN_ON(pte_write(pte_wrprotect(pte_mkwrite(pte))));
118 	WARN_ON(pte_dirty(pte_wrprotect(pte_mkclean(pte))));
119 	WARN_ON(!pte_dirty(pte_wrprotect(pte_mkdirty(pte))));
120 }
121 
122 static void __init pte_advanced_tests(struct pgtable_debug_args *args)
123 {
124 	struct page *page;
125 	pte_t pte;
126 
127 	/*
128 	 * Architectures optimize set_pte_at by avoiding TLB flush.
129 	 * This requires set_pte_at to be not used to update an
130 	 * existing pte entry. Clear pte before we do set_pte_at
131 	 *
132 	 * flush_dcache_page() is called after set_pte_at() to clear
133 	 * PG_arch_1 for the page on ARM64. The page flag isn't cleared
134 	 * when it's released and page allocation check will fail when
135 	 * the page is allocated again. For architectures other than ARM64,
136 	 * the unexpected overhead of cache flushing is acceptable.
137 	 */
138 	page = (args->pte_pfn != ULONG_MAX) ? pfn_to_page(args->pte_pfn) : NULL;
139 	if (!page)
140 		return;
141 
142 	pr_debug("Validating PTE advanced\n");
143 	pte = pfn_pte(args->pte_pfn, args->page_prot);
144 	set_pte_at(args->mm, args->vaddr, args->ptep, pte);
145 	flush_dcache_page(page);
146 	ptep_set_wrprotect(args->mm, args->vaddr, args->ptep);
147 	pte = ptep_get(args->ptep);
148 	WARN_ON(pte_write(pte));
149 	ptep_get_and_clear(args->mm, args->vaddr, args->ptep);
150 	pte = ptep_get(args->ptep);
151 	WARN_ON(!pte_none(pte));
152 
153 	pte = pfn_pte(args->pte_pfn, args->page_prot);
154 	pte = pte_wrprotect(pte);
155 	pte = pte_mkclean(pte);
156 	set_pte_at(args->mm, args->vaddr, args->ptep, pte);
157 	flush_dcache_page(page);
158 	pte = pte_mkwrite(pte);
159 	pte = pte_mkdirty(pte);
160 	ptep_set_access_flags(args->vma, args->vaddr, args->ptep, pte, 1);
161 	pte = ptep_get(args->ptep);
162 	WARN_ON(!(pte_write(pte) && pte_dirty(pte)));
163 	ptep_get_and_clear_full(args->mm, args->vaddr, args->ptep, 1);
164 	pte = ptep_get(args->ptep);
165 	WARN_ON(!pte_none(pte));
166 
167 	pte = pfn_pte(args->pte_pfn, args->page_prot);
168 	pte = pte_mkyoung(pte);
169 	set_pte_at(args->mm, args->vaddr, args->ptep, pte);
170 	flush_dcache_page(page);
171 	ptep_test_and_clear_young(args->vma, args->vaddr, args->ptep);
172 	pte = ptep_get(args->ptep);
173 	WARN_ON(pte_young(pte));
174 
175 	ptep_get_and_clear_full(args->mm, args->vaddr, args->ptep, 1);
176 }
177 
178 static void __init pte_savedwrite_tests(struct pgtable_debug_args *args)
179 {
180 	pte_t pte = pfn_pte(args->fixed_pte_pfn, args->page_prot_none);
181 
182 	if (!IS_ENABLED(CONFIG_NUMA_BALANCING))
183 		return;
184 
185 	pr_debug("Validating PTE saved write\n");
186 	WARN_ON(!pte_savedwrite(pte_mk_savedwrite(pte_clear_savedwrite(pte))));
187 	WARN_ON(pte_savedwrite(pte_clear_savedwrite(pte_mk_savedwrite(pte))));
188 }
189 
190 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
191 static void __init pmd_basic_tests(struct pgtable_debug_args *args, int idx)
192 {
193 	pgprot_t prot = protection_map[idx];
194 	unsigned long val = idx, *ptr = &val;
195 	pmd_t pmd;
196 
197 	if (!has_transparent_hugepage())
198 		return;
199 
200 	pr_debug("Validating PMD basic (%pGv)\n", ptr);
201 	pmd = pfn_pmd(args->fixed_pmd_pfn, prot);
202 
203 	/*
204 	 * This test needs to be executed after the given page table entry
205 	 * is created with pfn_pmd() to make sure that protection_map[idx]
206 	 * does not have the dirty bit enabled from the beginning. This is
207 	 * important for platforms like arm64 where (!PTE_RDONLY) indicate
208 	 * dirty bit being set.
209 	 */
210 	WARN_ON(pmd_dirty(pmd_wrprotect(pmd)));
211 
212 
213 	WARN_ON(!pmd_same(pmd, pmd));
214 	WARN_ON(!pmd_young(pmd_mkyoung(pmd_mkold(pmd))));
215 	WARN_ON(!pmd_dirty(pmd_mkdirty(pmd_mkclean(pmd))));
216 	WARN_ON(!pmd_write(pmd_mkwrite(pmd_wrprotect(pmd))));
217 	WARN_ON(pmd_young(pmd_mkold(pmd_mkyoung(pmd))));
218 	WARN_ON(pmd_dirty(pmd_mkclean(pmd_mkdirty(pmd))));
219 	WARN_ON(pmd_write(pmd_wrprotect(pmd_mkwrite(pmd))));
220 	WARN_ON(pmd_dirty(pmd_wrprotect(pmd_mkclean(pmd))));
221 	WARN_ON(!pmd_dirty(pmd_wrprotect(pmd_mkdirty(pmd))));
222 	/*
223 	 * A huge page does not point to next level page table
224 	 * entry. Hence this must qualify as pmd_bad().
225 	 */
226 	WARN_ON(!pmd_bad(pmd_mkhuge(pmd)));
227 }
228 
229 static void __init pmd_advanced_tests(struct pgtable_debug_args *args)
230 {
231 	struct page *page;
232 	pmd_t pmd;
233 	unsigned long vaddr = args->vaddr;
234 
235 	if (!has_transparent_hugepage())
236 		return;
237 
238 	page = (args->pmd_pfn != ULONG_MAX) ? pfn_to_page(args->pmd_pfn) : NULL;
239 	if (!page)
240 		return;
241 
242 	/*
243 	 * flush_dcache_page() is called after set_pmd_at() to clear
244 	 * PG_arch_1 for the page on ARM64. The page flag isn't cleared
245 	 * when it's released and page allocation check will fail when
246 	 * the page is allocated again. For architectures other than ARM64,
247 	 * the unexpected overhead of cache flushing is acceptable.
248 	 */
249 	pr_debug("Validating PMD advanced\n");
250 	/* Align the address wrt HPAGE_PMD_SIZE */
251 	vaddr &= HPAGE_PMD_MASK;
252 
253 	pgtable_trans_huge_deposit(args->mm, args->pmdp, args->start_ptep);
254 
255 	pmd = pfn_pmd(args->pmd_pfn, args->page_prot);
256 	set_pmd_at(args->mm, vaddr, args->pmdp, pmd);
257 	flush_dcache_page(page);
258 	pmdp_set_wrprotect(args->mm, vaddr, args->pmdp);
259 	pmd = READ_ONCE(*args->pmdp);
260 	WARN_ON(pmd_write(pmd));
261 	pmdp_huge_get_and_clear(args->mm, vaddr, args->pmdp);
262 	pmd = READ_ONCE(*args->pmdp);
263 	WARN_ON(!pmd_none(pmd));
264 
265 	pmd = pfn_pmd(args->pmd_pfn, args->page_prot);
266 	pmd = pmd_wrprotect(pmd);
267 	pmd = pmd_mkclean(pmd);
268 	set_pmd_at(args->mm, vaddr, args->pmdp, pmd);
269 	flush_dcache_page(page);
270 	pmd = pmd_mkwrite(pmd);
271 	pmd = pmd_mkdirty(pmd);
272 	pmdp_set_access_flags(args->vma, vaddr, args->pmdp, pmd, 1);
273 	pmd = READ_ONCE(*args->pmdp);
274 	WARN_ON(!(pmd_write(pmd) && pmd_dirty(pmd)));
275 	pmdp_huge_get_and_clear_full(args->vma, vaddr, args->pmdp, 1);
276 	pmd = READ_ONCE(*args->pmdp);
277 	WARN_ON(!pmd_none(pmd));
278 
279 	pmd = pmd_mkhuge(pfn_pmd(args->pmd_pfn, args->page_prot));
280 	pmd = pmd_mkyoung(pmd);
281 	set_pmd_at(args->mm, vaddr, args->pmdp, pmd);
282 	flush_dcache_page(page);
283 	pmdp_test_and_clear_young(args->vma, vaddr, args->pmdp);
284 	pmd = READ_ONCE(*args->pmdp);
285 	WARN_ON(pmd_young(pmd));
286 
287 	/*  Clear the pte entries  */
288 	pmdp_huge_get_and_clear(args->mm, vaddr, args->pmdp);
289 	pgtable_trans_huge_withdraw(args->mm, args->pmdp);
290 }
291 
292 static void __init pmd_leaf_tests(struct pgtable_debug_args *args)
293 {
294 	pmd_t pmd;
295 
296 	if (!has_transparent_hugepage())
297 		return;
298 
299 	pr_debug("Validating PMD leaf\n");
300 	pmd = pfn_pmd(args->fixed_pmd_pfn, args->page_prot);
301 
302 	/*
303 	 * PMD based THP is a leaf entry.
304 	 */
305 	pmd = pmd_mkhuge(pmd);
306 	WARN_ON(!pmd_leaf(pmd));
307 }
308 
309 static void __init pmd_savedwrite_tests(struct pgtable_debug_args *args)
310 {
311 	pmd_t pmd;
312 
313 	if (!IS_ENABLED(CONFIG_NUMA_BALANCING))
314 		return;
315 
316 	if (!has_transparent_hugepage())
317 		return;
318 
319 	pr_debug("Validating PMD saved write\n");
320 	pmd = pfn_pmd(args->fixed_pmd_pfn, args->page_prot_none);
321 	WARN_ON(!pmd_savedwrite(pmd_mk_savedwrite(pmd_clear_savedwrite(pmd))));
322 	WARN_ON(pmd_savedwrite(pmd_clear_savedwrite(pmd_mk_savedwrite(pmd))));
323 }
324 
325 #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
326 static void __init pud_basic_tests(struct pgtable_debug_args *args, int idx)
327 {
328 	pgprot_t prot = protection_map[idx];
329 	unsigned long val = idx, *ptr = &val;
330 	pud_t pud;
331 
332 	if (!has_transparent_hugepage())
333 		return;
334 
335 	pr_debug("Validating PUD basic (%pGv)\n", ptr);
336 	pud = pfn_pud(args->fixed_pud_pfn, prot);
337 
338 	/*
339 	 * This test needs to be executed after the given page table entry
340 	 * is created with pfn_pud() to make sure that protection_map[idx]
341 	 * does not have the dirty bit enabled from the beginning. This is
342 	 * important for platforms like arm64 where (!PTE_RDONLY) indicate
343 	 * dirty bit being set.
344 	 */
345 	WARN_ON(pud_dirty(pud_wrprotect(pud)));
346 
347 	WARN_ON(!pud_same(pud, pud));
348 	WARN_ON(!pud_young(pud_mkyoung(pud_mkold(pud))));
349 	WARN_ON(!pud_dirty(pud_mkdirty(pud_mkclean(pud))));
350 	WARN_ON(pud_dirty(pud_mkclean(pud_mkdirty(pud))));
351 	WARN_ON(!pud_write(pud_mkwrite(pud_wrprotect(pud))));
352 	WARN_ON(pud_write(pud_wrprotect(pud_mkwrite(pud))));
353 	WARN_ON(pud_young(pud_mkold(pud_mkyoung(pud))));
354 	WARN_ON(pud_dirty(pud_wrprotect(pud_mkclean(pud))));
355 	WARN_ON(!pud_dirty(pud_wrprotect(pud_mkdirty(pud))));
356 
357 	if (mm_pmd_folded(args->mm))
358 		return;
359 
360 	/*
361 	 * A huge page does not point to next level page table
362 	 * entry. Hence this must qualify as pud_bad().
363 	 */
364 	WARN_ON(!pud_bad(pud_mkhuge(pud)));
365 }
366 
367 static void __init pud_advanced_tests(struct pgtable_debug_args *args)
368 {
369 	struct page *page;
370 	unsigned long vaddr = args->vaddr;
371 	pud_t pud;
372 
373 	if (!has_transparent_hugepage())
374 		return;
375 
376 	page = (args->pud_pfn != ULONG_MAX) ? pfn_to_page(args->pud_pfn) : NULL;
377 	if (!page)
378 		return;
379 
380 	/*
381 	 * flush_dcache_page() is called after set_pud_at() to clear
382 	 * PG_arch_1 for the page on ARM64. The page flag isn't cleared
383 	 * when it's released and page allocation check will fail when
384 	 * the page is allocated again. For architectures other than ARM64,
385 	 * the unexpected overhead of cache flushing is acceptable.
386 	 */
387 	pr_debug("Validating PUD advanced\n");
388 	/* Align the address wrt HPAGE_PUD_SIZE */
389 	vaddr &= HPAGE_PUD_MASK;
390 
391 	pud = pfn_pud(args->pud_pfn, args->page_prot);
392 	set_pud_at(args->mm, vaddr, args->pudp, pud);
393 	flush_dcache_page(page);
394 	pudp_set_wrprotect(args->mm, vaddr, args->pudp);
395 	pud = READ_ONCE(*args->pudp);
396 	WARN_ON(pud_write(pud));
397 
398 #ifndef __PAGETABLE_PMD_FOLDED
399 	pudp_huge_get_and_clear(args->mm, vaddr, args->pudp);
400 	pud = READ_ONCE(*args->pudp);
401 	WARN_ON(!pud_none(pud));
402 #endif /* __PAGETABLE_PMD_FOLDED */
403 	pud = pfn_pud(args->pud_pfn, args->page_prot);
404 	pud = pud_wrprotect(pud);
405 	pud = pud_mkclean(pud);
406 	set_pud_at(args->mm, vaddr, args->pudp, pud);
407 	flush_dcache_page(page);
408 	pud = pud_mkwrite(pud);
409 	pud = pud_mkdirty(pud);
410 	pudp_set_access_flags(args->vma, vaddr, args->pudp, pud, 1);
411 	pud = READ_ONCE(*args->pudp);
412 	WARN_ON(!(pud_write(pud) && pud_dirty(pud)));
413 
414 #ifndef __PAGETABLE_PMD_FOLDED
415 	pudp_huge_get_and_clear_full(args->mm, vaddr, args->pudp, 1);
416 	pud = READ_ONCE(*args->pudp);
417 	WARN_ON(!pud_none(pud));
418 #endif /* __PAGETABLE_PMD_FOLDED */
419 
420 	pud = pfn_pud(args->pud_pfn, args->page_prot);
421 	pud = pud_mkyoung(pud);
422 	set_pud_at(args->mm, vaddr, args->pudp, pud);
423 	flush_dcache_page(page);
424 	pudp_test_and_clear_young(args->vma, vaddr, args->pudp);
425 	pud = READ_ONCE(*args->pudp);
426 	WARN_ON(pud_young(pud));
427 
428 	pudp_huge_get_and_clear(args->mm, vaddr, args->pudp);
429 }
430 
431 static void __init pud_leaf_tests(struct pgtable_debug_args *args)
432 {
433 	pud_t pud;
434 
435 	if (!has_transparent_hugepage())
436 		return;
437 
438 	pr_debug("Validating PUD leaf\n");
439 	pud = pfn_pud(args->fixed_pud_pfn, args->page_prot);
440 	/*
441 	 * PUD based THP is a leaf entry.
442 	 */
443 	pud = pud_mkhuge(pud);
444 	WARN_ON(!pud_leaf(pud));
445 }
446 #else  /* !CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
447 static void __init pud_basic_tests(struct pgtable_debug_args *args, int idx) { }
448 static void __init pud_advanced_tests(struct pgtable_debug_args *args) { }
449 static void __init pud_leaf_tests(struct pgtable_debug_args *args) { }
450 #endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
451 #else  /* !CONFIG_TRANSPARENT_HUGEPAGE */
452 static void __init pmd_basic_tests(struct pgtable_debug_args *args, int idx) { }
453 static void __init pud_basic_tests(struct pgtable_debug_args *args, int idx) { }
454 static void __init pmd_advanced_tests(struct pgtable_debug_args *args) { }
455 static void __init pud_advanced_tests(struct pgtable_debug_args *args) { }
456 static void __init pmd_leaf_tests(struct pgtable_debug_args *args) { }
457 static void __init pud_leaf_tests(struct pgtable_debug_args *args) { }
458 static void __init pmd_savedwrite_tests(struct pgtable_debug_args *args) { }
459 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
460 
461 #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
462 static void __init pmd_huge_tests(struct pgtable_debug_args *args)
463 {
464 	pmd_t pmd;
465 
466 	if (!arch_vmap_pmd_supported(args->page_prot))
467 		return;
468 
469 	pr_debug("Validating PMD huge\n");
470 	/*
471 	 * X86 defined pmd_set_huge() verifies that the given
472 	 * PMD is not a populated non-leaf entry.
473 	 */
474 	WRITE_ONCE(*args->pmdp, __pmd(0));
475 	WARN_ON(!pmd_set_huge(args->pmdp, __pfn_to_phys(args->fixed_pmd_pfn), args->page_prot));
476 	WARN_ON(!pmd_clear_huge(args->pmdp));
477 	pmd = READ_ONCE(*args->pmdp);
478 	WARN_ON(!pmd_none(pmd));
479 }
480 
481 static void __init pud_huge_tests(struct pgtable_debug_args *args)
482 {
483 	pud_t pud;
484 
485 	if (!arch_vmap_pud_supported(args->page_prot))
486 		return;
487 
488 	pr_debug("Validating PUD huge\n");
489 	/*
490 	 * X86 defined pud_set_huge() verifies that the given
491 	 * PUD is not a populated non-leaf entry.
492 	 */
493 	WRITE_ONCE(*args->pudp, __pud(0));
494 	WARN_ON(!pud_set_huge(args->pudp, __pfn_to_phys(args->fixed_pud_pfn), args->page_prot));
495 	WARN_ON(!pud_clear_huge(args->pudp));
496 	pud = READ_ONCE(*args->pudp);
497 	WARN_ON(!pud_none(pud));
498 }
499 #else /* !CONFIG_HAVE_ARCH_HUGE_VMAP */
500 static void __init pmd_huge_tests(struct pgtable_debug_args *args) { }
501 static void __init pud_huge_tests(struct pgtable_debug_args *args) { }
502 #endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */
503 
504 static void __init p4d_basic_tests(struct pgtable_debug_args *args)
505 {
506 	p4d_t p4d;
507 
508 	pr_debug("Validating P4D basic\n");
509 	memset(&p4d, RANDOM_NZVALUE, sizeof(p4d_t));
510 	WARN_ON(!p4d_same(p4d, p4d));
511 }
512 
513 static void __init pgd_basic_tests(struct pgtable_debug_args *args)
514 {
515 	pgd_t pgd;
516 
517 	pr_debug("Validating PGD basic\n");
518 	memset(&pgd, RANDOM_NZVALUE, sizeof(pgd_t));
519 	WARN_ON(!pgd_same(pgd, pgd));
520 }
521 
522 #ifndef __PAGETABLE_PUD_FOLDED
523 static void __init pud_clear_tests(struct pgtable_debug_args *args)
524 {
525 	pud_t pud = READ_ONCE(*args->pudp);
526 
527 	if (mm_pmd_folded(args->mm))
528 		return;
529 
530 	pr_debug("Validating PUD clear\n");
531 	pud = __pud(pud_val(pud) | RANDOM_ORVALUE);
532 	WRITE_ONCE(*args->pudp, pud);
533 	pud_clear(args->pudp);
534 	pud = READ_ONCE(*args->pudp);
535 	WARN_ON(!pud_none(pud));
536 }
537 
538 static void __init pud_populate_tests(struct pgtable_debug_args *args)
539 {
540 	pud_t pud;
541 
542 	if (mm_pmd_folded(args->mm))
543 		return;
544 
545 	pr_debug("Validating PUD populate\n");
546 	/*
547 	 * This entry points to next level page table page.
548 	 * Hence this must not qualify as pud_bad().
549 	 */
550 	pud_populate(args->mm, args->pudp, args->start_pmdp);
551 	pud = READ_ONCE(*args->pudp);
552 	WARN_ON(pud_bad(pud));
553 }
554 #else  /* !__PAGETABLE_PUD_FOLDED */
555 static void __init pud_clear_tests(struct pgtable_debug_args *args) { }
556 static void __init pud_populate_tests(struct pgtable_debug_args *args) { }
557 #endif /* PAGETABLE_PUD_FOLDED */
558 
559 #ifndef __PAGETABLE_P4D_FOLDED
560 static void __init p4d_clear_tests(struct pgtable_debug_args *args)
561 {
562 	p4d_t p4d = READ_ONCE(*args->p4dp);
563 
564 	if (mm_pud_folded(args->mm))
565 		return;
566 
567 	pr_debug("Validating P4D clear\n");
568 	p4d = __p4d(p4d_val(p4d) | RANDOM_ORVALUE);
569 	WRITE_ONCE(*args->p4dp, p4d);
570 	p4d_clear(args->p4dp);
571 	p4d = READ_ONCE(*args->p4dp);
572 	WARN_ON(!p4d_none(p4d));
573 }
574 
575 static void __init p4d_populate_tests(struct pgtable_debug_args *args)
576 {
577 	p4d_t p4d;
578 
579 	if (mm_pud_folded(args->mm))
580 		return;
581 
582 	pr_debug("Validating P4D populate\n");
583 	/*
584 	 * This entry points to next level page table page.
585 	 * Hence this must not qualify as p4d_bad().
586 	 */
587 	pud_clear(args->pudp);
588 	p4d_clear(args->p4dp);
589 	p4d_populate(args->mm, args->p4dp, args->start_pudp);
590 	p4d = READ_ONCE(*args->p4dp);
591 	WARN_ON(p4d_bad(p4d));
592 }
593 
594 static void __init pgd_clear_tests(struct pgtable_debug_args *args)
595 {
596 	pgd_t pgd = READ_ONCE(*(args->pgdp));
597 
598 	if (mm_p4d_folded(args->mm))
599 		return;
600 
601 	pr_debug("Validating PGD clear\n");
602 	pgd = __pgd(pgd_val(pgd) | RANDOM_ORVALUE);
603 	WRITE_ONCE(*args->pgdp, pgd);
604 	pgd_clear(args->pgdp);
605 	pgd = READ_ONCE(*args->pgdp);
606 	WARN_ON(!pgd_none(pgd));
607 }
608 
609 static void __init pgd_populate_tests(struct pgtable_debug_args *args)
610 {
611 	pgd_t pgd;
612 
613 	if (mm_p4d_folded(args->mm))
614 		return;
615 
616 	pr_debug("Validating PGD populate\n");
617 	/*
618 	 * This entry points to next level page table page.
619 	 * Hence this must not qualify as pgd_bad().
620 	 */
621 	p4d_clear(args->p4dp);
622 	pgd_clear(args->pgdp);
623 	pgd_populate(args->mm, args->pgdp, args->start_p4dp);
624 	pgd = READ_ONCE(*args->pgdp);
625 	WARN_ON(pgd_bad(pgd));
626 }
627 #else  /* !__PAGETABLE_P4D_FOLDED */
628 static void __init p4d_clear_tests(struct pgtable_debug_args *args) { }
629 static void __init pgd_clear_tests(struct pgtable_debug_args *args) { }
630 static void __init p4d_populate_tests(struct pgtable_debug_args *args) { }
631 static void __init pgd_populate_tests(struct pgtable_debug_args *args) { }
632 #endif /* PAGETABLE_P4D_FOLDED */
633 
634 static void __init pte_clear_tests(struct pgtable_debug_args *args)
635 {
636 	struct page *page;
637 	pte_t pte = pfn_pte(args->pte_pfn, args->page_prot);
638 
639 	page = (args->pte_pfn != ULONG_MAX) ? pfn_to_page(args->pte_pfn) : NULL;
640 	if (!page)
641 		return;
642 
643 	/*
644 	 * flush_dcache_page() is called after set_pte_at() to clear
645 	 * PG_arch_1 for the page on ARM64. The page flag isn't cleared
646 	 * when it's released and page allocation check will fail when
647 	 * the page is allocated again. For architectures other than ARM64,
648 	 * the unexpected overhead of cache flushing is acceptable.
649 	 */
650 	pr_debug("Validating PTE clear\n");
651 #ifndef CONFIG_RISCV
652 	pte = __pte(pte_val(pte) | RANDOM_ORVALUE);
653 #endif
654 	set_pte_at(args->mm, args->vaddr, args->ptep, pte);
655 	flush_dcache_page(page);
656 	barrier();
657 	ptep_clear(args->mm, args->vaddr, args->ptep);
658 	pte = ptep_get(args->ptep);
659 	WARN_ON(!pte_none(pte));
660 }
661 
662 static void __init pmd_clear_tests(struct pgtable_debug_args *args)
663 {
664 	pmd_t pmd = READ_ONCE(*args->pmdp);
665 
666 	pr_debug("Validating PMD clear\n");
667 	pmd = __pmd(pmd_val(pmd) | RANDOM_ORVALUE);
668 	WRITE_ONCE(*args->pmdp, pmd);
669 	pmd_clear(args->pmdp);
670 	pmd = READ_ONCE(*args->pmdp);
671 	WARN_ON(!pmd_none(pmd));
672 }
673 
674 static void __init pmd_populate_tests(struct pgtable_debug_args *args)
675 {
676 	pmd_t pmd;
677 
678 	pr_debug("Validating PMD populate\n");
679 	/*
680 	 * This entry points to next level page table page.
681 	 * Hence this must not qualify as pmd_bad().
682 	 */
683 	pmd_populate(args->mm, args->pmdp, args->start_ptep);
684 	pmd = READ_ONCE(*args->pmdp);
685 	WARN_ON(pmd_bad(pmd));
686 }
687 
688 static void __init pte_special_tests(struct pgtable_debug_args *args)
689 {
690 	pte_t pte = pfn_pte(args->fixed_pte_pfn, args->page_prot);
691 
692 	if (!IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL))
693 		return;
694 
695 	pr_debug("Validating PTE special\n");
696 	WARN_ON(!pte_special(pte_mkspecial(pte)));
697 }
698 
699 static void __init pte_protnone_tests(struct pgtable_debug_args *args)
700 {
701 	pte_t pte = pfn_pte(args->fixed_pte_pfn, args->page_prot_none);
702 
703 	if (!IS_ENABLED(CONFIG_NUMA_BALANCING))
704 		return;
705 
706 	pr_debug("Validating PTE protnone\n");
707 	WARN_ON(!pte_protnone(pte));
708 	WARN_ON(!pte_present(pte));
709 }
710 
711 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
712 static void __init pmd_protnone_tests(struct pgtable_debug_args *args)
713 {
714 	pmd_t pmd;
715 
716 	if (!IS_ENABLED(CONFIG_NUMA_BALANCING))
717 		return;
718 
719 	if (!has_transparent_hugepage())
720 		return;
721 
722 	pr_debug("Validating PMD protnone\n");
723 	pmd = pmd_mkhuge(pfn_pmd(args->fixed_pmd_pfn, args->page_prot_none));
724 	WARN_ON(!pmd_protnone(pmd));
725 	WARN_ON(!pmd_present(pmd));
726 }
727 #else  /* !CONFIG_TRANSPARENT_HUGEPAGE */
728 static void __init pmd_protnone_tests(struct pgtable_debug_args *args) { }
729 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
730 
731 #ifdef CONFIG_ARCH_HAS_PTE_DEVMAP
732 static void __init pte_devmap_tests(struct pgtable_debug_args *args)
733 {
734 	pte_t pte = pfn_pte(args->fixed_pte_pfn, args->page_prot);
735 
736 	pr_debug("Validating PTE devmap\n");
737 	WARN_ON(!pte_devmap(pte_mkdevmap(pte)));
738 }
739 
740 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
741 static void __init pmd_devmap_tests(struct pgtable_debug_args *args)
742 {
743 	pmd_t pmd;
744 
745 	if (!has_transparent_hugepage())
746 		return;
747 
748 	pr_debug("Validating PMD devmap\n");
749 	pmd = pfn_pmd(args->fixed_pmd_pfn, args->page_prot);
750 	WARN_ON(!pmd_devmap(pmd_mkdevmap(pmd)));
751 }
752 
753 #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
754 static void __init pud_devmap_tests(struct pgtable_debug_args *args)
755 {
756 	pud_t pud;
757 
758 	if (!has_transparent_hugepage())
759 		return;
760 
761 	pr_debug("Validating PUD devmap\n");
762 	pud = pfn_pud(args->fixed_pud_pfn, args->page_prot);
763 	WARN_ON(!pud_devmap(pud_mkdevmap(pud)));
764 }
765 #else  /* !CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
766 static void __init pud_devmap_tests(struct pgtable_debug_args *args) { }
767 #endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
768 #else  /* CONFIG_TRANSPARENT_HUGEPAGE */
769 static void __init pmd_devmap_tests(struct pgtable_debug_args *args) { }
770 static void __init pud_devmap_tests(struct pgtable_debug_args *args) { }
771 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
772 #else
773 static void __init pte_devmap_tests(struct pgtable_debug_args *args) { }
774 static void __init pmd_devmap_tests(struct pgtable_debug_args *args) { }
775 static void __init pud_devmap_tests(struct pgtable_debug_args *args) { }
776 #endif /* CONFIG_ARCH_HAS_PTE_DEVMAP */
777 
778 static void __init pte_soft_dirty_tests(struct pgtable_debug_args *args)
779 {
780 	pte_t pte = pfn_pte(args->fixed_pte_pfn, args->page_prot);
781 
782 	if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY))
783 		return;
784 
785 	pr_debug("Validating PTE soft dirty\n");
786 	WARN_ON(!pte_soft_dirty(pte_mksoft_dirty(pte)));
787 	WARN_ON(pte_soft_dirty(pte_clear_soft_dirty(pte)));
788 }
789 
790 static void __init pte_swap_soft_dirty_tests(struct pgtable_debug_args *args)
791 {
792 	pte_t pte = pfn_pte(args->fixed_pte_pfn, args->page_prot);
793 
794 	if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY))
795 		return;
796 
797 	pr_debug("Validating PTE swap soft dirty\n");
798 	WARN_ON(!pte_swp_soft_dirty(pte_swp_mksoft_dirty(pte)));
799 	WARN_ON(pte_swp_soft_dirty(pte_swp_clear_soft_dirty(pte)));
800 }
801 
802 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
803 static void __init pmd_soft_dirty_tests(struct pgtable_debug_args *args)
804 {
805 	pmd_t pmd;
806 
807 	if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY))
808 		return;
809 
810 	if (!has_transparent_hugepage())
811 		return;
812 
813 	pr_debug("Validating PMD soft dirty\n");
814 	pmd = pfn_pmd(args->fixed_pmd_pfn, args->page_prot);
815 	WARN_ON(!pmd_soft_dirty(pmd_mksoft_dirty(pmd)));
816 	WARN_ON(pmd_soft_dirty(pmd_clear_soft_dirty(pmd)));
817 }
818 
819 static void __init pmd_swap_soft_dirty_tests(struct pgtable_debug_args *args)
820 {
821 	pmd_t pmd;
822 
823 	if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) ||
824 		!IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION))
825 		return;
826 
827 	if (!has_transparent_hugepage())
828 		return;
829 
830 	pr_debug("Validating PMD swap soft dirty\n");
831 	pmd = pfn_pmd(args->fixed_pmd_pfn, args->page_prot);
832 	WARN_ON(!pmd_swp_soft_dirty(pmd_swp_mksoft_dirty(pmd)));
833 	WARN_ON(pmd_swp_soft_dirty(pmd_swp_clear_soft_dirty(pmd)));
834 }
835 #else  /* !CONFIG_TRANSPARENT_HUGEPAGE */
836 static void __init pmd_soft_dirty_tests(struct pgtable_debug_args *args) { }
837 static void __init pmd_swap_soft_dirty_tests(struct pgtable_debug_args *args) { }
838 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
839 
840 static void __init pte_swap_tests(struct pgtable_debug_args *args)
841 {
842 	swp_entry_t swp;
843 	pte_t pte;
844 
845 	pr_debug("Validating PTE swap\n");
846 	pte = pfn_pte(args->fixed_pte_pfn, args->page_prot);
847 	swp = __pte_to_swp_entry(pte);
848 	pte = __swp_entry_to_pte(swp);
849 	WARN_ON(args->fixed_pte_pfn != pte_pfn(pte));
850 }
851 
852 #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
853 static void __init pmd_swap_tests(struct pgtable_debug_args *args)
854 {
855 	swp_entry_t swp;
856 	pmd_t pmd;
857 
858 	if (!has_transparent_hugepage())
859 		return;
860 
861 	pr_debug("Validating PMD swap\n");
862 	pmd = pfn_pmd(args->fixed_pmd_pfn, args->page_prot);
863 	swp = __pmd_to_swp_entry(pmd);
864 	pmd = __swp_entry_to_pmd(swp);
865 	WARN_ON(args->fixed_pmd_pfn != pmd_pfn(pmd));
866 }
867 #else  /* !CONFIG_ARCH_ENABLE_THP_MIGRATION */
868 static void __init pmd_swap_tests(struct pgtable_debug_args *args) { }
869 #endif /* CONFIG_ARCH_ENABLE_THP_MIGRATION */
870 
871 static void __init swap_migration_tests(struct pgtable_debug_args *args)
872 {
873 	struct page *page;
874 	swp_entry_t swp;
875 
876 	if (!IS_ENABLED(CONFIG_MIGRATION))
877 		return;
878 
879 	/*
880 	 * swap_migration_tests() requires a dedicated page as it needs to
881 	 * be locked before creating a migration entry from it. Locking the
882 	 * page that actually maps kernel text ('start_kernel') can be real
883 	 * problematic. Lets use the allocated page explicitly for this
884 	 * purpose.
885 	 */
886 	page = (args->pte_pfn != ULONG_MAX) ? pfn_to_page(args->pte_pfn) : NULL;
887 	if (!page)
888 		return;
889 
890 	pr_debug("Validating swap migration\n");
891 
892 	/*
893 	 * make_[readable|writable]_migration_entry() expects given page to
894 	 * be locked, otherwise it stumbles upon a BUG_ON().
895 	 */
896 	__SetPageLocked(page);
897 	swp = make_writable_migration_entry(page_to_pfn(page));
898 	WARN_ON(!is_migration_entry(swp));
899 	WARN_ON(!is_writable_migration_entry(swp));
900 
901 	swp = make_readable_migration_entry(swp_offset(swp));
902 	WARN_ON(!is_migration_entry(swp));
903 	WARN_ON(is_writable_migration_entry(swp));
904 
905 	swp = make_readable_migration_entry(page_to_pfn(page));
906 	WARN_ON(!is_migration_entry(swp));
907 	WARN_ON(is_writable_migration_entry(swp));
908 	__ClearPageLocked(page);
909 }
910 
911 #ifdef CONFIG_HUGETLB_PAGE
912 static void __init hugetlb_basic_tests(struct pgtable_debug_args *args)
913 {
914 	struct page *page;
915 	pte_t pte;
916 
917 	pr_debug("Validating HugeTLB basic\n");
918 	/*
919 	 * Accessing the page associated with the pfn is safe here,
920 	 * as it was previously derived from a real kernel symbol.
921 	 */
922 	page = pfn_to_page(args->fixed_pmd_pfn);
923 	pte = mk_huge_pte(page, args->page_prot);
924 
925 	WARN_ON(!huge_pte_dirty(huge_pte_mkdirty(pte)));
926 	WARN_ON(!huge_pte_write(huge_pte_mkwrite(huge_pte_wrprotect(pte))));
927 	WARN_ON(huge_pte_write(huge_pte_wrprotect(huge_pte_mkwrite(pte))));
928 
929 #ifdef CONFIG_ARCH_WANT_GENERAL_HUGETLB
930 	pte = pfn_pte(args->fixed_pmd_pfn, args->page_prot);
931 
932 	WARN_ON(!pte_huge(pte_mkhuge(pte)));
933 #endif /* CONFIG_ARCH_WANT_GENERAL_HUGETLB */
934 }
935 #else  /* !CONFIG_HUGETLB_PAGE */
936 static void __init hugetlb_basic_tests(struct pgtable_debug_args *args) { }
937 #endif /* CONFIG_HUGETLB_PAGE */
938 
939 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
940 static void __init pmd_thp_tests(struct pgtable_debug_args *args)
941 {
942 	pmd_t pmd;
943 
944 	if (!has_transparent_hugepage())
945 		return;
946 
947 	pr_debug("Validating PMD based THP\n");
948 	/*
949 	 * pmd_trans_huge() and pmd_present() must return positive after
950 	 * MMU invalidation with pmd_mkinvalid(). This behavior is an
951 	 * optimization for transparent huge page. pmd_trans_huge() must
952 	 * be true if pmd_page() returns a valid THP to avoid taking the
953 	 * pmd_lock when others walk over non transhuge pmds (i.e. there
954 	 * are no THP allocated). Especially when splitting a THP and
955 	 * removing the present bit from the pmd, pmd_trans_huge() still
956 	 * needs to return true. pmd_present() should be true whenever
957 	 * pmd_trans_huge() returns true.
958 	 */
959 	pmd = pfn_pmd(args->fixed_pmd_pfn, args->page_prot);
960 	WARN_ON(!pmd_trans_huge(pmd_mkhuge(pmd)));
961 
962 #ifndef __HAVE_ARCH_PMDP_INVALIDATE
963 	WARN_ON(!pmd_trans_huge(pmd_mkinvalid(pmd_mkhuge(pmd))));
964 	WARN_ON(!pmd_present(pmd_mkinvalid(pmd_mkhuge(pmd))));
965 #endif /* __HAVE_ARCH_PMDP_INVALIDATE */
966 }
967 
968 #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
969 static void __init pud_thp_tests(struct pgtable_debug_args *args)
970 {
971 	pud_t pud;
972 
973 	if (!has_transparent_hugepage())
974 		return;
975 
976 	pr_debug("Validating PUD based THP\n");
977 	pud = pfn_pud(args->fixed_pud_pfn, args->page_prot);
978 	WARN_ON(!pud_trans_huge(pud_mkhuge(pud)));
979 
980 	/*
981 	 * pud_mkinvalid() has been dropped for now. Enable back
982 	 * these tests when it comes back with a modified pud_present().
983 	 *
984 	 * WARN_ON(!pud_trans_huge(pud_mkinvalid(pud_mkhuge(pud))));
985 	 * WARN_ON(!pud_present(pud_mkinvalid(pud_mkhuge(pud))));
986 	 */
987 }
988 #else  /* !CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
989 static void __init pud_thp_tests(struct pgtable_debug_args *args) { }
990 #endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
991 #else  /* !CONFIG_TRANSPARENT_HUGEPAGE */
992 static void __init pmd_thp_tests(struct pgtable_debug_args *args) { }
993 static void __init pud_thp_tests(struct pgtable_debug_args *args) { }
994 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
995 
996 static unsigned long __init get_random_vaddr(void)
997 {
998 	unsigned long random_vaddr, random_pages, total_user_pages;
999 
1000 	total_user_pages = (TASK_SIZE - FIRST_USER_ADDRESS) / PAGE_SIZE;
1001 
1002 	random_pages = get_random_long() % total_user_pages;
1003 	random_vaddr = FIRST_USER_ADDRESS + random_pages * PAGE_SIZE;
1004 
1005 	return random_vaddr;
1006 }
1007 
1008 static void __init destroy_args(struct pgtable_debug_args *args)
1009 {
1010 	struct page *page = NULL;
1011 
1012 	/* Free (huge) page */
1013 	if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
1014 	    IS_ENABLED(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) &&
1015 	    has_transparent_hugepage() &&
1016 	    args->pud_pfn != ULONG_MAX) {
1017 		if (args->is_contiguous_page) {
1018 			free_contig_range(args->pud_pfn,
1019 					  (1 << (HPAGE_PUD_SHIFT - PAGE_SHIFT)));
1020 		} else {
1021 			page = pfn_to_page(args->pud_pfn);
1022 			__free_pages(page, HPAGE_PUD_SHIFT - PAGE_SHIFT);
1023 		}
1024 
1025 		args->pud_pfn = ULONG_MAX;
1026 		args->pmd_pfn = ULONG_MAX;
1027 		args->pte_pfn = ULONG_MAX;
1028 	}
1029 
1030 	if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
1031 	    has_transparent_hugepage() &&
1032 	    args->pmd_pfn != ULONG_MAX) {
1033 		if (args->is_contiguous_page) {
1034 			free_contig_range(args->pmd_pfn, (1 << HPAGE_PMD_ORDER));
1035 		} else {
1036 			page = pfn_to_page(args->pmd_pfn);
1037 			__free_pages(page, HPAGE_PMD_ORDER);
1038 		}
1039 
1040 		args->pmd_pfn = ULONG_MAX;
1041 		args->pte_pfn = ULONG_MAX;
1042 	}
1043 
1044 	if (args->pte_pfn != ULONG_MAX) {
1045 		page = pfn_to_page(args->pte_pfn);
1046 		__free_pages(page, 0);
1047 
1048 		args->pte_pfn = ULONG_MAX;
1049 	}
1050 
1051 	/* Free page table entries */
1052 	if (args->start_ptep) {
1053 		pte_free(args->mm, args->start_ptep);
1054 		mm_dec_nr_ptes(args->mm);
1055 	}
1056 
1057 	if (args->start_pmdp) {
1058 		pmd_free(args->mm, args->start_pmdp);
1059 		mm_dec_nr_pmds(args->mm);
1060 	}
1061 
1062 	if (args->start_pudp) {
1063 		pud_free(args->mm, args->start_pudp);
1064 		mm_dec_nr_puds(args->mm);
1065 	}
1066 
1067 	if (args->start_p4dp)
1068 		p4d_free(args->mm, args->start_p4dp);
1069 
1070 	/* Free vma and mm struct */
1071 	if (args->vma)
1072 		vm_area_free(args->vma);
1073 
1074 	if (args->mm)
1075 		mmdrop(args->mm);
1076 }
1077 
1078 static struct page * __init
1079 debug_vm_pgtable_alloc_huge_page(struct pgtable_debug_args *args, int order)
1080 {
1081 	struct page *page = NULL;
1082 
1083 #ifdef CONFIG_CONTIG_ALLOC
1084 	if (order >= MAX_ORDER) {
1085 		page = alloc_contig_pages((1 << order), GFP_KERNEL,
1086 					  first_online_node, NULL);
1087 		if (page) {
1088 			args->is_contiguous_page = true;
1089 			return page;
1090 		}
1091 	}
1092 #endif
1093 
1094 	if (order < MAX_ORDER)
1095 		page = alloc_pages(GFP_KERNEL, order);
1096 
1097 	return page;
1098 }
1099 
1100 static int __init init_args(struct pgtable_debug_args *args)
1101 {
1102 	struct page *page = NULL;
1103 	phys_addr_t phys;
1104 	int ret = 0;
1105 
1106 	/*
1107 	 * Initialize the debugging data.
1108 	 *
1109 	 * protection_map[0] (or even protection_map[8]) will help create
1110 	 * page table entries with PROT_NONE permission as required for
1111 	 * pxx_protnone_tests().
1112 	 */
1113 	memset(args, 0, sizeof(*args));
1114 	args->vaddr              = get_random_vaddr();
1115 	args->page_prot          = vm_get_page_prot(VMFLAGS);
1116 	args->page_prot_none     = protection_map[0];
1117 	args->is_contiguous_page = false;
1118 	args->pud_pfn            = ULONG_MAX;
1119 	args->pmd_pfn            = ULONG_MAX;
1120 	args->pte_pfn            = ULONG_MAX;
1121 	args->fixed_pgd_pfn      = ULONG_MAX;
1122 	args->fixed_p4d_pfn      = ULONG_MAX;
1123 	args->fixed_pud_pfn      = ULONG_MAX;
1124 	args->fixed_pmd_pfn      = ULONG_MAX;
1125 	args->fixed_pte_pfn      = ULONG_MAX;
1126 
1127 	/* Allocate mm and vma */
1128 	args->mm = mm_alloc();
1129 	if (!args->mm) {
1130 		pr_err("Failed to allocate mm struct\n");
1131 		ret = -ENOMEM;
1132 		goto error;
1133 	}
1134 
1135 	args->vma = vm_area_alloc(args->mm);
1136 	if (!args->vma) {
1137 		pr_err("Failed to allocate vma\n");
1138 		ret = -ENOMEM;
1139 		goto error;
1140 	}
1141 
1142 	/*
1143 	 * Allocate page table entries. They will be modified in the tests.
1144 	 * Lets save the page table entries so that they can be released
1145 	 * when the tests are completed.
1146 	 */
1147 	args->pgdp = pgd_offset(args->mm, args->vaddr);
1148 	args->p4dp = p4d_alloc(args->mm, args->pgdp, args->vaddr);
1149 	if (!args->p4dp) {
1150 		pr_err("Failed to allocate p4d entries\n");
1151 		ret = -ENOMEM;
1152 		goto error;
1153 	}
1154 	args->start_p4dp = p4d_offset(args->pgdp, 0UL);
1155 	WARN_ON(!args->start_p4dp);
1156 
1157 	args->pudp = pud_alloc(args->mm, args->p4dp, args->vaddr);
1158 	if (!args->pudp) {
1159 		pr_err("Failed to allocate pud entries\n");
1160 		ret = -ENOMEM;
1161 		goto error;
1162 	}
1163 	args->start_pudp = pud_offset(args->p4dp, 0UL);
1164 	WARN_ON(!args->start_pudp);
1165 
1166 	args->pmdp = pmd_alloc(args->mm, args->pudp, args->vaddr);
1167 	if (!args->pmdp) {
1168 		pr_err("Failed to allocate pmd entries\n");
1169 		ret = -ENOMEM;
1170 		goto error;
1171 	}
1172 	args->start_pmdp = pmd_offset(args->pudp, 0UL);
1173 	WARN_ON(!args->start_pmdp);
1174 
1175 	if (pte_alloc(args->mm, args->pmdp)) {
1176 		pr_err("Failed to allocate pte entries\n");
1177 		ret = -ENOMEM;
1178 		goto error;
1179 	}
1180 	args->start_ptep = pmd_pgtable(READ_ONCE(*args->pmdp));
1181 	WARN_ON(!args->start_ptep);
1182 
1183 	/*
1184 	 * PFN for mapping at PTE level is determined from a standard kernel
1185 	 * text symbol. But pfns for higher page table levels are derived by
1186 	 * masking lower bits of this real pfn. These derived pfns might not
1187 	 * exist on the platform but that does not really matter as pfn_pxx()
1188 	 * helpers will still create appropriate entries for the test. This
1189 	 * helps avoid large memory block allocations to be used for mapping
1190 	 * at higher page table levels in some of the tests.
1191 	 */
1192 	phys = __pa_symbol(&start_kernel);
1193 	args->fixed_pgd_pfn = __phys_to_pfn(phys & PGDIR_MASK);
1194 	args->fixed_p4d_pfn = __phys_to_pfn(phys & P4D_MASK);
1195 	args->fixed_pud_pfn = __phys_to_pfn(phys & PUD_MASK);
1196 	args->fixed_pmd_pfn = __phys_to_pfn(phys & PMD_MASK);
1197 	args->fixed_pte_pfn = __phys_to_pfn(phys & PAGE_MASK);
1198 	WARN_ON(!pfn_valid(args->fixed_pte_pfn));
1199 
1200 	/*
1201 	 * Allocate (huge) pages because some of the tests need to access
1202 	 * the data in the pages. The corresponding tests will be skipped
1203 	 * if we fail to allocate (huge) pages.
1204 	 */
1205 	if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
1206 	    IS_ENABLED(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) &&
1207 	    has_transparent_hugepage()) {
1208 		page = debug_vm_pgtable_alloc_huge_page(args,
1209 				HPAGE_PUD_SHIFT - PAGE_SHIFT);
1210 		if (page) {
1211 			args->pud_pfn = page_to_pfn(page);
1212 			args->pmd_pfn = args->pud_pfn;
1213 			args->pte_pfn = args->pud_pfn;
1214 			return 0;
1215 		}
1216 	}
1217 
1218 	if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
1219 	    has_transparent_hugepage()) {
1220 		page = debug_vm_pgtable_alloc_huge_page(args, HPAGE_PMD_ORDER);
1221 		if (page) {
1222 			args->pmd_pfn = page_to_pfn(page);
1223 			args->pte_pfn = args->pmd_pfn;
1224 			return 0;
1225 		}
1226 	}
1227 
1228 	page = alloc_pages(GFP_KERNEL, 0);
1229 	if (page)
1230 		args->pte_pfn = page_to_pfn(page);
1231 
1232 	return 0;
1233 
1234 error:
1235 	destroy_args(args);
1236 	return ret;
1237 }
1238 
1239 static int __init debug_vm_pgtable(void)
1240 {
1241 	struct pgtable_debug_args args;
1242 	spinlock_t *ptl = NULL;
1243 	int idx, ret;
1244 
1245 	pr_info("Validating architecture page table helpers\n");
1246 	ret = init_args(&args);
1247 	if (ret)
1248 		return ret;
1249 
1250 	/*
1251 	 * Iterate over the protection_map[] to make sure that all
1252 	 * the basic page table transformation validations just hold
1253 	 * true irrespective of the starting protection value for a
1254 	 * given page table entry.
1255 	 */
1256 	for (idx = 0; idx < ARRAY_SIZE(protection_map); idx++) {
1257 		pte_basic_tests(&args, idx);
1258 		pmd_basic_tests(&args, idx);
1259 		pud_basic_tests(&args, idx);
1260 	}
1261 
1262 	/*
1263 	 * Both P4D and PGD level tests are very basic which do not
1264 	 * involve creating page table entries from the protection
1265 	 * value and the given pfn. Hence just keep them out from
1266 	 * the above iteration for now to save some test execution
1267 	 * time.
1268 	 */
1269 	p4d_basic_tests(&args);
1270 	pgd_basic_tests(&args);
1271 
1272 	pmd_leaf_tests(&args);
1273 	pud_leaf_tests(&args);
1274 
1275 	pte_savedwrite_tests(&args);
1276 	pmd_savedwrite_tests(&args);
1277 
1278 	pte_special_tests(&args);
1279 	pte_protnone_tests(&args);
1280 	pmd_protnone_tests(&args);
1281 
1282 	pte_devmap_tests(&args);
1283 	pmd_devmap_tests(&args);
1284 	pud_devmap_tests(&args);
1285 
1286 	pte_soft_dirty_tests(&args);
1287 	pmd_soft_dirty_tests(&args);
1288 	pte_swap_soft_dirty_tests(&args);
1289 	pmd_swap_soft_dirty_tests(&args);
1290 
1291 	pte_swap_tests(&args);
1292 	pmd_swap_tests(&args);
1293 
1294 	swap_migration_tests(&args);
1295 
1296 	pmd_thp_tests(&args);
1297 	pud_thp_tests(&args);
1298 
1299 	hugetlb_basic_tests(&args);
1300 
1301 	/*
1302 	 * Page table modifying tests. They need to hold
1303 	 * proper page table lock.
1304 	 */
1305 
1306 	args.ptep = pte_offset_map_lock(args.mm, args.pmdp, args.vaddr, &ptl);
1307 	pte_clear_tests(&args);
1308 	pte_advanced_tests(&args);
1309 	pte_unmap_unlock(args.ptep, ptl);
1310 
1311 	ptl = pmd_lock(args.mm, args.pmdp);
1312 	pmd_clear_tests(&args);
1313 	pmd_advanced_tests(&args);
1314 	pmd_huge_tests(&args);
1315 	pmd_populate_tests(&args);
1316 	spin_unlock(ptl);
1317 
1318 	ptl = pud_lock(args.mm, args.pudp);
1319 	pud_clear_tests(&args);
1320 	pud_advanced_tests(&args);
1321 	pud_huge_tests(&args);
1322 	pud_populate_tests(&args);
1323 	spin_unlock(ptl);
1324 
1325 	spin_lock(&(args.mm->page_table_lock));
1326 	p4d_clear_tests(&args);
1327 	pgd_clear_tests(&args);
1328 	p4d_populate_tests(&args);
1329 	pgd_populate_tests(&args);
1330 	spin_unlock(&(args.mm->page_table_lock));
1331 
1332 	destroy_args(&args);
1333 	return 0;
1334 }
1335 late_initcall(debug_vm_pgtable);
1336