xref: /linux/mm/debug_vm_pgtable.c (revision c358f53871605a1a8d7ed6e544a05ea00e9c80cb)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * This kernel test validates architecture page table helpers and
4  * accessors and helps in verifying their continued compliance with
5  * expected generic MM semantics.
6  *
7  * Copyright (C) 2019 ARM Ltd.
8  *
9  * Author: Anshuman Khandual <anshuman.khandual@arm.com>
10  */
11 #define pr_fmt(fmt) "debug_vm_pgtable: [%-25s]: " fmt, __func__
12 
13 #include <linux/gfp.h>
14 #include <linux/highmem.h>
15 #include <linux/hugetlb.h>
16 #include <linux/kernel.h>
17 #include <linux/kconfig.h>
18 #include <linux/mm.h>
19 #include <linux/mman.h>
20 #include <linux/mm_types.h>
21 #include <linux/module.h>
22 #include <linux/pfn_t.h>
23 #include <linux/printk.h>
24 #include <linux/pgtable.h>
25 #include <linux/random.h>
26 #include <linux/spinlock.h>
27 #include <linux/swap.h>
28 #include <linux/swapops.h>
29 #include <linux/start_kernel.h>
30 #include <linux/sched/mm.h>
31 #include <linux/io.h>
32 
33 #include <asm/cacheflush.h>
34 #include <asm/pgalloc.h>
35 #include <asm/tlbflush.h>
36 
37 /*
38  * Please refer Documentation/mm/arch_pgtable_helpers.rst for the semantics
39  * expectations that are being validated here. All future changes in here
40  * or the documentation need to be in sync.
41  */
42 
43 #define VMFLAGS	(VM_READ|VM_WRITE|VM_EXEC)
44 
45 /*
46  * On s390 platform, the lower 4 bits are used to identify given page table
47  * entry type. But these bits might affect the ability to clear entries with
48  * pxx_clear() because of how dynamic page table folding works on s390. So
49  * while loading up the entries do not change the lower 4 bits. It does not
50  * have affect any other platform. Also avoid the 62nd bit on ppc64 that is
51  * used to mark a pte entry.
52  */
53 #define S390_SKIP_MASK		GENMASK(3, 0)
54 #if __BITS_PER_LONG == 64
55 #define PPC64_SKIP_MASK		GENMASK(62, 62)
56 #else
57 #define PPC64_SKIP_MASK		0x0
58 #endif
59 #define ARCH_SKIP_MASK (S390_SKIP_MASK | PPC64_SKIP_MASK)
60 #define RANDOM_ORVALUE (GENMASK(BITS_PER_LONG - 1, 0) & ~ARCH_SKIP_MASK)
61 #define RANDOM_NZVALUE	GENMASK(7, 0)
62 
63 struct pgtable_debug_args {
64 	struct mm_struct	*mm;
65 	struct vm_area_struct	*vma;
66 
67 	pgd_t			*pgdp;
68 	p4d_t			*p4dp;
69 	pud_t			*pudp;
70 	pmd_t			*pmdp;
71 	pte_t			*ptep;
72 
73 	p4d_t			*start_p4dp;
74 	pud_t			*start_pudp;
75 	pmd_t			*start_pmdp;
76 	pgtable_t		start_ptep;
77 
78 	unsigned long		vaddr;
79 	pgprot_t		page_prot;
80 	pgprot_t		page_prot_none;
81 
82 	bool			is_contiguous_page;
83 	unsigned long		pud_pfn;
84 	unsigned long		pmd_pfn;
85 	unsigned long		pte_pfn;
86 
87 	unsigned long		fixed_pgd_pfn;
88 	unsigned long		fixed_p4d_pfn;
89 	unsigned long		fixed_pud_pfn;
90 	unsigned long		fixed_pmd_pfn;
91 	unsigned long		fixed_pte_pfn;
92 };
93 
94 static void __init pte_basic_tests(struct pgtable_debug_args *args, int idx)
95 {
96 	pgprot_t prot = vm_get_page_prot(idx);
97 	pte_t pte = pfn_pte(args->fixed_pte_pfn, prot);
98 	unsigned long val = idx, *ptr = &val;
99 
100 	pr_debug("Validating PTE basic (%pGv)\n", ptr);
101 
102 	/*
103 	 * This test needs to be executed after the given page table entry
104 	 * is created with pfn_pte() to make sure that vm_get_page_prot(idx)
105 	 * does not have the dirty bit enabled from the beginning. This is
106 	 * important for platforms like arm64 where (!PTE_RDONLY) indicate
107 	 * dirty bit being set.
108 	 */
109 	WARN_ON(pte_dirty(pte_wrprotect(pte)));
110 
111 	WARN_ON(!pte_same(pte, pte));
112 	WARN_ON(!pte_young(pte_mkyoung(pte_mkold(pte))));
113 	WARN_ON(!pte_dirty(pte_mkdirty(pte_mkclean(pte))));
114 	WARN_ON(!pte_write(pte_mkwrite(pte_wrprotect(pte))));
115 	WARN_ON(pte_young(pte_mkold(pte_mkyoung(pte))));
116 	WARN_ON(pte_dirty(pte_mkclean(pte_mkdirty(pte))));
117 	WARN_ON(pte_write(pte_wrprotect(pte_mkwrite(pte))));
118 	WARN_ON(pte_dirty(pte_wrprotect(pte_mkclean(pte))));
119 	WARN_ON(!pte_dirty(pte_wrprotect(pte_mkdirty(pte))));
120 }
121 
122 static void __init pte_advanced_tests(struct pgtable_debug_args *args)
123 {
124 	struct page *page;
125 	pte_t pte;
126 
127 	/*
128 	 * Architectures optimize set_pte_at by avoiding TLB flush.
129 	 * This requires set_pte_at to be not used to update an
130 	 * existing pte entry. Clear pte before we do set_pte_at
131 	 *
132 	 * flush_dcache_page() is called after set_pte_at() to clear
133 	 * PG_arch_1 for the page on ARM64. The page flag isn't cleared
134 	 * when it's released and page allocation check will fail when
135 	 * the page is allocated again. For architectures other than ARM64,
136 	 * the unexpected overhead of cache flushing is acceptable.
137 	 */
138 	page = (args->pte_pfn != ULONG_MAX) ? pfn_to_page(args->pte_pfn) : NULL;
139 	if (!page)
140 		return;
141 
142 	pr_debug("Validating PTE advanced\n");
143 	pte = pfn_pte(args->pte_pfn, args->page_prot);
144 	set_pte_at(args->mm, args->vaddr, args->ptep, pte);
145 	flush_dcache_page(page);
146 	ptep_set_wrprotect(args->mm, args->vaddr, args->ptep);
147 	pte = ptep_get(args->ptep);
148 	WARN_ON(pte_write(pte));
149 	ptep_get_and_clear(args->mm, args->vaddr, args->ptep);
150 	pte = ptep_get(args->ptep);
151 	WARN_ON(!pte_none(pte));
152 
153 	pte = pfn_pte(args->pte_pfn, args->page_prot);
154 	pte = pte_wrprotect(pte);
155 	pte = pte_mkclean(pte);
156 	set_pte_at(args->mm, args->vaddr, args->ptep, pte);
157 	flush_dcache_page(page);
158 	pte = pte_mkwrite(pte);
159 	pte = pte_mkdirty(pte);
160 	ptep_set_access_flags(args->vma, args->vaddr, args->ptep, pte, 1);
161 	pte = ptep_get(args->ptep);
162 	WARN_ON(!(pte_write(pte) && pte_dirty(pte)));
163 	ptep_get_and_clear_full(args->mm, args->vaddr, args->ptep, 1);
164 	pte = ptep_get(args->ptep);
165 	WARN_ON(!pte_none(pte));
166 
167 	pte = pfn_pte(args->pte_pfn, args->page_prot);
168 	pte = pte_mkyoung(pte);
169 	set_pte_at(args->mm, args->vaddr, args->ptep, pte);
170 	flush_dcache_page(page);
171 	ptep_test_and_clear_young(args->vma, args->vaddr, args->ptep);
172 	pte = ptep_get(args->ptep);
173 	WARN_ON(pte_young(pte));
174 
175 	ptep_get_and_clear_full(args->mm, args->vaddr, args->ptep, 1);
176 }
177 
178 static void __init pte_savedwrite_tests(struct pgtable_debug_args *args)
179 {
180 	pte_t pte = pfn_pte(args->fixed_pte_pfn, args->page_prot_none);
181 
182 	if (!IS_ENABLED(CONFIG_NUMA_BALANCING))
183 		return;
184 
185 	pr_debug("Validating PTE saved write\n");
186 	WARN_ON(!pte_savedwrite(pte_mk_savedwrite(pte_clear_savedwrite(pte))));
187 	WARN_ON(pte_savedwrite(pte_clear_savedwrite(pte_mk_savedwrite(pte))));
188 }
189 
190 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
191 static void __init pmd_basic_tests(struct pgtable_debug_args *args, int idx)
192 {
193 	pgprot_t prot = vm_get_page_prot(idx);
194 	unsigned long val = idx, *ptr = &val;
195 	pmd_t pmd;
196 
197 	if (!has_transparent_hugepage())
198 		return;
199 
200 	pr_debug("Validating PMD basic (%pGv)\n", ptr);
201 	pmd = pfn_pmd(args->fixed_pmd_pfn, prot);
202 
203 	/*
204 	 * This test needs to be executed after the given page table entry
205 	 * is created with pfn_pmd() to make sure that vm_get_page_prot(idx)
206 	 * does not have the dirty bit enabled from the beginning. This is
207 	 * important for platforms like arm64 where (!PTE_RDONLY) indicate
208 	 * dirty bit being set.
209 	 */
210 	WARN_ON(pmd_dirty(pmd_wrprotect(pmd)));
211 
212 
213 	WARN_ON(!pmd_same(pmd, pmd));
214 	WARN_ON(!pmd_young(pmd_mkyoung(pmd_mkold(pmd))));
215 	WARN_ON(!pmd_dirty(pmd_mkdirty(pmd_mkclean(pmd))));
216 	WARN_ON(!pmd_write(pmd_mkwrite(pmd_wrprotect(pmd))));
217 	WARN_ON(pmd_young(pmd_mkold(pmd_mkyoung(pmd))));
218 	WARN_ON(pmd_dirty(pmd_mkclean(pmd_mkdirty(pmd))));
219 	WARN_ON(pmd_write(pmd_wrprotect(pmd_mkwrite(pmd))));
220 	WARN_ON(pmd_dirty(pmd_wrprotect(pmd_mkclean(pmd))));
221 	WARN_ON(!pmd_dirty(pmd_wrprotect(pmd_mkdirty(pmd))));
222 	/*
223 	 * A huge page does not point to next level page table
224 	 * entry. Hence this must qualify as pmd_bad().
225 	 */
226 	WARN_ON(!pmd_bad(pmd_mkhuge(pmd)));
227 }
228 
229 static void __init pmd_advanced_tests(struct pgtable_debug_args *args)
230 {
231 	struct page *page;
232 	pmd_t pmd;
233 	unsigned long vaddr = args->vaddr;
234 
235 	if (!has_transparent_hugepage())
236 		return;
237 
238 	page = (args->pmd_pfn != ULONG_MAX) ? pfn_to_page(args->pmd_pfn) : NULL;
239 	if (!page)
240 		return;
241 
242 	/*
243 	 * flush_dcache_page() is called after set_pmd_at() to clear
244 	 * PG_arch_1 for the page on ARM64. The page flag isn't cleared
245 	 * when it's released and page allocation check will fail when
246 	 * the page is allocated again. For architectures other than ARM64,
247 	 * the unexpected overhead of cache flushing is acceptable.
248 	 */
249 	pr_debug("Validating PMD advanced\n");
250 	/* Align the address wrt HPAGE_PMD_SIZE */
251 	vaddr &= HPAGE_PMD_MASK;
252 
253 	pgtable_trans_huge_deposit(args->mm, args->pmdp, args->start_ptep);
254 
255 	pmd = pfn_pmd(args->pmd_pfn, args->page_prot);
256 	set_pmd_at(args->mm, vaddr, args->pmdp, pmd);
257 	flush_dcache_page(page);
258 	pmdp_set_wrprotect(args->mm, vaddr, args->pmdp);
259 	pmd = READ_ONCE(*args->pmdp);
260 	WARN_ON(pmd_write(pmd));
261 	pmdp_huge_get_and_clear(args->mm, vaddr, args->pmdp);
262 	pmd = READ_ONCE(*args->pmdp);
263 	WARN_ON(!pmd_none(pmd));
264 
265 	pmd = pfn_pmd(args->pmd_pfn, args->page_prot);
266 	pmd = pmd_wrprotect(pmd);
267 	pmd = pmd_mkclean(pmd);
268 	set_pmd_at(args->mm, vaddr, args->pmdp, pmd);
269 	flush_dcache_page(page);
270 	pmd = pmd_mkwrite(pmd);
271 	pmd = pmd_mkdirty(pmd);
272 	pmdp_set_access_flags(args->vma, vaddr, args->pmdp, pmd, 1);
273 	pmd = READ_ONCE(*args->pmdp);
274 	WARN_ON(!(pmd_write(pmd) && pmd_dirty(pmd)));
275 	pmdp_huge_get_and_clear_full(args->vma, vaddr, args->pmdp, 1);
276 	pmd = READ_ONCE(*args->pmdp);
277 	WARN_ON(!pmd_none(pmd));
278 
279 	pmd = pmd_mkhuge(pfn_pmd(args->pmd_pfn, args->page_prot));
280 	pmd = pmd_mkyoung(pmd);
281 	set_pmd_at(args->mm, vaddr, args->pmdp, pmd);
282 	flush_dcache_page(page);
283 	pmdp_test_and_clear_young(args->vma, vaddr, args->pmdp);
284 	pmd = READ_ONCE(*args->pmdp);
285 	WARN_ON(pmd_young(pmd));
286 
287 	/*  Clear the pte entries  */
288 	pmdp_huge_get_and_clear(args->mm, vaddr, args->pmdp);
289 	pgtable_trans_huge_withdraw(args->mm, args->pmdp);
290 }
291 
292 static void __init pmd_leaf_tests(struct pgtable_debug_args *args)
293 {
294 	pmd_t pmd;
295 
296 	if (!has_transparent_hugepage())
297 		return;
298 
299 	pr_debug("Validating PMD leaf\n");
300 	pmd = pfn_pmd(args->fixed_pmd_pfn, args->page_prot);
301 
302 	/*
303 	 * PMD based THP is a leaf entry.
304 	 */
305 	pmd = pmd_mkhuge(pmd);
306 	WARN_ON(!pmd_leaf(pmd));
307 }
308 
309 static void __init pmd_savedwrite_tests(struct pgtable_debug_args *args)
310 {
311 	pmd_t pmd;
312 
313 	if (!IS_ENABLED(CONFIG_NUMA_BALANCING))
314 		return;
315 
316 	if (!has_transparent_hugepage())
317 		return;
318 
319 	pr_debug("Validating PMD saved write\n");
320 	pmd = pfn_pmd(args->fixed_pmd_pfn, args->page_prot_none);
321 	WARN_ON(!pmd_savedwrite(pmd_mk_savedwrite(pmd_clear_savedwrite(pmd))));
322 	WARN_ON(pmd_savedwrite(pmd_clear_savedwrite(pmd_mk_savedwrite(pmd))));
323 }
324 
325 #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
326 static void __init pud_basic_tests(struct pgtable_debug_args *args, int idx)
327 {
328 	pgprot_t prot = vm_get_page_prot(idx);
329 	unsigned long val = idx, *ptr = &val;
330 	pud_t pud;
331 
332 	if (!has_transparent_hugepage())
333 		return;
334 
335 	pr_debug("Validating PUD basic (%pGv)\n", ptr);
336 	pud = pfn_pud(args->fixed_pud_pfn, prot);
337 
338 	/*
339 	 * This test needs to be executed after the given page table entry
340 	 * is created with pfn_pud() to make sure that vm_get_page_prot(idx)
341 	 * does not have the dirty bit enabled from the beginning. This is
342 	 * important for platforms like arm64 where (!PTE_RDONLY) indicate
343 	 * dirty bit being set.
344 	 */
345 	WARN_ON(pud_dirty(pud_wrprotect(pud)));
346 
347 	WARN_ON(!pud_same(pud, pud));
348 	WARN_ON(!pud_young(pud_mkyoung(pud_mkold(pud))));
349 	WARN_ON(!pud_dirty(pud_mkdirty(pud_mkclean(pud))));
350 	WARN_ON(pud_dirty(pud_mkclean(pud_mkdirty(pud))));
351 	WARN_ON(!pud_write(pud_mkwrite(pud_wrprotect(pud))));
352 	WARN_ON(pud_write(pud_wrprotect(pud_mkwrite(pud))));
353 	WARN_ON(pud_young(pud_mkold(pud_mkyoung(pud))));
354 	WARN_ON(pud_dirty(pud_wrprotect(pud_mkclean(pud))));
355 	WARN_ON(!pud_dirty(pud_wrprotect(pud_mkdirty(pud))));
356 
357 	if (mm_pmd_folded(args->mm))
358 		return;
359 
360 	/*
361 	 * A huge page does not point to next level page table
362 	 * entry. Hence this must qualify as pud_bad().
363 	 */
364 	WARN_ON(!pud_bad(pud_mkhuge(pud)));
365 }
366 
367 static void __init pud_advanced_tests(struct pgtable_debug_args *args)
368 {
369 	struct page *page;
370 	unsigned long vaddr = args->vaddr;
371 	pud_t pud;
372 
373 	if (!has_transparent_hugepage())
374 		return;
375 
376 	page = (args->pud_pfn != ULONG_MAX) ? pfn_to_page(args->pud_pfn) : NULL;
377 	if (!page)
378 		return;
379 
380 	/*
381 	 * flush_dcache_page() is called after set_pud_at() to clear
382 	 * PG_arch_1 for the page on ARM64. The page flag isn't cleared
383 	 * when it's released and page allocation check will fail when
384 	 * the page is allocated again. For architectures other than ARM64,
385 	 * the unexpected overhead of cache flushing is acceptable.
386 	 */
387 	pr_debug("Validating PUD advanced\n");
388 	/* Align the address wrt HPAGE_PUD_SIZE */
389 	vaddr &= HPAGE_PUD_MASK;
390 
391 	pud = pfn_pud(args->pud_pfn, args->page_prot);
392 	set_pud_at(args->mm, vaddr, args->pudp, pud);
393 	flush_dcache_page(page);
394 	pudp_set_wrprotect(args->mm, vaddr, args->pudp);
395 	pud = READ_ONCE(*args->pudp);
396 	WARN_ON(pud_write(pud));
397 
398 #ifndef __PAGETABLE_PMD_FOLDED
399 	pudp_huge_get_and_clear(args->mm, vaddr, args->pudp);
400 	pud = READ_ONCE(*args->pudp);
401 	WARN_ON(!pud_none(pud));
402 #endif /* __PAGETABLE_PMD_FOLDED */
403 	pud = pfn_pud(args->pud_pfn, args->page_prot);
404 	pud = pud_wrprotect(pud);
405 	pud = pud_mkclean(pud);
406 	set_pud_at(args->mm, vaddr, args->pudp, pud);
407 	flush_dcache_page(page);
408 	pud = pud_mkwrite(pud);
409 	pud = pud_mkdirty(pud);
410 	pudp_set_access_flags(args->vma, vaddr, args->pudp, pud, 1);
411 	pud = READ_ONCE(*args->pudp);
412 	WARN_ON(!(pud_write(pud) && pud_dirty(pud)));
413 
414 #ifndef __PAGETABLE_PMD_FOLDED
415 	pudp_huge_get_and_clear_full(args->mm, vaddr, args->pudp, 1);
416 	pud = READ_ONCE(*args->pudp);
417 	WARN_ON(!pud_none(pud));
418 #endif /* __PAGETABLE_PMD_FOLDED */
419 
420 	pud = pfn_pud(args->pud_pfn, args->page_prot);
421 	pud = pud_mkyoung(pud);
422 	set_pud_at(args->mm, vaddr, args->pudp, pud);
423 	flush_dcache_page(page);
424 	pudp_test_and_clear_young(args->vma, vaddr, args->pudp);
425 	pud = READ_ONCE(*args->pudp);
426 	WARN_ON(pud_young(pud));
427 
428 	pudp_huge_get_and_clear(args->mm, vaddr, args->pudp);
429 }
430 
431 static void __init pud_leaf_tests(struct pgtable_debug_args *args)
432 {
433 	pud_t pud;
434 
435 	if (!has_transparent_hugepage())
436 		return;
437 
438 	pr_debug("Validating PUD leaf\n");
439 	pud = pfn_pud(args->fixed_pud_pfn, args->page_prot);
440 	/*
441 	 * PUD based THP is a leaf entry.
442 	 */
443 	pud = pud_mkhuge(pud);
444 	WARN_ON(!pud_leaf(pud));
445 }
446 #else  /* !CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
447 static void __init pud_basic_tests(struct pgtable_debug_args *args, int idx) { }
448 static void __init pud_advanced_tests(struct pgtable_debug_args *args) { }
449 static void __init pud_leaf_tests(struct pgtable_debug_args *args) { }
450 #endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
451 #else  /* !CONFIG_TRANSPARENT_HUGEPAGE */
452 static void __init pmd_basic_tests(struct pgtable_debug_args *args, int idx) { }
453 static void __init pud_basic_tests(struct pgtable_debug_args *args, int idx) { }
454 static void __init pmd_advanced_tests(struct pgtable_debug_args *args) { }
455 static void __init pud_advanced_tests(struct pgtable_debug_args *args) { }
456 static void __init pmd_leaf_tests(struct pgtable_debug_args *args) { }
457 static void __init pud_leaf_tests(struct pgtable_debug_args *args) { }
458 static void __init pmd_savedwrite_tests(struct pgtable_debug_args *args) { }
459 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
460 
461 #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
462 static void __init pmd_huge_tests(struct pgtable_debug_args *args)
463 {
464 	pmd_t pmd;
465 
466 	if (!arch_vmap_pmd_supported(args->page_prot))
467 		return;
468 
469 	pr_debug("Validating PMD huge\n");
470 	/*
471 	 * X86 defined pmd_set_huge() verifies that the given
472 	 * PMD is not a populated non-leaf entry.
473 	 */
474 	WRITE_ONCE(*args->pmdp, __pmd(0));
475 	WARN_ON(!pmd_set_huge(args->pmdp, __pfn_to_phys(args->fixed_pmd_pfn), args->page_prot));
476 	WARN_ON(!pmd_clear_huge(args->pmdp));
477 	pmd = READ_ONCE(*args->pmdp);
478 	WARN_ON(!pmd_none(pmd));
479 }
480 
481 static void __init pud_huge_tests(struct pgtable_debug_args *args)
482 {
483 	pud_t pud;
484 
485 	if (!arch_vmap_pud_supported(args->page_prot))
486 		return;
487 
488 	pr_debug("Validating PUD huge\n");
489 	/*
490 	 * X86 defined pud_set_huge() verifies that the given
491 	 * PUD is not a populated non-leaf entry.
492 	 */
493 	WRITE_ONCE(*args->pudp, __pud(0));
494 	WARN_ON(!pud_set_huge(args->pudp, __pfn_to_phys(args->fixed_pud_pfn), args->page_prot));
495 	WARN_ON(!pud_clear_huge(args->pudp));
496 	pud = READ_ONCE(*args->pudp);
497 	WARN_ON(!pud_none(pud));
498 }
499 #else /* !CONFIG_HAVE_ARCH_HUGE_VMAP */
500 static void __init pmd_huge_tests(struct pgtable_debug_args *args) { }
501 static void __init pud_huge_tests(struct pgtable_debug_args *args) { }
502 #endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */
503 
504 static void __init p4d_basic_tests(struct pgtable_debug_args *args)
505 {
506 	p4d_t p4d;
507 
508 	pr_debug("Validating P4D basic\n");
509 	memset(&p4d, RANDOM_NZVALUE, sizeof(p4d_t));
510 	WARN_ON(!p4d_same(p4d, p4d));
511 }
512 
513 static void __init pgd_basic_tests(struct pgtable_debug_args *args)
514 {
515 	pgd_t pgd;
516 
517 	pr_debug("Validating PGD basic\n");
518 	memset(&pgd, RANDOM_NZVALUE, sizeof(pgd_t));
519 	WARN_ON(!pgd_same(pgd, pgd));
520 }
521 
522 #ifndef __PAGETABLE_PUD_FOLDED
523 static void __init pud_clear_tests(struct pgtable_debug_args *args)
524 {
525 	pud_t pud = READ_ONCE(*args->pudp);
526 
527 	if (mm_pmd_folded(args->mm))
528 		return;
529 
530 	pr_debug("Validating PUD clear\n");
531 	pud = __pud(pud_val(pud) | RANDOM_ORVALUE);
532 	WRITE_ONCE(*args->pudp, pud);
533 	pud_clear(args->pudp);
534 	pud = READ_ONCE(*args->pudp);
535 	WARN_ON(!pud_none(pud));
536 }
537 
538 static void __init pud_populate_tests(struct pgtable_debug_args *args)
539 {
540 	pud_t pud;
541 
542 	if (mm_pmd_folded(args->mm))
543 		return;
544 
545 	pr_debug("Validating PUD populate\n");
546 	/*
547 	 * This entry points to next level page table page.
548 	 * Hence this must not qualify as pud_bad().
549 	 */
550 	pud_populate(args->mm, args->pudp, args->start_pmdp);
551 	pud = READ_ONCE(*args->pudp);
552 	WARN_ON(pud_bad(pud));
553 }
554 #else  /* !__PAGETABLE_PUD_FOLDED */
555 static void __init pud_clear_tests(struct pgtable_debug_args *args) { }
556 static void __init pud_populate_tests(struct pgtable_debug_args *args) { }
557 #endif /* PAGETABLE_PUD_FOLDED */
558 
559 #ifndef __PAGETABLE_P4D_FOLDED
560 static void __init p4d_clear_tests(struct pgtable_debug_args *args)
561 {
562 	p4d_t p4d = READ_ONCE(*args->p4dp);
563 
564 	if (mm_pud_folded(args->mm))
565 		return;
566 
567 	pr_debug("Validating P4D clear\n");
568 	p4d = __p4d(p4d_val(p4d) | RANDOM_ORVALUE);
569 	WRITE_ONCE(*args->p4dp, p4d);
570 	p4d_clear(args->p4dp);
571 	p4d = READ_ONCE(*args->p4dp);
572 	WARN_ON(!p4d_none(p4d));
573 }
574 
575 static void __init p4d_populate_tests(struct pgtable_debug_args *args)
576 {
577 	p4d_t p4d;
578 
579 	if (mm_pud_folded(args->mm))
580 		return;
581 
582 	pr_debug("Validating P4D populate\n");
583 	/*
584 	 * This entry points to next level page table page.
585 	 * Hence this must not qualify as p4d_bad().
586 	 */
587 	pud_clear(args->pudp);
588 	p4d_clear(args->p4dp);
589 	p4d_populate(args->mm, args->p4dp, args->start_pudp);
590 	p4d = READ_ONCE(*args->p4dp);
591 	WARN_ON(p4d_bad(p4d));
592 }
593 
594 static void __init pgd_clear_tests(struct pgtable_debug_args *args)
595 {
596 	pgd_t pgd = READ_ONCE(*(args->pgdp));
597 
598 	if (mm_p4d_folded(args->mm))
599 		return;
600 
601 	pr_debug("Validating PGD clear\n");
602 	pgd = __pgd(pgd_val(pgd) | RANDOM_ORVALUE);
603 	WRITE_ONCE(*args->pgdp, pgd);
604 	pgd_clear(args->pgdp);
605 	pgd = READ_ONCE(*args->pgdp);
606 	WARN_ON(!pgd_none(pgd));
607 }
608 
609 static void __init pgd_populate_tests(struct pgtable_debug_args *args)
610 {
611 	pgd_t pgd;
612 
613 	if (mm_p4d_folded(args->mm))
614 		return;
615 
616 	pr_debug("Validating PGD populate\n");
617 	/*
618 	 * This entry points to next level page table page.
619 	 * Hence this must not qualify as pgd_bad().
620 	 */
621 	p4d_clear(args->p4dp);
622 	pgd_clear(args->pgdp);
623 	pgd_populate(args->mm, args->pgdp, args->start_p4dp);
624 	pgd = READ_ONCE(*args->pgdp);
625 	WARN_ON(pgd_bad(pgd));
626 }
627 #else  /* !__PAGETABLE_P4D_FOLDED */
628 static void __init p4d_clear_tests(struct pgtable_debug_args *args) { }
629 static void __init pgd_clear_tests(struct pgtable_debug_args *args) { }
630 static void __init p4d_populate_tests(struct pgtable_debug_args *args) { }
631 static void __init pgd_populate_tests(struct pgtable_debug_args *args) { }
632 #endif /* PAGETABLE_P4D_FOLDED */
633 
634 static void __init pte_clear_tests(struct pgtable_debug_args *args)
635 {
636 	struct page *page;
637 	pte_t pte = pfn_pte(args->pte_pfn, args->page_prot);
638 
639 	page = (args->pte_pfn != ULONG_MAX) ? pfn_to_page(args->pte_pfn) : NULL;
640 	if (!page)
641 		return;
642 
643 	/*
644 	 * flush_dcache_page() is called after set_pte_at() to clear
645 	 * PG_arch_1 for the page on ARM64. The page flag isn't cleared
646 	 * when it's released and page allocation check will fail when
647 	 * the page is allocated again. For architectures other than ARM64,
648 	 * the unexpected overhead of cache flushing is acceptable.
649 	 */
650 	pr_debug("Validating PTE clear\n");
651 #ifndef CONFIG_RISCV
652 	pte = __pte(pte_val(pte) | RANDOM_ORVALUE);
653 #endif
654 	set_pte_at(args->mm, args->vaddr, args->ptep, pte);
655 	flush_dcache_page(page);
656 	barrier();
657 	ptep_clear(args->mm, args->vaddr, args->ptep);
658 	pte = ptep_get(args->ptep);
659 	WARN_ON(!pte_none(pte));
660 }
661 
662 static void __init pmd_clear_tests(struct pgtable_debug_args *args)
663 {
664 	pmd_t pmd = READ_ONCE(*args->pmdp);
665 
666 	pr_debug("Validating PMD clear\n");
667 	pmd = __pmd(pmd_val(pmd) | RANDOM_ORVALUE);
668 	WRITE_ONCE(*args->pmdp, pmd);
669 	pmd_clear(args->pmdp);
670 	pmd = READ_ONCE(*args->pmdp);
671 	WARN_ON(!pmd_none(pmd));
672 }
673 
674 static void __init pmd_populate_tests(struct pgtable_debug_args *args)
675 {
676 	pmd_t pmd;
677 
678 	pr_debug("Validating PMD populate\n");
679 	/*
680 	 * This entry points to next level page table page.
681 	 * Hence this must not qualify as pmd_bad().
682 	 */
683 	pmd_populate(args->mm, args->pmdp, args->start_ptep);
684 	pmd = READ_ONCE(*args->pmdp);
685 	WARN_ON(pmd_bad(pmd));
686 }
687 
688 static void __init pte_special_tests(struct pgtable_debug_args *args)
689 {
690 	pte_t pte = pfn_pte(args->fixed_pte_pfn, args->page_prot);
691 
692 	if (!IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL))
693 		return;
694 
695 	pr_debug("Validating PTE special\n");
696 	WARN_ON(!pte_special(pte_mkspecial(pte)));
697 }
698 
699 static void __init pte_protnone_tests(struct pgtable_debug_args *args)
700 {
701 	pte_t pte = pfn_pte(args->fixed_pte_pfn, args->page_prot_none);
702 
703 	if (!IS_ENABLED(CONFIG_NUMA_BALANCING))
704 		return;
705 
706 	pr_debug("Validating PTE protnone\n");
707 	WARN_ON(!pte_protnone(pte));
708 	WARN_ON(!pte_present(pte));
709 }
710 
711 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
712 static void __init pmd_protnone_tests(struct pgtable_debug_args *args)
713 {
714 	pmd_t pmd;
715 
716 	if (!IS_ENABLED(CONFIG_NUMA_BALANCING))
717 		return;
718 
719 	if (!has_transparent_hugepage())
720 		return;
721 
722 	pr_debug("Validating PMD protnone\n");
723 	pmd = pmd_mkhuge(pfn_pmd(args->fixed_pmd_pfn, args->page_prot_none));
724 	WARN_ON(!pmd_protnone(pmd));
725 	WARN_ON(!pmd_present(pmd));
726 }
727 #else  /* !CONFIG_TRANSPARENT_HUGEPAGE */
728 static void __init pmd_protnone_tests(struct pgtable_debug_args *args) { }
729 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
730 
731 #ifdef CONFIG_ARCH_HAS_PTE_DEVMAP
732 static void __init pte_devmap_tests(struct pgtable_debug_args *args)
733 {
734 	pte_t pte = pfn_pte(args->fixed_pte_pfn, args->page_prot);
735 
736 	pr_debug("Validating PTE devmap\n");
737 	WARN_ON(!pte_devmap(pte_mkdevmap(pte)));
738 }
739 
740 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
741 static void __init pmd_devmap_tests(struct pgtable_debug_args *args)
742 {
743 	pmd_t pmd;
744 
745 	if (!has_transparent_hugepage())
746 		return;
747 
748 	pr_debug("Validating PMD devmap\n");
749 	pmd = pfn_pmd(args->fixed_pmd_pfn, args->page_prot);
750 	WARN_ON(!pmd_devmap(pmd_mkdevmap(pmd)));
751 }
752 
753 #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
754 static void __init pud_devmap_tests(struct pgtable_debug_args *args)
755 {
756 	pud_t pud;
757 
758 	if (!has_transparent_hugepage())
759 		return;
760 
761 	pr_debug("Validating PUD devmap\n");
762 	pud = pfn_pud(args->fixed_pud_pfn, args->page_prot);
763 	WARN_ON(!pud_devmap(pud_mkdevmap(pud)));
764 }
765 #else  /* !CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
766 static void __init pud_devmap_tests(struct pgtable_debug_args *args) { }
767 #endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
768 #else  /* CONFIG_TRANSPARENT_HUGEPAGE */
769 static void __init pmd_devmap_tests(struct pgtable_debug_args *args) { }
770 static void __init pud_devmap_tests(struct pgtable_debug_args *args) { }
771 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
772 #else
773 static void __init pte_devmap_tests(struct pgtable_debug_args *args) { }
774 static void __init pmd_devmap_tests(struct pgtable_debug_args *args) { }
775 static void __init pud_devmap_tests(struct pgtable_debug_args *args) { }
776 #endif /* CONFIG_ARCH_HAS_PTE_DEVMAP */
777 
778 static void __init pte_soft_dirty_tests(struct pgtable_debug_args *args)
779 {
780 	pte_t pte = pfn_pte(args->fixed_pte_pfn, args->page_prot);
781 
782 	if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY))
783 		return;
784 
785 	pr_debug("Validating PTE soft dirty\n");
786 	WARN_ON(!pte_soft_dirty(pte_mksoft_dirty(pte)));
787 	WARN_ON(pte_soft_dirty(pte_clear_soft_dirty(pte)));
788 }
789 
790 static void __init pte_swap_soft_dirty_tests(struct pgtable_debug_args *args)
791 {
792 	pte_t pte = pfn_pte(args->fixed_pte_pfn, args->page_prot);
793 
794 	if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY))
795 		return;
796 
797 	pr_debug("Validating PTE swap soft dirty\n");
798 	WARN_ON(!pte_swp_soft_dirty(pte_swp_mksoft_dirty(pte)));
799 	WARN_ON(pte_swp_soft_dirty(pte_swp_clear_soft_dirty(pte)));
800 }
801 
802 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
803 static void __init pmd_soft_dirty_tests(struct pgtable_debug_args *args)
804 {
805 	pmd_t pmd;
806 
807 	if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY))
808 		return;
809 
810 	if (!has_transparent_hugepage())
811 		return;
812 
813 	pr_debug("Validating PMD soft dirty\n");
814 	pmd = pfn_pmd(args->fixed_pmd_pfn, args->page_prot);
815 	WARN_ON(!pmd_soft_dirty(pmd_mksoft_dirty(pmd)));
816 	WARN_ON(pmd_soft_dirty(pmd_clear_soft_dirty(pmd)));
817 }
818 
819 static void __init pmd_swap_soft_dirty_tests(struct pgtable_debug_args *args)
820 {
821 	pmd_t pmd;
822 
823 	if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) ||
824 		!IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION))
825 		return;
826 
827 	if (!has_transparent_hugepage())
828 		return;
829 
830 	pr_debug("Validating PMD swap soft dirty\n");
831 	pmd = pfn_pmd(args->fixed_pmd_pfn, args->page_prot);
832 	WARN_ON(!pmd_swp_soft_dirty(pmd_swp_mksoft_dirty(pmd)));
833 	WARN_ON(pmd_swp_soft_dirty(pmd_swp_clear_soft_dirty(pmd)));
834 }
835 #else  /* !CONFIG_TRANSPARENT_HUGEPAGE */
836 static void __init pmd_soft_dirty_tests(struct pgtable_debug_args *args) { }
837 static void __init pmd_swap_soft_dirty_tests(struct pgtable_debug_args *args) { }
838 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
839 
840 static void __init pte_swap_exclusive_tests(struct pgtable_debug_args *args)
841 {
842 #ifdef __HAVE_ARCH_PTE_SWP_EXCLUSIVE
843 	pte_t pte = pfn_pte(args->fixed_pte_pfn, args->page_prot);
844 
845 	pr_debug("Validating PTE swap exclusive\n");
846 	pte = pte_swp_mkexclusive(pte);
847 	WARN_ON(!pte_swp_exclusive(pte));
848 	pte = pte_swp_clear_exclusive(pte);
849 	WARN_ON(pte_swp_exclusive(pte));
850 #endif /* __HAVE_ARCH_PTE_SWP_EXCLUSIVE */
851 }
852 
853 static void __init pte_swap_tests(struct pgtable_debug_args *args)
854 {
855 	swp_entry_t swp;
856 	pte_t pte;
857 
858 	pr_debug("Validating PTE swap\n");
859 	pte = pfn_pte(args->fixed_pte_pfn, args->page_prot);
860 	swp = __pte_to_swp_entry(pte);
861 	pte = __swp_entry_to_pte(swp);
862 	WARN_ON(args->fixed_pte_pfn != pte_pfn(pte));
863 }
864 
865 #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
866 static void __init pmd_swap_tests(struct pgtable_debug_args *args)
867 {
868 	swp_entry_t swp;
869 	pmd_t pmd;
870 
871 	if (!has_transparent_hugepage())
872 		return;
873 
874 	pr_debug("Validating PMD swap\n");
875 	pmd = pfn_pmd(args->fixed_pmd_pfn, args->page_prot);
876 	swp = __pmd_to_swp_entry(pmd);
877 	pmd = __swp_entry_to_pmd(swp);
878 	WARN_ON(args->fixed_pmd_pfn != pmd_pfn(pmd));
879 }
880 #else  /* !CONFIG_ARCH_ENABLE_THP_MIGRATION */
881 static void __init pmd_swap_tests(struct pgtable_debug_args *args) { }
882 #endif /* CONFIG_ARCH_ENABLE_THP_MIGRATION */
883 
884 static void __init swap_migration_tests(struct pgtable_debug_args *args)
885 {
886 	struct page *page;
887 	swp_entry_t swp;
888 
889 	if (!IS_ENABLED(CONFIG_MIGRATION))
890 		return;
891 
892 	/*
893 	 * swap_migration_tests() requires a dedicated page as it needs to
894 	 * be locked before creating a migration entry from it. Locking the
895 	 * page that actually maps kernel text ('start_kernel') can be real
896 	 * problematic. Lets use the allocated page explicitly for this
897 	 * purpose.
898 	 */
899 	page = (args->pte_pfn != ULONG_MAX) ? pfn_to_page(args->pte_pfn) : NULL;
900 	if (!page)
901 		return;
902 
903 	pr_debug("Validating swap migration\n");
904 
905 	/*
906 	 * make_[readable|writable]_migration_entry() expects given page to
907 	 * be locked, otherwise it stumbles upon a BUG_ON().
908 	 */
909 	__SetPageLocked(page);
910 	swp = make_writable_migration_entry(page_to_pfn(page));
911 	WARN_ON(!is_migration_entry(swp));
912 	WARN_ON(!is_writable_migration_entry(swp));
913 
914 	swp = make_readable_migration_entry(swp_offset(swp));
915 	WARN_ON(!is_migration_entry(swp));
916 	WARN_ON(is_writable_migration_entry(swp));
917 
918 	swp = make_readable_migration_entry(page_to_pfn(page));
919 	WARN_ON(!is_migration_entry(swp));
920 	WARN_ON(is_writable_migration_entry(swp));
921 	__ClearPageLocked(page);
922 }
923 
924 #ifdef CONFIG_HUGETLB_PAGE
925 static void __init hugetlb_basic_tests(struct pgtable_debug_args *args)
926 {
927 	struct page *page;
928 	pte_t pte;
929 
930 	pr_debug("Validating HugeTLB basic\n");
931 	/*
932 	 * Accessing the page associated with the pfn is safe here,
933 	 * as it was previously derived from a real kernel symbol.
934 	 */
935 	page = pfn_to_page(args->fixed_pmd_pfn);
936 	pte = mk_huge_pte(page, args->page_prot);
937 
938 	WARN_ON(!huge_pte_dirty(huge_pte_mkdirty(pte)));
939 	WARN_ON(!huge_pte_write(huge_pte_mkwrite(huge_pte_wrprotect(pte))));
940 	WARN_ON(huge_pte_write(huge_pte_wrprotect(huge_pte_mkwrite(pte))));
941 
942 #ifdef CONFIG_ARCH_WANT_GENERAL_HUGETLB
943 	pte = pfn_pte(args->fixed_pmd_pfn, args->page_prot);
944 
945 	WARN_ON(!pte_huge(pte_mkhuge(pte)));
946 #endif /* CONFIG_ARCH_WANT_GENERAL_HUGETLB */
947 }
948 #else  /* !CONFIG_HUGETLB_PAGE */
949 static void __init hugetlb_basic_tests(struct pgtable_debug_args *args) { }
950 #endif /* CONFIG_HUGETLB_PAGE */
951 
952 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
953 static void __init pmd_thp_tests(struct pgtable_debug_args *args)
954 {
955 	pmd_t pmd;
956 
957 	if (!has_transparent_hugepage())
958 		return;
959 
960 	pr_debug("Validating PMD based THP\n");
961 	/*
962 	 * pmd_trans_huge() and pmd_present() must return positive after
963 	 * MMU invalidation with pmd_mkinvalid(). This behavior is an
964 	 * optimization for transparent huge page. pmd_trans_huge() must
965 	 * be true if pmd_page() returns a valid THP to avoid taking the
966 	 * pmd_lock when others walk over non transhuge pmds (i.e. there
967 	 * are no THP allocated). Especially when splitting a THP and
968 	 * removing the present bit from the pmd, pmd_trans_huge() still
969 	 * needs to return true. pmd_present() should be true whenever
970 	 * pmd_trans_huge() returns true.
971 	 */
972 	pmd = pfn_pmd(args->fixed_pmd_pfn, args->page_prot);
973 	WARN_ON(!pmd_trans_huge(pmd_mkhuge(pmd)));
974 
975 #ifndef __HAVE_ARCH_PMDP_INVALIDATE
976 	WARN_ON(!pmd_trans_huge(pmd_mkinvalid(pmd_mkhuge(pmd))));
977 	WARN_ON(!pmd_present(pmd_mkinvalid(pmd_mkhuge(pmd))));
978 #endif /* __HAVE_ARCH_PMDP_INVALIDATE */
979 }
980 
981 #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
982 static void __init pud_thp_tests(struct pgtable_debug_args *args)
983 {
984 	pud_t pud;
985 
986 	if (!has_transparent_hugepage())
987 		return;
988 
989 	pr_debug("Validating PUD based THP\n");
990 	pud = pfn_pud(args->fixed_pud_pfn, args->page_prot);
991 	WARN_ON(!pud_trans_huge(pud_mkhuge(pud)));
992 
993 	/*
994 	 * pud_mkinvalid() has been dropped for now. Enable back
995 	 * these tests when it comes back with a modified pud_present().
996 	 *
997 	 * WARN_ON(!pud_trans_huge(pud_mkinvalid(pud_mkhuge(pud))));
998 	 * WARN_ON(!pud_present(pud_mkinvalid(pud_mkhuge(pud))));
999 	 */
1000 }
1001 #else  /* !CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
1002 static void __init pud_thp_tests(struct pgtable_debug_args *args) { }
1003 #endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
1004 #else  /* !CONFIG_TRANSPARENT_HUGEPAGE */
1005 static void __init pmd_thp_tests(struct pgtable_debug_args *args) { }
1006 static void __init pud_thp_tests(struct pgtable_debug_args *args) { }
1007 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
1008 
1009 static unsigned long __init get_random_vaddr(void)
1010 {
1011 	unsigned long random_vaddr, random_pages, total_user_pages;
1012 
1013 	total_user_pages = (TASK_SIZE - FIRST_USER_ADDRESS) / PAGE_SIZE;
1014 
1015 	random_pages = get_random_long() % total_user_pages;
1016 	random_vaddr = FIRST_USER_ADDRESS + random_pages * PAGE_SIZE;
1017 
1018 	return random_vaddr;
1019 }
1020 
1021 static void __init destroy_args(struct pgtable_debug_args *args)
1022 {
1023 	struct page *page = NULL;
1024 
1025 	/* Free (huge) page */
1026 	if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
1027 	    IS_ENABLED(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) &&
1028 	    has_transparent_hugepage() &&
1029 	    args->pud_pfn != ULONG_MAX) {
1030 		if (args->is_contiguous_page) {
1031 			free_contig_range(args->pud_pfn,
1032 					  (1 << (HPAGE_PUD_SHIFT - PAGE_SHIFT)));
1033 		} else {
1034 			page = pfn_to_page(args->pud_pfn);
1035 			__free_pages(page, HPAGE_PUD_SHIFT - PAGE_SHIFT);
1036 		}
1037 
1038 		args->pud_pfn = ULONG_MAX;
1039 		args->pmd_pfn = ULONG_MAX;
1040 		args->pte_pfn = ULONG_MAX;
1041 	}
1042 
1043 	if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
1044 	    has_transparent_hugepage() &&
1045 	    args->pmd_pfn != ULONG_MAX) {
1046 		if (args->is_contiguous_page) {
1047 			free_contig_range(args->pmd_pfn, (1 << HPAGE_PMD_ORDER));
1048 		} else {
1049 			page = pfn_to_page(args->pmd_pfn);
1050 			__free_pages(page, HPAGE_PMD_ORDER);
1051 		}
1052 
1053 		args->pmd_pfn = ULONG_MAX;
1054 		args->pte_pfn = ULONG_MAX;
1055 	}
1056 
1057 	if (args->pte_pfn != ULONG_MAX) {
1058 		page = pfn_to_page(args->pte_pfn);
1059 		__free_pages(page, 0);
1060 
1061 		args->pte_pfn = ULONG_MAX;
1062 	}
1063 
1064 	/* Free page table entries */
1065 	if (args->start_ptep) {
1066 		pte_free(args->mm, args->start_ptep);
1067 		mm_dec_nr_ptes(args->mm);
1068 	}
1069 
1070 	if (args->start_pmdp) {
1071 		pmd_free(args->mm, args->start_pmdp);
1072 		mm_dec_nr_pmds(args->mm);
1073 	}
1074 
1075 	if (args->start_pudp) {
1076 		pud_free(args->mm, args->start_pudp);
1077 		mm_dec_nr_puds(args->mm);
1078 	}
1079 
1080 	if (args->start_p4dp)
1081 		p4d_free(args->mm, args->start_p4dp);
1082 
1083 	/* Free vma and mm struct */
1084 	if (args->vma)
1085 		vm_area_free(args->vma);
1086 
1087 	if (args->mm)
1088 		mmdrop(args->mm);
1089 }
1090 
1091 static struct page * __init
1092 debug_vm_pgtable_alloc_huge_page(struct pgtable_debug_args *args, int order)
1093 {
1094 	struct page *page = NULL;
1095 
1096 #ifdef CONFIG_CONTIG_ALLOC
1097 	if (order >= MAX_ORDER) {
1098 		page = alloc_contig_pages((1 << order), GFP_KERNEL,
1099 					  first_online_node, NULL);
1100 		if (page) {
1101 			args->is_contiguous_page = true;
1102 			return page;
1103 		}
1104 	}
1105 #endif
1106 
1107 	if (order < MAX_ORDER)
1108 		page = alloc_pages(GFP_KERNEL, order);
1109 
1110 	return page;
1111 }
1112 
1113 static int __init init_args(struct pgtable_debug_args *args)
1114 {
1115 	struct page *page = NULL;
1116 	phys_addr_t phys;
1117 	int ret = 0;
1118 
1119 	/*
1120 	 * Initialize the debugging data.
1121 	 *
1122 	 * vm_get_page_prot(VM_NONE) or vm_get_page_prot(VM_SHARED|VM_NONE)
1123 	 * will help create page table entries with PROT_NONE permission as
1124 	 * required for pxx_protnone_tests().
1125 	 */
1126 	memset(args, 0, sizeof(*args));
1127 	args->vaddr              = get_random_vaddr();
1128 	args->page_prot          = vm_get_page_prot(VMFLAGS);
1129 	args->page_prot_none     = vm_get_page_prot(VM_NONE);
1130 	args->is_contiguous_page = false;
1131 	args->pud_pfn            = ULONG_MAX;
1132 	args->pmd_pfn            = ULONG_MAX;
1133 	args->pte_pfn            = ULONG_MAX;
1134 	args->fixed_pgd_pfn      = ULONG_MAX;
1135 	args->fixed_p4d_pfn      = ULONG_MAX;
1136 	args->fixed_pud_pfn      = ULONG_MAX;
1137 	args->fixed_pmd_pfn      = ULONG_MAX;
1138 	args->fixed_pte_pfn      = ULONG_MAX;
1139 
1140 	/* Allocate mm and vma */
1141 	args->mm = mm_alloc();
1142 	if (!args->mm) {
1143 		pr_err("Failed to allocate mm struct\n");
1144 		ret = -ENOMEM;
1145 		goto error;
1146 	}
1147 
1148 	args->vma = vm_area_alloc(args->mm);
1149 	if (!args->vma) {
1150 		pr_err("Failed to allocate vma\n");
1151 		ret = -ENOMEM;
1152 		goto error;
1153 	}
1154 
1155 	/*
1156 	 * Allocate page table entries. They will be modified in the tests.
1157 	 * Lets save the page table entries so that they can be released
1158 	 * when the tests are completed.
1159 	 */
1160 	args->pgdp = pgd_offset(args->mm, args->vaddr);
1161 	args->p4dp = p4d_alloc(args->mm, args->pgdp, args->vaddr);
1162 	if (!args->p4dp) {
1163 		pr_err("Failed to allocate p4d entries\n");
1164 		ret = -ENOMEM;
1165 		goto error;
1166 	}
1167 	args->start_p4dp = p4d_offset(args->pgdp, 0UL);
1168 	WARN_ON(!args->start_p4dp);
1169 
1170 	args->pudp = pud_alloc(args->mm, args->p4dp, args->vaddr);
1171 	if (!args->pudp) {
1172 		pr_err("Failed to allocate pud entries\n");
1173 		ret = -ENOMEM;
1174 		goto error;
1175 	}
1176 	args->start_pudp = pud_offset(args->p4dp, 0UL);
1177 	WARN_ON(!args->start_pudp);
1178 
1179 	args->pmdp = pmd_alloc(args->mm, args->pudp, args->vaddr);
1180 	if (!args->pmdp) {
1181 		pr_err("Failed to allocate pmd entries\n");
1182 		ret = -ENOMEM;
1183 		goto error;
1184 	}
1185 	args->start_pmdp = pmd_offset(args->pudp, 0UL);
1186 	WARN_ON(!args->start_pmdp);
1187 
1188 	if (pte_alloc(args->mm, args->pmdp)) {
1189 		pr_err("Failed to allocate pte entries\n");
1190 		ret = -ENOMEM;
1191 		goto error;
1192 	}
1193 	args->start_ptep = pmd_pgtable(READ_ONCE(*args->pmdp));
1194 	WARN_ON(!args->start_ptep);
1195 
1196 	/*
1197 	 * PFN for mapping at PTE level is determined from a standard kernel
1198 	 * text symbol. But pfns for higher page table levels are derived by
1199 	 * masking lower bits of this real pfn. These derived pfns might not
1200 	 * exist on the platform but that does not really matter as pfn_pxx()
1201 	 * helpers will still create appropriate entries for the test. This
1202 	 * helps avoid large memory block allocations to be used for mapping
1203 	 * at higher page table levels in some of the tests.
1204 	 */
1205 	phys = __pa_symbol(&start_kernel);
1206 	args->fixed_pgd_pfn = __phys_to_pfn(phys & PGDIR_MASK);
1207 	args->fixed_p4d_pfn = __phys_to_pfn(phys & P4D_MASK);
1208 	args->fixed_pud_pfn = __phys_to_pfn(phys & PUD_MASK);
1209 	args->fixed_pmd_pfn = __phys_to_pfn(phys & PMD_MASK);
1210 	args->fixed_pte_pfn = __phys_to_pfn(phys & PAGE_MASK);
1211 	WARN_ON(!pfn_valid(args->fixed_pte_pfn));
1212 
1213 	/*
1214 	 * Allocate (huge) pages because some of the tests need to access
1215 	 * the data in the pages. The corresponding tests will be skipped
1216 	 * if we fail to allocate (huge) pages.
1217 	 */
1218 	if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
1219 	    IS_ENABLED(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) &&
1220 	    has_transparent_hugepage()) {
1221 		page = debug_vm_pgtable_alloc_huge_page(args,
1222 				HPAGE_PUD_SHIFT - PAGE_SHIFT);
1223 		if (page) {
1224 			args->pud_pfn = page_to_pfn(page);
1225 			args->pmd_pfn = args->pud_pfn;
1226 			args->pte_pfn = args->pud_pfn;
1227 			return 0;
1228 		}
1229 	}
1230 
1231 	if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
1232 	    has_transparent_hugepage()) {
1233 		page = debug_vm_pgtable_alloc_huge_page(args, HPAGE_PMD_ORDER);
1234 		if (page) {
1235 			args->pmd_pfn = page_to_pfn(page);
1236 			args->pte_pfn = args->pmd_pfn;
1237 			return 0;
1238 		}
1239 	}
1240 
1241 	page = alloc_pages(GFP_KERNEL, 0);
1242 	if (page)
1243 		args->pte_pfn = page_to_pfn(page);
1244 
1245 	return 0;
1246 
1247 error:
1248 	destroy_args(args);
1249 	return ret;
1250 }
1251 
1252 static int __init debug_vm_pgtable(void)
1253 {
1254 	struct pgtable_debug_args args;
1255 	spinlock_t *ptl = NULL;
1256 	int idx, ret;
1257 
1258 	pr_info("Validating architecture page table helpers\n");
1259 	ret = init_args(&args);
1260 	if (ret)
1261 		return ret;
1262 
1263 	/*
1264 	 * Iterate over each possible vm_flags to make sure that all
1265 	 * the basic page table transformation validations just hold
1266 	 * true irrespective of the starting protection value for a
1267 	 * given page table entry.
1268 	 *
1269 	 * Protection based vm_flags combinatins are always linear
1270 	 * and increasing i.e starting from VM_NONE and going upto
1271 	 * (VM_SHARED | READ | WRITE | EXEC).
1272 	 */
1273 #define VM_FLAGS_START	(VM_NONE)
1274 #define VM_FLAGS_END	(VM_SHARED | VM_EXEC | VM_WRITE | VM_READ)
1275 
1276 	for (idx = VM_FLAGS_START; idx <= VM_FLAGS_END; idx++) {
1277 		pte_basic_tests(&args, idx);
1278 		pmd_basic_tests(&args, idx);
1279 		pud_basic_tests(&args, idx);
1280 	}
1281 
1282 	/*
1283 	 * Both P4D and PGD level tests are very basic which do not
1284 	 * involve creating page table entries from the protection
1285 	 * value and the given pfn. Hence just keep them out from
1286 	 * the above iteration for now to save some test execution
1287 	 * time.
1288 	 */
1289 	p4d_basic_tests(&args);
1290 	pgd_basic_tests(&args);
1291 
1292 	pmd_leaf_tests(&args);
1293 	pud_leaf_tests(&args);
1294 
1295 	pte_savedwrite_tests(&args);
1296 	pmd_savedwrite_tests(&args);
1297 
1298 	pte_special_tests(&args);
1299 	pte_protnone_tests(&args);
1300 	pmd_protnone_tests(&args);
1301 
1302 	pte_devmap_tests(&args);
1303 	pmd_devmap_tests(&args);
1304 	pud_devmap_tests(&args);
1305 
1306 	pte_soft_dirty_tests(&args);
1307 	pmd_soft_dirty_tests(&args);
1308 	pte_swap_soft_dirty_tests(&args);
1309 	pmd_swap_soft_dirty_tests(&args);
1310 
1311 	pte_swap_exclusive_tests(&args);
1312 
1313 	pte_swap_tests(&args);
1314 	pmd_swap_tests(&args);
1315 
1316 	swap_migration_tests(&args);
1317 
1318 	pmd_thp_tests(&args);
1319 	pud_thp_tests(&args);
1320 
1321 	hugetlb_basic_tests(&args);
1322 
1323 	/*
1324 	 * Page table modifying tests. They need to hold
1325 	 * proper page table lock.
1326 	 */
1327 
1328 	args.ptep = pte_offset_map_lock(args.mm, args.pmdp, args.vaddr, &ptl);
1329 	pte_clear_tests(&args);
1330 	pte_advanced_tests(&args);
1331 	pte_unmap_unlock(args.ptep, ptl);
1332 
1333 	ptl = pmd_lock(args.mm, args.pmdp);
1334 	pmd_clear_tests(&args);
1335 	pmd_advanced_tests(&args);
1336 	pmd_huge_tests(&args);
1337 	pmd_populate_tests(&args);
1338 	spin_unlock(ptl);
1339 
1340 	ptl = pud_lock(args.mm, args.pudp);
1341 	pud_clear_tests(&args);
1342 	pud_advanced_tests(&args);
1343 	pud_huge_tests(&args);
1344 	pud_populate_tests(&args);
1345 	spin_unlock(ptl);
1346 
1347 	spin_lock(&(args.mm->page_table_lock));
1348 	p4d_clear_tests(&args);
1349 	pgd_clear_tests(&args);
1350 	p4d_populate_tests(&args);
1351 	pgd_populate_tests(&args);
1352 	spin_unlock(&(args.mm->page_table_lock));
1353 
1354 	destroy_args(&args);
1355 	return 0;
1356 }
1357 late_initcall(debug_vm_pgtable);
1358