xref: /linux/arch/x86/include/asm/page_64.h (revision 6aacab308a5dfd222b2d23662bbae60c11007cfb)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 #ifndef _ASM_X86_PAGE_64_H
3 #define _ASM_X86_PAGE_64_H
4 
5 #include <asm/page_64_types.h>
6 
7 #ifndef __ASSEMBLER__
8 #include <asm/cpufeatures.h>
9 #include <asm/alternative.h>
10 
11 #include <linux/kmsan-checks.h>
12 #include <linux/mmdebug.h>
13 
14 /* duplicated to the one in bootmem.h */
15 extern unsigned long max_pfn;
16 extern unsigned long phys_base;
17 
18 extern unsigned long page_offset_base;
19 extern unsigned long vmalloc_base;
20 extern unsigned long vmemmap_base;
21 extern unsigned long direct_map_physmem_end;
22 
23 static __always_inline unsigned long __phys_addr_nodebug(unsigned long x)
24 {
25 	unsigned long y = x - __START_KERNEL_map;
26 
27 	/* use the carry flag to determine if x was < __START_KERNEL_map */
28 	x = y + ((x > y) ? phys_base : (__START_KERNEL_map - PAGE_OFFSET));
29 
30 	return x;
31 }
32 
33 #ifdef CONFIG_DEBUG_VIRTUAL
34 extern unsigned long __phys_addr(unsigned long);
35 #else
36 #define __phys_addr(x)		__phys_addr_nodebug(x)
37 #endif
38 
39 static inline unsigned long __phys_addr_symbol(unsigned long x)
40 {
41 	unsigned long y = x - __START_KERNEL_map;
42 
43 	/* only check upper bounds since lower bounds will trigger carry */
44 	VIRTUAL_BUG_ON(y >= KERNEL_IMAGE_SIZE);
45 
46 	return y + phys_base;
47 }
48 
49 #define __phys_reloc_hide(x)	(x)
50 
51 void __clear_pages_unrolled(void *page);
52 KCFI_REFERENCE(__clear_pages_unrolled);
53 
54 /**
55  * clear_pages() - clear a page range using a kernel virtual address.
56  * @addr: start address of kernel page range
57  * @npages: number of pages
58  *
59  * Switch between three implementations of page clearing based on CPU
60  * capabilities:
61  *
62  *  - __clear_pages_unrolled(): the oldest, slowest and universally
63  *    supported method. Zeroes via 8-byte MOV instructions unrolled 8x
64  *    to write a 64-byte cacheline in each loop iteration.
65  *
66  *  - "REP; STOSQ": really old CPUs had crummy REP implementations.
67  *    Vendor CPU setup code sets 'REP_GOOD' on CPUs where REP can be
68  *    trusted. The instruction writes 8-byte per REP iteration but
69  *    CPUs can internally batch these together and do larger writes.
70  *
71  *  - "REP; STOSB": used on CPUs with "enhanced REP MOVSB/STOSB",
72  *    which enumerate 'ERMS' and provide an implementation which
73  *    unlike "REP; STOSQ" above wasn't overly picky about alignment.
74  *    The instruction writes 1-byte per REP iteration with CPUs
75  *    internally batching these together into larger writes and is
76  *    generally fastest of the three.
77  *
78  * Note that when running as a guest, features exposed by the CPU
79  * might be mediated by the hypervisor. So, the STOSQ variant might
80  * be in active use on some systems even when the hardware enumerates
81  * ERMS.
82  *
83  * Does absolutely no exception handling.
84  */
85 static inline void clear_pages(void *addr, unsigned int npages)
86 {
87 	u64 len = npages * PAGE_SIZE;
88 	/*
89 	 * Clean up KMSAN metadata for the pages being cleared. The assembly call
90 	 * below clobbers @addr, so perform unpoisoning before it.
91 	 */
92 	kmsan_unpoison_memory(addr, len);
93 
94 	/*
95 	 * The inline asm embeds a CALL instruction and usually that is a no-no
96 	 * due to the compiler not knowing that and thus being unable to track
97 	 * callee-clobbered registers.
98 	 *
99 	 * In this case that is fine because the registers clobbered by
100 	 * __clear_pages_unrolled() are part of the inline asm register
101 	 * specification.
102 	 */
103 	asm volatile(ALTERNATIVE_2("call __clear_pages_unrolled",
104 				   "shrq $3, %%rcx; rep stosq", X86_FEATURE_REP_GOOD,
105 				   "rep stosb", X86_FEATURE_ERMS)
106 			: "+c" (len), "+D" (addr), ASM_CALL_CONSTRAINT
107 			: "a" (0)
108 			: "cc", "memory");
109 }
110 #define clear_pages clear_pages
111 
112 static inline void clear_page(void *addr)
113 {
114 	clear_pages(addr, 1);
115 }
116 
117 void copy_page(void *to, void *from);
118 KCFI_REFERENCE(copy_page);
119 
120 /*
121  * User space process size.  This is the first address outside the user range.
122  * There are a few constraints that determine this:
123  *
124  * On Intel CPUs, if a SYSCALL instruction is at the highest canonical
125  * address, then that syscall will enter the kernel with a
126  * non-canonical return address, and SYSRET will explode dangerously.
127  * We avoid this particular problem by preventing anything
128  * from being mapped at the maximum canonical address.
129  *
130  * On AMD CPUs in the Ryzen family, there's a nasty bug in which the
131  * CPUs malfunction if they execute code from the highest canonical page.
132  * They'll speculate right off the end of the canonical space, and
133  * bad things happen.  This is worked around in the same way as the
134  * Intel problem.
135  *
136  * With page table isolation enabled, we map the LDT in ... [stay tuned]
137  */
138 static __always_inline unsigned long task_size_max(void)
139 {
140 	unsigned long ret;
141 
142 	alternative_io("movq %[small],%0","movq %[large],%0",
143 			X86_FEATURE_LA57,
144 			"=r" (ret),
145 			[small] "i" ((1ul << 47)-PAGE_SIZE),
146 			[large] "i" ((1ul << 56)-PAGE_SIZE));
147 
148 	return ret;
149 }
150 
151 #endif	/* !__ASSEMBLER__ */
152 
153 #ifdef CONFIG_X86_VSYSCALL_EMULATION
154 # define __HAVE_ARCH_GATE_AREA 1
155 #endif
156 
157 #endif /* _ASM_X86_PAGE_64_H */
158