1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #ifndef _ASM_X86_PAGE_64_H 3 #define _ASM_X86_PAGE_64_H 4 5 #include <asm/page_64_types.h> 6 7 #ifndef __ASSEMBLER__ 8 #include <asm/cpufeatures.h> 9 #include <asm/alternative.h> 10 11 #include <linux/kmsan-checks.h> 12 #include <linux/mmdebug.h> 13 14 /* duplicated to the one in bootmem.h */ 15 extern unsigned long max_pfn; 16 extern unsigned long phys_base; 17 18 extern unsigned long page_offset_base; 19 extern unsigned long vmalloc_base; 20 extern unsigned long vmemmap_base; 21 extern unsigned long direct_map_physmem_end; 22 23 static __always_inline unsigned long __phys_addr_nodebug(unsigned long x) 24 { 25 unsigned long y = x - __START_KERNEL_map; 26 27 /* use the carry flag to determine if x was < __START_KERNEL_map */ 28 x = y + ((x > y) ? phys_base : (__START_KERNEL_map - PAGE_OFFSET)); 29 30 return x; 31 } 32 33 #ifdef CONFIG_DEBUG_VIRTUAL 34 extern unsigned long __phys_addr(unsigned long); 35 #else 36 #define __phys_addr(x) __phys_addr_nodebug(x) 37 #endif 38 39 static inline unsigned long __phys_addr_symbol(unsigned long x) 40 { 41 unsigned long y = x - __START_KERNEL_map; 42 43 /* only check upper bounds since lower bounds will trigger carry */ 44 VIRTUAL_BUG_ON(y >= KERNEL_IMAGE_SIZE); 45 46 return y + phys_base; 47 } 48 49 #define __phys_reloc_hide(x) (x) 50 51 void __clear_pages_unrolled(void *page); 52 KCFI_REFERENCE(__clear_pages_unrolled); 53 54 /** 55 * clear_pages() - clear a page range using a kernel virtual address. 56 * @addr: start address of kernel page range 57 * @npages: number of pages 58 * 59 * Switch between three implementations of page clearing based on CPU 60 * capabilities: 61 * 62 * - __clear_pages_unrolled(): the oldest, slowest and universally 63 * supported method. Zeroes via 8-byte MOV instructions unrolled 8x 64 * to write a 64-byte cacheline in each loop iteration. 65 * 66 * - "REP; STOSQ": really old CPUs had crummy REP implementations. 67 * Vendor CPU setup code sets 'REP_GOOD' on CPUs where REP can be 68 * trusted. The instruction writes 8-byte per REP iteration but 69 * CPUs can internally batch these together and do larger writes. 70 * 71 * - "REP; STOSB": used on CPUs with "enhanced REP MOVSB/STOSB", 72 * which enumerate 'ERMS' and provide an implementation which 73 * unlike "REP; STOSQ" above wasn't overly picky about alignment. 74 * The instruction writes 1-byte per REP iteration with CPUs 75 * internally batching these together into larger writes and is 76 * generally fastest of the three. 77 * 78 * Note that when running as a guest, features exposed by the CPU 79 * might be mediated by the hypervisor. So, the STOSQ variant might 80 * be in active use on some systems even when the hardware enumerates 81 * ERMS. 82 * 83 * Does absolutely no exception handling. 84 */ 85 static inline void clear_pages(void *addr, unsigned int npages) 86 { 87 u64 len = npages * PAGE_SIZE; 88 /* 89 * Clean up KMSAN metadata for the pages being cleared. The assembly call 90 * below clobbers @addr, so perform unpoisoning before it. 91 */ 92 kmsan_unpoison_memory(addr, len); 93 94 /* 95 * The inline asm embeds a CALL instruction and usually that is a no-no 96 * due to the compiler not knowing that and thus being unable to track 97 * callee-clobbered registers. 98 * 99 * In this case that is fine because the registers clobbered by 100 * __clear_pages_unrolled() are part of the inline asm register 101 * specification. 102 */ 103 asm volatile(ALTERNATIVE_2("call __clear_pages_unrolled", 104 "shrq $3, %%rcx; rep stosq", X86_FEATURE_REP_GOOD, 105 "rep stosb", X86_FEATURE_ERMS) 106 : "+c" (len), "+D" (addr), ASM_CALL_CONSTRAINT 107 : "a" (0) 108 : "cc", "memory"); 109 } 110 #define clear_pages clear_pages 111 112 static inline void clear_page(void *addr) 113 { 114 clear_pages(addr, 1); 115 } 116 117 void copy_page(void *to, void *from); 118 KCFI_REFERENCE(copy_page); 119 120 /* 121 * User space process size. This is the first address outside the user range. 122 * There are a few constraints that determine this: 123 * 124 * On Intel CPUs, if a SYSCALL instruction is at the highest canonical 125 * address, then that syscall will enter the kernel with a 126 * non-canonical return address, and SYSRET will explode dangerously. 127 * We avoid this particular problem by preventing anything 128 * from being mapped at the maximum canonical address. 129 * 130 * On AMD CPUs in the Ryzen family, there's a nasty bug in which the 131 * CPUs malfunction if they execute code from the highest canonical page. 132 * They'll speculate right off the end of the canonical space, and 133 * bad things happen. This is worked around in the same way as the 134 * Intel problem. 135 * 136 * With page table isolation enabled, we map the LDT in ... [stay tuned] 137 */ 138 static __always_inline unsigned long task_size_max(void) 139 { 140 unsigned long ret; 141 142 alternative_io("movq %[small],%0","movq %[large],%0", 143 X86_FEATURE_LA57, 144 "=r" (ret), 145 [small] "i" ((1ul << 47)-PAGE_SIZE), 146 [large] "i" ((1ul << 56)-PAGE_SIZE)); 147 148 return ret; 149 } 150 151 #endif /* !__ASSEMBLER__ */ 152 153 #ifdef CONFIG_X86_VSYSCALL_EMULATION 154 # define __HAVE_ARCH_GATE_AREA 1 155 #endif 156 157 #endif /* _ASM_X86_PAGE_64_H */ 158