xref: /linux/arch/x86/entry/vdso/vma.c (revision 764e77d868a5b932c709e20ddb5993f9111a841c)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright 2007 Andi Kleen, SUSE Labs.
4  *
5  * This contains most of the x86 vDSO kernel-side code.
6  */
7 #include <linux/mm.h>
8 #include <linux/err.h>
9 #include <linux/futex.h>
10 #include <linux/sched.h>
11 #include <linux/sched/task_stack.h>
12 #include <linux/slab.h>
13 #include <linux/init.h>
14 #include <linux/random.h>
15 #include <linux/elf.h>
16 #include <linux/cpu.h>
17 #include <linux/ptrace.h>
18 #include <linux/vdso_datastore.h>
19 
20 #include <asm/pvclock.h>
21 #include <asm/vgtod.h>
22 #include <asm/proto.h>
23 #include <asm/vdso.h>
24 #include <asm/tlb.h>
25 #include <asm/page.h>
26 #include <asm/desc.h>
27 #include <asm/cpufeature.h>
28 #include <asm/vdso/vsyscall.h>
29 #include <clocksource/hyperv_timer.h>
30 
31 static_assert(VDSO_NR_PAGES + VDSO_NR_VCLOCK_PAGES == __VDSO_PAGES);
32 
33 unsigned int vclocks_used __read_mostly;
34 
35 #if defined(CONFIG_X86_64)
36 unsigned int __read_mostly vdso64_enabled = 1;
37 #endif
38 
39 int __init init_vdso_image(const struct vdso_image *image)
40 {
41 	BUILD_BUG_ON(VDSO_CLOCKMODE_MAX >= 32);
42 	BUG_ON(image->size % PAGE_SIZE != 0);
43 
44 	apply_alternatives((struct alt_instr *)(image->data + image->alt),
45 			   (struct alt_instr *)(image->data + image->alt +
46 						image->alt_len));
47 
48 	return 0;
49 }
50 
51 struct linux_binprm;
52 
53 static vm_fault_t vdso_fault(const struct vm_special_mapping *sm,
54 		      struct vm_area_struct *vma, struct vm_fault *vmf)
55 {
56 	const struct vdso_image *image = vma->vm_mm->context.vdso_image;
57 
58 	if (!image || (vmf->pgoff << PAGE_SHIFT) >= image->size)
59 		return VM_FAULT_SIGBUS;
60 
61 	vmf->page = virt_to_page(image->data + (vmf->pgoff << PAGE_SHIFT));
62 	get_page(vmf->page);
63 	return 0;
64 }
65 
66 static void vdso_fix_landing(const struct vdso_image *image,
67 		struct vm_area_struct *new_vma)
68 {
69 	struct pt_regs *regs = current_pt_regs();
70 	unsigned long ipoffset = regs->ip -
71 		(unsigned long)current->mm->context.vdso;
72 
73 	if (ipoffset < image->size)
74 		regs->ip = new_vma->vm_start + ipoffset;
75 }
76 
77 #ifdef CONFIG_FUTEX_ROBUST_UNLOCK
78 static void vdso_futex_robust_unlock_update_ips(void)
79 {
80 	const struct vdso_image *image = current->mm->context.vdso_image;
81 	unsigned long vdso = (unsigned long) current->mm->context.vdso;
82 	struct futex_mm_data *fd = &current->mm->futex;
83 	unsigned int idx = 0;
84 
85 	futex_reset_cs_ranges(fd);
86 
87 #ifdef CONFIG_X86_64
88 	futex_set_vdso_cs_range(fd, idx, vdso + image->sym___futex_list64_try_unlock_cs_start,
89 				vdso + image->sym___futex_list64_try_unlock_cs_end, false);
90 	idx++;
91 #endif /* CONFIG_X86_64 */
92 
93 #if defined(CONFIG_X86_32) || defined(CONFIG_COMPAT)
94 	futex_set_vdso_cs_range(fd, idx, vdso + image->sym___futex_list32_try_unlock_cs_start,
95 				vdso + image->sym___futex_list32_try_unlock_cs_end, true);
96 #endif /* CONFIG_X86_32 || CONFIG_COMPAT */
97 }
98 #else
99 static inline void vdso_futex_robust_unlock_update_ips(void) { }
100 #endif
101 
102 static int vdso_mremap(const struct vm_special_mapping *sm,
103 		struct vm_area_struct *new_vma)
104 {
105 	const struct vdso_image *image = current->mm->context.vdso_image;
106 
107 	vdso_fix_landing(image, new_vma);
108 	current->mm->context.vdso = (void __user *)new_vma->vm_start;
109 	vdso_futex_robust_unlock_update_ips();
110 
111 	return 0;
112 }
113 
114 static vm_fault_t vvar_vclock_fault(const struct vm_special_mapping *sm,
115 				    struct vm_area_struct *vma, struct vm_fault *vmf)
116 {
117 	switch (vmf->pgoff) {
118 	case VDSO_PAGE_PVCLOCK_OFFSET:
119 	{
120 		struct pvclock_vsyscall_time_info *pvti =
121 			pvclock_get_pvti_cpu0_va();
122 
123 		if (pvti && vclock_was_used(VDSO_CLOCKMODE_PVCLOCK))
124 			return vmf_insert_pfn_prot(vma, vmf->address,
125 					__pa(pvti) >> PAGE_SHIFT,
126 					pgprot_decrypted(vma->vm_page_prot));
127 		break;
128 	}
129 	case VDSO_PAGE_HVCLOCK_OFFSET:
130 	{
131 		unsigned long pfn = hv_get_tsc_pfn();
132 		if (pfn && vclock_was_used(VDSO_CLOCKMODE_HVCLOCK))
133 			return vmf_insert_pfn(vma, vmf->address, pfn);
134 		break;
135 	}
136 	}
137 
138 	return VM_FAULT_SIGBUS;
139 }
140 
141 static const struct vm_special_mapping vdso_mapping = {
142 	.name = "[vdso]",
143 	.fault = vdso_fault,
144 	.mremap = vdso_mremap,
145 };
146 static const struct vm_special_mapping vvar_vclock_mapping = {
147 	.name = "[vvar_vclock]",
148 	.fault = vvar_vclock_fault,
149 };
150 
151 /*
152  * Add vdso and vvar mappings to current process.
153  * @image          - blob to map
154  * @addr           - request a specific address (zero to map at free addr)
155  */
156 static int map_vdso(const struct vdso_image *image, unsigned long addr)
157 {
158 	struct mm_struct *mm = current->mm;
159 	struct vm_area_struct *vma;
160 	unsigned long text_start;
161 	int ret = 0;
162 
163 	if (mmap_write_lock_killable(mm))
164 		return -EINTR;
165 
166 	addr = get_unmapped_area(NULL, addr,
167 				 image->size + __VDSO_PAGES * PAGE_SIZE, 0, 0);
168 	if (IS_ERR_VALUE(addr)) {
169 		ret = addr;
170 		goto up_fail;
171 	}
172 
173 	text_start = addr + __VDSO_PAGES * PAGE_SIZE;
174 
175 	/*
176 	 * MAYWRITE to allow gdb to COW and set breakpoints
177 	 */
178 	vma = _install_special_mapping(mm,
179 				       text_start,
180 				       image->size,
181 				       VM_READ|VM_EXEC|
182 				       VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
183 				       VM_SEALED_SYSMAP,
184 				       &vdso_mapping);
185 
186 	if (IS_ERR(vma)) {
187 		ret = PTR_ERR(vma);
188 		goto up_fail;
189 	}
190 
191 	vma = vdso_install_vvar_mapping(mm, addr);
192 	if (IS_ERR(vma)) {
193 		ret = PTR_ERR(vma);
194 		do_munmap(mm, text_start, image->size, NULL);
195 		goto up_fail;
196 	}
197 
198 	vma = _install_special_mapping(mm,
199 				       VDSO_VCLOCK_PAGES_START(addr),
200 				       VDSO_NR_VCLOCK_PAGES * PAGE_SIZE,
201 				       VM_READ|VM_MAYREAD|VM_IO|VM_DONTDUMP|
202 				       VM_PFNMAP|VM_SEALED_SYSMAP,
203 				       &vvar_vclock_mapping);
204 
205 	if (IS_ERR(vma)) {
206 		ret = PTR_ERR(vma);
207 		do_munmap(mm, text_start, image->size, NULL);
208 		do_munmap(mm, addr, VDSO_NR_PAGES * PAGE_SIZE, NULL);
209 		goto up_fail;
210 	}
211 
212 	current->mm->context.vdso = (void __user *)text_start;
213 	current->mm->context.vdso_image = image;
214 
215 	vdso_futex_robust_unlock_update_ips();
216 
217 up_fail:
218 	mmap_write_unlock(mm);
219 	return ret;
220 }
221 
222 int map_vdso_once(const struct vdso_image *image, unsigned long addr)
223 {
224 	struct mm_struct *mm = current->mm;
225 	struct vm_area_struct *vma;
226 	VMA_ITERATOR(vmi, mm, 0);
227 
228 	mmap_write_lock(mm);
229 	/*
230 	 * Check if we have already mapped vdso blob - fail to prevent
231 	 * abusing from userspace install_special_mapping, which may
232 	 * not do accounting and rlimit right.
233 	 * We could search vma near context.vdso, but it's a slowpath,
234 	 * so let's explicitly check all VMAs to be completely sure.
235 	 */
236 	for_each_vma(vmi, vma) {
237 		if (vma_is_special_mapping(vma, &vdso_mapping) ||
238 				vma_is_special_mapping(vma, &vdso_vvar_mapping) ||
239 				vma_is_special_mapping(vma, &vvar_vclock_mapping)) {
240 			mmap_write_unlock(mm);
241 			return -EEXIST;
242 		}
243 	}
244 	mmap_write_unlock(mm);
245 
246 	return map_vdso(image, addr);
247 }
248 
249 static int load_vdso32(void)
250 {
251 	if (vdso32_enabled != 1)  /* Other values all mean "disabled" */
252 		return 0;
253 
254 	return map_vdso(&vdso32_image, 0);
255 }
256 
257 int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
258 {
259 	if (IS_ENABLED(CONFIG_X86_64)) {
260 		if (!vdso64_enabled)
261 			return 0;
262 
263 		return map_vdso(&vdso64_image, 0);
264 	}
265 
266 	return load_vdso32();
267 }
268 
269 #ifdef CONFIG_COMPAT
270 int compat_arch_setup_additional_pages(struct linux_binprm *bprm,
271 				       int uses_interp, bool x32)
272 {
273 	if (IS_ENABLED(CONFIG_X86_X32_ABI) && x32) {
274 		if (!vdso64_enabled)
275 			return 0;
276 		return map_vdso(&vdsox32_image, 0);
277 	}
278 
279 	if (IS_ENABLED(CONFIG_IA32_EMULATION))
280 		return load_vdso32();
281 
282 	return 0;
283 }
284 #endif
285 
286 bool arch_syscall_is_vdso_sigreturn(struct pt_regs *regs)
287 {
288 	const struct vdso_image *image = current->mm->context.vdso_image;
289 	unsigned long vdso = (unsigned long) current->mm->context.vdso;
290 
291 	if (in_ia32_syscall() && image == &vdso32_image) {
292 		if (regs->ip == vdso + image->sym_vdso32_sigreturn_landing_pad ||
293 		    regs->ip == vdso + image->sym_vdso32_rt_sigreturn_landing_pad)
294 			return true;
295 	}
296 	return false;
297 }
298 
299 #ifdef CONFIG_X86_64
300 static __init int vdso_setup(char *s)
301 {
302 	vdso64_enabled = simple_strtoul(s, NULL, 0);
303 	return 1;
304 }
305 __setup("vdso=", vdso_setup);
306 #endif /* CONFIG_X86_64 */
307