xref: /linux/arch/x86/entry/vdso/vma.c (revision c0c914eca7f251c70facc37dfebeaf176601918d)
1 /*
2  * Copyright 2007 Andi Kleen, SUSE Labs.
3  * Subject to the GPL, v.2
4  *
5  * This contains most of the x86 vDSO kernel-side code.
6  */
7 #include <linux/mm.h>
8 #include <linux/err.h>
9 #include <linux/sched.h>
10 #include <linux/slab.h>
11 #include <linux/init.h>
12 #include <linux/random.h>
13 #include <linux/elf.h>
14 #include <linux/cpu.h>
15 #include <asm/pvclock.h>
16 #include <asm/vgtod.h>
17 #include <asm/proto.h>
18 #include <asm/vdso.h>
19 #include <asm/vvar.h>
20 #include <asm/page.h>
21 #include <asm/hpet.h>
22 #include <asm/desc.h>
23 
24 #if defined(CONFIG_X86_64)
25 unsigned int __read_mostly vdso64_enabled = 1;
26 #endif
27 
28 void __init init_vdso_image(const struct vdso_image *image)
29 {
30 	int i;
31 	int npages = (image->size) / PAGE_SIZE;
32 
33 	BUG_ON(image->size % PAGE_SIZE != 0);
34 	for (i = 0; i < npages; i++)
35 		image->text_mapping.pages[i] =
36 			virt_to_page(image->data + i*PAGE_SIZE);
37 
38 	apply_alternatives((struct alt_instr *)(image->data + image->alt),
39 			   (struct alt_instr *)(image->data + image->alt +
40 						image->alt_len));
41 }
42 
43 struct linux_binprm;
44 
45 /*
46  * Put the vdso above the (randomized) stack with another randomized
47  * offset.  This way there is no hole in the middle of address space.
48  * To save memory make sure it is still in the same PTE as the stack
49  * top.  This doesn't give that many random bits.
50  *
51  * Note that this algorithm is imperfect: the distribution of the vdso
52  * start address within a PMD is biased toward the end.
53  *
54  * Only used for the 64-bit and x32 vdsos.
55  */
56 static unsigned long vdso_addr(unsigned long start, unsigned len)
57 {
58 #ifdef CONFIG_X86_32
59 	return 0;
60 #else
61 	unsigned long addr, end;
62 	unsigned offset;
63 
64 	/*
65 	 * Round up the start address.  It can start out unaligned as a result
66 	 * of stack start randomization.
67 	 */
68 	start = PAGE_ALIGN(start);
69 
70 	/* Round the lowest possible end address up to a PMD boundary. */
71 	end = (start + len + PMD_SIZE - 1) & PMD_MASK;
72 	if (end >= TASK_SIZE_MAX)
73 		end = TASK_SIZE_MAX;
74 	end -= len;
75 
76 	if (end > start) {
77 		offset = get_random_int() % (((end - start) >> PAGE_SHIFT) + 1);
78 		addr = start + (offset << PAGE_SHIFT);
79 	} else {
80 		addr = start;
81 	}
82 
83 	/*
84 	 * Forcibly align the final address in case we have a hardware
85 	 * issue that requires alignment for performance reasons.
86 	 */
87 	addr = align_vdso_addr(addr);
88 
89 	return addr;
90 #endif
91 }
92 
93 static int map_vdso(const struct vdso_image *image, bool calculate_addr)
94 {
95 	struct mm_struct *mm = current->mm;
96 	struct vm_area_struct *vma;
97 	unsigned long addr, text_start;
98 	int ret = 0;
99 	static struct page *no_pages[] = {NULL};
100 	static struct vm_special_mapping vvar_mapping = {
101 		.name = "[vvar]",
102 		.pages = no_pages,
103 	};
104 	struct pvclock_vsyscall_time_info *pvti;
105 
106 	if (calculate_addr) {
107 		addr = vdso_addr(current->mm->start_stack,
108 				 image->size - image->sym_vvar_start);
109 	} else {
110 		addr = 0;
111 	}
112 
113 	down_write(&mm->mmap_sem);
114 
115 	addr = get_unmapped_area(NULL, addr,
116 				 image->size - image->sym_vvar_start, 0, 0);
117 	if (IS_ERR_VALUE(addr)) {
118 		ret = addr;
119 		goto up_fail;
120 	}
121 
122 	text_start = addr - image->sym_vvar_start;
123 	current->mm->context.vdso = (void __user *)text_start;
124 
125 	/*
126 	 * MAYWRITE to allow gdb to COW and set breakpoints
127 	 */
128 	vma = _install_special_mapping(mm,
129 				       text_start,
130 				       image->size,
131 				       VM_READ|VM_EXEC|
132 				       VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
133 				       &image->text_mapping);
134 
135 	if (IS_ERR(vma)) {
136 		ret = PTR_ERR(vma);
137 		goto up_fail;
138 	}
139 
140 	vma = _install_special_mapping(mm,
141 				       addr,
142 				       -image->sym_vvar_start,
143 				       VM_READ|VM_MAYREAD,
144 				       &vvar_mapping);
145 
146 	if (IS_ERR(vma)) {
147 		ret = PTR_ERR(vma);
148 		goto up_fail;
149 	}
150 
151 	if (image->sym_vvar_page)
152 		ret = remap_pfn_range(vma,
153 				      text_start + image->sym_vvar_page,
154 				      __pa_symbol(&__vvar_page) >> PAGE_SHIFT,
155 				      PAGE_SIZE,
156 				      PAGE_READONLY);
157 
158 	if (ret)
159 		goto up_fail;
160 
161 #ifdef CONFIG_HPET_TIMER
162 	if (hpet_address && image->sym_hpet_page) {
163 		ret = io_remap_pfn_range(vma,
164 			text_start + image->sym_hpet_page,
165 			hpet_address >> PAGE_SHIFT,
166 			PAGE_SIZE,
167 			pgprot_noncached(PAGE_READONLY));
168 
169 		if (ret)
170 			goto up_fail;
171 	}
172 #endif
173 
174 	pvti = pvclock_pvti_cpu0_va();
175 	if (pvti && image->sym_pvclock_page) {
176 		ret = remap_pfn_range(vma,
177 				      text_start + image->sym_pvclock_page,
178 				      __pa(pvti) >> PAGE_SHIFT,
179 				      PAGE_SIZE,
180 				      PAGE_READONLY);
181 
182 		if (ret)
183 			goto up_fail;
184 	}
185 
186 up_fail:
187 	if (ret)
188 		current->mm->context.vdso = NULL;
189 
190 	up_write(&mm->mmap_sem);
191 	return ret;
192 }
193 
194 #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
195 static int load_vdso32(void)
196 {
197 	if (vdso32_enabled != 1)  /* Other values all mean "disabled" */
198 		return 0;
199 
200 	return map_vdso(&vdso_image_32, false);
201 }
202 #endif
203 
204 #ifdef CONFIG_X86_64
205 int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
206 {
207 	if (!vdso64_enabled)
208 		return 0;
209 
210 	return map_vdso(&vdso_image_64, true);
211 }
212 
213 #ifdef CONFIG_COMPAT
214 int compat_arch_setup_additional_pages(struct linux_binprm *bprm,
215 				       int uses_interp)
216 {
217 #ifdef CONFIG_X86_X32_ABI
218 	if (test_thread_flag(TIF_X32)) {
219 		if (!vdso64_enabled)
220 			return 0;
221 
222 		return map_vdso(&vdso_image_x32, true);
223 	}
224 #endif
225 #ifdef CONFIG_IA32_EMULATION
226 	return load_vdso32();
227 #else
228 	return 0;
229 #endif
230 }
231 #endif
232 #else
233 int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
234 {
235 	return load_vdso32();
236 }
237 #endif
238 
239 #ifdef CONFIG_X86_64
240 static __init int vdso_setup(char *s)
241 {
242 	vdso64_enabled = simple_strtoul(s, NULL, 0);
243 	return 0;
244 }
245 __setup("vdso=", vdso_setup);
246 #endif
247 
248 #ifdef CONFIG_X86_64
249 static void vgetcpu_cpu_init(void *arg)
250 {
251 	int cpu = smp_processor_id();
252 	struct desc_struct d = { };
253 	unsigned long node = 0;
254 #ifdef CONFIG_NUMA
255 	node = cpu_to_node(cpu);
256 #endif
257 	if (cpu_has(&cpu_data(cpu), X86_FEATURE_RDTSCP))
258 		write_rdtscp_aux((node << 12) | cpu);
259 
260 	/*
261 	 * Store cpu number in limit so that it can be loaded
262 	 * quickly in user space in vgetcpu. (12 bits for the CPU
263 	 * and 8 bits for the node)
264 	 */
265 	d.limit0 = cpu | ((node & 0xf) << 12);
266 	d.limit = node >> 4;
267 	d.type = 5;		/* RO data, expand down, accessed */
268 	d.dpl = 3;		/* Visible to user code */
269 	d.s = 1;		/* Not a system segment */
270 	d.p = 1;		/* Present */
271 	d.d = 1;		/* 32-bit */
272 
273 	write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S);
274 }
275 
276 static int
277 vgetcpu_cpu_notifier(struct notifier_block *n, unsigned long action, void *arg)
278 {
279 	long cpu = (long)arg;
280 
281 	if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN)
282 		smp_call_function_single(cpu, vgetcpu_cpu_init, NULL, 1);
283 
284 	return NOTIFY_DONE;
285 }
286 
287 static int __init init_vdso(void)
288 {
289 	init_vdso_image(&vdso_image_64);
290 
291 #ifdef CONFIG_X86_X32_ABI
292 	init_vdso_image(&vdso_image_x32);
293 #endif
294 
295 	cpu_notifier_register_begin();
296 
297 	on_each_cpu(vgetcpu_cpu_init, NULL, 1);
298 	/* notifier priority > KVM */
299 	__hotcpu_notifier(vgetcpu_cpu_notifier, 30);
300 
301 	cpu_notifier_register_done();
302 
303 	return 0;
304 }
305 subsys_initcall(init_vdso);
306 #endif /* CONFIG_X86_64 */
307