xref: /linux/arch/x86/kernel/process_64.c (revision 627fce14809ba5610b0cb476cd0186d3fcedecfc)
1 /*
2  *  Copyright (C) 1995  Linus Torvalds
3  *
4  *  Pentium III FXSR, SSE support
5  *	Gareth Hughes <gareth@valinux.com>, May 2000
6  *
7  *  X86-64 port
8  *	Andi Kleen.
9  *
10  *	CPU hotplug support - ashok.raj@intel.com
11  */
12 
13 /*
14  * This file handles the architecture-dependent parts of process handling..
15  */
16 
17 #include <linux/cpu.h>
18 #include <linux/errno.h>
19 #include <linux/sched.h>
20 #include <linux/sched/task.h>
21 #include <linux/sched/task_stack.h>
22 #include <linux/fs.h>
23 #include <linux/kernel.h>
24 #include <linux/mm.h>
25 #include <linux/elfcore.h>
26 #include <linux/smp.h>
27 #include <linux/slab.h>
28 #include <linux/user.h>
29 #include <linux/interrupt.h>
30 #include <linux/delay.h>
31 #include <linux/export.h>
32 #include <linux/ptrace.h>
33 #include <linux/notifier.h>
34 #include <linux/kprobes.h>
35 #include <linux/kdebug.h>
36 #include <linux/prctl.h>
37 #include <linux/uaccess.h>
38 #include <linux/io.h>
39 #include <linux/ftrace.h>
40 #include <linux/syscalls.h>
41 
42 #include <asm/pgtable.h>
43 #include <asm/processor.h>
44 #include <asm/fpu/internal.h>
45 #include <asm/mmu_context.h>
46 #include <asm/prctl.h>
47 #include <asm/desc.h>
48 #include <asm/proto.h>
49 #include <asm/ia32.h>
50 #include <asm/syscalls.h>
51 #include <asm/debugreg.h>
52 #include <asm/switch_to.h>
53 #include <asm/xen/hypervisor.h>
54 #include <asm/vdso.h>
55 #include <asm/intel_rdt.h>
56 #include <asm/unistd.h>
57 #ifdef CONFIG_IA32_EMULATION
58 /* Not included via unistd.h */
59 #include <asm/unistd_32_ia32.h>
60 #endif
61 
62 __visible DEFINE_PER_CPU(unsigned long, rsp_scratch);
63 
64 /* Prints also some state that isn't saved in the pt_regs */
65 void __show_regs(struct pt_regs *regs, int all)
66 {
67 	unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
68 	unsigned long d0, d1, d2, d3, d6, d7;
69 	unsigned int fsindex, gsindex;
70 	unsigned int ds, cs, es;
71 
72 	printk(KERN_DEFAULT "RIP: %04lx:%pS\n", regs->cs & 0xffff,
73 		(void *)regs->ip);
74 	printk(KERN_DEFAULT "RSP: %04lx:%016lx EFLAGS: %08lx", regs->ss,
75 		regs->sp, regs->flags);
76 	if (regs->orig_ax != -1)
77 		pr_cont(" ORIG_RAX: %016lx\n", regs->orig_ax);
78 	else
79 		pr_cont("\n");
80 
81 	printk(KERN_DEFAULT "RAX: %016lx RBX: %016lx RCX: %016lx\n",
82 	       regs->ax, regs->bx, regs->cx);
83 	printk(KERN_DEFAULT "RDX: %016lx RSI: %016lx RDI: %016lx\n",
84 	       regs->dx, regs->si, regs->di);
85 	printk(KERN_DEFAULT "RBP: %016lx R08: %016lx R09: %016lx\n",
86 	       regs->bp, regs->r8, regs->r9);
87 	printk(KERN_DEFAULT "R10: %016lx R11: %016lx R12: %016lx\n",
88 	       regs->r10, regs->r11, regs->r12);
89 	printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n",
90 	       regs->r13, regs->r14, regs->r15);
91 
92 	asm("movl %%ds,%0" : "=r" (ds));
93 	asm("movl %%cs,%0" : "=r" (cs));
94 	asm("movl %%es,%0" : "=r" (es));
95 	asm("movl %%fs,%0" : "=r" (fsindex));
96 	asm("movl %%gs,%0" : "=r" (gsindex));
97 
98 	rdmsrl(MSR_FS_BASE, fs);
99 	rdmsrl(MSR_GS_BASE, gs);
100 	rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
101 
102 	if (!all)
103 		return;
104 
105 	cr0 = read_cr0();
106 	cr2 = read_cr2();
107 	cr3 = __read_cr3();
108 	cr4 = __read_cr4();
109 
110 	printk(KERN_DEFAULT "FS:  %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
111 	       fs, fsindex, gs, gsindex, shadowgs);
112 	printk(KERN_DEFAULT "CS:  %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds,
113 			es, cr0);
114 	printk(KERN_DEFAULT "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3,
115 			cr4);
116 
117 	get_debugreg(d0, 0);
118 	get_debugreg(d1, 1);
119 	get_debugreg(d2, 2);
120 	get_debugreg(d3, 3);
121 	get_debugreg(d6, 6);
122 	get_debugreg(d7, 7);
123 
124 	/* Only print out debug registers if they are in their non-default state. */
125 	if (!((d0 == 0) && (d1 == 0) && (d2 == 0) && (d3 == 0) &&
126 	    (d6 == DR6_RESERVED) && (d7 == 0x400))) {
127 		printk(KERN_DEFAULT "DR0: %016lx DR1: %016lx DR2: %016lx\n",
128 		       d0, d1, d2);
129 		printk(KERN_DEFAULT "DR3: %016lx DR6: %016lx DR7: %016lx\n",
130 		       d3, d6, d7);
131 	}
132 
133 	if (boot_cpu_has(X86_FEATURE_OSPKE))
134 		printk(KERN_DEFAULT "PKRU: %08x\n", read_pkru());
135 }
136 
137 void release_thread(struct task_struct *dead_task)
138 {
139 	if (dead_task->mm) {
140 #ifdef CONFIG_MODIFY_LDT_SYSCALL
141 		if (dead_task->mm->context.ldt) {
142 			pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n",
143 				dead_task->comm,
144 				dead_task->mm->context.ldt->entries,
145 				dead_task->mm->context.ldt->nr_entries);
146 			BUG();
147 		}
148 #endif
149 	}
150 }
151 
152 int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
153 		unsigned long arg, struct task_struct *p, unsigned long tls)
154 {
155 	int err;
156 	struct pt_regs *childregs;
157 	struct fork_frame *fork_frame;
158 	struct inactive_task_frame *frame;
159 	struct task_struct *me = current;
160 
161 	p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE;
162 	childregs = task_pt_regs(p);
163 	fork_frame = container_of(childregs, struct fork_frame, regs);
164 	frame = &fork_frame->frame;
165 	frame->bp = 0;
166 	frame->ret_addr = (unsigned long) ret_from_fork;
167 	p->thread.sp = (unsigned long) fork_frame;
168 	p->thread.io_bitmap_ptr = NULL;
169 
170 	savesegment(gs, p->thread.gsindex);
171 	p->thread.gsbase = p->thread.gsindex ? 0 : me->thread.gsbase;
172 	savesegment(fs, p->thread.fsindex);
173 	p->thread.fsbase = p->thread.fsindex ? 0 : me->thread.fsbase;
174 	savesegment(es, p->thread.es);
175 	savesegment(ds, p->thread.ds);
176 	memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
177 
178 	if (unlikely(p->flags & PF_KTHREAD)) {
179 		/* kernel thread */
180 		memset(childregs, 0, sizeof(struct pt_regs));
181 		frame->bx = sp;		/* function */
182 		frame->r12 = arg;
183 		return 0;
184 	}
185 	frame->bx = 0;
186 	*childregs = *current_pt_regs();
187 
188 	childregs->ax = 0;
189 	if (sp)
190 		childregs->sp = sp;
191 
192 	err = -ENOMEM;
193 	if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
194 		p->thread.io_bitmap_ptr = kmemdup(me->thread.io_bitmap_ptr,
195 						  IO_BITMAP_BYTES, GFP_KERNEL);
196 		if (!p->thread.io_bitmap_ptr) {
197 			p->thread.io_bitmap_max = 0;
198 			return -ENOMEM;
199 		}
200 		set_tsk_thread_flag(p, TIF_IO_BITMAP);
201 	}
202 
203 	/*
204 	 * Set a new TLS for the child thread?
205 	 */
206 	if (clone_flags & CLONE_SETTLS) {
207 #ifdef CONFIG_IA32_EMULATION
208 		if (in_ia32_syscall())
209 			err = do_set_thread_area(p, -1,
210 				(struct user_desc __user *)tls, 0);
211 		else
212 #endif
213 			err = do_arch_prctl_64(p, ARCH_SET_FS, tls);
214 		if (err)
215 			goto out;
216 	}
217 	err = 0;
218 out:
219 	if (err && p->thread.io_bitmap_ptr) {
220 		kfree(p->thread.io_bitmap_ptr);
221 		p->thread.io_bitmap_max = 0;
222 	}
223 
224 	return err;
225 }
226 
227 static void
228 start_thread_common(struct pt_regs *regs, unsigned long new_ip,
229 		    unsigned long new_sp,
230 		    unsigned int _cs, unsigned int _ss, unsigned int _ds)
231 {
232 	loadsegment(fs, 0);
233 	loadsegment(es, _ds);
234 	loadsegment(ds, _ds);
235 	load_gs_index(0);
236 	regs->ip		= new_ip;
237 	regs->sp		= new_sp;
238 	regs->cs		= _cs;
239 	regs->ss		= _ss;
240 	regs->flags		= X86_EFLAGS_IF;
241 	force_iret();
242 }
243 
244 void
245 start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
246 {
247 	start_thread_common(regs, new_ip, new_sp,
248 			    __USER_CS, __USER_DS, 0);
249 }
250 
251 #ifdef CONFIG_COMPAT
252 void compat_start_thread(struct pt_regs *regs, u32 new_ip, u32 new_sp)
253 {
254 	start_thread_common(regs, new_ip, new_sp,
255 			    test_thread_flag(TIF_X32)
256 			    ? __USER_CS : __USER32_CS,
257 			    __USER_DS, __USER_DS);
258 }
259 #endif
260 
261 /*
262  *	switch_to(x,y) should switch tasks from x to y.
263  *
264  * This could still be optimized:
265  * - fold all the options into a flag word and test it with a single test.
266  * - could test fs/gs bitsliced
267  *
268  * Kprobes not supported here. Set the probe on schedule instead.
269  * Function graph tracer not supported too.
270  */
271 __visible __notrace_funcgraph struct task_struct *
272 __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
273 {
274 	struct thread_struct *prev = &prev_p->thread;
275 	struct thread_struct *next = &next_p->thread;
276 	struct fpu *prev_fpu = &prev->fpu;
277 	struct fpu *next_fpu = &next->fpu;
278 	int cpu = smp_processor_id();
279 	struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
280 	unsigned prev_fsindex, prev_gsindex;
281 
282 	WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
283 		     this_cpu_read(irq_count) != -1);
284 
285 	switch_fpu_prepare(prev_fpu, cpu);
286 
287 	/* We must save %fs and %gs before load_TLS() because
288 	 * %fs and %gs may be cleared by load_TLS().
289 	 *
290 	 * (e.g. xen_load_tls())
291 	 */
292 	savesegment(fs, prev_fsindex);
293 	savesegment(gs, prev_gsindex);
294 
295 	/*
296 	 * Load TLS before restoring any segments so that segment loads
297 	 * reference the correct GDT entries.
298 	 */
299 	load_TLS(next, cpu);
300 
301 	/*
302 	 * Leave lazy mode, flushing any hypercalls made here.  This
303 	 * must be done after loading TLS entries in the GDT but before
304 	 * loading segments that might reference them, and and it must
305 	 * be done before fpu__restore(), so the TS bit is up to
306 	 * date.
307 	 */
308 	arch_end_context_switch(next_p);
309 
310 	/* Switch DS and ES.
311 	 *
312 	 * Reading them only returns the selectors, but writing them (if
313 	 * nonzero) loads the full descriptor from the GDT or LDT.  The
314 	 * LDT for next is loaded in switch_mm, and the GDT is loaded
315 	 * above.
316 	 *
317 	 * We therefore need to write new values to the segment
318 	 * registers on every context switch unless both the new and old
319 	 * values are zero.
320 	 *
321 	 * Note that we don't need to do anything for CS and SS, as
322 	 * those are saved and restored as part of pt_regs.
323 	 */
324 	savesegment(es, prev->es);
325 	if (unlikely(next->es | prev->es))
326 		loadsegment(es, next->es);
327 
328 	savesegment(ds, prev->ds);
329 	if (unlikely(next->ds | prev->ds))
330 		loadsegment(ds, next->ds);
331 
332 	/*
333 	 * Switch FS and GS.
334 	 *
335 	 * These are even more complicated than DS and ES: they have
336 	 * 64-bit bases are that controlled by arch_prctl.  The bases
337 	 * don't necessarily match the selectors, as user code can do
338 	 * any number of things to cause them to be inconsistent.
339 	 *
340 	 * We don't promise to preserve the bases if the selectors are
341 	 * nonzero.  We also don't promise to preserve the base if the
342 	 * selector is zero and the base doesn't match whatever was
343 	 * most recently passed to ARCH_SET_FS/GS.  (If/when the
344 	 * FSGSBASE instructions are enabled, we'll need to offer
345 	 * stronger guarantees.)
346 	 *
347 	 * As an invariant,
348 	 * (fsbase != 0 && fsindex != 0) || (gsbase != 0 && gsindex != 0) is
349 	 * impossible.
350 	 */
351 	if (next->fsindex) {
352 		/* Loading a nonzero value into FS sets the index and base. */
353 		loadsegment(fs, next->fsindex);
354 	} else {
355 		if (next->fsbase) {
356 			/* Next index is zero but next base is nonzero. */
357 			if (prev_fsindex)
358 				loadsegment(fs, 0);
359 			wrmsrl(MSR_FS_BASE, next->fsbase);
360 		} else {
361 			/* Next base and index are both zero. */
362 			if (static_cpu_has_bug(X86_BUG_NULL_SEG)) {
363 				/*
364 				 * We don't know the previous base and can't
365 				 * find out without RDMSR.  Forcibly clear it.
366 				 */
367 				loadsegment(fs, __USER_DS);
368 				loadsegment(fs, 0);
369 			} else {
370 				/*
371 				 * If the previous index is zero and ARCH_SET_FS
372 				 * didn't change the base, then the base is
373 				 * also zero and we don't need to do anything.
374 				 */
375 				if (prev->fsbase || prev_fsindex)
376 					loadsegment(fs, 0);
377 			}
378 		}
379 	}
380 	/*
381 	 * Save the old state and preserve the invariant.
382 	 * NB: if prev_fsindex == 0, then we can't reliably learn the base
383 	 * without RDMSR because Intel user code can zero it without telling
384 	 * us and AMD user code can program any 32-bit value without telling
385 	 * us.
386 	 */
387 	if (prev_fsindex)
388 		prev->fsbase = 0;
389 	prev->fsindex = prev_fsindex;
390 
391 	if (next->gsindex) {
392 		/* Loading a nonzero value into GS sets the index and base. */
393 		load_gs_index(next->gsindex);
394 	} else {
395 		if (next->gsbase) {
396 			/* Next index is zero but next base is nonzero. */
397 			if (prev_gsindex)
398 				load_gs_index(0);
399 			wrmsrl(MSR_KERNEL_GS_BASE, next->gsbase);
400 		} else {
401 			/* Next base and index are both zero. */
402 			if (static_cpu_has_bug(X86_BUG_NULL_SEG)) {
403 				/*
404 				 * We don't know the previous base and can't
405 				 * find out without RDMSR.  Forcibly clear it.
406 				 *
407 				 * This contains a pointless SWAPGS pair.
408 				 * Fixing it would involve an explicit check
409 				 * for Xen or a new pvop.
410 				 */
411 				load_gs_index(__USER_DS);
412 				load_gs_index(0);
413 			} else {
414 				/*
415 				 * If the previous index is zero and ARCH_SET_GS
416 				 * didn't change the base, then the base is
417 				 * also zero and we don't need to do anything.
418 				 */
419 				if (prev->gsbase || prev_gsindex)
420 					load_gs_index(0);
421 			}
422 		}
423 	}
424 	/*
425 	 * Save the old state and preserve the invariant.
426 	 * NB: if prev_gsindex == 0, then we can't reliably learn the base
427 	 * without RDMSR because Intel user code can zero it without telling
428 	 * us and AMD user code can program any 32-bit value without telling
429 	 * us.
430 	 */
431 	if (prev_gsindex)
432 		prev->gsbase = 0;
433 	prev->gsindex = prev_gsindex;
434 
435 	switch_fpu_finish(next_fpu, cpu);
436 
437 	/*
438 	 * Switch the PDA and FPU contexts.
439 	 */
440 	this_cpu_write(current_task, next_p);
441 
442 	/* Reload esp0 and ss1.  This changes current_thread_info(). */
443 	load_sp0(tss, next);
444 
445 	/*
446 	 * Now maybe reload the debug registers and handle I/O bitmaps
447 	 */
448 	if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
449 		     task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
450 		__switch_to_xtra(prev_p, next_p, tss);
451 
452 #ifdef CONFIG_XEN_PV
453 	/*
454 	 * On Xen PV, IOPL bits in pt_regs->flags have no effect, and
455 	 * current_pt_regs()->flags may not match the current task's
456 	 * intended IOPL.  We need to switch it manually.
457 	 */
458 	if (unlikely(static_cpu_has(X86_FEATURE_XENPV) &&
459 		     prev->iopl != next->iopl))
460 		xen_set_iopl_mask(next->iopl);
461 #endif
462 
463 	if (static_cpu_has_bug(X86_BUG_SYSRET_SS_ATTRS)) {
464 		/*
465 		 * AMD CPUs have a misfeature: SYSRET sets the SS selector but
466 		 * does not update the cached descriptor.  As a result, if we
467 		 * do SYSRET while SS is NULL, we'll end up in user mode with
468 		 * SS apparently equal to __USER_DS but actually unusable.
469 		 *
470 		 * The straightforward workaround would be to fix it up just
471 		 * before SYSRET, but that would slow down the system call
472 		 * fast paths.  Instead, we ensure that SS is never NULL in
473 		 * system call context.  We do this by replacing NULL SS
474 		 * selectors at every context switch.  SYSCALL sets up a valid
475 		 * SS, so the only way to get NULL is to re-enter the kernel
476 		 * from CPL 3 through an interrupt.  Since that can't happen
477 		 * in the same task as a running syscall, we are guaranteed to
478 		 * context switch between every interrupt vector entry and a
479 		 * subsequent SYSRET.
480 		 *
481 		 * We read SS first because SS reads are much faster than
482 		 * writes.  Out of caution, we force SS to __KERNEL_DS even if
483 		 * it previously had a different non-NULL value.
484 		 */
485 		unsigned short ss_sel;
486 		savesegment(ss, ss_sel);
487 		if (ss_sel != __KERNEL_DS)
488 			loadsegment(ss, __KERNEL_DS);
489 	}
490 
491 	/* Load the Intel cache allocation PQR MSR. */
492 	intel_rdt_sched_in();
493 
494 	return prev_p;
495 }
496 
497 void set_personality_64bit(void)
498 {
499 	/* inherit personality from parent */
500 
501 	/* Make sure to be in 64bit mode */
502 	clear_thread_flag(TIF_IA32);
503 	clear_thread_flag(TIF_ADDR32);
504 	clear_thread_flag(TIF_X32);
505 	/* Pretend that this comes from a 64bit execve */
506 	task_pt_regs(current)->orig_ax = __NR_execve;
507 
508 	/* Ensure the corresponding mm is not marked. */
509 	if (current->mm)
510 		current->mm->context.ia32_compat = 0;
511 
512 	/* TBD: overwrites user setup. Should have two bits.
513 	   But 64bit processes have always behaved this way,
514 	   so it's not too bad. The main problem is just that
515 	   32bit childs are affected again. */
516 	current->personality &= ~READ_IMPLIES_EXEC;
517 }
518 
519 static void __set_personality_x32(void)
520 {
521 #ifdef CONFIG_X86_X32
522 	clear_thread_flag(TIF_IA32);
523 	set_thread_flag(TIF_X32);
524 	if (current->mm)
525 		current->mm->context.ia32_compat = TIF_X32;
526 	current->personality &= ~READ_IMPLIES_EXEC;
527 	/*
528 	 * in_compat_syscall() uses the presence of the x32 syscall bit
529 	 * flag to determine compat status.  The x86 mmap() code relies on
530 	 * the syscall bitness so set x32 syscall bit right here to make
531 	 * in_compat_syscall() work during exec().
532 	 *
533 	 * Pretend to come from a x32 execve.
534 	 */
535 	task_pt_regs(current)->orig_ax = __NR_x32_execve | __X32_SYSCALL_BIT;
536 	current->thread.status &= ~TS_COMPAT;
537 #endif
538 }
539 
540 static void __set_personality_ia32(void)
541 {
542 #ifdef CONFIG_IA32_EMULATION
543 	set_thread_flag(TIF_IA32);
544 	clear_thread_flag(TIF_X32);
545 	if (current->mm)
546 		current->mm->context.ia32_compat = TIF_IA32;
547 	current->personality |= force_personality32;
548 	/* Prepare the first "return" to user space */
549 	task_pt_regs(current)->orig_ax = __NR_ia32_execve;
550 	current->thread.status |= TS_COMPAT;
551 #endif
552 }
553 
554 void set_personality_ia32(bool x32)
555 {
556 	/* Make sure to be in 32bit mode */
557 	set_thread_flag(TIF_ADDR32);
558 
559 	if (x32)
560 		__set_personality_x32();
561 	else
562 		__set_personality_ia32();
563 }
564 EXPORT_SYMBOL_GPL(set_personality_ia32);
565 
566 #ifdef CONFIG_CHECKPOINT_RESTORE
567 static long prctl_map_vdso(const struct vdso_image *image, unsigned long addr)
568 {
569 	int ret;
570 
571 	ret = map_vdso_once(image, addr);
572 	if (ret)
573 		return ret;
574 
575 	return (long)image->size;
576 }
577 #endif
578 
579 long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2)
580 {
581 	int ret = 0;
582 	int doit = task == current;
583 	int cpu;
584 
585 	switch (option) {
586 	case ARCH_SET_GS:
587 		if (arg2 >= TASK_SIZE_MAX)
588 			return -EPERM;
589 		cpu = get_cpu();
590 		task->thread.gsindex = 0;
591 		task->thread.gsbase = arg2;
592 		if (doit) {
593 			load_gs_index(0);
594 			ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, arg2);
595 		}
596 		put_cpu();
597 		break;
598 	case ARCH_SET_FS:
599 		/* Not strictly needed for fs, but do it for symmetry
600 		   with gs */
601 		if (arg2 >= TASK_SIZE_MAX)
602 			return -EPERM;
603 		cpu = get_cpu();
604 		task->thread.fsindex = 0;
605 		task->thread.fsbase = arg2;
606 		if (doit) {
607 			/* set the selector to 0 to not confuse __switch_to */
608 			loadsegment(fs, 0);
609 			ret = wrmsrl_safe(MSR_FS_BASE, arg2);
610 		}
611 		put_cpu();
612 		break;
613 	case ARCH_GET_FS: {
614 		unsigned long base;
615 
616 		if (doit)
617 			rdmsrl(MSR_FS_BASE, base);
618 		else
619 			base = task->thread.fsbase;
620 		ret = put_user(base, (unsigned long __user *)arg2);
621 		break;
622 	}
623 	case ARCH_GET_GS: {
624 		unsigned long base;
625 
626 		if (doit)
627 			rdmsrl(MSR_KERNEL_GS_BASE, base);
628 		else
629 			base = task->thread.gsbase;
630 		ret = put_user(base, (unsigned long __user *)arg2);
631 		break;
632 	}
633 
634 #ifdef CONFIG_CHECKPOINT_RESTORE
635 # ifdef CONFIG_X86_X32_ABI
636 	case ARCH_MAP_VDSO_X32:
637 		return prctl_map_vdso(&vdso_image_x32, arg2);
638 # endif
639 # if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
640 	case ARCH_MAP_VDSO_32:
641 		return prctl_map_vdso(&vdso_image_32, arg2);
642 # endif
643 	case ARCH_MAP_VDSO_64:
644 		return prctl_map_vdso(&vdso_image_64, arg2);
645 #endif
646 
647 	default:
648 		ret = -EINVAL;
649 		break;
650 	}
651 
652 	return ret;
653 }
654 
655 SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
656 {
657 	long ret;
658 
659 	ret = do_arch_prctl_64(current, option, arg2);
660 	if (ret == -EINVAL)
661 		ret = do_arch_prctl_common(current, option, arg2);
662 
663 	return ret;
664 }
665 
666 #ifdef CONFIG_IA32_EMULATION
667 COMPAT_SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
668 {
669 	return do_arch_prctl_common(current, option, arg2);
670 }
671 #endif
672 
673 unsigned long KSTK_ESP(struct task_struct *task)
674 {
675 	return task_pt_regs(task)->sp;
676 }
677