xref: /linux/arch/x86/kernel/process_64.c (revision 367b8112fe2ea5c39a7bb4d263dcdd9b612fae18)
1 /*
2  *  Copyright (C) 1995  Linus Torvalds
3  *
4  *  Pentium III FXSR, SSE support
5  *	Gareth Hughes <gareth@valinux.com>, May 2000
6  *
7  *  X86-64 port
8  *	Andi Kleen.
9  *
10  *	CPU hotplug support - ashok.raj@intel.com
11  */
12 
13 /*
14  * This file handles the architecture-dependent parts of process handling..
15  */
16 
17 #include <stdarg.h>
18 
19 #include <linux/cpu.h>
20 #include <linux/errno.h>
21 #include <linux/sched.h>
22 #include <linux/fs.h>
23 #include <linux/kernel.h>
24 #include <linux/mm.h>
25 #include <linux/elfcore.h>
26 #include <linux/smp.h>
27 #include <linux/slab.h>
28 #include <linux/user.h>
29 #include <linux/interrupt.h>
30 #include <linux/utsname.h>
31 #include <linux/delay.h>
32 #include <linux/module.h>
33 #include <linux/ptrace.h>
34 #include <linux/random.h>
35 #include <linux/notifier.h>
36 #include <linux/kprobes.h>
37 #include <linux/kdebug.h>
38 #include <linux/tick.h>
39 #include <linux/prctl.h>
40 #include <linux/uaccess.h>
41 #include <linux/io.h>
42 
43 #include <asm/pgtable.h>
44 #include <asm/system.h>
45 #include <asm/processor.h>
46 #include <asm/i387.h>
47 #include <asm/mmu_context.h>
48 #include <asm/pda.h>
49 #include <asm/prctl.h>
50 #include <asm/desc.h>
51 #include <asm/proto.h>
52 #include <asm/ia32.h>
53 #include <asm/idle.h>
54 #include <asm/syscalls.h>
55 
56 asmlinkage extern void ret_from_fork(void);
57 
58 unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
59 
60 static ATOMIC_NOTIFIER_HEAD(idle_notifier);
61 
62 void idle_notifier_register(struct notifier_block *n)
63 {
64 	atomic_notifier_chain_register(&idle_notifier, n);
65 }
66 EXPORT_SYMBOL_GPL(idle_notifier_register);
67 
68 void idle_notifier_unregister(struct notifier_block *n)
69 {
70 	atomic_notifier_chain_unregister(&idle_notifier, n);
71 }
72 EXPORT_SYMBOL_GPL(idle_notifier_unregister);
73 
74 void enter_idle(void)
75 {
76 	write_pda(isidle, 1);
77 	atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
78 }
79 
80 static void __exit_idle(void)
81 {
82 	if (test_and_clear_bit_pda(0, isidle) == 0)
83 		return;
84 	atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
85 }
86 
87 /* Called from interrupts to signify idle end */
88 void exit_idle(void)
89 {
90 	/* idle loop has pid 0 */
91 	if (current->pid)
92 		return;
93 	__exit_idle();
94 }
95 
96 #ifndef CONFIG_SMP
97 static inline void play_dead(void)
98 {
99 	BUG();
100 }
101 #endif
102 
103 /*
104  * The idle thread. There's no useful work to be
105  * done, so just try to conserve power and have a
106  * low exit latency (ie sit in a loop waiting for
107  * somebody to say that they'd like to reschedule)
108  */
109 void cpu_idle(void)
110 {
111 	current_thread_info()->status |= TS_POLLING;
112 	/* endless idle loop with no priority at all */
113 	while (1) {
114 		tick_nohz_stop_sched_tick(1);
115 		while (!need_resched()) {
116 
117 			rmb();
118 
119 			if (cpu_is_offline(smp_processor_id()))
120 				play_dead();
121 			/*
122 			 * Idle routines should keep interrupts disabled
123 			 * from here on, until they go to idle.
124 			 * Otherwise, idle callbacks can misfire.
125 			 */
126 			local_irq_disable();
127 			enter_idle();
128 			/* Don't trace irqs off for idle */
129 			stop_critical_timings();
130 			pm_idle();
131 			start_critical_timings();
132 			/* In many cases the interrupt that ended idle
133 			   has already called exit_idle. But some idle
134 			   loops can be woken up without interrupt. */
135 			__exit_idle();
136 		}
137 
138 		tick_nohz_restart_sched_tick();
139 		preempt_enable_no_resched();
140 		schedule();
141 		preempt_disable();
142 	}
143 }
144 
145 /* Prints also some state that isn't saved in the pt_regs */
146 void __show_regs(struct pt_regs *regs, int all)
147 {
148 	unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
149 	unsigned long d0, d1, d2, d3, d6, d7;
150 	unsigned int fsindex, gsindex;
151 	unsigned int ds, cs, es;
152 
153 	printk("\n");
154 	print_modules();
155 	printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s\n",
156 		current->pid, current->comm, print_tainted(),
157 		init_utsname()->release,
158 		(int)strcspn(init_utsname()->version, " "),
159 		init_utsname()->version);
160 	printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
161 	printk_address(regs->ip, 1);
162 	printk(KERN_INFO "RSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss,
163 			regs->sp, regs->flags);
164 	printk(KERN_INFO "RAX: %016lx RBX: %016lx RCX: %016lx\n",
165 	       regs->ax, regs->bx, regs->cx);
166 	printk(KERN_INFO "RDX: %016lx RSI: %016lx RDI: %016lx\n",
167 	       regs->dx, regs->si, regs->di);
168 	printk(KERN_INFO "RBP: %016lx R08: %016lx R09: %016lx\n",
169 	       regs->bp, regs->r8, regs->r9);
170 	printk(KERN_INFO "R10: %016lx R11: %016lx R12: %016lx\n",
171 	       regs->r10, regs->r11, regs->r12);
172 	printk(KERN_INFO "R13: %016lx R14: %016lx R15: %016lx\n",
173 	       regs->r13, regs->r14, regs->r15);
174 
175 	asm("movl %%ds,%0" : "=r" (ds));
176 	asm("movl %%cs,%0" : "=r" (cs));
177 	asm("movl %%es,%0" : "=r" (es));
178 	asm("movl %%fs,%0" : "=r" (fsindex));
179 	asm("movl %%gs,%0" : "=r" (gsindex));
180 
181 	rdmsrl(MSR_FS_BASE, fs);
182 	rdmsrl(MSR_GS_BASE, gs);
183 	rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
184 
185 	if (!all)
186 		return;
187 
188 	cr0 = read_cr0();
189 	cr2 = read_cr2();
190 	cr3 = read_cr3();
191 	cr4 = read_cr4();
192 
193 	printk(KERN_INFO "FS:  %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
194 	       fs, fsindex, gs, gsindex, shadowgs);
195 	printk(KERN_INFO "CS:  %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds,
196 			es, cr0);
197 	printk(KERN_INFO "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3,
198 			cr4);
199 
200 	get_debugreg(d0, 0);
201 	get_debugreg(d1, 1);
202 	get_debugreg(d2, 2);
203 	printk(KERN_INFO "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
204 	get_debugreg(d3, 3);
205 	get_debugreg(d6, 6);
206 	get_debugreg(d7, 7);
207 	printk(KERN_INFO "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
208 }
209 
210 void show_regs(struct pt_regs *regs)
211 {
212 	printk(KERN_INFO "CPU %d:", smp_processor_id());
213 	__show_regs(regs, 1);
214 	show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
215 }
216 
217 /*
218  * Free current thread data structures etc..
219  */
220 void exit_thread(void)
221 {
222 	struct task_struct *me = current;
223 	struct thread_struct *t = &me->thread;
224 
225 	if (me->thread.io_bitmap_ptr) {
226 		struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
227 
228 		kfree(t->io_bitmap_ptr);
229 		t->io_bitmap_ptr = NULL;
230 		clear_thread_flag(TIF_IO_BITMAP);
231 		/*
232 		 * Careful, clear this in the TSS too:
233 		 */
234 		memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
235 		t->io_bitmap_max = 0;
236 		put_cpu();
237 	}
238 #ifdef CONFIG_X86_DS
239 	/* Free any DS contexts that have not been properly released. */
240 	if (unlikely(t->ds_ctx)) {
241 		/* we clear debugctl to make sure DS is not used. */
242 		update_debugctlmsr(0);
243 		ds_free(t->ds_ctx);
244 	}
245 #endif /* CONFIG_X86_DS */
246 }
247 
248 void flush_thread(void)
249 {
250 	struct task_struct *tsk = current;
251 
252 	if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) {
253 		clear_tsk_thread_flag(tsk, TIF_ABI_PENDING);
254 		if (test_tsk_thread_flag(tsk, TIF_IA32)) {
255 			clear_tsk_thread_flag(tsk, TIF_IA32);
256 		} else {
257 			set_tsk_thread_flag(tsk, TIF_IA32);
258 			current_thread_info()->status |= TS_COMPAT;
259 		}
260 	}
261 	clear_tsk_thread_flag(tsk, TIF_DEBUG);
262 
263 	tsk->thread.debugreg0 = 0;
264 	tsk->thread.debugreg1 = 0;
265 	tsk->thread.debugreg2 = 0;
266 	tsk->thread.debugreg3 = 0;
267 	tsk->thread.debugreg6 = 0;
268 	tsk->thread.debugreg7 = 0;
269 	memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
270 	/*
271 	 * Forget coprocessor state..
272 	 */
273 	tsk->fpu_counter = 0;
274 	clear_fpu(tsk);
275 	clear_used_math();
276 }
277 
278 void release_thread(struct task_struct *dead_task)
279 {
280 	if (dead_task->mm) {
281 		if (dead_task->mm->context.size) {
282 			printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
283 					dead_task->comm,
284 					dead_task->mm->context.ldt,
285 					dead_task->mm->context.size);
286 			BUG();
287 		}
288 	}
289 }
290 
291 static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
292 {
293 	struct user_desc ud = {
294 		.base_addr = addr,
295 		.limit = 0xfffff,
296 		.seg_32bit = 1,
297 		.limit_in_pages = 1,
298 		.useable = 1,
299 	};
300 	struct desc_struct *desc = t->thread.tls_array;
301 	desc += tls;
302 	fill_ldt(desc, &ud);
303 }
304 
305 static inline u32 read_32bit_tls(struct task_struct *t, int tls)
306 {
307 	return get_desc_base(&t->thread.tls_array[tls]);
308 }
309 
310 /*
311  * This gets called before we allocate a new thread and copy
312  * the current task into it.
313  */
314 void prepare_to_copy(struct task_struct *tsk)
315 {
316 	unlazy_fpu(tsk);
317 }
318 
319 int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
320 		unsigned long unused,
321 	struct task_struct *p, struct pt_regs *regs)
322 {
323 	int err;
324 	struct pt_regs *childregs;
325 	struct task_struct *me = current;
326 
327 	childregs = ((struct pt_regs *)
328 			(THREAD_SIZE + task_stack_page(p))) - 1;
329 	*childregs = *regs;
330 
331 	childregs->ax = 0;
332 	childregs->sp = sp;
333 	if (sp == ~0UL)
334 		childregs->sp = (unsigned long)childregs;
335 
336 	p->thread.sp = (unsigned long) childregs;
337 	p->thread.sp0 = (unsigned long) (childregs+1);
338 	p->thread.usersp = me->thread.usersp;
339 
340 	set_tsk_thread_flag(p, TIF_FORK);
341 
342 	p->thread.fs = me->thread.fs;
343 	p->thread.gs = me->thread.gs;
344 
345 	savesegment(gs, p->thread.gsindex);
346 	savesegment(fs, p->thread.fsindex);
347 	savesegment(es, p->thread.es);
348 	savesegment(ds, p->thread.ds);
349 
350 	if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
351 		p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
352 		if (!p->thread.io_bitmap_ptr) {
353 			p->thread.io_bitmap_max = 0;
354 			return -ENOMEM;
355 		}
356 		memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
357 				IO_BITMAP_BYTES);
358 		set_tsk_thread_flag(p, TIF_IO_BITMAP);
359 	}
360 
361 	/*
362 	 * Set a new TLS for the child thread?
363 	 */
364 	if (clone_flags & CLONE_SETTLS) {
365 #ifdef CONFIG_IA32_EMULATION
366 		if (test_thread_flag(TIF_IA32))
367 			err = do_set_thread_area(p, -1,
368 				(struct user_desc __user *)childregs->si, 0);
369 		else
370 #endif
371 			err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
372 		if (err)
373 			goto out;
374 	}
375 	err = 0;
376 out:
377 	if (err && p->thread.io_bitmap_ptr) {
378 		kfree(p->thread.io_bitmap_ptr);
379 		p->thread.io_bitmap_max = 0;
380 	}
381 	return err;
382 }
383 
384 void
385 start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
386 {
387 	loadsegment(fs, 0);
388 	loadsegment(es, 0);
389 	loadsegment(ds, 0);
390 	load_gs_index(0);
391 	regs->ip		= new_ip;
392 	regs->sp		= new_sp;
393 	write_pda(oldrsp, new_sp);
394 	regs->cs		= __USER_CS;
395 	regs->ss		= __USER_DS;
396 	regs->flags		= 0x200;
397 	set_fs(USER_DS);
398 	/*
399 	 * Free the old FP and other extended state
400 	 */
401 	free_thread_xstate(current);
402 }
403 EXPORT_SYMBOL_GPL(start_thread);
404 
405 static void hard_disable_TSC(void)
406 {
407 	write_cr4(read_cr4() | X86_CR4_TSD);
408 }
409 
410 void disable_TSC(void)
411 {
412 	preempt_disable();
413 	if (!test_and_set_thread_flag(TIF_NOTSC))
414 		/*
415 		 * Must flip the CPU state synchronously with
416 		 * TIF_NOTSC in the current running context.
417 		 */
418 		hard_disable_TSC();
419 	preempt_enable();
420 }
421 
422 static void hard_enable_TSC(void)
423 {
424 	write_cr4(read_cr4() & ~X86_CR4_TSD);
425 }
426 
427 static void enable_TSC(void)
428 {
429 	preempt_disable();
430 	if (test_and_clear_thread_flag(TIF_NOTSC))
431 		/*
432 		 * Must flip the CPU state synchronously with
433 		 * TIF_NOTSC in the current running context.
434 		 */
435 		hard_enable_TSC();
436 	preempt_enable();
437 }
438 
439 int get_tsc_mode(unsigned long adr)
440 {
441 	unsigned int val;
442 
443 	if (test_thread_flag(TIF_NOTSC))
444 		val = PR_TSC_SIGSEGV;
445 	else
446 		val = PR_TSC_ENABLE;
447 
448 	return put_user(val, (unsigned int __user *)adr);
449 }
450 
451 int set_tsc_mode(unsigned int val)
452 {
453 	if (val == PR_TSC_SIGSEGV)
454 		disable_TSC();
455 	else if (val == PR_TSC_ENABLE)
456 		enable_TSC();
457 	else
458 		return -EINVAL;
459 
460 	return 0;
461 }
462 
463 /*
464  * This special macro can be used to load a debugging register
465  */
466 #define loaddebug(thread, r) set_debugreg(thread->debugreg ## r, r)
467 
468 static inline void __switch_to_xtra(struct task_struct *prev_p,
469 				    struct task_struct *next_p,
470 				    struct tss_struct *tss)
471 {
472 	struct thread_struct *prev, *next;
473 	unsigned long debugctl;
474 
475 	prev = &prev_p->thread,
476 	next = &next_p->thread;
477 
478 	debugctl = prev->debugctlmsr;
479 
480 #ifdef CONFIG_X86_DS
481 	{
482 		unsigned long ds_prev = 0, ds_next = 0;
483 
484 		if (prev->ds_ctx)
485 			ds_prev = (unsigned long)prev->ds_ctx->ds;
486 		if (next->ds_ctx)
487 			ds_next = (unsigned long)next->ds_ctx->ds;
488 
489 		if (ds_next != ds_prev) {
490 			/*
491 			 * We clear debugctl to make sure DS
492 			 * is not in use when we change it:
493 			 */
494 			debugctl = 0;
495 			update_debugctlmsr(0);
496 			wrmsrl(MSR_IA32_DS_AREA, ds_next);
497 		}
498 	}
499 #endif /* CONFIG_X86_DS */
500 
501 	if (next->debugctlmsr != debugctl)
502 		update_debugctlmsr(next->debugctlmsr);
503 
504 	if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
505 		loaddebug(next, 0);
506 		loaddebug(next, 1);
507 		loaddebug(next, 2);
508 		loaddebug(next, 3);
509 		/* no 4 and 5 */
510 		loaddebug(next, 6);
511 		loaddebug(next, 7);
512 	}
513 
514 	if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
515 	    test_tsk_thread_flag(next_p, TIF_NOTSC)) {
516 		/* prev and next are different */
517 		if (test_tsk_thread_flag(next_p, TIF_NOTSC))
518 			hard_disable_TSC();
519 		else
520 			hard_enable_TSC();
521 	}
522 
523 	if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
524 		/*
525 		 * Copy the relevant range of the IO bitmap.
526 		 * Normally this is 128 bytes or less:
527 		 */
528 		memcpy(tss->io_bitmap, next->io_bitmap_ptr,
529 		       max(prev->io_bitmap_max, next->io_bitmap_max));
530 	} else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
531 		/*
532 		 * Clear any possible leftover bits:
533 		 */
534 		memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
535 	}
536 
537 #ifdef CONFIG_X86_PTRACE_BTS
538 	if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
539 		ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
540 
541 	if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
542 		ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
543 #endif /* CONFIG_X86_PTRACE_BTS */
544 }
545 
546 /*
547  *	switch_to(x,y) should switch tasks from x to y.
548  *
549  * This could still be optimized:
550  * - fold all the options into a flag word and test it with a single test.
551  * - could test fs/gs bitsliced
552  *
553  * Kprobes not supported here. Set the probe on schedule instead.
554  */
555 struct task_struct *
556 __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
557 {
558 	struct thread_struct *prev = &prev_p->thread;
559 	struct thread_struct *next = &next_p->thread;
560 	int cpu = smp_processor_id();
561 	struct tss_struct *tss = &per_cpu(init_tss, cpu);
562 	unsigned fsindex, gsindex;
563 
564 	/* we're going to use this soon, after a few expensive things */
565 	if (next_p->fpu_counter > 5)
566 		prefetch(next->xstate);
567 
568 	/*
569 	 * Reload esp0, LDT and the page table pointer:
570 	 */
571 	load_sp0(tss, next);
572 
573 	/*
574 	 * Switch DS and ES.
575 	 * This won't pick up thread selector changes, but I guess that is ok.
576 	 */
577 	savesegment(es, prev->es);
578 	if (unlikely(next->es | prev->es))
579 		loadsegment(es, next->es);
580 
581 	savesegment(ds, prev->ds);
582 	if (unlikely(next->ds | prev->ds))
583 		loadsegment(ds, next->ds);
584 
585 
586 	/* We must save %fs and %gs before load_TLS() because
587 	 * %fs and %gs may be cleared by load_TLS().
588 	 *
589 	 * (e.g. xen_load_tls())
590 	 */
591 	savesegment(fs, fsindex);
592 	savesegment(gs, gsindex);
593 
594 	load_TLS(next, cpu);
595 
596 	/*
597 	 * Leave lazy mode, flushing any hypercalls made here.
598 	 * This must be done before restoring TLS segments so
599 	 * the GDT and LDT are properly updated, and must be
600 	 * done before math_state_restore, so the TS bit is up
601 	 * to date.
602 	 */
603 	arch_leave_lazy_cpu_mode();
604 
605 	/*
606 	 * Switch FS and GS.
607 	 *
608 	 * Segment register != 0 always requires a reload.  Also
609 	 * reload when it has changed.  When prev process used 64bit
610 	 * base always reload to avoid an information leak.
611 	 */
612 	if (unlikely(fsindex | next->fsindex | prev->fs)) {
613 		loadsegment(fs, next->fsindex);
614 		/*
615 		 * Check if the user used a selector != 0; if yes
616 		 *  clear 64bit base, since overloaded base is always
617 		 *  mapped to the Null selector
618 		 */
619 		if (fsindex)
620 			prev->fs = 0;
621 	}
622 	/* when next process has a 64bit base use it */
623 	if (next->fs)
624 		wrmsrl(MSR_FS_BASE, next->fs);
625 	prev->fsindex = fsindex;
626 
627 	if (unlikely(gsindex | next->gsindex | prev->gs)) {
628 		load_gs_index(next->gsindex);
629 		if (gsindex)
630 			prev->gs = 0;
631 	}
632 	if (next->gs)
633 		wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
634 	prev->gsindex = gsindex;
635 
636 	/* Must be after DS reload */
637 	unlazy_fpu(prev_p);
638 
639 	/*
640 	 * Switch the PDA and FPU contexts.
641 	 */
642 	prev->usersp = read_pda(oldrsp);
643 	write_pda(oldrsp, next->usersp);
644 	write_pda(pcurrent, next_p);
645 
646 	write_pda(kernelstack,
647 		  (unsigned long)task_stack_page(next_p) +
648 		  THREAD_SIZE - PDA_STACKOFFSET);
649 #ifdef CONFIG_CC_STACKPROTECTOR
650 	write_pda(stack_canary, next_p->stack_canary);
651 	/*
652 	 * Build time only check to make sure the stack_canary is at
653 	 * offset 40 in the pda; this is a gcc ABI requirement
654 	 */
655 	BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
656 #endif
657 
658 	/*
659 	 * Now maybe reload the debug registers and handle I/O bitmaps
660 	 */
661 	if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
662 		     task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
663 		__switch_to_xtra(prev_p, next_p, tss);
664 
665 	/* If the task has used fpu the last 5 timeslices, just do a full
666 	 * restore of the math state immediately to avoid the trap; the
667 	 * chances of needing FPU soon are obviously high now
668 	 *
669 	 * tsk_used_math() checks prevent calling math_state_restore(),
670 	 * which can sleep in the case of !tsk_used_math()
671 	 */
672 	if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
673 		math_state_restore();
674 	return prev_p;
675 }
676 
677 /*
678  * sys_execve() executes a new program.
679  */
680 asmlinkage
681 long sys_execve(char __user *name, char __user * __user *argv,
682 		char __user * __user *envp, struct pt_regs *regs)
683 {
684 	long error;
685 	char *filename;
686 
687 	filename = getname(name);
688 	error = PTR_ERR(filename);
689 	if (IS_ERR(filename))
690 		return error;
691 	error = do_execve(filename, argv, envp, regs);
692 	putname(filename);
693 	return error;
694 }
695 
696 void set_personality_64bit(void)
697 {
698 	/* inherit personality from parent */
699 
700 	/* Make sure to be in 64bit mode */
701 	clear_thread_flag(TIF_IA32);
702 
703 	/* TBD: overwrites user setup. Should have two bits.
704 	   But 64bit processes have always behaved this way,
705 	   so it's not too bad. The main problem is just that
706 	   32bit childs are affected again. */
707 	current->personality &= ~READ_IMPLIES_EXEC;
708 }
709 
710 asmlinkage long sys_fork(struct pt_regs *regs)
711 {
712 	return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
713 }
714 
715 asmlinkage long
716 sys_clone(unsigned long clone_flags, unsigned long newsp,
717 	  void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
718 {
719 	if (!newsp)
720 		newsp = regs->sp;
721 	return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
722 }
723 
724 /*
725  * This is trivial, and on the face of it looks like it
726  * could equally well be done in user mode.
727  *
728  * Not so, for quite unobvious reasons - register pressure.
729  * In user mode vfork() cannot have a stack frame, and if
730  * done by calling the "clone()" system call directly, you
731  * do not have enough call-clobbered registers to hold all
732  * the information you need.
733  */
734 asmlinkage long sys_vfork(struct pt_regs *regs)
735 {
736 	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0,
737 		    NULL, NULL);
738 }
739 
740 unsigned long get_wchan(struct task_struct *p)
741 {
742 	unsigned long stack;
743 	u64 fp, ip;
744 	int count = 0;
745 
746 	if (!p || p == current || p->state == TASK_RUNNING)
747 		return 0;
748 	stack = (unsigned long)task_stack_page(p);
749 	if (p->thread.sp < stack || p->thread.sp >= stack+THREAD_SIZE)
750 		return 0;
751 	fp = *(u64 *)(p->thread.sp);
752 	do {
753 		if (fp < (unsigned long)stack ||
754 		    fp >= (unsigned long)stack+THREAD_SIZE)
755 			return 0;
756 		ip = *(u64 *)(fp+8);
757 		if (!in_sched_functions(ip))
758 			return ip;
759 		fp = *(u64 *)fp;
760 	} while (count++ < 16);
761 	return 0;
762 }
763 
764 long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
765 {
766 	int ret = 0;
767 	int doit = task == current;
768 	int cpu;
769 
770 	switch (code) {
771 	case ARCH_SET_GS:
772 		if (addr >= TASK_SIZE_OF(task))
773 			return -EPERM;
774 		cpu = get_cpu();
775 		/* handle small bases via the GDT because that's faster to
776 		   switch. */
777 		if (addr <= 0xffffffff) {
778 			set_32bit_tls(task, GS_TLS, addr);
779 			if (doit) {
780 				load_TLS(&task->thread, cpu);
781 				load_gs_index(GS_TLS_SEL);
782 			}
783 			task->thread.gsindex = GS_TLS_SEL;
784 			task->thread.gs = 0;
785 		} else {
786 			task->thread.gsindex = 0;
787 			task->thread.gs = addr;
788 			if (doit) {
789 				load_gs_index(0);
790 				ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
791 			}
792 		}
793 		put_cpu();
794 		break;
795 	case ARCH_SET_FS:
796 		/* Not strictly needed for fs, but do it for symmetry
797 		   with gs */
798 		if (addr >= TASK_SIZE_OF(task))
799 			return -EPERM;
800 		cpu = get_cpu();
801 		/* handle small bases via the GDT because that's faster to
802 		   switch. */
803 		if (addr <= 0xffffffff) {
804 			set_32bit_tls(task, FS_TLS, addr);
805 			if (doit) {
806 				load_TLS(&task->thread, cpu);
807 				loadsegment(fs, FS_TLS_SEL);
808 			}
809 			task->thread.fsindex = FS_TLS_SEL;
810 			task->thread.fs = 0;
811 		} else {
812 			task->thread.fsindex = 0;
813 			task->thread.fs = addr;
814 			if (doit) {
815 				/* set the selector to 0 to not confuse
816 				   __switch_to */
817 				loadsegment(fs, 0);
818 				ret = checking_wrmsrl(MSR_FS_BASE, addr);
819 			}
820 		}
821 		put_cpu();
822 		break;
823 	case ARCH_GET_FS: {
824 		unsigned long base;
825 		if (task->thread.fsindex == FS_TLS_SEL)
826 			base = read_32bit_tls(task, FS_TLS);
827 		else if (doit)
828 			rdmsrl(MSR_FS_BASE, base);
829 		else
830 			base = task->thread.fs;
831 		ret = put_user(base, (unsigned long __user *)addr);
832 		break;
833 	}
834 	case ARCH_GET_GS: {
835 		unsigned long base;
836 		unsigned gsindex;
837 		if (task->thread.gsindex == GS_TLS_SEL)
838 			base = read_32bit_tls(task, GS_TLS);
839 		else if (doit) {
840 			savesegment(gs, gsindex);
841 			if (gsindex)
842 				rdmsrl(MSR_KERNEL_GS_BASE, base);
843 			else
844 				base = task->thread.gs;
845 		} else
846 			base = task->thread.gs;
847 		ret = put_user(base, (unsigned long __user *)addr);
848 		break;
849 	}
850 
851 	default:
852 		ret = -EINVAL;
853 		break;
854 	}
855 
856 	return ret;
857 }
858 
859 long sys_arch_prctl(int code, unsigned long addr)
860 {
861 	return do_arch_prctl(current, code, addr);
862 }
863 
864 unsigned long arch_align_stack(unsigned long sp)
865 {
866 	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
867 		sp -= get_random_int() % 8192;
868 	return sp & ~0xf;
869 }
870 
871 unsigned long arch_randomize_brk(struct mm_struct *mm)
872 {
873 	unsigned long range_end = mm->brk + 0x02000000;
874 	return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
875 }
876