xref: /linux/arch/x86/kernel/process_64.c (revision f7511d5f66f01fc451747b24e79f3ada7a3af9af)
1 /*
2  *  Copyright (C) 1995  Linus Torvalds
3  *
4  *  Pentium III FXSR, SSE support
5  *	Gareth Hughes <gareth@valinux.com>, May 2000
6  *
7  *  X86-64 port
8  *	Andi Kleen.
9  *
10  *	CPU hotplug support - ashok.raj@intel.com
11  */
12 
13 /*
14  * This file handles the architecture-dependent parts of process handling..
15  */
16 
17 #include <stdarg.h>
18 
19 #include <linux/cpu.h>
20 #include <linux/errno.h>
21 #include <linux/sched.h>
22 #include <linux/fs.h>
23 #include <linux/kernel.h>
24 #include <linux/mm.h>
25 #include <linux/elfcore.h>
26 #include <linux/smp.h>
27 #include <linux/slab.h>
28 #include <linux/user.h>
29 #include <linux/interrupt.h>
30 #include <linux/utsname.h>
31 #include <linux/delay.h>
32 #include <linux/module.h>
33 #include <linux/ptrace.h>
34 #include <linux/random.h>
35 #include <linux/notifier.h>
36 #include <linux/kprobes.h>
37 #include <linux/kdebug.h>
38 #include <linux/tick.h>
39 #include <linux/prctl.h>
40 
41 #include <asm/uaccess.h>
42 #include <asm/pgtable.h>
43 #include <asm/system.h>
44 #include <asm/io.h>
45 #include <asm/processor.h>
46 #include <asm/i387.h>
47 #include <asm/mmu_context.h>
48 #include <asm/pda.h>
49 #include <asm/prctl.h>
50 #include <asm/desc.h>
51 #include <asm/proto.h>
52 #include <asm/ia32.h>
53 #include <asm/idle.h>
54 
55 asmlinkage extern void ret_from_fork(void);
56 
57 unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
58 
59 unsigned long boot_option_idle_override = 0;
60 EXPORT_SYMBOL(boot_option_idle_override);
61 
62 /*
63  * Powermanagement idle function, if any..
64  */
65 void (*pm_idle)(void);
66 EXPORT_SYMBOL(pm_idle);
67 
68 static ATOMIC_NOTIFIER_HEAD(idle_notifier);
69 
70 void idle_notifier_register(struct notifier_block *n)
71 {
72 	atomic_notifier_chain_register(&idle_notifier, n);
73 }
74 
75 void enter_idle(void)
76 {
77 	write_pda(isidle, 1);
78 	atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
79 }
80 
81 static void __exit_idle(void)
82 {
83 	if (test_and_clear_bit_pda(0, isidle) == 0)
84 		return;
85 	atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
86 }
87 
88 /* Called from interrupts to signify idle end */
89 void exit_idle(void)
90 {
91 	/* idle loop has pid 0 */
92 	if (current->pid)
93 		return;
94 	__exit_idle();
95 }
96 
97 /*
98  * We use this if we don't have any better
99  * idle routine..
100  */
101 void default_idle(void)
102 {
103 	current_thread_info()->status &= ~TS_POLLING;
104 	/*
105 	 * TS_POLLING-cleared state must be visible before we
106 	 * test NEED_RESCHED:
107 	 */
108 	smp_mb();
109 	if (!need_resched())
110 		safe_halt();	/* enables interrupts racelessly */
111 	else
112 		local_irq_enable();
113 	current_thread_info()->status |= TS_POLLING;
114 }
115 
116 #ifdef CONFIG_HOTPLUG_CPU
117 DECLARE_PER_CPU(int, cpu_state);
118 
119 #include <asm/nmi.h>
120 /* We halt the CPU with physical CPU hotplug */
121 static inline void play_dead(void)
122 {
123 	idle_task_exit();
124 	wbinvd();
125 	mb();
126 	/* Ack it */
127 	__get_cpu_var(cpu_state) = CPU_DEAD;
128 
129 	local_irq_disable();
130 	while (1)
131 		halt();
132 }
133 #else
134 static inline void play_dead(void)
135 {
136 	BUG();
137 }
138 #endif /* CONFIG_HOTPLUG_CPU */
139 
140 /*
141  * The idle thread. There's no useful work to be
142  * done, so just try to conserve power and have a
143  * low exit latency (ie sit in a loop waiting for
144  * somebody to say that they'd like to reschedule)
145  */
146 void cpu_idle(void)
147 {
148 	current_thread_info()->status |= TS_POLLING;
149 	/* endless idle loop with no priority at all */
150 	while (1) {
151 		tick_nohz_stop_sched_tick();
152 		while (!need_resched()) {
153 			void (*idle)(void);
154 
155 			rmb();
156 			idle = pm_idle;
157 			if (!idle)
158 				idle = default_idle;
159 			if (cpu_is_offline(smp_processor_id()))
160 				play_dead();
161 			/*
162 			 * Idle routines should keep interrupts disabled
163 			 * from here on, until they go to idle.
164 			 * Otherwise, idle callbacks can misfire.
165 			 */
166 			local_irq_disable();
167 			enter_idle();
168 			idle();
169 			/* In many cases the interrupt that ended idle
170 			   has already called exit_idle. But some idle
171 			   loops can be woken up without interrupt. */
172 			__exit_idle();
173 		}
174 
175 		tick_nohz_restart_sched_tick();
176 		preempt_enable_no_resched();
177 		schedule();
178 		preempt_disable();
179 	}
180 }
181 
182 /* Prints also some state that isn't saved in the pt_regs */
183 void __show_regs(struct pt_regs * regs)
184 {
185 	unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
186 	unsigned long d0, d1, d2, d3, d6, d7;
187 	unsigned int fsindex, gsindex;
188 	unsigned int ds, cs, es;
189 
190 	printk("\n");
191 	print_modules();
192 	printk("Pid: %d, comm: %.20s %s %s %.*s\n",
193 		current->pid, current->comm, print_tainted(),
194 		init_utsname()->release,
195 		(int)strcspn(init_utsname()->version, " "),
196 		init_utsname()->version);
197 	printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
198 	printk_address(regs->ip, 1);
199 	printk("RSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss, regs->sp,
200 		regs->flags);
201 	printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
202 	       regs->ax, regs->bx, regs->cx);
203 	printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
204 	       regs->dx, regs->si, regs->di);
205 	printk("RBP: %016lx R08: %016lx R09: %016lx\n",
206 	       regs->bp, regs->r8, regs->r9);
207 	printk("R10: %016lx R11: %016lx R12: %016lx\n",
208 	       regs->r10, regs->r11, regs->r12);
209 	printk("R13: %016lx R14: %016lx R15: %016lx\n",
210 	       regs->r13, regs->r14, regs->r15);
211 
212 	asm("movl %%ds,%0" : "=r" (ds));
213 	asm("movl %%cs,%0" : "=r" (cs));
214 	asm("movl %%es,%0" : "=r" (es));
215 	asm("movl %%fs,%0" : "=r" (fsindex));
216 	asm("movl %%gs,%0" : "=r" (gsindex));
217 
218 	rdmsrl(MSR_FS_BASE, fs);
219 	rdmsrl(MSR_GS_BASE, gs);
220 	rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
221 
222 	cr0 = read_cr0();
223 	cr2 = read_cr2();
224 	cr3 = read_cr3();
225 	cr4 = read_cr4();
226 
227 	printk("FS:  %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
228 	       fs,fsindex,gs,gsindex,shadowgs);
229 	printk("CS:  %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0);
230 	printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4);
231 
232 	get_debugreg(d0, 0);
233 	get_debugreg(d1, 1);
234 	get_debugreg(d2, 2);
235 	printk("DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
236 	get_debugreg(d3, 3);
237 	get_debugreg(d6, 6);
238 	get_debugreg(d7, 7);
239 	printk("DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
240 }
241 
242 void show_regs(struct pt_regs *regs)
243 {
244 	printk("CPU %d:", smp_processor_id());
245 	__show_regs(regs);
246 	show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
247 }
248 
249 /*
250  * Free current thread data structures etc..
251  */
252 void exit_thread(void)
253 {
254 	struct task_struct *me = current;
255 	struct thread_struct *t = &me->thread;
256 
257 	if (me->thread.io_bitmap_ptr) {
258 		struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
259 
260 		kfree(t->io_bitmap_ptr);
261 		t->io_bitmap_ptr = NULL;
262 		clear_thread_flag(TIF_IO_BITMAP);
263 		/*
264 		 * Careful, clear this in the TSS too:
265 		 */
266 		memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
267 		t->io_bitmap_max = 0;
268 		put_cpu();
269 	}
270 }
271 
272 void flush_thread(void)
273 {
274 	struct task_struct *tsk = current;
275 
276 	if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) {
277 		clear_tsk_thread_flag(tsk, TIF_ABI_PENDING);
278 		if (test_tsk_thread_flag(tsk, TIF_IA32)) {
279 			clear_tsk_thread_flag(tsk, TIF_IA32);
280 		} else {
281 			set_tsk_thread_flag(tsk, TIF_IA32);
282 			current_thread_info()->status |= TS_COMPAT;
283 		}
284 	}
285 	clear_tsk_thread_flag(tsk, TIF_DEBUG);
286 
287 	tsk->thread.debugreg0 = 0;
288 	tsk->thread.debugreg1 = 0;
289 	tsk->thread.debugreg2 = 0;
290 	tsk->thread.debugreg3 = 0;
291 	tsk->thread.debugreg6 = 0;
292 	tsk->thread.debugreg7 = 0;
293 	memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
294 	/*
295 	 * Forget coprocessor state..
296 	 */
297 	clear_fpu(tsk);
298 	clear_used_math();
299 }
300 
301 void release_thread(struct task_struct *dead_task)
302 {
303 	if (dead_task->mm) {
304 		if (dead_task->mm->context.size) {
305 			printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
306 					dead_task->comm,
307 					dead_task->mm->context.ldt,
308 					dead_task->mm->context.size);
309 			BUG();
310 		}
311 	}
312 }
313 
314 static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
315 {
316 	struct user_desc ud = {
317 		.base_addr = addr,
318 		.limit = 0xfffff,
319 		.seg_32bit = 1,
320 		.limit_in_pages = 1,
321 		.useable = 1,
322 	};
323 	struct desc_struct *desc = t->thread.tls_array;
324 	desc += tls;
325 	fill_ldt(desc, &ud);
326 }
327 
328 static inline u32 read_32bit_tls(struct task_struct *t, int tls)
329 {
330 	return get_desc_base(&t->thread.tls_array[tls]);
331 }
332 
333 /*
334  * This gets called before we allocate a new thread and copy
335  * the current task into it.
336  */
337 void prepare_to_copy(struct task_struct *tsk)
338 {
339 	unlazy_fpu(tsk);
340 }
341 
342 int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
343 		unsigned long unused,
344 	struct task_struct * p, struct pt_regs * regs)
345 {
346 	int err;
347 	struct pt_regs * childregs;
348 	struct task_struct *me = current;
349 
350 	childregs = ((struct pt_regs *)
351 			(THREAD_SIZE + task_stack_page(p))) - 1;
352 	*childregs = *regs;
353 
354 	childregs->ax = 0;
355 	childregs->sp = sp;
356 	if (sp == ~0UL)
357 		childregs->sp = (unsigned long)childregs;
358 
359 	p->thread.sp = (unsigned long) childregs;
360 	p->thread.sp0 = (unsigned long) (childregs+1);
361 	p->thread.usersp = me->thread.usersp;
362 
363 	set_tsk_thread_flag(p, TIF_FORK);
364 
365 	p->thread.fs = me->thread.fs;
366 	p->thread.gs = me->thread.gs;
367 
368 	asm("mov %%gs,%0" : "=m" (p->thread.gsindex));
369 	asm("mov %%fs,%0" : "=m" (p->thread.fsindex));
370 	asm("mov %%es,%0" : "=m" (p->thread.es));
371 	asm("mov %%ds,%0" : "=m" (p->thread.ds));
372 
373 	if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
374 		p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
375 		if (!p->thread.io_bitmap_ptr) {
376 			p->thread.io_bitmap_max = 0;
377 			return -ENOMEM;
378 		}
379 		memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
380 				IO_BITMAP_BYTES);
381 		set_tsk_thread_flag(p, TIF_IO_BITMAP);
382 	}
383 
384 	/*
385 	 * Set a new TLS for the child thread?
386 	 */
387 	if (clone_flags & CLONE_SETTLS) {
388 #ifdef CONFIG_IA32_EMULATION
389 		if (test_thread_flag(TIF_IA32))
390 			err = do_set_thread_area(p, -1,
391 				(struct user_desc __user *)childregs->si, 0);
392 		else
393 #endif
394 			err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
395 		if (err)
396 			goto out;
397 	}
398 	err = 0;
399 out:
400 	if (err && p->thread.io_bitmap_ptr) {
401 		kfree(p->thread.io_bitmap_ptr);
402 		p->thread.io_bitmap_max = 0;
403 	}
404 	return err;
405 }
406 
407 void
408 start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
409 {
410 	asm volatile("movl %0, %%fs; movl %0, %%es; movl %0, %%ds" :: "r"(0));
411 	load_gs_index(0);
412 	regs->ip		= new_ip;
413 	regs->sp		= new_sp;
414 	write_pda(oldrsp, new_sp);
415 	regs->cs		= __USER_CS;
416 	regs->ss		= __USER_DS;
417 	regs->flags		= 0x200;
418 	set_fs(USER_DS);
419 	/*
420 	 * Free the old FP and other extended state
421 	 */
422 	free_thread_xstate(current);
423 }
424 EXPORT_SYMBOL_GPL(start_thread);
425 
426 static void hard_disable_TSC(void)
427 {
428 	write_cr4(read_cr4() | X86_CR4_TSD);
429 }
430 
431 void disable_TSC(void)
432 {
433 	preempt_disable();
434 	if (!test_and_set_thread_flag(TIF_NOTSC))
435 		/*
436 		 * Must flip the CPU state synchronously with
437 		 * TIF_NOTSC in the current running context.
438 		 */
439 		hard_disable_TSC();
440 	preempt_enable();
441 }
442 
443 static void hard_enable_TSC(void)
444 {
445 	write_cr4(read_cr4() & ~X86_CR4_TSD);
446 }
447 
448 static void enable_TSC(void)
449 {
450 	preempt_disable();
451 	if (test_and_clear_thread_flag(TIF_NOTSC))
452 		/*
453 		 * Must flip the CPU state synchronously with
454 		 * TIF_NOTSC in the current running context.
455 		 */
456 		hard_enable_TSC();
457 	preempt_enable();
458 }
459 
460 int get_tsc_mode(unsigned long adr)
461 {
462 	unsigned int val;
463 
464 	if (test_thread_flag(TIF_NOTSC))
465 		val = PR_TSC_SIGSEGV;
466 	else
467 		val = PR_TSC_ENABLE;
468 
469 	return put_user(val, (unsigned int __user *)adr);
470 }
471 
472 int set_tsc_mode(unsigned int val)
473 {
474 	if (val == PR_TSC_SIGSEGV)
475 		disable_TSC();
476 	else if (val == PR_TSC_ENABLE)
477 		enable_TSC();
478 	else
479 		return -EINVAL;
480 
481 	return 0;
482 }
483 
484 /*
485  * This special macro can be used to load a debugging register
486  */
487 #define loaddebug(thread, r) set_debugreg(thread->debugreg ## r, r)
488 
489 static inline void __switch_to_xtra(struct task_struct *prev_p,
490 				    struct task_struct *next_p,
491 				    struct tss_struct *tss)
492 {
493 	struct thread_struct *prev, *next;
494 	unsigned long debugctl;
495 
496 	prev = &prev_p->thread,
497 	next = &next_p->thread;
498 
499 	debugctl = prev->debugctlmsr;
500 	if (next->ds_area_msr != prev->ds_area_msr) {
501 		/* we clear debugctl to make sure DS
502 		 * is not in use when we change it */
503 		debugctl = 0;
504 		update_debugctlmsr(0);
505 		wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr);
506 	}
507 
508 	if (next->debugctlmsr != debugctl)
509 		update_debugctlmsr(next->debugctlmsr);
510 
511 	if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
512 		loaddebug(next, 0);
513 		loaddebug(next, 1);
514 		loaddebug(next, 2);
515 		loaddebug(next, 3);
516 		/* no 4 and 5 */
517 		loaddebug(next, 6);
518 		loaddebug(next, 7);
519 	}
520 
521 	if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
522 	    test_tsk_thread_flag(next_p, TIF_NOTSC)) {
523 		/* prev and next are different */
524 		if (test_tsk_thread_flag(next_p, TIF_NOTSC))
525 			hard_disable_TSC();
526 		else
527 			hard_enable_TSC();
528 	}
529 
530 	if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
531 		/*
532 		 * Copy the relevant range of the IO bitmap.
533 		 * Normally this is 128 bytes or less:
534 		 */
535 		memcpy(tss->io_bitmap, next->io_bitmap_ptr,
536 		       max(prev->io_bitmap_max, next->io_bitmap_max));
537 	} else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
538 		/*
539 		 * Clear any possible leftover bits:
540 		 */
541 		memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
542 	}
543 
544 #ifdef X86_BTS
545 	if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
546 		ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
547 
548 	if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
549 		ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
550 #endif
551 }
552 
553 /*
554  *	switch_to(x,y) should switch tasks from x to y.
555  *
556  * This could still be optimized:
557  * - fold all the options into a flag word and test it with a single test.
558  * - could test fs/gs bitsliced
559  *
560  * Kprobes not supported here. Set the probe on schedule instead.
561  */
562 struct task_struct *
563 __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
564 {
565 	struct thread_struct *prev = &prev_p->thread,
566 				 *next = &next_p->thread;
567 	int cpu = smp_processor_id();
568 	struct tss_struct *tss = &per_cpu(init_tss, cpu);
569 
570 	/* we're going to use this soon, after a few expensive things */
571 	if (next_p->fpu_counter>5)
572 		prefetch(next->xstate);
573 
574 	/*
575 	 * Reload esp0, LDT and the page table pointer:
576 	 */
577 	load_sp0(tss, next);
578 
579 	/*
580 	 * Switch DS and ES.
581 	 * This won't pick up thread selector changes, but I guess that is ok.
582 	 */
583 	asm volatile("mov %%es,%0" : "=m" (prev->es));
584 	if (unlikely(next->es | prev->es))
585 		loadsegment(es, next->es);
586 
587 	asm volatile ("mov %%ds,%0" : "=m" (prev->ds));
588 	if (unlikely(next->ds | prev->ds))
589 		loadsegment(ds, next->ds);
590 
591 	load_TLS(next, cpu);
592 
593 	/*
594 	 * Switch FS and GS.
595 	 */
596 	{
597 		unsigned fsindex;
598 		asm volatile("movl %%fs,%0" : "=r" (fsindex));
599 		/* segment register != 0 always requires a reload.
600 		   also reload when it has changed.
601 		   when prev process used 64bit base always reload
602 		   to avoid an information leak. */
603 		if (unlikely(fsindex | next->fsindex | prev->fs)) {
604 			loadsegment(fs, next->fsindex);
605 			/* check if the user used a selector != 0
606 	                 * if yes clear 64bit base, since overloaded base
607                          * is always mapped to the Null selector
608                          */
609 			if (fsindex)
610 			prev->fs = 0;
611 		}
612 		/* when next process has a 64bit base use it */
613 		if (next->fs)
614 			wrmsrl(MSR_FS_BASE, next->fs);
615 		prev->fsindex = fsindex;
616 	}
617 	{
618 		unsigned gsindex;
619 		asm volatile("movl %%gs,%0" : "=r" (gsindex));
620 		if (unlikely(gsindex | next->gsindex | prev->gs)) {
621 			load_gs_index(next->gsindex);
622 			if (gsindex)
623 			prev->gs = 0;
624 		}
625 		if (next->gs)
626 			wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
627 		prev->gsindex = gsindex;
628 	}
629 
630 	/* Must be after DS reload */
631 	unlazy_fpu(prev_p);
632 
633 	/*
634 	 * Switch the PDA and FPU contexts.
635 	 */
636 	prev->usersp = read_pda(oldrsp);
637 	write_pda(oldrsp, next->usersp);
638 	write_pda(pcurrent, next_p);
639 
640 	write_pda(kernelstack,
641 	(unsigned long)task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
642 #ifdef CONFIG_CC_STACKPROTECTOR
643 	write_pda(stack_canary, next_p->stack_canary);
644 	/*
645 	 * Build time only check to make sure the stack_canary is at
646 	 * offset 40 in the pda; this is a gcc ABI requirement
647 	 */
648 	BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
649 #endif
650 
651 	/*
652 	 * Now maybe reload the debug registers and handle I/O bitmaps
653 	 */
654 	if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
655 		     task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
656 		__switch_to_xtra(prev_p, next_p, tss);
657 
658 	/* If the task has used fpu the last 5 timeslices, just do a full
659 	 * restore of the math state immediately to avoid the trap; the
660 	 * chances of needing FPU soon are obviously high now
661 	 */
662 	if (next_p->fpu_counter>5)
663 		math_state_restore();
664 	return prev_p;
665 }
666 
667 /*
668  * sys_execve() executes a new program.
669  */
670 asmlinkage
671 long sys_execve(char __user *name, char __user * __user *argv,
672 		char __user * __user *envp, struct pt_regs *regs)
673 {
674 	long error;
675 	char * filename;
676 
677 	filename = getname(name);
678 	error = PTR_ERR(filename);
679 	if (IS_ERR(filename))
680 		return error;
681 	error = do_execve(filename, argv, envp, regs);
682 	putname(filename);
683 	return error;
684 }
685 
686 void set_personality_64bit(void)
687 {
688 	/* inherit personality from parent */
689 
690 	/* Make sure to be in 64bit mode */
691 	clear_thread_flag(TIF_IA32);
692 
693 	/* TBD: overwrites user setup. Should have two bits.
694 	   But 64bit processes have always behaved this way,
695 	   so it's not too bad. The main problem is just that
696 	   32bit childs are affected again. */
697 	current->personality &= ~READ_IMPLIES_EXEC;
698 }
699 
700 asmlinkage long sys_fork(struct pt_regs *regs)
701 {
702 	return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
703 }
704 
705 asmlinkage long
706 sys_clone(unsigned long clone_flags, unsigned long newsp,
707 	  void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
708 {
709 	if (!newsp)
710 		newsp = regs->sp;
711 	return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
712 }
713 
714 /*
715  * This is trivial, and on the face of it looks like it
716  * could equally well be done in user mode.
717  *
718  * Not so, for quite unobvious reasons - register pressure.
719  * In user mode vfork() cannot have a stack frame, and if
720  * done by calling the "clone()" system call directly, you
721  * do not have enough call-clobbered registers to hold all
722  * the information you need.
723  */
724 asmlinkage long sys_vfork(struct pt_regs *regs)
725 {
726 	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0,
727 		    NULL, NULL);
728 }
729 
730 unsigned long get_wchan(struct task_struct *p)
731 {
732 	unsigned long stack;
733 	u64 fp,ip;
734 	int count = 0;
735 
736 	if (!p || p == current || p->state==TASK_RUNNING)
737 		return 0;
738 	stack = (unsigned long)task_stack_page(p);
739 	if (p->thread.sp < stack || p->thread.sp > stack+THREAD_SIZE)
740 		return 0;
741 	fp = *(u64 *)(p->thread.sp);
742 	do {
743 		if (fp < (unsigned long)stack ||
744 		    fp > (unsigned long)stack+THREAD_SIZE)
745 			return 0;
746 		ip = *(u64 *)(fp+8);
747 		if (!in_sched_functions(ip))
748 			return ip;
749 		fp = *(u64 *)fp;
750 	} while (count++ < 16);
751 	return 0;
752 }
753 
754 long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
755 {
756 	int ret = 0;
757 	int doit = task == current;
758 	int cpu;
759 
760 	switch (code) {
761 	case ARCH_SET_GS:
762 		if (addr >= TASK_SIZE_OF(task))
763 			return -EPERM;
764 		cpu = get_cpu();
765 		/* handle small bases via the GDT because that's faster to
766 		   switch. */
767 		if (addr <= 0xffffffff) {
768 			set_32bit_tls(task, GS_TLS, addr);
769 			if (doit) {
770 				load_TLS(&task->thread, cpu);
771 				load_gs_index(GS_TLS_SEL);
772 			}
773 			task->thread.gsindex = GS_TLS_SEL;
774 			task->thread.gs = 0;
775 		} else {
776 			task->thread.gsindex = 0;
777 			task->thread.gs = addr;
778 			if (doit) {
779 				load_gs_index(0);
780 				ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
781 			}
782 		}
783 		put_cpu();
784 		break;
785 	case ARCH_SET_FS:
786 		/* Not strictly needed for fs, but do it for symmetry
787 		   with gs */
788 		if (addr >= TASK_SIZE_OF(task))
789 			return -EPERM;
790 		cpu = get_cpu();
791 		/* handle small bases via the GDT because that's faster to
792 		   switch. */
793 		if (addr <= 0xffffffff) {
794 			set_32bit_tls(task, FS_TLS, addr);
795 			if (doit) {
796 				load_TLS(&task->thread, cpu);
797 				asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL));
798 			}
799 			task->thread.fsindex = FS_TLS_SEL;
800 			task->thread.fs = 0;
801 		} else {
802 			task->thread.fsindex = 0;
803 			task->thread.fs = addr;
804 			if (doit) {
805 				/* set the selector to 0 to not confuse
806 				   __switch_to */
807 				asm volatile("movl %0,%%fs" :: "r" (0));
808 				ret = checking_wrmsrl(MSR_FS_BASE, addr);
809 			}
810 		}
811 		put_cpu();
812 		break;
813 	case ARCH_GET_FS: {
814 		unsigned long base;
815 		if (task->thread.fsindex == FS_TLS_SEL)
816 			base = read_32bit_tls(task, FS_TLS);
817 		else if (doit)
818 			rdmsrl(MSR_FS_BASE, base);
819 		else
820 			base = task->thread.fs;
821 		ret = put_user(base, (unsigned long __user *)addr);
822 		break;
823 	}
824 	case ARCH_GET_GS: {
825 		unsigned long base;
826 		unsigned gsindex;
827 		if (task->thread.gsindex == GS_TLS_SEL)
828 			base = read_32bit_tls(task, GS_TLS);
829 		else if (doit) {
830 			asm("movl %%gs,%0" : "=r" (gsindex));
831 			if (gsindex)
832 				rdmsrl(MSR_KERNEL_GS_BASE, base);
833 			else
834 				base = task->thread.gs;
835 		}
836 		else
837 			base = task->thread.gs;
838 		ret = put_user(base, (unsigned long __user *)addr);
839 		break;
840 	}
841 
842 	default:
843 		ret = -EINVAL;
844 		break;
845 	}
846 
847 	return ret;
848 }
849 
850 long sys_arch_prctl(int code, unsigned long addr)
851 {
852 	return do_arch_prctl(current, code, addr);
853 }
854 
855 unsigned long arch_align_stack(unsigned long sp)
856 {
857 	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
858 		sp -= get_random_int() % 8192;
859 	return sp & ~0xf;
860 }
861 
862 unsigned long arch_randomize_brk(struct mm_struct *mm)
863 {
864 	unsigned long range_end = mm->brk + 0x02000000;
865 	return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
866 }
867