xref: /linux/arch/x86/kernel/process_64.c (revision 7ec7fb394298c212c30e063c57e0aa895efe9439)
1 /*
2  *  Copyright (C) 1995  Linus Torvalds
3  *
4  *  Pentium III FXSR, SSE support
5  *	Gareth Hughes <gareth@valinux.com>, May 2000
6  *
7  *  X86-64 port
8  *	Andi Kleen.
9  *
10  *	CPU hotplug support - ashok.raj@intel.com
11  */
12 
13 /*
14  * This file handles the architecture-dependent parts of process handling..
15  */
16 
17 #include <stdarg.h>
18 
19 #include <linux/cpu.h>
20 #include <linux/errno.h>
21 #include <linux/sched.h>
22 #include <linux/fs.h>
23 #include <linux/kernel.h>
24 #include <linux/mm.h>
25 #include <linux/elfcore.h>
26 #include <linux/smp.h>
27 #include <linux/slab.h>
28 #include <linux/user.h>
29 #include <linux/interrupt.h>
30 #include <linux/utsname.h>
31 #include <linux/delay.h>
32 #include <linux/module.h>
33 #include <linux/ptrace.h>
34 #include <linux/random.h>
35 #include <linux/notifier.h>
36 #include <linux/kprobes.h>
37 #include <linux/kdebug.h>
38 #include <linux/tick.h>
39 #include <linux/prctl.h>
40 #include <linux/uaccess.h>
41 #include <linux/io.h>
42 #include <linux/ftrace.h>
43 
44 #include <asm/pgtable.h>
45 #include <asm/system.h>
46 #include <asm/processor.h>
47 #include <asm/i387.h>
48 #include <asm/mmu_context.h>
49 #include <asm/pda.h>
50 #include <asm/prctl.h>
51 #include <asm/desc.h>
52 #include <asm/proto.h>
53 #include <asm/ia32.h>
54 #include <asm/idle.h>
55 #include <asm/syscalls.h>
56 #include <asm/ds.h>
57 
58 asmlinkage extern void ret_from_fork(void);
59 
60 unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
61 
62 static ATOMIC_NOTIFIER_HEAD(idle_notifier);
63 
64 void idle_notifier_register(struct notifier_block *n)
65 {
66 	atomic_notifier_chain_register(&idle_notifier, n);
67 }
68 EXPORT_SYMBOL_GPL(idle_notifier_register);
69 
70 void idle_notifier_unregister(struct notifier_block *n)
71 {
72 	atomic_notifier_chain_unregister(&idle_notifier, n);
73 }
74 EXPORT_SYMBOL_GPL(idle_notifier_unregister);
75 
76 void enter_idle(void)
77 {
78 	write_pda(isidle, 1);
79 	atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
80 }
81 
82 static void __exit_idle(void)
83 {
84 	if (test_and_clear_bit_pda(0, isidle) == 0)
85 		return;
86 	atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
87 }
88 
89 /* Called from interrupts to signify idle end */
90 void exit_idle(void)
91 {
92 	/* idle loop has pid 0 */
93 	if (current->pid)
94 		return;
95 	__exit_idle();
96 }
97 
98 #ifndef CONFIG_SMP
99 static inline void play_dead(void)
100 {
101 	BUG();
102 }
103 #endif
104 
105 /*
106  * The idle thread. There's no useful work to be
107  * done, so just try to conserve power and have a
108  * low exit latency (ie sit in a loop waiting for
109  * somebody to say that they'd like to reschedule)
110  */
111 void cpu_idle(void)
112 {
113 	current_thread_info()->status |= TS_POLLING;
114 	/* endless idle loop with no priority at all */
115 	while (1) {
116 		tick_nohz_stop_sched_tick(1);
117 		while (!need_resched()) {
118 
119 			rmb();
120 
121 			if (cpu_is_offline(smp_processor_id()))
122 				play_dead();
123 			/*
124 			 * Idle routines should keep interrupts disabled
125 			 * from here on, until they go to idle.
126 			 * Otherwise, idle callbacks can misfire.
127 			 */
128 			local_irq_disable();
129 			enter_idle();
130 			/* Don't trace irqs off for idle */
131 			stop_critical_timings();
132 			pm_idle();
133 			start_critical_timings();
134 			/* In many cases the interrupt that ended idle
135 			   has already called exit_idle. But some idle
136 			   loops can be woken up without interrupt. */
137 			__exit_idle();
138 		}
139 
140 		tick_nohz_restart_sched_tick();
141 		preempt_enable_no_resched();
142 		schedule();
143 		preempt_disable();
144 	}
145 }
146 
147 /* Prints also some state that isn't saved in the pt_regs */
148 void __show_regs(struct pt_regs *regs, int all)
149 {
150 	unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
151 	unsigned long d0, d1, d2, d3, d6, d7;
152 	unsigned int fsindex, gsindex;
153 	unsigned int ds, cs, es;
154 
155 	printk("\n");
156 	print_modules();
157 	printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s\n",
158 		current->pid, current->comm, print_tainted(),
159 		init_utsname()->release,
160 		(int)strcspn(init_utsname()->version, " "),
161 		init_utsname()->version);
162 	printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
163 	printk_address(regs->ip, 1);
164 	printk(KERN_INFO "RSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss,
165 			regs->sp, regs->flags);
166 	printk(KERN_INFO "RAX: %016lx RBX: %016lx RCX: %016lx\n",
167 	       regs->ax, regs->bx, regs->cx);
168 	printk(KERN_INFO "RDX: %016lx RSI: %016lx RDI: %016lx\n",
169 	       regs->dx, regs->si, regs->di);
170 	printk(KERN_INFO "RBP: %016lx R08: %016lx R09: %016lx\n",
171 	       regs->bp, regs->r8, regs->r9);
172 	printk(KERN_INFO "R10: %016lx R11: %016lx R12: %016lx\n",
173 	       regs->r10, regs->r11, regs->r12);
174 	printk(KERN_INFO "R13: %016lx R14: %016lx R15: %016lx\n",
175 	       regs->r13, regs->r14, regs->r15);
176 
177 	asm("movl %%ds,%0" : "=r" (ds));
178 	asm("movl %%cs,%0" : "=r" (cs));
179 	asm("movl %%es,%0" : "=r" (es));
180 	asm("movl %%fs,%0" : "=r" (fsindex));
181 	asm("movl %%gs,%0" : "=r" (gsindex));
182 
183 	rdmsrl(MSR_FS_BASE, fs);
184 	rdmsrl(MSR_GS_BASE, gs);
185 	rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
186 
187 	if (!all)
188 		return;
189 
190 	cr0 = read_cr0();
191 	cr2 = read_cr2();
192 	cr3 = read_cr3();
193 	cr4 = read_cr4();
194 
195 	printk(KERN_INFO "FS:  %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
196 	       fs, fsindex, gs, gsindex, shadowgs);
197 	printk(KERN_INFO "CS:  %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds,
198 			es, cr0);
199 	printk(KERN_INFO "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3,
200 			cr4);
201 
202 	get_debugreg(d0, 0);
203 	get_debugreg(d1, 1);
204 	get_debugreg(d2, 2);
205 	printk(KERN_INFO "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
206 	get_debugreg(d3, 3);
207 	get_debugreg(d6, 6);
208 	get_debugreg(d7, 7);
209 	printk(KERN_INFO "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
210 }
211 
212 void show_regs(struct pt_regs *regs)
213 {
214 	printk(KERN_INFO "CPU %d:", smp_processor_id());
215 	__show_regs(regs, 1);
216 	show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
217 }
218 
219 /*
220  * Free current thread data structures etc..
221  */
222 void exit_thread(void)
223 {
224 	struct task_struct *me = current;
225 	struct thread_struct *t = &me->thread;
226 
227 	if (me->thread.io_bitmap_ptr) {
228 		struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
229 
230 		kfree(t->io_bitmap_ptr);
231 		t->io_bitmap_ptr = NULL;
232 		clear_thread_flag(TIF_IO_BITMAP);
233 		/*
234 		 * Careful, clear this in the TSS too:
235 		 */
236 		memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
237 		t->io_bitmap_max = 0;
238 		put_cpu();
239 	}
240 
241 	ds_exit_thread(current);
242 }
243 
244 void flush_thread(void)
245 {
246 	struct task_struct *tsk = current;
247 
248 	if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) {
249 		clear_tsk_thread_flag(tsk, TIF_ABI_PENDING);
250 		if (test_tsk_thread_flag(tsk, TIF_IA32)) {
251 			clear_tsk_thread_flag(tsk, TIF_IA32);
252 		} else {
253 			set_tsk_thread_flag(tsk, TIF_IA32);
254 			current_thread_info()->status |= TS_COMPAT;
255 		}
256 	}
257 	clear_tsk_thread_flag(tsk, TIF_DEBUG);
258 
259 	tsk->thread.debugreg0 = 0;
260 	tsk->thread.debugreg1 = 0;
261 	tsk->thread.debugreg2 = 0;
262 	tsk->thread.debugreg3 = 0;
263 	tsk->thread.debugreg6 = 0;
264 	tsk->thread.debugreg7 = 0;
265 	memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
266 	/*
267 	 * Forget coprocessor state..
268 	 */
269 	tsk->fpu_counter = 0;
270 	clear_fpu(tsk);
271 	clear_used_math();
272 }
273 
274 void release_thread(struct task_struct *dead_task)
275 {
276 	if (dead_task->mm) {
277 		if (dead_task->mm->context.size) {
278 			printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
279 					dead_task->comm,
280 					dead_task->mm->context.ldt,
281 					dead_task->mm->context.size);
282 			BUG();
283 		}
284 	}
285 }
286 
287 static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
288 {
289 	struct user_desc ud = {
290 		.base_addr = addr,
291 		.limit = 0xfffff,
292 		.seg_32bit = 1,
293 		.limit_in_pages = 1,
294 		.useable = 1,
295 	};
296 	struct desc_struct *desc = t->thread.tls_array;
297 	desc += tls;
298 	fill_ldt(desc, &ud);
299 }
300 
301 static inline u32 read_32bit_tls(struct task_struct *t, int tls)
302 {
303 	return get_desc_base(&t->thread.tls_array[tls]);
304 }
305 
306 /*
307  * This gets called before we allocate a new thread and copy
308  * the current task into it.
309  */
310 void prepare_to_copy(struct task_struct *tsk)
311 {
312 	unlazy_fpu(tsk);
313 }
314 
315 int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
316 		unsigned long unused,
317 	struct task_struct *p, struct pt_regs *regs)
318 {
319 	int err;
320 	struct pt_regs *childregs;
321 	struct task_struct *me = current;
322 
323 	childregs = ((struct pt_regs *)
324 			(THREAD_SIZE + task_stack_page(p))) - 1;
325 	*childregs = *regs;
326 
327 	childregs->ax = 0;
328 	childregs->sp = sp;
329 	if (sp == ~0UL)
330 		childregs->sp = (unsigned long)childregs;
331 
332 	p->thread.sp = (unsigned long) childregs;
333 	p->thread.sp0 = (unsigned long) (childregs+1);
334 	p->thread.usersp = me->thread.usersp;
335 
336 	set_tsk_thread_flag(p, TIF_FORK);
337 
338 	p->thread.fs = me->thread.fs;
339 	p->thread.gs = me->thread.gs;
340 
341 	savesegment(gs, p->thread.gsindex);
342 	savesegment(fs, p->thread.fsindex);
343 	savesegment(es, p->thread.es);
344 	savesegment(ds, p->thread.ds);
345 
346 	if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
347 		p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
348 		if (!p->thread.io_bitmap_ptr) {
349 			p->thread.io_bitmap_max = 0;
350 			return -ENOMEM;
351 		}
352 		memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
353 				IO_BITMAP_BYTES);
354 		set_tsk_thread_flag(p, TIF_IO_BITMAP);
355 	}
356 
357 	/*
358 	 * Set a new TLS for the child thread?
359 	 */
360 	if (clone_flags & CLONE_SETTLS) {
361 #ifdef CONFIG_IA32_EMULATION
362 		if (test_thread_flag(TIF_IA32))
363 			err = do_set_thread_area(p, -1,
364 				(struct user_desc __user *)childregs->si, 0);
365 		else
366 #endif
367 			err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
368 		if (err)
369 			goto out;
370 	}
371 
372 	ds_copy_thread(p, me);
373 
374 	clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR);
375 	p->thread.debugctlmsr = 0;
376 
377 	err = 0;
378 out:
379 	if (err && p->thread.io_bitmap_ptr) {
380 		kfree(p->thread.io_bitmap_ptr);
381 		p->thread.io_bitmap_max = 0;
382 	}
383 	return err;
384 }
385 
386 void
387 start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
388 {
389 	loadsegment(fs, 0);
390 	loadsegment(es, 0);
391 	loadsegment(ds, 0);
392 	load_gs_index(0);
393 	regs->ip		= new_ip;
394 	regs->sp		= new_sp;
395 	write_pda(oldrsp, new_sp);
396 	regs->cs		= __USER_CS;
397 	regs->ss		= __USER_DS;
398 	regs->flags		= 0x200;
399 	set_fs(USER_DS);
400 	/*
401 	 * Free the old FP and other extended state
402 	 */
403 	free_thread_xstate(current);
404 }
405 EXPORT_SYMBOL_GPL(start_thread);
406 
407 static void hard_disable_TSC(void)
408 {
409 	write_cr4(read_cr4() | X86_CR4_TSD);
410 }
411 
412 void disable_TSC(void)
413 {
414 	preempt_disable();
415 	if (!test_and_set_thread_flag(TIF_NOTSC))
416 		/*
417 		 * Must flip the CPU state synchronously with
418 		 * TIF_NOTSC in the current running context.
419 		 */
420 		hard_disable_TSC();
421 	preempt_enable();
422 }
423 
424 static void hard_enable_TSC(void)
425 {
426 	write_cr4(read_cr4() & ~X86_CR4_TSD);
427 }
428 
429 static void enable_TSC(void)
430 {
431 	preempt_disable();
432 	if (test_and_clear_thread_flag(TIF_NOTSC))
433 		/*
434 		 * Must flip the CPU state synchronously with
435 		 * TIF_NOTSC in the current running context.
436 		 */
437 		hard_enable_TSC();
438 	preempt_enable();
439 }
440 
441 int get_tsc_mode(unsigned long adr)
442 {
443 	unsigned int val;
444 
445 	if (test_thread_flag(TIF_NOTSC))
446 		val = PR_TSC_SIGSEGV;
447 	else
448 		val = PR_TSC_ENABLE;
449 
450 	return put_user(val, (unsigned int __user *)adr);
451 }
452 
453 int set_tsc_mode(unsigned int val)
454 {
455 	if (val == PR_TSC_SIGSEGV)
456 		disable_TSC();
457 	else if (val == PR_TSC_ENABLE)
458 		enable_TSC();
459 	else
460 		return -EINVAL;
461 
462 	return 0;
463 }
464 
465 /*
466  * This special macro can be used to load a debugging register
467  */
468 #define loaddebug(thread, r) set_debugreg(thread->debugreg ## r, r)
469 
470 static inline void __switch_to_xtra(struct task_struct *prev_p,
471 				    struct task_struct *next_p,
472 				    struct tss_struct *tss)
473 {
474 	struct thread_struct *prev, *next;
475 
476 	prev = &prev_p->thread,
477 	next = &next_p->thread;
478 
479 	if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) ||
480 	    test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR))
481 		ds_switch_to(prev_p, next_p);
482 	else if (next->debugctlmsr != prev->debugctlmsr)
483 		update_debugctlmsr(next->debugctlmsr);
484 
485 	if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
486 		loaddebug(next, 0);
487 		loaddebug(next, 1);
488 		loaddebug(next, 2);
489 		loaddebug(next, 3);
490 		/* no 4 and 5 */
491 		loaddebug(next, 6);
492 		loaddebug(next, 7);
493 	}
494 
495 	if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
496 	    test_tsk_thread_flag(next_p, TIF_NOTSC)) {
497 		/* prev and next are different */
498 		if (test_tsk_thread_flag(next_p, TIF_NOTSC))
499 			hard_disable_TSC();
500 		else
501 			hard_enable_TSC();
502 	}
503 
504 	if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
505 		/*
506 		 * Copy the relevant range of the IO bitmap.
507 		 * Normally this is 128 bytes or less:
508 		 */
509 		memcpy(tss->io_bitmap, next->io_bitmap_ptr,
510 		       max(prev->io_bitmap_max, next->io_bitmap_max));
511 	} else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
512 		/*
513 		 * Clear any possible leftover bits:
514 		 */
515 		memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
516 	}
517 }
518 
519 /*
520  *	switch_to(x,y) should switch tasks from x to y.
521  *
522  * This could still be optimized:
523  * - fold all the options into a flag word and test it with a single test.
524  * - could test fs/gs bitsliced
525  *
526  * Kprobes not supported here. Set the probe on schedule instead.
527  * Function graph tracer not supported too.
528  */
529 __notrace_funcgraph struct task_struct *
530 __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
531 {
532 	struct thread_struct *prev = &prev_p->thread;
533 	struct thread_struct *next = &next_p->thread;
534 	int cpu = smp_processor_id();
535 	struct tss_struct *tss = &per_cpu(init_tss, cpu);
536 	unsigned fsindex, gsindex;
537 
538 	/* we're going to use this soon, after a few expensive things */
539 	if (next_p->fpu_counter > 5)
540 		prefetch(next->xstate);
541 
542 	/*
543 	 * Reload esp0, LDT and the page table pointer:
544 	 */
545 	load_sp0(tss, next);
546 
547 	/*
548 	 * Switch DS and ES.
549 	 * This won't pick up thread selector changes, but I guess that is ok.
550 	 */
551 	savesegment(es, prev->es);
552 	if (unlikely(next->es | prev->es))
553 		loadsegment(es, next->es);
554 
555 	savesegment(ds, prev->ds);
556 	if (unlikely(next->ds | prev->ds))
557 		loadsegment(ds, next->ds);
558 
559 
560 	/* We must save %fs and %gs before load_TLS() because
561 	 * %fs and %gs may be cleared by load_TLS().
562 	 *
563 	 * (e.g. xen_load_tls())
564 	 */
565 	savesegment(fs, fsindex);
566 	savesegment(gs, gsindex);
567 
568 	load_TLS(next, cpu);
569 
570 	/*
571 	 * Leave lazy mode, flushing any hypercalls made here.
572 	 * This must be done before restoring TLS segments so
573 	 * the GDT and LDT are properly updated, and must be
574 	 * done before math_state_restore, so the TS bit is up
575 	 * to date.
576 	 */
577 	arch_leave_lazy_cpu_mode();
578 
579 	/*
580 	 * Switch FS and GS.
581 	 *
582 	 * Segment register != 0 always requires a reload.  Also
583 	 * reload when it has changed.  When prev process used 64bit
584 	 * base always reload to avoid an information leak.
585 	 */
586 	if (unlikely(fsindex | next->fsindex | prev->fs)) {
587 		loadsegment(fs, next->fsindex);
588 		/*
589 		 * Check if the user used a selector != 0; if yes
590 		 *  clear 64bit base, since overloaded base is always
591 		 *  mapped to the Null selector
592 		 */
593 		if (fsindex)
594 			prev->fs = 0;
595 	}
596 	/* when next process has a 64bit base use it */
597 	if (next->fs)
598 		wrmsrl(MSR_FS_BASE, next->fs);
599 	prev->fsindex = fsindex;
600 
601 	if (unlikely(gsindex | next->gsindex | prev->gs)) {
602 		load_gs_index(next->gsindex);
603 		if (gsindex)
604 			prev->gs = 0;
605 	}
606 	if (next->gs)
607 		wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
608 	prev->gsindex = gsindex;
609 
610 	/* Must be after DS reload */
611 	unlazy_fpu(prev_p);
612 
613 	/*
614 	 * Switch the PDA and FPU contexts.
615 	 */
616 	prev->usersp = read_pda(oldrsp);
617 	write_pda(oldrsp, next->usersp);
618 	write_pda(pcurrent, next_p);
619 
620 	write_pda(kernelstack,
621 		  (unsigned long)task_stack_page(next_p) +
622 		  THREAD_SIZE - PDA_STACKOFFSET);
623 #ifdef CONFIG_CC_STACKPROTECTOR
624 	write_pda(stack_canary, next_p->stack_canary);
625 	/*
626 	 * Build time only check to make sure the stack_canary is at
627 	 * offset 40 in the pda; this is a gcc ABI requirement
628 	 */
629 	BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
630 #endif
631 
632 	/*
633 	 * Now maybe reload the debug registers and handle I/O bitmaps
634 	 */
635 	if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
636 		     task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
637 		__switch_to_xtra(prev_p, next_p, tss);
638 
639 	/* If the task has used fpu the last 5 timeslices, just do a full
640 	 * restore of the math state immediately to avoid the trap; the
641 	 * chances of needing FPU soon are obviously high now
642 	 *
643 	 * tsk_used_math() checks prevent calling math_state_restore(),
644 	 * which can sleep in the case of !tsk_used_math()
645 	 */
646 	if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
647 		math_state_restore();
648 	return prev_p;
649 }
650 
651 /*
652  * sys_execve() executes a new program.
653  */
654 asmlinkage
655 long sys_execve(char __user *name, char __user * __user *argv,
656 		char __user * __user *envp, struct pt_regs *regs)
657 {
658 	long error;
659 	char *filename;
660 
661 	filename = getname(name);
662 	error = PTR_ERR(filename);
663 	if (IS_ERR(filename))
664 		return error;
665 	error = do_execve(filename, argv, envp, regs);
666 	putname(filename);
667 	return error;
668 }
669 
670 void set_personality_64bit(void)
671 {
672 	/* inherit personality from parent */
673 
674 	/* Make sure to be in 64bit mode */
675 	clear_thread_flag(TIF_IA32);
676 
677 	/* TBD: overwrites user setup. Should have two bits.
678 	   But 64bit processes have always behaved this way,
679 	   so it's not too bad. The main problem is just that
680 	   32bit childs are affected again. */
681 	current->personality &= ~READ_IMPLIES_EXEC;
682 }
683 
684 asmlinkage long sys_fork(struct pt_regs *regs)
685 {
686 	return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
687 }
688 
689 asmlinkage long
690 sys_clone(unsigned long clone_flags, unsigned long newsp,
691 	  void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
692 {
693 	if (!newsp)
694 		newsp = regs->sp;
695 	return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
696 }
697 
698 /*
699  * This is trivial, and on the face of it looks like it
700  * could equally well be done in user mode.
701  *
702  * Not so, for quite unobvious reasons - register pressure.
703  * In user mode vfork() cannot have a stack frame, and if
704  * done by calling the "clone()" system call directly, you
705  * do not have enough call-clobbered registers to hold all
706  * the information you need.
707  */
708 asmlinkage long sys_vfork(struct pt_regs *regs)
709 {
710 	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0,
711 		    NULL, NULL);
712 }
713 
714 unsigned long get_wchan(struct task_struct *p)
715 {
716 	unsigned long stack;
717 	u64 fp, ip;
718 	int count = 0;
719 
720 	if (!p || p == current || p->state == TASK_RUNNING)
721 		return 0;
722 	stack = (unsigned long)task_stack_page(p);
723 	if (p->thread.sp < stack || p->thread.sp >= stack+THREAD_SIZE)
724 		return 0;
725 	fp = *(u64 *)(p->thread.sp);
726 	do {
727 		if (fp < (unsigned long)stack ||
728 		    fp >= (unsigned long)stack+THREAD_SIZE)
729 			return 0;
730 		ip = *(u64 *)(fp+8);
731 		if (!in_sched_functions(ip))
732 			return ip;
733 		fp = *(u64 *)fp;
734 	} while (count++ < 16);
735 	return 0;
736 }
737 
738 long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
739 {
740 	int ret = 0;
741 	int doit = task == current;
742 	int cpu;
743 
744 	switch (code) {
745 	case ARCH_SET_GS:
746 		if (addr >= TASK_SIZE_OF(task))
747 			return -EPERM;
748 		cpu = get_cpu();
749 		/* handle small bases via the GDT because that's faster to
750 		   switch. */
751 		if (addr <= 0xffffffff) {
752 			set_32bit_tls(task, GS_TLS, addr);
753 			if (doit) {
754 				load_TLS(&task->thread, cpu);
755 				load_gs_index(GS_TLS_SEL);
756 			}
757 			task->thread.gsindex = GS_TLS_SEL;
758 			task->thread.gs = 0;
759 		} else {
760 			task->thread.gsindex = 0;
761 			task->thread.gs = addr;
762 			if (doit) {
763 				load_gs_index(0);
764 				ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
765 			}
766 		}
767 		put_cpu();
768 		break;
769 	case ARCH_SET_FS:
770 		/* Not strictly needed for fs, but do it for symmetry
771 		   with gs */
772 		if (addr >= TASK_SIZE_OF(task))
773 			return -EPERM;
774 		cpu = get_cpu();
775 		/* handle small bases via the GDT because that's faster to
776 		   switch. */
777 		if (addr <= 0xffffffff) {
778 			set_32bit_tls(task, FS_TLS, addr);
779 			if (doit) {
780 				load_TLS(&task->thread, cpu);
781 				loadsegment(fs, FS_TLS_SEL);
782 			}
783 			task->thread.fsindex = FS_TLS_SEL;
784 			task->thread.fs = 0;
785 		} else {
786 			task->thread.fsindex = 0;
787 			task->thread.fs = addr;
788 			if (doit) {
789 				/* set the selector to 0 to not confuse
790 				   __switch_to */
791 				loadsegment(fs, 0);
792 				ret = checking_wrmsrl(MSR_FS_BASE, addr);
793 			}
794 		}
795 		put_cpu();
796 		break;
797 	case ARCH_GET_FS: {
798 		unsigned long base;
799 		if (task->thread.fsindex == FS_TLS_SEL)
800 			base = read_32bit_tls(task, FS_TLS);
801 		else if (doit)
802 			rdmsrl(MSR_FS_BASE, base);
803 		else
804 			base = task->thread.fs;
805 		ret = put_user(base, (unsigned long __user *)addr);
806 		break;
807 	}
808 	case ARCH_GET_GS: {
809 		unsigned long base;
810 		unsigned gsindex;
811 		if (task->thread.gsindex == GS_TLS_SEL)
812 			base = read_32bit_tls(task, GS_TLS);
813 		else if (doit) {
814 			savesegment(gs, gsindex);
815 			if (gsindex)
816 				rdmsrl(MSR_KERNEL_GS_BASE, base);
817 			else
818 				base = task->thread.gs;
819 		} else
820 			base = task->thread.gs;
821 		ret = put_user(base, (unsigned long __user *)addr);
822 		break;
823 	}
824 
825 	default:
826 		ret = -EINVAL;
827 		break;
828 	}
829 
830 	return ret;
831 }
832 
833 long sys_arch_prctl(int code, unsigned long addr)
834 {
835 	return do_arch_prctl(current, code, addr);
836 }
837 
838 unsigned long arch_align_stack(unsigned long sp)
839 {
840 	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
841 		sp -= get_random_int() % 8192;
842 	return sp & ~0xf;
843 }
844 
845 unsigned long arch_randomize_brk(struct mm_struct *mm)
846 {
847 	unsigned long range_end = mm->brk + 0x02000000;
848 	return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
849 }
850