xref: /linux/arch/x86/kernel/process_64.c (revision a5766f11cfd3a0c03450d99c8fe548c2940be884)
1 /*
2  *  Copyright (C) 1995  Linus Torvalds
3  *
4  *  Pentium III FXSR, SSE support
5  *	Gareth Hughes <gareth@valinux.com>, May 2000
6  *
7  *  X86-64 port
8  *	Andi Kleen.
9  *
10  *	CPU hotplug support - ashok.raj@intel.com
11  */
12 
13 /*
14  * This file handles the architecture-dependent parts of process handling..
15  */
16 
17 #include <stdarg.h>
18 
19 #include <linux/cpu.h>
20 #include <linux/errno.h>
21 #include <linux/sched.h>
22 #include <linux/fs.h>
23 #include <linux/kernel.h>
24 #include <linux/mm.h>
25 #include <linux/elfcore.h>
26 #include <linux/smp.h>
27 #include <linux/slab.h>
28 #include <linux/user.h>
29 #include <linux/interrupt.h>
30 #include <linux/utsname.h>
31 #include <linux/delay.h>
32 #include <linux/module.h>
33 #include <linux/ptrace.h>
34 #include <linux/random.h>
35 #include <linux/notifier.h>
36 #include <linux/kprobes.h>
37 #include <linux/kdebug.h>
38 #include <linux/tick.h>
39 #include <linux/prctl.h>
40 #include <linux/uaccess.h>
41 #include <linux/io.h>
42 
43 #include <asm/pgtable.h>
44 #include <asm/system.h>
45 #include <asm/processor.h>
46 #include <asm/i387.h>
47 #include <asm/mmu_context.h>
48 #include <asm/pda.h>
49 #include <asm/prctl.h>
50 #include <asm/desc.h>
51 #include <asm/proto.h>
52 #include <asm/ia32.h>
53 #include <asm/idle.h>
54 #include <asm/syscalls.h>
55 
56 asmlinkage extern void ret_from_fork(void);
57 
58 unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
59 
60 static ATOMIC_NOTIFIER_HEAD(idle_notifier);
61 
62 void idle_notifier_register(struct notifier_block *n)
63 {
64 	atomic_notifier_chain_register(&idle_notifier, n);
65 }
66 
67 void enter_idle(void)
68 {
69 	write_pda(isidle, 1);
70 	atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
71 }
72 
73 static void __exit_idle(void)
74 {
75 	if (test_and_clear_bit_pda(0, isidle) == 0)
76 		return;
77 	atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
78 }
79 
80 /* Called from interrupts to signify idle end */
81 void exit_idle(void)
82 {
83 	/* idle loop has pid 0 */
84 	if (current->pid)
85 		return;
86 	__exit_idle();
87 }
88 
89 #ifndef CONFIG_SMP
90 static inline void play_dead(void)
91 {
92 	BUG();
93 }
94 #endif
95 
96 /*
97  * The idle thread. There's no useful work to be
98  * done, so just try to conserve power and have a
99  * low exit latency (ie sit in a loop waiting for
100  * somebody to say that they'd like to reschedule)
101  */
102 void cpu_idle(void)
103 {
104 	current_thread_info()->status |= TS_POLLING;
105 	/* endless idle loop with no priority at all */
106 	while (1) {
107 		tick_nohz_stop_sched_tick(1);
108 		while (!need_resched()) {
109 
110 			rmb();
111 
112 			if (cpu_is_offline(smp_processor_id()))
113 				play_dead();
114 			/*
115 			 * Idle routines should keep interrupts disabled
116 			 * from here on, until they go to idle.
117 			 * Otherwise, idle callbacks can misfire.
118 			 */
119 			local_irq_disable();
120 			enter_idle();
121 			/* Don't trace irqs off for idle */
122 			stop_critical_timings();
123 			pm_idle();
124 			start_critical_timings();
125 			/* In many cases the interrupt that ended idle
126 			   has already called exit_idle. But some idle
127 			   loops can be woken up without interrupt. */
128 			__exit_idle();
129 		}
130 
131 		tick_nohz_restart_sched_tick();
132 		preempt_enable_no_resched();
133 		schedule();
134 		preempt_disable();
135 	}
136 }
137 
138 /* Prints also some state that isn't saved in the pt_regs */
139 void __show_regs(struct pt_regs *regs, int all)
140 {
141 	unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
142 	unsigned long d0, d1, d2, d3, d6, d7;
143 	unsigned int fsindex, gsindex;
144 	unsigned int ds, cs, es;
145 
146 	printk("\n");
147 	print_modules();
148 	printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s\n",
149 		current->pid, current->comm, print_tainted(),
150 		init_utsname()->release,
151 		(int)strcspn(init_utsname()->version, " "),
152 		init_utsname()->version);
153 	printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
154 	printk_address(regs->ip, 1);
155 	printk(KERN_INFO "RSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss,
156 			regs->sp, regs->flags);
157 	printk(KERN_INFO "RAX: %016lx RBX: %016lx RCX: %016lx\n",
158 	       regs->ax, regs->bx, regs->cx);
159 	printk(KERN_INFO "RDX: %016lx RSI: %016lx RDI: %016lx\n",
160 	       regs->dx, regs->si, regs->di);
161 	printk(KERN_INFO "RBP: %016lx R08: %016lx R09: %016lx\n",
162 	       regs->bp, regs->r8, regs->r9);
163 	printk(KERN_INFO "R10: %016lx R11: %016lx R12: %016lx\n",
164 	       regs->r10, regs->r11, regs->r12);
165 	printk(KERN_INFO "R13: %016lx R14: %016lx R15: %016lx\n",
166 	       regs->r13, regs->r14, regs->r15);
167 
168 	asm("movl %%ds,%0" : "=r" (ds));
169 	asm("movl %%cs,%0" : "=r" (cs));
170 	asm("movl %%es,%0" : "=r" (es));
171 	asm("movl %%fs,%0" : "=r" (fsindex));
172 	asm("movl %%gs,%0" : "=r" (gsindex));
173 
174 	rdmsrl(MSR_FS_BASE, fs);
175 	rdmsrl(MSR_GS_BASE, gs);
176 	rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
177 
178 	if (!all)
179 		return;
180 
181 	cr0 = read_cr0();
182 	cr2 = read_cr2();
183 	cr3 = read_cr3();
184 	cr4 = read_cr4();
185 
186 	printk(KERN_INFO "FS:  %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
187 	       fs, fsindex, gs, gsindex, shadowgs);
188 	printk(KERN_INFO "CS:  %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds,
189 			es, cr0);
190 	printk(KERN_INFO "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3,
191 			cr4);
192 
193 	get_debugreg(d0, 0);
194 	get_debugreg(d1, 1);
195 	get_debugreg(d2, 2);
196 	printk(KERN_INFO "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
197 	get_debugreg(d3, 3);
198 	get_debugreg(d6, 6);
199 	get_debugreg(d7, 7);
200 	printk(KERN_INFO "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
201 }
202 
203 void show_regs(struct pt_regs *regs)
204 {
205 	printk(KERN_INFO "CPU %d:", smp_processor_id());
206 	__show_regs(regs, 1);
207 	show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
208 }
209 
210 /*
211  * Free current thread data structures etc..
212  */
213 void exit_thread(void)
214 {
215 	struct task_struct *me = current;
216 	struct thread_struct *t = &me->thread;
217 
218 	if (me->thread.io_bitmap_ptr) {
219 		struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
220 
221 		kfree(t->io_bitmap_ptr);
222 		t->io_bitmap_ptr = NULL;
223 		clear_thread_flag(TIF_IO_BITMAP);
224 		/*
225 		 * Careful, clear this in the TSS too:
226 		 */
227 		memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
228 		t->io_bitmap_max = 0;
229 		put_cpu();
230 	}
231 #ifdef CONFIG_X86_DS
232 	/* Free any DS contexts that have not been properly released. */
233 	if (unlikely(t->ds_ctx)) {
234 		/* we clear debugctl to make sure DS is not used. */
235 		update_debugctlmsr(0);
236 		ds_free(t->ds_ctx);
237 	}
238 #endif /* CONFIG_X86_DS */
239 }
240 
241 void flush_thread(void)
242 {
243 	struct task_struct *tsk = current;
244 
245 	if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) {
246 		clear_tsk_thread_flag(tsk, TIF_ABI_PENDING);
247 		if (test_tsk_thread_flag(tsk, TIF_IA32)) {
248 			clear_tsk_thread_flag(tsk, TIF_IA32);
249 		} else {
250 			set_tsk_thread_flag(tsk, TIF_IA32);
251 			current_thread_info()->status |= TS_COMPAT;
252 		}
253 	}
254 	clear_tsk_thread_flag(tsk, TIF_DEBUG);
255 
256 	tsk->thread.debugreg0 = 0;
257 	tsk->thread.debugreg1 = 0;
258 	tsk->thread.debugreg2 = 0;
259 	tsk->thread.debugreg3 = 0;
260 	tsk->thread.debugreg6 = 0;
261 	tsk->thread.debugreg7 = 0;
262 	memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
263 	/*
264 	 * Forget coprocessor state..
265 	 */
266 	tsk->fpu_counter = 0;
267 	clear_fpu(tsk);
268 	clear_used_math();
269 }
270 
271 void release_thread(struct task_struct *dead_task)
272 {
273 	if (dead_task->mm) {
274 		if (dead_task->mm->context.size) {
275 			printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
276 					dead_task->comm,
277 					dead_task->mm->context.ldt,
278 					dead_task->mm->context.size);
279 			BUG();
280 		}
281 	}
282 }
283 
284 static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
285 {
286 	struct user_desc ud = {
287 		.base_addr = addr,
288 		.limit = 0xfffff,
289 		.seg_32bit = 1,
290 		.limit_in_pages = 1,
291 		.useable = 1,
292 	};
293 	struct desc_struct *desc = t->thread.tls_array;
294 	desc += tls;
295 	fill_ldt(desc, &ud);
296 }
297 
298 static inline u32 read_32bit_tls(struct task_struct *t, int tls)
299 {
300 	return get_desc_base(&t->thread.tls_array[tls]);
301 }
302 
303 /*
304  * This gets called before we allocate a new thread and copy
305  * the current task into it.
306  */
307 void prepare_to_copy(struct task_struct *tsk)
308 {
309 	unlazy_fpu(tsk);
310 }
311 
312 int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
313 		unsigned long unused,
314 	struct task_struct *p, struct pt_regs *regs)
315 {
316 	int err;
317 	struct pt_regs *childregs;
318 	struct task_struct *me = current;
319 
320 	childregs = ((struct pt_regs *)
321 			(THREAD_SIZE + task_stack_page(p))) - 1;
322 	*childregs = *regs;
323 
324 	childregs->ax = 0;
325 	childregs->sp = sp;
326 	if (sp == ~0UL)
327 		childregs->sp = (unsigned long)childregs;
328 
329 	p->thread.sp = (unsigned long) childregs;
330 	p->thread.sp0 = (unsigned long) (childregs+1);
331 	p->thread.usersp = me->thread.usersp;
332 
333 	set_tsk_thread_flag(p, TIF_FORK);
334 
335 	p->thread.fs = me->thread.fs;
336 	p->thread.gs = me->thread.gs;
337 
338 	savesegment(gs, p->thread.gsindex);
339 	savesegment(fs, p->thread.fsindex);
340 	savesegment(es, p->thread.es);
341 	savesegment(ds, p->thread.ds);
342 
343 	if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
344 		p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
345 		if (!p->thread.io_bitmap_ptr) {
346 			p->thread.io_bitmap_max = 0;
347 			return -ENOMEM;
348 		}
349 		memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
350 				IO_BITMAP_BYTES);
351 		set_tsk_thread_flag(p, TIF_IO_BITMAP);
352 	}
353 
354 	/*
355 	 * Set a new TLS for the child thread?
356 	 */
357 	if (clone_flags & CLONE_SETTLS) {
358 #ifdef CONFIG_IA32_EMULATION
359 		if (test_thread_flag(TIF_IA32))
360 			err = do_set_thread_area(p, -1,
361 				(struct user_desc __user *)childregs->si, 0);
362 		else
363 #endif
364 			err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
365 		if (err)
366 			goto out;
367 	}
368 	err = 0;
369 out:
370 	if (err && p->thread.io_bitmap_ptr) {
371 		kfree(p->thread.io_bitmap_ptr);
372 		p->thread.io_bitmap_max = 0;
373 	}
374 	return err;
375 }
376 
377 void
378 start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
379 {
380 	loadsegment(fs, 0);
381 	loadsegment(es, 0);
382 	loadsegment(ds, 0);
383 	load_gs_index(0);
384 	regs->ip		= new_ip;
385 	regs->sp		= new_sp;
386 	write_pda(oldrsp, new_sp);
387 	regs->cs		= __USER_CS;
388 	regs->ss		= __USER_DS;
389 	regs->flags		= 0x200;
390 	set_fs(USER_DS);
391 	/*
392 	 * Free the old FP and other extended state
393 	 */
394 	free_thread_xstate(current);
395 }
396 EXPORT_SYMBOL_GPL(start_thread);
397 
398 static void hard_disable_TSC(void)
399 {
400 	write_cr4(read_cr4() | X86_CR4_TSD);
401 }
402 
403 void disable_TSC(void)
404 {
405 	preempt_disable();
406 	if (!test_and_set_thread_flag(TIF_NOTSC))
407 		/*
408 		 * Must flip the CPU state synchronously with
409 		 * TIF_NOTSC in the current running context.
410 		 */
411 		hard_disable_TSC();
412 	preempt_enable();
413 }
414 
415 static void hard_enable_TSC(void)
416 {
417 	write_cr4(read_cr4() & ~X86_CR4_TSD);
418 }
419 
420 static void enable_TSC(void)
421 {
422 	preempt_disable();
423 	if (test_and_clear_thread_flag(TIF_NOTSC))
424 		/*
425 		 * Must flip the CPU state synchronously with
426 		 * TIF_NOTSC in the current running context.
427 		 */
428 		hard_enable_TSC();
429 	preempt_enable();
430 }
431 
432 int get_tsc_mode(unsigned long adr)
433 {
434 	unsigned int val;
435 
436 	if (test_thread_flag(TIF_NOTSC))
437 		val = PR_TSC_SIGSEGV;
438 	else
439 		val = PR_TSC_ENABLE;
440 
441 	return put_user(val, (unsigned int __user *)adr);
442 }
443 
444 int set_tsc_mode(unsigned int val)
445 {
446 	if (val == PR_TSC_SIGSEGV)
447 		disable_TSC();
448 	else if (val == PR_TSC_ENABLE)
449 		enable_TSC();
450 	else
451 		return -EINVAL;
452 
453 	return 0;
454 }
455 
456 /*
457  * This special macro can be used to load a debugging register
458  */
459 #define loaddebug(thread, r) set_debugreg(thread->debugreg ## r, r)
460 
461 static inline void __switch_to_xtra(struct task_struct *prev_p,
462 				    struct task_struct *next_p,
463 				    struct tss_struct *tss)
464 {
465 	struct thread_struct *prev, *next;
466 	unsigned long debugctl;
467 
468 	prev = &prev_p->thread,
469 	next = &next_p->thread;
470 
471 	debugctl = prev->debugctlmsr;
472 
473 #ifdef CONFIG_X86_DS
474 	{
475 		unsigned long ds_prev = 0, ds_next = 0;
476 
477 		if (prev->ds_ctx)
478 			ds_prev = (unsigned long)prev->ds_ctx->ds;
479 		if (next->ds_ctx)
480 			ds_next = (unsigned long)next->ds_ctx->ds;
481 
482 		if (ds_next != ds_prev) {
483 			/*
484 			 * We clear debugctl to make sure DS
485 			 * is not in use when we change it:
486 			 */
487 			debugctl = 0;
488 			update_debugctlmsr(0);
489 			wrmsrl(MSR_IA32_DS_AREA, ds_next);
490 		}
491 	}
492 #endif /* CONFIG_X86_DS */
493 
494 	if (next->debugctlmsr != debugctl)
495 		update_debugctlmsr(next->debugctlmsr);
496 
497 	if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
498 		loaddebug(next, 0);
499 		loaddebug(next, 1);
500 		loaddebug(next, 2);
501 		loaddebug(next, 3);
502 		/* no 4 and 5 */
503 		loaddebug(next, 6);
504 		loaddebug(next, 7);
505 	}
506 
507 	if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
508 	    test_tsk_thread_flag(next_p, TIF_NOTSC)) {
509 		/* prev and next are different */
510 		if (test_tsk_thread_flag(next_p, TIF_NOTSC))
511 			hard_disable_TSC();
512 		else
513 			hard_enable_TSC();
514 	}
515 
516 	if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
517 		/*
518 		 * Copy the relevant range of the IO bitmap.
519 		 * Normally this is 128 bytes or less:
520 		 */
521 		memcpy(tss->io_bitmap, next->io_bitmap_ptr,
522 		       max(prev->io_bitmap_max, next->io_bitmap_max));
523 	} else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
524 		/*
525 		 * Clear any possible leftover bits:
526 		 */
527 		memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
528 	}
529 
530 #ifdef CONFIG_X86_PTRACE_BTS
531 	if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
532 		ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
533 
534 	if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
535 		ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
536 #endif /* CONFIG_X86_PTRACE_BTS */
537 }
538 
539 /*
540  *	switch_to(x,y) should switch tasks from x to y.
541  *
542  * This could still be optimized:
543  * - fold all the options into a flag word and test it with a single test.
544  * - could test fs/gs bitsliced
545  *
546  * Kprobes not supported here. Set the probe on schedule instead.
547  */
548 struct task_struct *
549 __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
550 {
551 	struct thread_struct *prev = &prev_p->thread;
552 	struct thread_struct *next = &next_p->thread;
553 	int cpu = smp_processor_id();
554 	struct tss_struct *tss = &per_cpu(init_tss, cpu);
555 	unsigned fsindex, gsindex;
556 
557 	/* we're going to use this soon, after a few expensive things */
558 	if (next_p->fpu_counter > 5)
559 		prefetch(next->xstate);
560 
561 	/*
562 	 * Reload esp0, LDT and the page table pointer:
563 	 */
564 	load_sp0(tss, next);
565 
566 	/*
567 	 * Switch DS and ES.
568 	 * This won't pick up thread selector changes, but I guess that is ok.
569 	 */
570 	savesegment(es, prev->es);
571 	if (unlikely(next->es | prev->es))
572 		loadsegment(es, next->es);
573 
574 	savesegment(ds, prev->ds);
575 	if (unlikely(next->ds | prev->ds))
576 		loadsegment(ds, next->ds);
577 
578 
579 	/* We must save %fs and %gs before load_TLS() because
580 	 * %fs and %gs may be cleared by load_TLS().
581 	 *
582 	 * (e.g. xen_load_tls())
583 	 */
584 	savesegment(fs, fsindex);
585 	savesegment(gs, gsindex);
586 
587 	load_TLS(next, cpu);
588 
589 	/*
590 	 * Leave lazy mode, flushing any hypercalls made here.
591 	 * This must be done before restoring TLS segments so
592 	 * the GDT and LDT are properly updated, and must be
593 	 * done before math_state_restore, so the TS bit is up
594 	 * to date.
595 	 */
596 	arch_leave_lazy_cpu_mode();
597 
598 	/*
599 	 * Switch FS and GS.
600 	 *
601 	 * Segment register != 0 always requires a reload.  Also
602 	 * reload when it has changed.  When prev process used 64bit
603 	 * base always reload to avoid an information leak.
604 	 */
605 	if (unlikely(fsindex | next->fsindex | prev->fs)) {
606 		loadsegment(fs, next->fsindex);
607 		/*
608 		 * Check if the user used a selector != 0; if yes
609 		 *  clear 64bit base, since overloaded base is always
610 		 *  mapped to the Null selector
611 		 */
612 		if (fsindex)
613 			prev->fs = 0;
614 	}
615 	/* when next process has a 64bit base use it */
616 	if (next->fs)
617 		wrmsrl(MSR_FS_BASE, next->fs);
618 	prev->fsindex = fsindex;
619 
620 	if (unlikely(gsindex | next->gsindex | prev->gs)) {
621 		load_gs_index(next->gsindex);
622 		if (gsindex)
623 			prev->gs = 0;
624 	}
625 	if (next->gs)
626 		wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
627 	prev->gsindex = gsindex;
628 
629 	/* Must be after DS reload */
630 	unlazy_fpu(prev_p);
631 
632 	/*
633 	 * Switch the PDA and FPU contexts.
634 	 */
635 	prev->usersp = read_pda(oldrsp);
636 	write_pda(oldrsp, next->usersp);
637 	write_pda(pcurrent, next_p);
638 
639 	write_pda(kernelstack,
640 		  (unsigned long)task_stack_page(next_p) +
641 		  THREAD_SIZE - PDA_STACKOFFSET);
642 #ifdef CONFIG_CC_STACKPROTECTOR
643 	write_pda(stack_canary, next_p->stack_canary);
644 	/*
645 	 * Build time only check to make sure the stack_canary is at
646 	 * offset 40 in the pda; this is a gcc ABI requirement
647 	 */
648 	BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
649 #endif
650 
651 	/*
652 	 * Now maybe reload the debug registers and handle I/O bitmaps
653 	 */
654 	if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
655 		     task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
656 		__switch_to_xtra(prev_p, next_p, tss);
657 
658 	/* If the task has used fpu the last 5 timeslices, just do a full
659 	 * restore of the math state immediately to avoid the trap; the
660 	 * chances of needing FPU soon are obviously high now
661 	 *
662 	 * tsk_used_math() checks prevent calling math_state_restore(),
663 	 * which can sleep in the case of !tsk_used_math()
664 	 */
665 	if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
666 		math_state_restore();
667 	return prev_p;
668 }
669 
670 /*
671  * sys_execve() executes a new program.
672  */
673 asmlinkage
674 long sys_execve(char __user *name, char __user * __user *argv,
675 		char __user * __user *envp, struct pt_regs *regs)
676 {
677 	long error;
678 	char *filename;
679 
680 	filename = getname(name);
681 	error = PTR_ERR(filename);
682 	if (IS_ERR(filename))
683 		return error;
684 	error = do_execve(filename, argv, envp, regs);
685 	putname(filename);
686 	return error;
687 }
688 
689 void set_personality_64bit(void)
690 {
691 	/* inherit personality from parent */
692 
693 	/* Make sure to be in 64bit mode */
694 	clear_thread_flag(TIF_IA32);
695 
696 	/* TBD: overwrites user setup. Should have two bits.
697 	   But 64bit processes have always behaved this way,
698 	   so it's not too bad. The main problem is just that
699 	   32bit childs are affected again. */
700 	current->personality &= ~READ_IMPLIES_EXEC;
701 }
702 
703 asmlinkage long sys_fork(struct pt_regs *regs)
704 {
705 	return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
706 }
707 
708 asmlinkage long
709 sys_clone(unsigned long clone_flags, unsigned long newsp,
710 	  void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
711 {
712 	if (!newsp)
713 		newsp = regs->sp;
714 	return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
715 }
716 
717 /*
718  * This is trivial, and on the face of it looks like it
719  * could equally well be done in user mode.
720  *
721  * Not so, for quite unobvious reasons - register pressure.
722  * In user mode vfork() cannot have a stack frame, and if
723  * done by calling the "clone()" system call directly, you
724  * do not have enough call-clobbered registers to hold all
725  * the information you need.
726  */
727 asmlinkage long sys_vfork(struct pt_regs *regs)
728 {
729 	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0,
730 		    NULL, NULL);
731 }
732 
733 unsigned long get_wchan(struct task_struct *p)
734 {
735 	unsigned long stack;
736 	u64 fp, ip;
737 	int count = 0;
738 
739 	if (!p || p == current || p->state == TASK_RUNNING)
740 		return 0;
741 	stack = (unsigned long)task_stack_page(p);
742 	if (p->thread.sp < stack || p->thread.sp >= stack+THREAD_SIZE)
743 		return 0;
744 	fp = *(u64 *)(p->thread.sp);
745 	do {
746 		if (fp < (unsigned long)stack ||
747 		    fp >= (unsigned long)stack+THREAD_SIZE)
748 			return 0;
749 		ip = *(u64 *)(fp+8);
750 		if (!in_sched_functions(ip))
751 			return ip;
752 		fp = *(u64 *)fp;
753 	} while (count++ < 16);
754 	return 0;
755 }
756 
757 long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
758 {
759 	int ret = 0;
760 	int doit = task == current;
761 	int cpu;
762 
763 	switch (code) {
764 	case ARCH_SET_GS:
765 		if (addr >= TASK_SIZE_OF(task))
766 			return -EPERM;
767 		cpu = get_cpu();
768 		/* handle small bases via the GDT because that's faster to
769 		   switch. */
770 		if (addr <= 0xffffffff) {
771 			set_32bit_tls(task, GS_TLS, addr);
772 			if (doit) {
773 				load_TLS(&task->thread, cpu);
774 				load_gs_index(GS_TLS_SEL);
775 			}
776 			task->thread.gsindex = GS_TLS_SEL;
777 			task->thread.gs = 0;
778 		} else {
779 			task->thread.gsindex = 0;
780 			task->thread.gs = addr;
781 			if (doit) {
782 				load_gs_index(0);
783 				ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
784 			}
785 		}
786 		put_cpu();
787 		break;
788 	case ARCH_SET_FS:
789 		/* Not strictly needed for fs, but do it for symmetry
790 		   with gs */
791 		if (addr >= TASK_SIZE_OF(task))
792 			return -EPERM;
793 		cpu = get_cpu();
794 		/* handle small bases via the GDT because that's faster to
795 		   switch. */
796 		if (addr <= 0xffffffff) {
797 			set_32bit_tls(task, FS_TLS, addr);
798 			if (doit) {
799 				load_TLS(&task->thread, cpu);
800 				loadsegment(fs, FS_TLS_SEL);
801 			}
802 			task->thread.fsindex = FS_TLS_SEL;
803 			task->thread.fs = 0;
804 		} else {
805 			task->thread.fsindex = 0;
806 			task->thread.fs = addr;
807 			if (doit) {
808 				/* set the selector to 0 to not confuse
809 				   __switch_to */
810 				loadsegment(fs, 0);
811 				ret = checking_wrmsrl(MSR_FS_BASE, addr);
812 			}
813 		}
814 		put_cpu();
815 		break;
816 	case ARCH_GET_FS: {
817 		unsigned long base;
818 		if (task->thread.fsindex == FS_TLS_SEL)
819 			base = read_32bit_tls(task, FS_TLS);
820 		else if (doit)
821 			rdmsrl(MSR_FS_BASE, base);
822 		else
823 			base = task->thread.fs;
824 		ret = put_user(base, (unsigned long __user *)addr);
825 		break;
826 	}
827 	case ARCH_GET_GS: {
828 		unsigned long base;
829 		unsigned gsindex;
830 		if (task->thread.gsindex == GS_TLS_SEL)
831 			base = read_32bit_tls(task, GS_TLS);
832 		else if (doit) {
833 			savesegment(gs, gsindex);
834 			if (gsindex)
835 				rdmsrl(MSR_KERNEL_GS_BASE, base);
836 			else
837 				base = task->thread.gs;
838 		} else
839 			base = task->thread.gs;
840 		ret = put_user(base, (unsigned long __user *)addr);
841 		break;
842 	}
843 
844 	default:
845 		ret = -EINVAL;
846 		break;
847 	}
848 
849 	return ret;
850 }
851 
852 long sys_arch_prctl(int code, unsigned long addr)
853 {
854 	return do_arch_prctl(current, code, addr);
855 }
856 
857 unsigned long arch_align_stack(unsigned long sp)
858 {
859 	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
860 		sp -= get_random_int() % 8192;
861 	return sp & ~0xf;
862 }
863 
864 unsigned long arch_randomize_brk(struct mm_struct *mm)
865 {
866 	unsigned long range_end = mm->brk + 0x02000000;
867 	return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
868 }
869