xref: /linux/arch/powerpc/kernel/process.c (revision 367b8112fe2ea5c39a7bb4d263dcdd9b612fae18)
1 /*
2  *  Derived from "arch/i386/kernel/process.c"
3  *    Copyright (C) 1995  Linus Torvalds
4  *
5  *  Updated and modified by Cort Dougan (cort@cs.nmt.edu) and
6  *  Paul Mackerras (paulus@cs.anu.edu.au)
7  *
8  *  PowerPC version
9  *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
10  *
11  *  This program is free software; you can redistribute it and/or
12  *  modify it under the terms of the GNU General Public License
13  *  as published by the Free Software Foundation; either version
14  *  2 of the License, or (at your option) any later version.
15  */
16 
17 #include <linux/errno.h>
18 #include <linux/sched.h>
19 #include <linux/kernel.h>
20 #include <linux/mm.h>
21 #include <linux/smp.h>
22 #include <linux/stddef.h>
23 #include <linux/unistd.h>
24 #include <linux/ptrace.h>
25 #include <linux/slab.h>
26 #include <linux/user.h>
27 #include <linux/elf.h>
28 #include <linux/init.h>
29 #include <linux/prctl.h>
30 #include <linux/init_task.h>
31 #include <linux/module.h>
32 #include <linux/kallsyms.h>
33 #include <linux/mqueue.h>
34 #include <linux/hardirq.h>
35 #include <linux/utsname.h>
36 
37 #include <asm/pgtable.h>
38 #include <asm/uaccess.h>
39 #include <asm/system.h>
40 #include <asm/io.h>
41 #include <asm/processor.h>
42 #include <asm/mmu.h>
43 #include <asm/prom.h>
44 #include <asm/machdep.h>
45 #include <asm/time.h>
46 #include <asm/syscalls.h>
47 #ifdef CONFIG_PPC64
48 #include <asm/firmware.h>
49 #endif
50 #include <linux/kprobes.h>
51 #include <linux/kdebug.h>
52 
53 extern unsigned long _get_SP(void);
54 
55 #ifndef CONFIG_SMP
56 struct task_struct *last_task_used_math = NULL;
57 struct task_struct *last_task_used_altivec = NULL;
58 struct task_struct *last_task_used_vsx = NULL;
59 struct task_struct *last_task_used_spe = NULL;
60 #endif
61 
62 /*
63  * Make sure the floating-point register state in the
64  * the thread_struct is up to date for task tsk.
65  */
66 void flush_fp_to_thread(struct task_struct *tsk)
67 {
68 	if (tsk->thread.regs) {
69 		/*
70 		 * We need to disable preemption here because if we didn't,
71 		 * another process could get scheduled after the regs->msr
72 		 * test but before we have finished saving the FP registers
73 		 * to the thread_struct.  That process could take over the
74 		 * FPU, and then when we get scheduled again we would store
75 		 * bogus values for the remaining FP registers.
76 		 */
77 		preempt_disable();
78 		if (tsk->thread.regs->msr & MSR_FP) {
79 #ifdef CONFIG_SMP
80 			/*
81 			 * This should only ever be called for current or
82 			 * for a stopped child process.  Since we save away
83 			 * the FP register state on context switch on SMP,
84 			 * there is something wrong if a stopped child appears
85 			 * to still have its FP state in the CPU registers.
86 			 */
87 			BUG_ON(tsk != current);
88 #endif
89 			giveup_fpu(tsk);
90 		}
91 		preempt_enable();
92 	}
93 }
94 
95 void enable_kernel_fp(void)
96 {
97 	WARN_ON(preemptible());
98 
99 #ifdef CONFIG_SMP
100 	if (current->thread.regs && (current->thread.regs->msr & MSR_FP))
101 		giveup_fpu(current);
102 	else
103 		giveup_fpu(NULL);	/* just enables FP for kernel */
104 #else
105 	giveup_fpu(last_task_used_math);
106 #endif /* CONFIG_SMP */
107 }
108 EXPORT_SYMBOL(enable_kernel_fp);
109 
110 #ifdef CONFIG_ALTIVEC
111 void enable_kernel_altivec(void)
112 {
113 	WARN_ON(preemptible());
114 
115 #ifdef CONFIG_SMP
116 	if (current->thread.regs && (current->thread.regs->msr & MSR_VEC))
117 		giveup_altivec(current);
118 	else
119 		giveup_altivec(NULL);	/* just enable AltiVec for kernel - force */
120 #else
121 	giveup_altivec(last_task_used_altivec);
122 #endif /* CONFIG_SMP */
123 }
124 EXPORT_SYMBOL(enable_kernel_altivec);
125 
126 /*
127  * Make sure the VMX/Altivec register state in the
128  * the thread_struct is up to date for task tsk.
129  */
130 void flush_altivec_to_thread(struct task_struct *tsk)
131 {
132 	if (tsk->thread.regs) {
133 		preempt_disable();
134 		if (tsk->thread.regs->msr & MSR_VEC) {
135 #ifdef CONFIG_SMP
136 			BUG_ON(tsk != current);
137 #endif
138 			giveup_altivec(tsk);
139 		}
140 		preempt_enable();
141 	}
142 }
143 #endif /* CONFIG_ALTIVEC */
144 
145 #ifdef CONFIG_VSX
146 #if 0
147 /* not currently used, but some crazy RAID module might want to later */
148 void enable_kernel_vsx(void)
149 {
150 	WARN_ON(preemptible());
151 
152 #ifdef CONFIG_SMP
153 	if (current->thread.regs && (current->thread.regs->msr & MSR_VSX))
154 		giveup_vsx(current);
155 	else
156 		giveup_vsx(NULL);	/* just enable vsx for kernel - force */
157 #else
158 	giveup_vsx(last_task_used_vsx);
159 #endif /* CONFIG_SMP */
160 }
161 EXPORT_SYMBOL(enable_kernel_vsx);
162 #endif
163 
164 void giveup_vsx(struct task_struct *tsk)
165 {
166 	giveup_fpu(tsk);
167 	giveup_altivec(tsk);
168 	__giveup_vsx(tsk);
169 }
170 
171 void flush_vsx_to_thread(struct task_struct *tsk)
172 {
173 	if (tsk->thread.regs) {
174 		preempt_disable();
175 		if (tsk->thread.regs->msr & MSR_VSX) {
176 #ifdef CONFIG_SMP
177 			BUG_ON(tsk != current);
178 #endif
179 			giveup_vsx(tsk);
180 		}
181 		preempt_enable();
182 	}
183 }
184 #endif /* CONFIG_VSX */
185 
186 #ifdef CONFIG_SPE
187 
188 void enable_kernel_spe(void)
189 {
190 	WARN_ON(preemptible());
191 
192 #ifdef CONFIG_SMP
193 	if (current->thread.regs && (current->thread.regs->msr & MSR_SPE))
194 		giveup_spe(current);
195 	else
196 		giveup_spe(NULL);	/* just enable SPE for kernel - force */
197 #else
198 	giveup_spe(last_task_used_spe);
199 #endif /* __SMP __ */
200 }
201 EXPORT_SYMBOL(enable_kernel_spe);
202 
203 void flush_spe_to_thread(struct task_struct *tsk)
204 {
205 	if (tsk->thread.regs) {
206 		preempt_disable();
207 		if (tsk->thread.regs->msr & MSR_SPE) {
208 #ifdef CONFIG_SMP
209 			BUG_ON(tsk != current);
210 #endif
211 			giveup_spe(tsk);
212 		}
213 		preempt_enable();
214 	}
215 }
216 #endif /* CONFIG_SPE */
217 
218 #ifndef CONFIG_SMP
219 /*
220  * If we are doing lazy switching of CPU state (FP, altivec or SPE),
221  * and the current task has some state, discard it.
222  */
223 void discard_lazy_cpu_state(void)
224 {
225 	preempt_disable();
226 	if (last_task_used_math == current)
227 		last_task_used_math = NULL;
228 #ifdef CONFIG_ALTIVEC
229 	if (last_task_used_altivec == current)
230 		last_task_used_altivec = NULL;
231 #endif /* CONFIG_ALTIVEC */
232 #ifdef CONFIG_VSX
233 	if (last_task_used_vsx == current)
234 		last_task_used_vsx = NULL;
235 #endif /* CONFIG_VSX */
236 #ifdef CONFIG_SPE
237 	if (last_task_used_spe == current)
238 		last_task_used_spe = NULL;
239 #endif
240 	preempt_enable();
241 }
242 #endif /* CONFIG_SMP */
243 
244 void do_dabr(struct pt_regs *regs, unsigned long address,
245 		    unsigned long error_code)
246 {
247 	siginfo_t info;
248 
249 	if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code,
250 			11, SIGSEGV) == NOTIFY_STOP)
251 		return;
252 
253 	if (debugger_dabr_match(regs))
254 		return;
255 
256 	/* Clear the DAC and struct entries.  One shot trigger */
257 #if defined(CONFIG_BOOKE)
258 	mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~(DBSR_DAC1R | DBSR_DAC1W
259 							| DBCR0_IDM));
260 #endif
261 
262 	/* Clear the DABR */
263 	set_dabr(0);
264 
265 	/* Deliver the signal to userspace */
266 	info.si_signo = SIGTRAP;
267 	info.si_errno = 0;
268 	info.si_code = TRAP_HWBKPT;
269 	info.si_addr = (void __user *)address;
270 	force_sig_info(SIGTRAP, &info, current);
271 }
272 
273 static DEFINE_PER_CPU(unsigned long, current_dabr);
274 
275 int set_dabr(unsigned long dabr)
276 {
277 	__get_cpu_var(current_dabr) = dabr;
278 
279 	if (ppc_md.set_dabr)
280 		return ppc_md.set_dabr(dabr);
281 
282 	/* XXX should we have a CPU_FTR_HAS_DABR ? */
283 #if defined(CONFIG_PPC64) || defined(CONFIG_6xx)
284 	mtspr(SPRN_DABR, dabr);
285 #endif
286 
287 #if defined(CONFIG_BOOKE)
288 	mtspr(SPRN_DAC1, dabr);
289 #endif
290 
291 	return 0;
292 }
293 
294 #ifdef CONFIG_PPC64
295 DEFINE_PER_CPU(struct cpu_usage, cpu_usage_array);
296 #endif
297 
298 struct task_struct *__switch_to(struct task_struct *prev,
299 	struct task_struct *new)
300 {
301 	struct thread_struct *new_thread, *old_thread;
302 	unsigned long flags;
303 	struct task_struct *last;
304 
305 #ifdef CONFIG_SMP
306 	/* avoid complexity of lazy save/restore of fpu
307 	 * by just saving it every time we switch out if
308 	 * this task used the fpu during the last quantum.
309 	 *
310 	 * If it tries to use the fpu again, it'll trap and
311 	 * reload its fp regs.  So we don't have to do a restore
312 	 * every switch, just a save.
313 	 *  -- Cort
314 	 */
315 	if (prev->thread.regs && (prev->thread.regs->msr & MSR_FP))
316 		giveup_fpu(prev);
317 #ifdef CONFIG_ALTIVEC
318 	/*
319 	 * If the previous thread used altivec in the last quantum
320 	 * (thus changing altivec regs) then save them.
321 	 * We used to check the VRSAVE register but not all apps
322 	 * set it, so we don't rely on it now (and in fact we need
323 	 * to save & restore VSCR even if VRSAVE == 0).  -- paulus
324 	 *
325 	 * On SMP we always save/restore altivec regs just to avoid the
326 	 * complexity of changing processors.
327 	 *  -- Cort
328 	 */
329 	if (prev->thread.regs && (prev->thread.regs->msr & MSR_VEC))
330 		giveup_altivec(prev);
331 #endif /* CONFIG_ALTIVEC */
332 #ifdef CONFIG_VSX
333 	if (prev->thread.regs && (prev->thread.regs->msr & MSR_VSX))
334 		/* VMX and FPU registers are already save here */
335 		__giveup_vsx(prev);
336 #endif /* CONFIG_VSX */
337 #ifdef CONFIG_SPE
338 	/*
339 	 * If the previous thread used spe in the last quantum
340 	 * (thus changing spe regs) then save them.
341 	 *
342 	 * On SMP we always save/restore spe regs just to avoid the
343 	 * complexity of changing processors.
344 	 */
345 	if ((prev->thread.regs && (prev->thread.regs->msr & MSR_SPE)))
346 		giveup_spe(prev);
347 #endif /* CONFIG_SPE */
348 
349 #else  /* CONFIG_SMP */
350 #ifdef CONFIG_ALTIVEC
351 	/* Avoid the trap.  On smp this this never happens since
352 	 * we don't set last_task_used_altivec -- Cort
353 	 */
354 	if (new->thread.regs && last_task_used_altivec == new)
355 		new->thread.regs->msr |= MSR_VEC;
356 #endif /* CONFIG_ALTIVEC */
357 #ifdef CONFIG_VSX
358 	if (new->thread.regs && last_task_used_vsx == new)
359 		new->thread.regs->msr |= MSR_VSX;
360 #endif /* CONFIG_VSX */
361 #ifdef CONFIG_SPE
362 	/* Avoid the trap.  On smp this this never happens since
363 	 * we don't set last_task_used_spe
364 	 */
365 	if (new->thread.regs && last_task_used_spe == new)
366 		new->thread.regs->msr |= MSR_SPE;
367 #endif /* CONFIG_SPE */
368 
369 #endif /* CONFIG_SMP */
370 
371 	if (unlikely(__get_cpu_var(current_dabr) != new->thread.dabr))
372 		set_dabr(new->thread.dabr);
373 
374 #if defined(CONFIG_BOOKE)
375 	/* If new thread DAC (HW breakpoint) is the same then leave it */
376 	if (new->thread.dabr)
377 		set_dabr(new->thread.dabr);
378 #endif
379 
380 	new_thread = &new->thread;
381 	old_thread = &current->thread;
382 
383 #ifdef CONFIG_PPC64
384 	/*
385 	 * Collect processor utilization data per process
386 	 */
387 	if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
388 		struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array);
389 		long unsigned start_tb, current_tb;
390 		start_tb = old_thread->start_tb;
391 		cu->current_tb = current_tb = mfspr(SPRN_PURR);
392 		old_thread->accum_tb += (current_tb - start_tb);
393 		new_thread->start_tb = current_tb;
394 	}
395 #endif
396 
397 	local_irq_save(flags);
398 
399 	account_system_vtime(current);
400 	account_process_vtime(current);
401 	calculate_steal_time();
402 
403 	/*
404 	 * We can't take a PMU exception inside _switch() since there is a
405 	 * window where the kernel stack SLB and the kernel stack are out
406 	 * of sync. Hard disable here.
407 	 */
408 	hard_irq_disable();
409 	last = _switch(old_thread, new_thread);
410 
411 	local_irq_restore(flags);
412 
413 	return last;
414 }
415 
416 static int instructions_to_print = 16;
417 
418 static void show_instructions(struct pt_regs *regs)
419 {
420 	int i;
421 	unsigned long pc = regs->nip - (instructions_to_print * 3 / 4 *
422 			sizeof(int));
423 
424 	printk("Instruction dump:");
425 
426 	for (i = 0; i < instructions_to_print; i++) {
427 		int instr;
428 
429 		if (!(i % 8))
430 			printk("\n");
431 
432 #if !defined(CONFIG_BOOKE)
433 		/* If executing with the IMMU off, adjust pc rather
434 		 * than print XXXXXXXX.
435 		 */
436 		if (!(regs->msr & MSR_IR))
437 			pc = (unsigned long)phys_to_virt(pc);
438 #endif
439 
440 		/* We use __get_user here *only* to avoid an OOPS on a
441 		 * bad address because the pc *should* only be a
442 		 * kernel address.
443 		 */
444 		if (!__kernel_text_address(pc) ||
445 		     __get_user(instr, (unsigned int __user *)pc)) {
446 			printk("XXXXXXXX ");
447 		} else {
448 			if (regs->nip == pc)
449 				printk("<%08x> ", instr);
450 			else
451 				printk("%08x ", instr);
452 		}
453 
454 		pc += sizeof(int);
455 	}
456 
457 	printk("\n");
458 }
459 
460 static struct regbit {
461 	unsigned long bit;
462 	const char *name;
463 } msr_bits[] = {
464 	{MSR_EE,	"EE"},
465 	{MSR_PR,	"PR"},
466 	{MSR_FP,	"FP"},
467 	{MSR_VEC,	"VEC"},
468 	{MSR_VSX,	"VSX"},
469 	{MSR_ME,	"ME"},
470 	{MSR_IR,	"IR"},
471 	{MSR_DR,	"DR"},
472 	{0,		NULL}
473 };
474 
475 static void printbits(unsigned long val, struct regbit *bits)
476 {
477 	const char *sep = "";
478 
479 	printk("<");
480 	for (; bits->bit; ++bits)
481 		if (val & bits->bit) {
482 			printk("%s%s", sep, bits->name);
483 			sep = ",";
484 		}
485 	printk(">");
486 }
487 
488 #ifdef CONFIG_PPC64
489 #define REG		"%016lx"
490 #define REGS_PER_LINE	4
491 #define LAST_VOLATILE	13
492 #else
493 #define REG		"%08lx"
494 #define REGS_PER_LINE	8
495 #define LAST_VOLATILE	12
496 #endif
497 
498 void show_regs(struct pt_regs * regs)
499 {
500 	int i, trap;
501 
502 	printk("NIP: "REG" LR: "REG" CTR: "REG"\n",
503 	       regs->nip, regs->link, regs->ctr);
504 	printk("REGS: %p TRAP: %04lx   %s  (%s)\n",
505 	       regs, regs->trap, print_tainted(), init_utsname()->release);
506 	printk("MSR: "REG" ", regs->msr);
507 	printbits(regs->msr, msr_bits);
508 	printk("  CR: %08lx  XER: %08lx\n", regs->ccr, regs->xer);
509 	trap = TRAP(regs);
510 	if (trap == 0x300 || trap == 0x600)
511 #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
512 		printk("DEAR: "REG", ESR: "REG"\n", regs->dar, regs->dsisr);
513 #else
514 		printk("DAR: "REG", DSISR: "REG"\n", regs->dar, regs->dsisr);
515 #endif
516 	printk("TASK = %p[%d] '%s' THREAD: %p",
517 	       current, task_pid_nr(current), current->comm, task_thread_info(current));
518 
519 #ifdef CONFIG_SMP
520 	printk(" CPU: %d", raw_smp_processor_id());
521 #endif /* CONFIG_SMP */
522 
523 	for (i = 0;  i < 32;  i++) {
524 		if ((i % REGS_PER_LINE) == 0)
525 			printk("\n" KERN_INFO "GPR%02d: ", i);
526 		printk(REG " ", regs->gpr[i]);
527 		if (i == LAST_VOLATILE && !FULL_REGS(regs))
528 			break;
529 	}
530 	printk("\n");
531 #ifdef CONFIG_KALLSYMS
532 	/*
533 	 * Lookup NIP late so we have the best change of getting the
534 	 * above info out without failing
535 	 */
536 	printk("NIP ["REG"] %pS\n", regs->nip, (void *)regs->nip);
537 	printk("LR ["REG"] %pS\n", regs->link, (void *)regs->link);
538 #endif
539 	show_stack(current, (unsigned long *) regs->gpr[1]);
540 	if (!user_mode(regs))
541 		show_instructions(regs);
542 }
543 
544 void exit_thread(void)
545 {
546 	discard_lazy_cpu_state();
547 }
548 
549 void flush_thread(void)
550 {
551 #ifdef CONFIG_PPC64
552 	struct thread_info *t = current_thread_info();
553 
554 	if (test_ti_thread_flag(t, TIF_ABI_PENDING)) {
555 		clear_ti_thread_flag(t, TIF_ABI_PENDING);
556 		if (test_ti_thread_flag(t, TIF_32BIT))
557 			clear_ti_thread_flag(t, TIF_32BIT);
558 		else
559 			set_ti_thread_flag(t, TIF_32BIT);
560 	}
561 #endif
562 
563 	discard_lazy_cpu_state();
564 
565 	if (current->thread.dabr) {
566 		current->thread.dabr = 0;
567 		set_dabr(0);
568 
569 #if defined(CONFIG_BOOKE)
570 		current->thread.dbcr0 &= ~(DBSR_DAC1R | DBSR_DAC1W);
571 #endif
572 	}
573 }
574 
575 void
576 release_thread(struct task_struct *t)
577 {
578 }
579 
580 /*
581  * This gets called before we allocate a new thread and copy
582  * the current task into it.
583  */
584 void prepare_to_copy(struct task_struct *tsk)
585 {
586 	flush_fp_to_thread(current);
587 	flush_altivec_to_thread(current);
588 	flush_vsx_to_thread(current);
589 	flush_spe_to_thread(current);
590 }
591 
592 /*
593  * Copy a thread..
594  */
595 int copy_thread(int nr, unsigned long clone_flags, unsigned long usp,
596 		unsigned long unused, struct task_struct *p,
597 		struct pt_regs *regs)
598 {
599 	struct pt_regs *childregs, *kregs;
600 	extern void ret_from_fork(void);
601 	unsigned long sp = (unsigned long)task_stack_page(p) + THREAD_SIZE;
602 
603 	CHECK_FULL_REGS(regs);
604 	/* Copy registers */
605 	sp -= sizeof(struct pt_regs);
606 	childregs = (struct pt_regs *) sp;
607 	*childregs = *regs;
608 	if ((childregs->msr & MSR_PR) == 0) {
609 		/* for kernel thread, set `current' and stackptr in new task */
610 		childregs->gpr[1] = sp + sizeof(struct pt_regs);
611 #ifdef CONFIG_PPC32
612 		childregs->gpr[2] = (unsigned long) p;
613 #else
614 		clear_tsk_thread_flag(p, TIF_32BIT);
615 #endif
616 		p->thread.regs = NULL;	/* no user register state */
617 	} else {
618 		childregs->gpr[1] = usp;
619 		p->thread.regs = childregs;
620 		if (clone_flags & CLONE_SETTLS) {
621 #ifdef CONFIG_PPC64
622 			if (!test_thread_flag(TIF_32BIT))
623 				childregs->gpr[13] = childregs->gpr[6];
624 			else
625 #endif
626 				childregs->gpr[2] = childregs->gpr[6];
627 		}
628 	}
629 	childregs->gpr[3] = 0;  /* Result from fork() */
630 	sp -= STACK_FRAME_OVERHEAD;
631 
632 	/*
633 	 * The way this works is that at some point in the future
634 	 * some task will call _switch to switch to the new task.
635 	 * That will pop off the stack frame created below and start
636 	 * the new task running at ret_from_fork.  The new task will
637 	 * do some house keeping and then return from the fork or clone
638 	 * system call, using the stack frame created above.
639 	 */
640 	sp -= sizeof(struct pt_regs);
641 	kregs = (struct pt_regs *) sp;
642 	sp -= STACK_FRAME_OVERHEAD;
643 	p->thread.ksp = sp;
644 	p->thread.ksp_limit = (unsigned long)task_stack_page(p) +
645 				_ALIGN_UP(sizeof(struct thread_info), 16);
646 
647 #ifdef CONFIG_PPC64
648 	if (cpu_has_feature(CPU_FTR_SLB)) {
649 		unsigned long sp_vsid;
650 		unsigned long llp = mmu_psize_defs[mmu_linear_psize].sllp;
651 
652 		if (cpu_has_feature(CPU_FTR_1T_SEGMENT))
653 			sp_vsid = get_kernel_vsid(sp, MMU_SEGSIZE_1T)
654 				<< SLB_VSID_SHIFT_1T;
655 		else
656 			sp_vsid = get_kernel_vsid(sp, MMU_SEGSIZE_256M)
657 				<< SLB_VSID_SHIFT;
658 		sp_vsid |= SLB_VSID_KERNEL | llp;
659 		p->thread.ksp_vsid = sp_vsid;
660 	}
661 
662 	/*
663 	 * The PPC64 ABI makes use of a TOC to contain function
664 	 * pointers.  The function (ret_from_except) is actually a pointer
665 	 * to the TOC entry.  The first entry is a pointer to the actual
666 	 * function.
667  	 */
668 	kregs->nip = *((unsigned long *)ret_from_fork);
669 #else
670 	kregs->nip = (unsigned long)ret_from_fork;
671 #endif
672 
673 	return 0;
674 }
675 
676 /*
677  * Set up a thread for executing a new program
678  */
679 void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
680 {
681 #ifdef CONFIG_PPC64
682 	unsigned long load_addr = regs->gpr[2];	/* saved by ELF_PLAT_INIT */
683 #endif
684 
685 	set_fs(USER_DS);
686 
687 	/*
688 	 * If we exec out of a kernel thread then thread.regs will not be
689 	 * set.  Do it now.
690 	 */
691 	if (!current->thread.regs) {
692 		struct pt_regs *regs = task_stack_page(current) + THREAD_SIZE;
693 		current->thread.regs = regs - 1;
694 	}
695 
696 	memset(regs->gpr, 0, sizeof(regs->gpr));
697 	regs->ctr = 0;
698 	regs->link = 0;
699 	regs->xer = 0;
700 	regs->ccr = 0;
701 	regs->gpr[1] = sp;
702 
703 	/*
704 	 * We have just cleared all the nonvolatile GPRs, so make
705 	 * FULL_REGS(regs) return true.  This is necessary to allow
706 	 * ptrace to examine the thread immediately after exec.
707 	 */
708 	regs->trap &= ~1UL;
709 
710 #ifdef CONFIG_PPC32
711 	regs->mq = 0;
712 	regs->nip = start;
713 	regs->msr = MSR_USER;
714 #else
715 	if (!test_thread_flag(TIF_32BIT)) {
716 		unsigned long entry, toc;
717 
718 		/* start is a relocated pointer to the function descriptor for
719 		 * the elf _start routine.  The first entry in the function
720 		 * descriptor is the entry address of _start and the second
721 		 * entry is the TOC value we need to use.
722 		 */
723 		__get_user(entry, (unsigned long __user *)start);
724 		__get_user(toc, (unsigned long __user *)start+1);
725 
726 		/* Check whether the e_entry function descriptor entries
727 		 * need to be relocated before we can use them.
728 		 */
729 		if (load_addr != 0) {
730 			entry += load_addr;
731 			toc   += load_addr;
732 		}
733 		regs->nip = entry;
734 		regs->gpr[2] = toc;
735 		regs->msr = MSR_USER64;
736 	} else {
737 		regs->nip = start;
738 		regs->gpr[2] = 0;
739 		regs->msr = MSR_USER32;
740 	}
741 #endif
742 
743 	discard_lazy_cpu_state();
744 #ifdef CONFIG_VSX
745 	current->thread.used_vsr = 0;
746 #endif
747 	memset(current->thread.fpr, 0, sizeof(current->thread.fpr));
748 	current->thread.fpscr.val = 0;
749 #ifdef CONFIG_ALTIVEC
750 	memset(current->thread.vr, 0, sizeof(current->thread.vr));
751 	memset(&current->thread.vscr, 0, sizeof(current->thread.vscr));
752 	current->thread.vscr.u[3] = 0x00010000; /* Java mode disabled */
753 	current->thread.vrsave = 0;
754 	current->thread.used_vr = 0;
755 #endif /* CONFIG_ALTIVEC */
756 #ifdef CONFIG_SPE
757 	memset(current->thread.evr, 0, sizeof(current->thread.evr));
758 	current->thread.acc = 0;
759 	current->thread.spefscr = 0;
760 	current->thread.used_spe = 0;
761 #endif /* CONFIG_SPE */
762 }
763 
764 #define PR_FP_ALL_EXCEPT (PR_FP_EXC_DIV | PR_FP_EXC_OVF | PR_FP_EXC_UND \
765 		| PR_FP_EXC_RES | PR_FP_EXC_INV)
766 
767 int set_fpexc_mode(struct task_struct *tsk, unsigned int val)
768 {
769 	struct pt_regs *regs = tsk->thread.regs;
770 
771 	/* This is a bit hairy.  If we are an SPE enabled  processor
772 	 * (have embedded fp) we store the IEEE exception enable flags in
773 	 * fpexc_mode.  fpexc_mode is also used for setting FP exception
774 	 * mode (asyn, precise, disabled) for 'Classic' FP. */
775 	if (val & PR_FP_EXC_SW_ENABLE) {
776 #ifdef CONFIG_SPE
777 		if (cpu_has_feature(CPU_FTR_SPE)) {
778 			tsk->thread.fpexc_mode = val &
779 				(PR_FP_EXC_SW_ENABLE | PR_FP_ALL_EXCEPT);
780 			return 0;
781 		} else {
782 			return -EINVAL;
783 		}
784 #else
785 		return -EINVAL;
786 #endif
787 	}
788 
789 	/* on a CONFIG_SPE this does not hurt us.  The bits that
790 	 * __pack_fe01 use do not overlap with bits used for
791 	 * PR_FP_EXC_SW_ENABLE.  Additionally, the MSR[FE0,FE1] bits
792 	 * on CONFIG_SPE implementations are reserved so writing to
793 	 * them does not change anything */
794 	if (val > PR_FP_EXC_PRECISE)
795 		return -EINVAL;
796 	tsk->thread.fpexc_mode = __pack_fe01(val);
797 	if (regs != NULL && (regs->msr & MSR_FP) != 0)
798 		regs->msr = (regs->msr & ~(MSR_FE0|MSR_FE1))
799 			| tsk->thread.fpexc_mode;
800 	return 0;
801 }
802 
803 int get_fpexc_mode(struct task_struct *tsk, unsigned long adr)
804 {
805 	unsigned int val;
806 
807 	if (tsk->thread.fpexc_mode & PR_FP_EXC_SW_ENABLE)
808 #ifdef CONFIG_SPE
809 		if (cpu_has_feature(CPU_FTR_SPE))
810 			val = tsk->thread.fpexc_mode;
811 		else
812 			return -EINVAL;
813 #else
814 		return -EINVAL;
815 #endif
816 	else
817 		val = __unpack_fe01(tsk->thread.fpexc_mode);
818 	return put_user(val, (unsigned int __user *) adr);
819 }
820 
821 int set_endian(struct task_struct *tsk, unsigned int val)
822 {
823 	struct pt_regs *regs = tsk->thread.regs;
824 
825 	if ((val == PR_ENDIAN_LITTLE && !cpu_has_feature(CPU_FTR_REAL_LE)) ||
826 	    (val == PR_ENDIAN_PPC_LITTLE && !cpu_has_feature(CPU_FTR_PPC_LE)))
827 		return -EINVAL;
828 
829 	if (regs == NULL)
830 		return -EINVAL;
831 
832 	if (val == PR_ENDIAN_BIG)
833 		regs->msr &= ~MSR_LE;
834 	else if (val == PR_ENDIAN_LITTLE || val == PR_ENDIAN_PPC_LITTLE)
835 		regs->msr |= MSR_LE;
836 	else
837 		return -EINVAL;
838 
839 	return 0;
840 }
841 
842 int get_endian(struct task_struct *tsk, unsigned long adr)
843 {
844 	struct pt_regs *regs = tsk->thread.regs;
845 	unsigned int val;
846 
847 	if (!cpu_has_feature(CPU_FTR_PPC_LE) &&
848 	    !cpu_has_feature(CPU_FTR_REAL_LE))
849 		return -EINVAL;
850 
851 	if (regs == NULL)
852 		return -EINVAL;
853 
854 	if (regs->msr & MSR_LE) {
855 		if (cpu_has_feature(CPU_FTR_REAL_LE))
856 			val = PR_ENDIAN_LITTLE;
857 		else
858 			val = PR_ENDIAN_PPC_LITTLE;
859 	} else
860 		val = PR_ENDIAN_BIG;
861 
862 	return put_user(val, (unsigned int __user *)adr);
863 }
864 
865 int set_unalign_ctl(struct task_struct *tsk, unsigned int val)
866 {
867 	tsk->thread.align_ctl = val;
868 	return 0;
869 }
870 
871 int get_unalign_ctl(struct task_struct *tsk, unsigned long adr)
872 {
873 	return put_user(tsk->thread.align_ctl, (unsigned int __user *)adr);
874 }
875 
876 #define TRUNC_PTR(x)	((typeof(x))(((unsigned long)(x)) & 0xffffffff))
877 
878 int sys_clone(unsigned long clone_flags, unsigned long usp,
879 	      int __user *parent_tidp, void __user *child_threadptr,
880 	      int __user *child_tidp, int p6,
881 	      struct pt_regs *regs)
882 {
883 	CHECK_FULL_REGS(regs);
884 	if (usp == 0)
885 		usp = regs->gpr[1];	/* stack pointer for child */
886 #ifdef CONFIG_PPC64
887 	if (test_thread_flag(TIF_32BIT)) {
888 		parent_tidp = TRUNC_PTR(parent_tidp);
889 		child_tidp = TRUNC_PTR(child_tidp);
890 	}
891 #endif
892  	return do_fork(clone_flags, usp, regs, 0, parent_tidp, child_tidp);
893 }
894 
895 int sys_fork(unsigned long p1, unsigned long p2, unsigned long p3,
896 	     unsigned long p4, unsigned long p5, unsigned long p6,
897 	     struct pt_regs *regs)
898 {
899 	CHECK_FULL_REGS(regs);
900 	return do_fork(SIGCHLD, regs->gpr[1], regs, 0, NULL, NULL);
901 }
902 
903 int sys_vfork(unsigned long p1, unsigned long p2, unsigned long p3,
904 	      unsigned long p4, unsigned long p5, unsigned long p6,
905 	      struct pt_regs *regs)
906 {
907 	CHECK_FULL_REGS(regs);
908 	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->gpr[1],
909 			regs, 0, NULL, NULL);
910 }
911 
912 int sys_execve(unsigned long a0, unsigned long a1, unsigned long a2,
913 	       unsigned long a3, unsigned long a4, unsigned long a5,
914 	       struct pt_regs *regs)
915 {
916 	int error;
917 	char *filename;
918 
919 	filename = getname((char __user *) a0);
920 	error = PTR_ERR(filename);
921 	if (IS_ERR(filename))
922 		goto out;
923 	flush_fp_to_thread(current);
924 	flush_altivec_to_thread(current);
925 	flush_spe_to_thread(current);
926 	error = do_execve(filename, (char __user * __user *) a1,
927 			  (char __user * __user *) a2, regs);
928 	putname(filename);
929 out:
930 	return error;
931 }
932 
933 #ifdef CONFIG_IRQSTACKS
934 static inline int valid_irq_stack(unsigned long sp, struct task_struct *p,
935 				  unsigned long nbytes)
936 {
937 	unsigned long stack_page;
938 	unsigned long cpu = task_cpu(p);
939 
940 	/*
941 	 * Avoid crashing if the stack has overflowed and corrupted
942 	 * task_cpu(p), which is in the thread_info struct.
943 	 */
944 	if (cpu < NR_CPUS && cpu_possible(cpu)) {
945 		stack_page = (unsigned long) hardirq_ctx[cpu];
946 		if (sp >= stack_page + sizeof(struct thread_struct)
947 		    && sp <= stack_page + THREAD_SIZE - nbytes)
948 			return 1;
949 
950 		stack_page = (unsigned long) softirq_ctx[cpu];
951 		if (sp >= stack_page + sizeof(struct thread_struct)
952 		    && sp <= stack_page + THREAD_SIZE - nbytes)
953 			return 1;
954 	}
955 	return 0;
956 }
957 
958 #else
959 #define valid_irq_stack(sp, p, nb)	0
960 #endif /* CONFIG_IRQSTACKS */
961 
962 int validate_sp(unsigned long sp, struct task_struct *p,
963 		       unsigned long nbytes)
964 {
965 	unsigned long stack_page = (unsigned long)task_stack_page(p);
966 
967 	if (sp >= stack_page + sizeof(struct thread_struct)
968 	    && sp <= stack_page + THREAD_SIZE - nbytes)
969 		return 1;
970 
971 	return valid_irq_stack(sp, p, nbytes);
972 }
973 
974 EXPORT_SYMBOL(validate_sp);
975 
976 unsigned long get_wchan(struct task_struct *p)
977 {
978 	unsigned long ip, sp;
979 	int count = 0;
980 
981 	if (!p || p == current || p->state == TASK_RUNNING)
982 		return 0;
983 
984 	sp = p->thread.ksp;
985 	if (!validate_sp(sp, p, STACK_FRAME_OVERHEAD))
986 		return 0;
987 
988 	do {
989 		sp = *(unsigned long *)sp;
990 		if (!validate_sp(sp, p, STACK_FRAME_OVERHEAD))
991 			return 0;
992 		if (count > 0) {
993 			ip = ((unsigned long *)sp)[STACK_FRAME_LR_SAVE];
994 			if (!in_sched_functions(ip))
995 				return ip;
996 		}
997 	} while (count++ < 16);
998 	return 0;
999 }
1000 
1001 static int kstack_depth_to_print = 64;
1002 
1003 void show_stack(struct task_struct *tsk, unsigned long *stack)
1004 {
1005 	unsigned long sp, ip, lr, newsp;
1006 	int count = 0;
1007 	int firstframe = 1;
1008 
1009 	sp = (unsigned long) stack;
1010 	if (tsk == NULL)
1011 		tsk = current;
1012 	if (sp == 0) {
1013 		if (tsk == current)
1014 			asm("mr %0,1" : "=r" (sp));
1015 		else
1016 			sp = tsk->thread.ksp;
1017 	}
1018 
1019 	lr = 0;
1020 	printk("Call Trace:\n");
1021 	do {
1022 		if (!validate_sp(sp, tsk, STACK_FRAME_OVERHEAD))
1023 			return;
1024 
1025 		stack = (unsigned long *) sp;
1026 		newsp = stack[0];
1027 		ip = stack[STACK_FRAME_LR_SAVE];
1028 		if (!firstframe || ip != lr) {
1029 			printk("["REG"] ["REG"] %pS", sp, ip, (void *)ip);
1030 			if (firstframe)
1031 				printk(" (unreliable)");
1032 			printk("\n");
1033 		}
1034 		firstframe = 0;
1035 
1036 		/*
1037 		 * See if this is an exception frame.
1038 		 * We look for the "regshere" marker in the current frame.
1039 		 */
1040 		if (validate_sp(sp, tsk, STACK_INT_FRAME_SIZE)
1041 		    && stack[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) {
1042 			struct pt_regs *regs = (struct pt_regs *)
1043 				(sp + STACK_FRAME_OVERHEAD);
1044 			lr = regs->link;
1045 			printk("--- Exception: %lx at %pS\n    LR = %pS\n",
1046 			       regs->trap, (void *)regs->nip, (void *)lr);
1047 			firstframe = 1;
1048 		}
1049 
1050 		sp = newsp;
1051 	} while (count++ < kstack_depth_to_print);
1052 }
1053 
1054 void dump_stack(void)
1055 {
1056 	show_stack(current, NULL);
1057 }
1058 EXPORT_SYMBOL(dump_stack);
1059 
1060 #ifdef CONFIG_PPC64
1061 void ppc64_runlatch_on(void)
1062 {
1063 	unsigned long ctrl;
1064 
1065 	if (cpu_has_feature(CPU_FTR_CTRL) && !test_thread_flag(TIF_RUNLATCH)) {
1066 		HMT_medium();
1067 
1068 		ctrl = mfspr(SPRN_CTRLF);
1069 		ctrl |= CTRL_RUNLATCH;
1070 		mtspr(SPRN_CTRLT, ctrl);
1071 
1072 		set_thread_flag(TIF_RUNLATCH);
1073 	}
1074 }
1075 
1076 void ppc64_runlatch_off(void)
1077 {
1078 	unsigned long ctrl;
1079 
1080 	if (cpu_has_feature(CPU_FTR_CTRL) && test_thread_flag(TIF_RUNLATCH)) {
1081 		HMT_medium();
1082 
1083 		clear_thread_flag(TIF_RUNLATCH);
1084 
1085 		ctrl = mfspr(SPRN_CTRLF);
1086 		ctrl &= ~CTRL_RUNLATCH;
1087 		mtspr(SPRN_CTRLT, ctrl);
1088 	}
1089 }
1090 #endif
1091 
1092 #if THREAD_SHIFT < PAGE_SHIFT
1093 
1094 static struct kmem_cache *thread_info_cache;
1095 
1096 struct thread_info *alloc_thread_info(struct task_struct *tsk)
1097 {
1098 	struct thread_info *ti;
1099 
1100 	ti = kmem_cache_alloc(thread_info_cache, GFP_KERNEL);
1101 	if (unlikely(ti == NULL))
1102 		return NULL;
1103 #ifdef CONFIG_DEBUG_STACK_USAGE
1104 	memset(ti, 0, THREAD_SIZE);
1105 #endif
1106 	return ti;
1107 }
1108 
1109 void free_thread_info(struct thread_info *ti)
1110 {
1111 	kmem_cache_free(thread_info_cache, ti);
1112 }
1113 
1114 void thread_info_cache_init(void)
1115 {
1116 	thread_info_cache = kmem_cache_create("thread_info", THREAD_SIZE,
1117 					      THREAD_SIZE, 0, NULL);
1118 	BUG_ON(thread_info_cache == NULL);
1119 }
1120 
1121 #endif /* THREAD_SHIFT < PAGE_SHIFT */
1122