xref: /linux/arch/powerpc/kernel/vector.S (revision c0c914eca7f251c70facc37dfebeaf176601918d)
1#include <asm/processor.h>
2#include <asm/ppc_asm.h>
3#include <asm/reg.h>
4#include <asm/asm-offsets.h>
5#include <asm/cputable.h>
6#include <asm/thread_info.h>
7#include <asm/page.h>
8#include <asm/ptrace.h>
9
10#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
11/* void do_load_up_transact_altivec(struct thread_struct *thread)
12 *
13 * This is similar to load_up_altivec but for the transactional version of the
14 * vector regs.  It doesn't mess with the task MSR or valid flags.
15 * Furthermore, VEC laziness is not supported with TM currently.
16 */
17_GLOBAL(do_load_up_transact_altivec)
18	mfmsr	r6
19	oris	r5,r6,MSR_VEC@h
20	MTMSRD(r5)
21	isync
22
23	li	r4,1
24	stw	r4,THREAD_USED_VR(r3)
25
26	li	r10,THREAD_TRANSACT_VRSTATE+VRSTATE_VSCR
27	lvx	v0,r10,r3
28	mtvscr	v0
29	addi	r10,r3,THREAD_TRANSACT_VRSTATE
30	REST_32VRS(0,r4,r10)
31
32	blr
33#endif
34
35/*
36 * Load state from memory into VMX registers including VSCR.
37 * Assumes the caller has enabled VMX in the MSR.
38 */
39_GLOBAL(load_vr_state)
40	li	r4,VRSTATE_VSCR
41	lvx	v0,r4,r3
42	mtvscr	v0
43	REST_32VRS(0,r4,r3)
44	blr
45
46/*
47 * Store VMX state into memory, including VSCR.
48 * Assumes the caller has enabled VMX in the MSR.
49 */
50_GLOBAL(store_vr_state)
51	SAVE_32VRS(0, r4, r3)
52	mfvscr	v0
53	li	r4, VRSTATE_VSCR
54	stvx	v0, r4, r3
55	blr
56
57/*
58 * Disable VMX for the task which had it previously,
59 * and save its vector registers in its thread_struct.
60 * Enables the VMX for use in the kernel on return.
61 * On SMP we know the VMX is free, since we give it up every
62 * switch (ie, no lazy save of the vector registers).
63 *
64 * Note that on 32-bit this can only use registers that will be
65 * restored by fast_exception_return, i.e. r3 - r6, r10 and r11.
66 */
67_GLOBAL(load_up_altivec)
68	mfmsr	r5			/* grab the current MSR */
69	oris	r5,r5,MSR_VEC@h
70	MTMSRD(r5)			/* enable use of AltiVec now */
71	isync
72
73	/* Hack: if we get an altivec unavailable trap with VRSAVE
74	 * set to all zeros, we assume this is a broken application
75	 * that fails to set it properly, and thus we switch it to
76	 * all 1's
77	 */
78	mfspr	r4,SPRN_VRSAVE
79	cmpwi	0,r4,0
80	bne+	1f
81	li	r4,-1
82	mtspr	SPRN_VRSAVE,r4
831:
84	/* enable use of VMX after return */
85#ifdef CONFIG_PPC32
86	mfspr	r5,SPRN_SPRG_THREAD		/* current task's THREAD (phys) */
87	oris	r9,r9,MSR_VEC@h
88#else
89	ld	r4,PACACURRENT(r13)
90	addi	r5,r4,THREAD		/* Get THREAD */
91	oris	r12,r12,MSR_VEC@h
92	std	r12,_MSR(r1)
93#endif
94	addi	r6,r5,THREAD_VRSTATE
95	li	r4,1
96	li	r10,VRSTATE_VSCR
97	stw	r4,THREAD_USED_VR(r5)
98	lvx	v0,r10,r6
99	mtvscr	v0
100	REST_32VRS(0,r4,r6)
101	/* restore registers and return */
102	blr
103
104/*
105 * __giveup_altivec(tsk)
106 * Disable VMX for the task given as the argument,
107 * and save the vector registers in its thread_struct.
108 */
109_GLOBAL(__giveup_altivec)
110	addi	r3,r3,THREAD		/* want THREAD of task */
111	PPC_LL	r7,THREAD_VRSAVEAREA(r3)
112	PPC_LL	r5,PT_REGS(r3)
113	PPC_LCMPI	0,r7,0
114	bne	2f
115	addi	r7,r3,THREAD_VRSTATE
1162:	PPC_LCMPI	0,r5,0
117	SAVE_32VRS(0,r4,r7)
118	mfvscr	v0
119	li	r4,VRSTATE_VSCR
120	stvx	v0,r4,r7
121	beq	1f
122	PPC_LL	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
123#ifdef CONFIG_VSX
124BEGIN_FTR_SECTION
125	lis	r3,(MSR_VEC|MSR_VSX)@h
126FTR_SECTION_ELSE
127	lis	r3,MSR_VEC@h
128ALT_FTR_SECTION_END_IFSET(CPU_FTR_VSX)
129#else
130	lis	r3,MSR_VEC@h
131#endif
132	andc	r4,r4,r3		/* disable FP for previous task */
133	PPC_STL	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
1341:
135	blr
136
137#ifdef CONFIG_VSX
138
139#ifdef CONFIG_PPC32
140#error This asm code isn't ready for 32-bit kernels
141#endif
142
143/*
144 * load_up_vsx(unused, unused, tsk)
145 * Disable VSX for the task which had it previously,
146 * and save its vector registers in its thread_struct.
147 * Reuse the fp and vsx saves, but first check to see if they have
148 * been saved already.
149 */
150_GLOBAL(load_up_vsx)
151/* Load FP and VSX registers if they haven't been done yet */
152	andi.	r5,r12,MSR_FP
153	beql+	load_up_fpu		/* skip if already loaded */
154	andis.	r5,r12,MSR_VEC@h
155	beql+	load_up_altivec		/* skip if already loaded */
156
157	ld	r4,PACACURRENT(r13)
158	addi	r4,r4,THREAD		/* Get THREAD */
159	li	r6,1
160	stw	r6,THREAD_USED_VSR(r4) /* ... also set thread used vsr */
161	/* enable use of VSX after return */
162	oris	r12,r12,MSR_VSX@h
163	std	r12,_MSR(r1)
164	b	fast_exception_return
165
166/*
167 * __giveup_vsx(tsk)
168 * Disable VSX for the task given as the argument.
169 * Does NOT save vsx registers.
170 */
171_GLOBAL(__giveup_vsx)
172	addi	r3,r3,THREAD		/* want THREAD of task */
173	ld	r5,PT_REGS(r3)
174	cmpdi	0,r5,0
175	beq	1f
176	ld	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
177	lis	r3,MSR_VSX@h
178	andc	r4,r4,r3		/* disable VSX for previous task */
179	std	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
1801:
181	blr
182
183#endif /* CONFIG_VSX */
184
185
186/*
187 * The routines below are in assembler so we can closely control the
188 * usage of floating-point registers.  These routines must be called
189 * with preempt disabled.
190 */
191#ifdef CONFIG_PPC32
192	.data
193fpzero:
194	.long	0
195fpone:
196	.long	0x3f800000	/* 1.0 in single-precision FP */
197fphalf:
198	.long	0x3f000000	/* 0.5 in single-precision FP */
199
200#define LDCONST(fr, name)	\
201	lis	r11,name@ha;	\
202	lfs	fr,name@l(r11)
203#else
204
205	.section ".toc","aw"
206fpzero:
207	.tc	FD_0_0[TC],0
208fpone:
209	.tc	FD_3ff00000_0[TC],0x3ff0000000000000	/* 1.0 */
210fphalf:
211	.tc	FD_3fe00000_0[TC],0x3fe0000000000000	/* 0.5 */
212
213#define LDCONST(fr, name)	\
214	lfd	fr,name@toc(r2)
215#endif
216
217	.text
218/*
219 * Internal routine to enable floating point and set FPSCR to 0.
220 * Don't call it from C; it doesn't use the normal calling convention.
221 */
222fpenable:
223#ifdef CONFIG_PPC32
224	stwu	r1,-64(r1)
225#else
226	stdu	r1,-64(r1)
227#endif
228	mfmsr	r10
229	ori	r11,r10,MSR_FP
230	mtmsr	r11
231	isync
232	stfd	fr0,24(r1)
233	stfd	fr1,16(r1)
234	stfd	fr31,8(r1)
235	LDCONST(fr1, fpzero)
236	mffs	fr31
237	MTFSF_L(fr1)
238	blr
239
240fpdisable:
241	mtlr	r12
242	MTFSF_L(fr31)
243	lfd	fr31,8(r1)
244	lfd	fr1,16(r1)
245	lfd	fr0,24(r1)
246	mtmsr	r10
247	isync
248	addi	r1,r1,64
249	blr
250
251/*
252 * Vector add, floating point.
253 */
254_GLOBAL(vaddfp)
255	mflr	r12
256	bl	fpenable
257	li	r0,4
258	mtctr	r0
259	li	r6,0
2601:	lfsx	fr0,r4,r6
261	lfsx	fr1,r5,r6
262	fadds	fr0,fr0,fr1
263	stfsx	fr0,r3,r6
264	addi	r6,r6,4
265	bdnz	1b
266	b	fpdisable
267
268/*
269 * Vector subtract, floating point.
270 */
271_GLOBAL(vsubfp)
272	mflr	r12
273	bl	fpenable
274	li	r0,4
275	mtctr	r0
276	li	r6,0
2771:	lfsx	fr0,r4,r6
278	lfsx	fr1,r5,r6
279	fsubs	fr0,fr0,fr1
280	stfsx	fr0,r3,r6
281	addi	r6,r6,4
282	bdnz	1b
283	b	fpdisable
284
285/*
286 * Vector multiply and add, floating point.
287 */
288_GLOBAL(vmaddfp)
289	mflr	r12
290	bl	fpenable
291	stfd	fr2,32(r1)
292	li	r0,4
293	mtctr	r0
294	li	r7,0
2951:	lfsx	fr0,r4,r7
296	lfsx	fr1,r5,r7
297	lfsx	fr2,r6,r7
298	fmadds	fr0,fr0,fr2,fr1
299	stfsx	fr0,r3,r7
300	addi	r7,r7,4
301	bdnz	1b
302	lfd	fr2,32(r1)
303	b	fpdisable
304
305/*
306 * Vector negative multiply and subtract, floating point.
307 */
308_GLOBAL(vnmsubfp)
309	mflr	r12
310	bl	fpenable
311	stfd	fr2,32(r1)
312	li	r0,4
313	mtctr	r0
314	li	r7,0
3151:	lfsx	fr0,r4,r7
316	lfsx	fr1,r5,r7
317	lfsx	fr2,r6,r7
318	fnmsubs	fr0,fr0,fr2,fr1
319	stfsx	fr0,r3,r7
320	addi	r7,r7,4
321	bdnz	1b
322	lfd	fr2,32(r1)
323	b	fpdisable
324
325/*
326 * Vector reciprocal estimate.  We just compute 1.0/x.
327 * r3 -> destination, r4 -> source.
328 */
329_GLOBAL(vrefp)
330	mflr	r12
331	bl	fpenable
332	li	r0,4
333	LDCONST(fr1, fpone)
334	mtctr	r0
335	li	r6,0
3361:	lfsx	fr0,r4,r6
337	fdivs	fr0,fr1,fr0
338	stfsx	fr0,r3,r6
339	addi	r6,r6,4
340	bdnz	1b
341	b	fpdisable
342
343/*
344 * Vector reciprocal square-root estimate, floating point.
345 * We use the frsqrte instruction for the initial estimate followed
346 * by 2 iterations of Newton-Raphson to get sufficient accuracy.
347 * r3 -> destination, r4 -> source.
348 */
349_GLOBAL(vrsqrtefp)
350	mflr	r12
351	bl	fpenable
352	stfd	fr2,32(r1)
353	stfd	fr3,40(r1)
354	stfd	fr4,48(r1)
355	stfd	fr5,56(r1)
356	li	r0,4
357	LDCONST(fr4, fpone)
358	LDCONST(fr5, fphalf)
359	mtctr	r0
360	li	r6,0
3611:	lfsx	fr0,r4,r6
362	frsqrte	fr1,fr0		/* r = frsqrte(s) */
363	fmuls	fr3,fr1,fr0	/* r * s */
364	fmuls	fr2,fr1,fr5	/* r * 0.5 */
365	fnmsubs	fr3,fr1,fr3,fr4	/* 1 - s * r * r */
366	fmadds	fr1,fr2,fr3,fr1	/* r = r + 0.5 * r * (1 - s * r * r) */
367	fmuls	fr3,fr1,fr0	/* r * s */
368	fmuls	fr2,fr1,fr5	/* r * 0.5 */
369	fnmsubs	fr3,fr1,fr3,fr4	/* 1 - s * r * r */
370	fmadds	fr1,fr2,fr3,fr1	/* r = r + 0.5 * r * (1 - s * r * r) */
371	stfsx	fr1,r3,r6
372	addi	r6,r6,4
373	bdnz	1b
374	lfd	fr5,56(r1)
375	lfd	fr4,48(r1)
376	lfd	fr3,40(r1)
377	lfd	fr2,32(r1)
378	b	fpdisable
379