xref: /linux/arch/powerpc/kernel/vector.S (revision ae64438be1923e3c1102d90fd41db7afcfaf54cc)
1/* SPDX-License-Identifier: GPL-2.0 */
2#include <asm/processor.h>
3#include <asm/ppc_asm.h>
4#include <asm/reg.h>
5#include <asm/asm-offsets.h>
6#include <asm/cputable.h>
7#include <asm/thread_info.h>
8#include <asm/page.h>
9#include <asm/ptrace.h>
10#include <asm/export.h>
11#include <asm/asm-compat.h>
12
13/*
14 * Load state from memory into VMX registers including VSCR.
15 * Assumes the caller has enabled VMX in the MSR.
16 */
17_GLOBAL(load_vr_state)
18	li	r4,VRSTATE_VSCR
19	lvx	v0,r4,r3
20	mtvscr	v0
21	REST_32VRS(0,r4,r3)
22	blr
23EXPORT_SYMBOL(load_vr_state)
24_ASM_NOKPROBE_SYMBOL(load_vr_state); /* used by restore_math */
25
26/*
27 * Store VMX state into memory, including VSCR.
28 * Assumes the caller has enabled VMX in the MSR.
29 */
30_GLOBAL(store_vr_state)
31	SAVE_32VRS(0, r4, r3)
32	mfvscr	v0
33	li	r4, VRSTATE_VSCR
34	stvx	v0, r4, r3
35	blr
36EXPORT_SYMBOL(store_vr_state)
37
38/*
39 * Disable VMX for the task which had it previously,
40 * and save its vector registers in its thread_struct.
41 * Enables the VMX for use in the kernel on return.
42 * On SMP we know the VMX is free, since we give it up every
43 * switch (ie, no lazy save of the vector registers).
44 *
45 * Note that on 32-bit this can only use registers that will be
46 * restored by fast_exception_return, i.e. r3 - r6, r10 and r11.
47 */
48_GLOBAL(load_up_altivec)
49	mfmsr	r5			/* grab the current MSR */
50#ifdef CONFIG_PPC_BOOK3S_64
51	/* interrupt doesn't set MSR[RI] and HPT can fault on current access */
52	ori	r5,r5,MSR_RI
53#endif
54	oris	r5,r5,MSR_VEC@h
55	MTMSRD(r5)			/* enable use of AltiVec now */
56	isync
57
58	/*
59	 * While userspace in general ignores VRSAVE, glibc uses it as a boolean
60	 * to optimise userspace context save/restore. Whenever we take an
61	 * altivec unavailable exception we must set VRSAVE to something non
62	 * zero. Set it to all 1s. See also the programming note in the ISA.
63	 */
64	mfspr	r4,SPRN_VRSAVE
65	cmpwi	0,r4,0
66	bne+	1f
67	li	r4,-1
68	mtspr	SPRN_VRSAVE,r4
691:
70	/* enable use of VMX after return */
71#ifdef CONFIG_PPC32
72	addi	r5,r2,THREAD
73	oris	r9,r9,MSR_VEC@h
74#else
75	ld	r4,PACACURRENT(r13)
76	addi	r5,r4,THREAD		/* Get THREAD */
77	oris	r12,r12,MSR_VEC@h
78	std	r12,_MSR(r1)
79#ifdef CONFIG_PPC_BOOK3S_64
80	li	r4,0
81	stb	r4,PACASRR_VALID(r13)
82#endif
83#endif
84	li	r4,1
85	stb	r4,THREAD_LOAD_VEC(r5)
86	addi	r6,r5,THREAD_VRSTATE
87	li	r10,VRSTATE_VSCR
88	stw	r4,THREAD_USED_VR(r5)
89	lvx	v0,r10,r6
90	mtvscr	v0
91	REST_32VRS(0,r4,r6)
92	/* restore registers and return */
93	blr
94_ASM_NOKPROBE_SYMBOL(load_up_altivec)
95
96/*
97 * save_altivec(tsk)
98 * Save the vector registers to its thread_struct
99 */
100_GLOBAL(save_altivec)
101	addi	r3,r3,THREAD		/* want THREAD of task */
102	PPC_LL	r7,THREAD_VRSAVEAREA(r3)
103	PPC_LL	r5,PT_REGS(r3)
104	PPC_LCMPI	0,r7,0
105	bne	2f
106	addi	r7,r3,THREAD_VRSTATE
1072:	SAVE_32VRS(0,r4,r7)
108	mfvscr	v0
109	li	r4,VRSTATE_VSCR
110	stvx	v0,r4,r7
111	blr
112
113#ifdef CONFIG_VSX
114
115#ifdef CONFIG_PPC32
116#error This asm code isn't ready for 32-bit kernels
117#endif
118
119/*
120 * load_up_vsx(unused, unused, tsk)
121 * Disable VSX for the task which had it previously,
122 * and save its vector registers in its thread_struct.
123 * Reuse the fp and vsx saves, but first check to see if they have
124 * been saved already.
125 */
126_GLOBAL(load_up_vsx)
127/* Load FP and VSX registers if they haven't been done yet */
128	andi.	r5,r12,MSR_FP
129	beql+	load_up_fpu		/* skip if already loaded */
130	andis.	r5,r12,MSR_VEC@h
131	beql+	load_up_altivec		/* skip if already loaded */
132
133#ifdef CONFIG_PPC_BOOK3S_64
134	/* interrupt doesn't set MSR[RI] and HPT can fault on current access */
135	li	r5,MSR_RI
136	mtmsrd	r5,1
137#endif
138
139	ld	r4,PACACURRENT(r13)
140	addi	r4,r4,THREAD		/* Get THREAD */
141	li	r6,1
142	stw	r6,THREAD_USED_VSR(r4) /* ... also set thread used vsr */
143	/* enable use of VSX after return */
144	oris	r12,r12,MSR_VSX@h
145	std	r12,_MSR(r1)
146	li	r4,0
147	stb	r4,PACASRR_VALID(r13)
148	b	fast_interrupt_return_srr
149
150#endif /* CONFIG_VSX */
151
152
153/*
154 * The routines below are in assembler so we can closely control the
155 * usage of floating-point registers.  These routines must be called
156 * with preempt disabled.
157 */
158	.data
159#ifdef CONFIG_PPC32
160fpzero:
161	.long	0
162fpone:
163	.long	0x3f800000	/* 1.0 in single-precision FP */
164fphalf:
165	.long	0x3f000000	/* 0.5 in single-precision FP */
166
167#define LDCONST(fr, name)	\
168	lis	r11,name@ha;	\
169	lfs	fr,name@l(r11)
170#else
171
172fpzero:
173	.quad	0
174fpone:
175	.quad	0x3ff0000000000000	/* 1.0 */
176fphalf:
177	.quad	0x3fe0000000000000	/* 0.5 */
178
179#define LDCONST(fr, name)		\
180	addis	r11,r2,name@toc@ha;	\
181	lfd	fr,name@toc@l(r11)
182#endif
183	.text
184/*
185 * Internal routine to enable floating point and set FPSCR to 0.
186 * Don't call it from C; it doesn't use the normal calling convention.
187 */
188fpenable:
189#ifdef CONFIG_PPC32
190	stwu	r1,-64(r1)
191#else
192	stdu	r1,-64(r1)
193#endif
194	mfmsr	r10
195	ori	r11,r10,MSR_FP
196	mtmsr	r11
197	isync
198	stfd	fr0,24(r1)
199	stfd	fr1,16(r1)
200	stfd	fr31,8(r1)
201	LDCONST(fr1, fpzero)
202	mffs	fr31
203	MTFSF_L(fr1)
204	blr
205
206fpdisable:
207	mtlr	r12
208	MTFSF_L(fr31)
209	lfd	fr31,8(r1)
210	lfd	fr1,16(r1)
211	lfd	fr0,24(r1)
212	mtmsr	r10
213	isync
214	addi	r1,r1,64
215	blr
216
217/*
218 * Vector add, floating point.
219 */
220_GLOBAL(vaddfp)
221	mflr	r12
222	bl	fpenable
223	li	r0,4
224	mtctr	r0
225	li	r6,0
2261:	lfsx	fr0,r4,r6
227	lfsx	fr1,r5,r6
228	fadds	fr0,fr0,fr1
229	stfsx	fr0,r3,r6
230	addi	r6,r6,4
231	bdnz	1b
232	b	fpdisable
233
234/*
235 * Vector subtract, floating point.
236 */
237_GLOBAL(vsubfp)
238	mflr	r12
239	bl	fpenable
240	li	r0,4
241	mtctr	r0
242	li	r6,0
2431:	lfsx	fr0,r4,r6
244	lfsx	fr1,r5,r6
245	fsubs	fr0,fr0,fr1
246	stfsx	fr0,r3,r6
247	addi	r6,r6,4
248	bdnz	1b
249	b	fpdisable
250
251/*
252 * Vector multiply and add, floating point.
253 */
254_GLOBAL(vmaddfp)
255	mflr	r12
256	bl	fpenable
257	stfd	fr2,32(r1)
258	li	r0,4
259	mtctr	r0
260	li	r7,0
2611:	lfsx	fr0,r4,r7
262	lfsx	fr1,r5,r7
263	lfsx	fr2,r6,r7
264	fmadds	fr0,fr0,fr2,fr1
265	stfsx	fr0,r3,r7
266	addi	r7,r7,4
267	bdnz	1b
268	lfd	fr2,32(r1)
269	b	fpdisable
270
271/*
272 * Vector negative multiply and subtract, floating point.
273 */
274_GLOBAL(vnmsubfp)
275	mflr	r12
276	bl	fpenable
277	stfd	fr2,32(r1)
278	li	r0,4
279	mtctr	r0
280	li	r7,0
2811:	lfsx	fr0,r4,r7
282	lfsx	fr1,r5,r7
283	lfsx	fr2,r6,r7
284	fnmsubs	fr0,fr0,fr2,fr1
285	stfsx	fr0,r3,r7
286	addi	r7,r7,4
287	bdnz	1b
288	lfd	fr2,32(r1)
289	b	fpdisable
290
291/*
292 * Vector reciprocal estimate.  We just compute 1.0/x.
293 * r3 -> destination, r4 -> source.
294 */
295_GLOBAL(vrefp)
296	mflr	r12
297	bl	fpenable
298	li	r0,4
299	LDCONST(fr1, fpone)
300	mtctr	r0
301	li	r6,0
3021:	lfsx	fr0,r4,r6
303	fdivs	fr0,fr1,fr0
304	stfsx	fr0,r3,r6
305	addi	r6,r6,4
306	bdnz	1b
307	b	fpdisable
308
309/*
310 * Vector reciprocal square-root estimate, floating point.
311 * We use the frsqrte instruction for the initial estimate followed
312 * by 2 iterations of Newton-Raphson to get sufficient accuracy.
313 * r3 -> destination, r4 -> source.
314 */
315_GLOBAL(vrsqrtefp)
316	mflr	r12
317	bl	fpenable
318	stfd	fr2,32(r1)
319	stfd	fr3,40(r1)
320	stfd	fr4,48(r1)
321	stfd	fr5,56(r1)
322	li	r0,4
323	LDCONST(fr4, fpone)
324	LDCONST(fr5, fphalf)
325	mtctr	r0
326	li	r6,0
3271:	lfsx	fr0,r4,r6
328	frsqrte	fr1,fr0		/* r = frsqrte(s) */
329	fmuls	fr3,fr1,fr0	/* r * s */
330	fmuls	fr2,fr1,fr5	/* r * 0.5 */
331	fnmsubs	fr3,fr1,fr3,fr4	/* 1 - s * r * r */
332	fmadds	fr1,fr2,fr3,fr1	/* r = r + 0.5 * r * (1 - s * r * r) */
333	fmuls	fr3,fr1,fr0	/* r * s */
334	fmuls	fr2,fr1,fr5	/* r * 0.5 */
335	fnmsubs	fr3,fr1,fr3,fr4	/* 1 - s * r * r */
336	fmadds	fr1,fr2,fr3,fr1	/* r = r + 0.5 * r * (1 - s * r * r) */
337	stfsx	fr1,r3,r6
338	addi	r6,r6,4
339	bdnz	1b
340	lfd	fr5,56(r1)
341	lfd	fr4,48(r1)
342	lfd	fr3,40(r1)
343	lfd	fr2,32(r1)
344	b	fpdisable
345