xref: /linux/arch/loongarch/kernel/fpu.S (revision 9e7c9b8eb719835638ee74d93dccc2173581324c)
1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * Author: Lu Zeng <zenglu@loongson.cn>
4 *         Pei Huang <huangpei@loongson.cn>
5 *         Huacai Chen <chenhuacai@loongson.cn>
6 *
7 * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
8 */
9#include <asm/asm.h>
10#include <asm/asmmacro.h>
11#include <asm/asm-offsets.h>
12#include <asm/errno.h>
13#include <asm/export.h>
14#include <asm/fpregdef.h>
15#include <asm/loongarch.h>
16#include <asm/regdef.h>
17
18#define FPU_REG_WIDTH		8
19#define LSX_REG_WIDTH		16
20#define LASX_REG_WIDTH		32
21
22	.macro	EX insn, reg, src, offs
23.ex\@:	\insn	\reg, \src, \offs
24	.section __ex_table,"a"
25	PTR	.ex\@, fault
26	.previous
27	.endm
28
29	.macro sc_save_fp base
30	EX	fst.d $f0,  \base, (0 * FPU_REG_WIDTH)
31	EX	fst.d $f1,  \base, (1 * FPU_REG_WIDTH)
32	EX	fst.d $f2,  \base, (2 * FPU_REG_WIDTH)
33	EX	fst.d $f3,  \base, (3 * FPU_REG_WIDTH)
34	EX	fst.d $f4,  \base, (4 * FPU_REG_WIDTH)
35	EX	fst.d $f5,  \base, (5 * FPU_REG_WIDTH)
36	EX	fst.d $f6,  \base, (6 * FPU_REG_WIDTH)
37	EX	fst.d $f7,  \base, (7 * FPU_REG_WIDTH)
38	EX	fst.d $f8,  \base, (8 * FPU_REG_WIDTH)
39	EX	fst.d $f9,  \base, (9 * FPU_REG_WIDTH)
40	EX	fst.d $f10, \base, (10 * FPU_REG_WIDTH)
41	EX	fst.d $f11, \base, (11 * FPU_REG_WIDTH)
42	EX	fst.d $f12, \base, (12 * FPU_REG_WIDTH)
43	EX	fst.d $f13, \base, (13 * FPU_REG_WIDTH)
44	EX	fst.d $f14, \base, (14 * FPU_REG_WIDTH)
45	EX	fst.d $f15, \base, (15 * FPU_REG_WIDTH)
46	EX	fst.d $f16, \base, (16 * FPU_REG_WIDTH)
47	EX	fst.d $f17, \base, (17 * FPU_REG_WIDTH)
48	EX	fst.d $f18, \base, (18 * FPU_REG_WIDTH)
49	EX	fst.d $f19, \base, (19 * FPU_REG_WIDTH)
50	EX	fst.d $f20, \base, (20 * FPU_REG_WIDTH)
51	EX	fst.d $f21, \base, (21 * FPU_REG_WIDTH)
52	EX	fst.d $f22, \base, (22 * FPU_REG_WIDTH)
53	EX	fst.d $f23, \base, (23 * FPU_REG_WIDTH)
54	EX	fst.d $f24, \base, (24 * FPU_REG_WIDTH)
55	EX	fst.d $f25, \base, (25 * FPU_REG_WIDTH)
56	EX	fst.d $f26, \base, (26 * FPU_REG_WIDTH)
57	EX	fst.d $f27, \base, (27 * FPU_REG_WIDTH)
58	EX	fst.d $f28, \base, (28 * FPU_REG_WIDTH)
59	EX	fst.d $f29, \base, (29 * FPU_REG_WIDTH)
60	EX	fst.d $f30, \base, (30 * FPU_REG_WIDTH)
61	EX	fst.d $f31, \base, (31 * FPU_REG_WIDTH)
62	.endm
63
64	.macro sc_restore_fp base
65	EX	fld.d $f0,  \base, (0 * FPU_REG_WIDTH)
66	EX	fld.d $f1,  \base, (1 * FPU_REG_WIDTH)
67	EX	fld.d $f2,  \base, (2 * FPU_REG_WIDTH)
68	EX	fld.d $f3,  \base, (3 * FPU_REG_WIDTH)
69	EX	fld.d $f4,  \base, (4 * FPU_REG_WIDTH)
70	EX	fld.d $f5,  \base, (5 * FPU_REG_WIDTH)
71	EX	fld.d $f6,  \base, (6 * FPU_REG_WIDTH)
72	EX	fld.d $f7,  \base, (7 * FPU_REG_WIDTH)
73	EX	fld.d $f8,  \base, (8 * FPU_REG_WIDTH)
74	EX	fld.d $f9,  \base, (9 * FPU_REG_WIDTH)
75	EX	fld.d $f10, \base, (10 * FPU_REG_WIDTH)
76	EX	fld.d $f11, \base, (11 * FPU_REG_WIDTH)
77	EX	fld.d $f12, \base, (12 * FPU_REG_WIDTH)
78	EX	fld.d $f13, \base, (13 * FPU_REG_WIDTH)
79	EX	fld.d $f14, \base, (14 * FPU_REG_WIDTH)
80	EX	fld.d $f15, \base, (15 * FPU_REG_WIDTH)
81	EX	fld.d $f16, \base, (16 * FPU_REG_WIDTH)
82	EX	fld.d $f17, \base, (17 * FPU_REG_WIDTH)
83	EX	fld.d $f18, \base, (18 * FPU_REG_WIDTH)
84	EX	fld.d $f19, \base, (19 * FPU_REG_WIDTH)
85	EX	fld.d $f20, \base, (20 * FPU_REG_WIDTH)
86	EX	fld.d $f21, \base, (21 * FPU_REG_WIDTH)
87	EX	fld.d $f22, \base, (22 * FPU_REG_WIDTH)
88	EX	fld.d $f23, \base, (23 * FPU_REG_WIDTH)
89	EX	fld.d $f24, \base, (24 * FPU_REG_WIDTH)
90	EX	fld.d $f25, \base, (25 * FPU_REG_WIDTH)
91	EX	fld.d $f26, \base, (26 * FPU_REG_WIDTH)
92	EX	fld.d $f27, \base, (27 * FPU_REG_WIDTH)
93	EX	fld.d $f28, \base, (28 * FPU_REG_WIDTH)
94	EX	fld.d $f29, \base, (29 * FPU_REG_WIDTH)
95	EX	fld.d $f30, \base, (30 * FPU_REG_WIDTH)
96	EX	fld.d $f31, \base, (31 * FPU_REG_WIDTH)
97	.endm
98
99	.macro sc_save_fcc base, tmp0, tmp1
100	movcf2gr	\tmp0, $fcc0
101	move	\tmp1, \tmp0
102	movcf2gr	\tmp0, $fcc1
103	bstrins.d	\tmp1, \tmp0, 15, 8
104	movcf2gr	\tmp0, $fcc2
105	bstrins.d	\tmp1, \tmp0, 23, 16
106	movcf2gr	\tmp0, $fcc3
107	bstrins.d	\tmp1, \tmp0, 31, 24
108	movcf2gr	\tmp0, $fcc4
109	bstrins.d	\tmp1, \tmp0, 39, 32
110	movcf2gr	\tmp0, $fcc5
111	bstrins.d	\tmp1, \tmp0, 47, 40
112	movcf2gr	\tmp0, $fcc6
113	bstrins.d	\tmp1, \tmp0, 55, 48
114	movcf2gr	\tmp0, $fcc7
115	bstrins.d	\tmp1, \tmp0, 63, 56
116	EX	st.d \tmp1, \base, 0
117	.endm
118
119	.macro sc_restore_fcc base, tmp0, tmp1
120	EX	ld.d \tmp0, \base, 0
121	bstrpick.d	\tmp1, \tmp0, 7, 0
122	movgr2cf	$fcc0, \tmp1
123	bstrpick.d	\tmp1, \tmp0, 15, 8
124	movgr2cf	$fcc1, \tmp1
125	bstrpick.d	\tmp1, \tmp0, 23, 16
126	movgr2cf	$fcc2, \tmp1
127	bstrpick.d	\tmp1, \tmp0, 31, 24
128	movgr2cf	$fcc3, \tmp1
129	bstrpick.d	\tmp1, \tmp0, 39, 32
130	movgr2cf	$fcc4, \tmp1
131	bstrpick.d	\tmp1, \tmp0, 47, 40
132	movgr2cf	$fcc5, \tmp1
133	bstrpick.d	\tmp1, \tmp0, 55, 48
134	movgr2cf	$fcc6, \tmp1
135	bstrpick.d	\tmp1, \tmp0, 63, 56
136	movgr2cf	$fcc7, \tmp1
137	.endm
138
139	.macro sc_save_fcsr base, tmp0
140	movfcsr2gr	\tmp0, fcsr0
141	EX	st.w \tmp0, \base, 0
142	.endm
143
144	.macro sc_restore_fcsr base, tmp0
145	EX	ld.w \tmp0, \base, 0
146	movgr2fcsr	fcsr0, \tmp0
147	.endm
148
149/*
150 * Save a thread's fp context.
151 */
152SYM_FUNC_START(_save_fp)
153	fpu_save_csr	a0 t1
154	fpu_save_double a0 t1			# clobbers t1
155	fpu_save_cc	a0 t1 t2		# clobbers t1, t2
156	jirl zero, ra, 0
157SYM_FUNC_END(_save_fp)
158EXPORT_SYMBOL(_save_fp)
159
160/*
161 * Restore a thread's fp context.
162 */
163SYM_FUNC_START(_restore_fp)
164	fpu_restore_double a0 t1		# clobbers t1
165	fpu_restore_csr	a0 t1
166	fpu_restore_cc	a0 t1 t2		# clobbers t1, t2
167	jirl zero, ra, 0
168SYM_FUNC_END(_restore_fp)
169
170/*
171 * Load the FPU with signalling NANS.  This bit pattern we're using has
172 * the property that no matter whether considered as single or as double
173 * precision represents signaling NANS.
174 *
175 * The value to initialize fcsr0 to comes in $a0.
176 */
177
178SYM_FUNC_START(_init_fpu)
179	li.w	t1, CSR_EUEN_FPEN
180	csrxchg	t1, t1, LOONGARCH_CSR_EUEN
181
182	movgr2fcsr	fcsr0, a0
183
184	li.w	t1, -1				# SNaN
185
186	movgr2fr.d	$f0, t1
187	movgr2fr.d	$f1, t1
188	movgr2fr.d	$f2, t1
189	movgr2fr.d	$f3, t1
190	movgr2fr.d	$f4, t1
191	movgr2fr.d	$f5, t1
192	movgr2fr.d	$f6, t1
193	movgr2fr.d	$f7, t1
194	movgr2fr.d	$f8, t1
195	movgr2fr.d	$f9, t1
196	movgr2fr.d	$f10, t1
197	movgr2fr.d	$f11, t1
198	movgr2fr.d	$f12, t1
199	movgr2fr.d	$f13, t1
200	movgr2fr.d	$f14, t1
201	movgr2fr.d	$f15, t1
202	movgr2fr.d	$f16, t1
203	movgr2fr.d	$f17, t1
204	movgr2fr.d	$f18, t1
205	movgr2fr.d	$f19, t1
206	movgr2fr.d	$f20, t1
207	movgr2fr.d	$f21, t1
208	movgr2fr.d	$f22, t1
209	movgr2fr.d	$f23, t1
210	movgr2fr.d	$f24, t1
211	movgr2fr.d	$f25, t1
212	movgr2fr.d	$f26, t1
213	movgr2fr.d	$f27, t1
214	movgr2fr.d	$f28, t1
215	movgr2fr.d	$f29, t1
216	movgr2fr.d	$f30, t1
217	movgr2fr.d	$f31, t1
218
219	jirl zero, ra, 0
220SYM_FUNC_END(_init_fpu)
221
222/*
223 * a0: fpregs
224 * a1: fcc
225 * a2: fcsr
226 */
227SYM_FUNC_START(_save_fp_context)
228	sc_save_fcc a1 t1 t2
229	sc_save_fcsr a2 t1
230	sc_save_fp a0
231	li.w	a0, 0					# success
232	jirl zero, ra, 0
233SYM_FUNC_END(_save_fp_context)
234
235/*
236 * a0: fpregs
237 * a1: fcc
238 * a2: fcsr
239 */
240SYM_FUNC_START(_restore_fp_context)
241	sc_restore_fp a0
242	sc_restore_fcc a1 t1 t2
243	sc_restore_fcsr a2 t1
244	li.w	a0, 0					# success
245	jirl zero, ra, 0
246SYM_FUNC_END(_restore_fp_context)
247
248SYM_FUNC_START(fault)
249	li.w	a0, -EFAULT				# failure
250	jirl zero, ra, 0
251SYM_FUNC_END(fault)
252