xref: /linux/arch/loongarch/kernel/fpu.S (revision 803b0fc5c3f2baa6e54978cd576407896f789b08)
1*803b0fc5SHuacai Chen/* SPDX-License-Identifier: GPL-2.0 */
2*803b0fc5SHuacai Chen/*
3*803b0fc5SHuacai Chen * Author: Lu Zeng <zenglu@loongson.cn>
4*803b0fc5SHuacai Chen *         Pei Huang <huangpei@loongson.cn>
5*803b0fc5SHuacai Chen *         Huacai Chen <chenhuacai@loongson.cn>
6*803b0fc5SHuacai Chen *
7*803b0fc5SHuacai Chen * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
8*803b0fc5SHuacai Chen */
9*803b0fc5SHuacai Chen#include <asm/asm.h>
10*803b0fc5SHuacai Chen#include <asm/asmmacro.h>
11*803b0fc5SHuacai Chen#include <asm/asm-offsets.h>
12*803b0fc5SHuacai Chen#include <asm/errno.h>
13*803b0fc5SHuacai Chen#include <asm/export.h>
14*803b0fc5SHuacai Chen#include <asm/fpregdef.h>
15*803b0fc5SHuacai Chen#include <asm/loongarch.h>
16*803b0fc5SHuacai Chen#include <asm/regdef.h>
17*803b0fc5SHuacai Chen
18*803b0fc5SHuacai Chen#define FPU_REG_WIDTH		8
19*803b0fc5SHuacai Chen#define LSX_REG_WIDTH		16
20*803b0fc5SHuacai Chen#define LASX_REG_WIDTH		32
21*803b0fc5SHuacai Chen
22*803b0fc5SHuacai Chen	.macro	EX insn, reg, src, offs
23*803b0fc5SHuacai Chen.ex\@:	\insn	\reg, \src, \offs
24*803b0fc5SHuacai Chen	.section __ex_table,"a"
25*803b0fc5SHuacai Chen	PTR	.ex\@, fault
26*803b0fc5SHuacai Chen	.previous
27*803b0fc5SHuacai Chen	.endm
28*803b0fc5SHuacai Chen
29*803b0fc5SHuacai Chen	.macro sc_save_fp base
30*803b0fc5SHuacai Chen	EX	fst.d $f0,  \base, (0 * FPU_REG_WIDTH)
31*803b0fc5SHuacai Chen	EX	fst.d $f1,  \base, (1 * FPU_REG_WIDTH)
32*803b0fc5SHuacai Chen	EX	fst.d $f2,  \base, (2 * FPU_REG_WIDTH)
33*803b0fc5SHuacai Chen	EX	fst.d $f3,  \base, (3 * FPU_REG_WIDTH)
34*803b0fc5SHuacai Chen	EX	fst.d $f4,  \base, (4 * FPU_REG_WIDTH)
35*803b0fc5SHuacai Chen	EX	fst.d $f5,  \base, (5 * FPU_REG_WIDTH)
36*803b0fc5SHuacai Chen	EX	fst.d $f6,  \base, (6 * FPU_REG_WIDTH)
37*803b0fc5SHuacai Chen	EX	fst.d $f7,  \base, (7 * FPU_REG_WIDTH)
38*803b0fc5SHuacai Chen	EX	fst.d $f8,  \base, (8 * FPU_REG_WIDTH)
39*803b0fc5SHuacai Chen	EX	fst.d $f9,  \base, (9 * FPU_REG_WIDTH)
40*803b0fc5SHuacai Chen	EX	fst.d $f10, \base, (10 * FPU_REG_WIDTH)
41*803b0fc5SHuacai Chen	EX	fst.d $f11, \base, (11 * FPU_REG_WIDTH)
42*803b0fc5SHuacai Chen	EX	fst.d $f12, \base, (12 * FPU_REG_WIDTH)
43*803b0fc5SHuacai Chen	EX	fst.d $f13, \base, (13 * FPU_REG_WIDTH)
44*803b0fc5SHuacai Chen	EX	fst.d $f14, \base, (14 * FPU_REG_WIDTH)
45*803b0fc5SHuacai Chen	EX	fst.d $f15, \base, (15 * FPU_REG_WIDTH)
46*803b0fc5SHuacai Chen	EX	fst.d $f16, \base, (16 * FPU_REG_WIDTH)
47*803b0fc5SHuacai Chen	EX	fst.d $f17, \base, (17 * FPU_REG_WIDTH)
48*803b0fc5SHuacai Chen	EX	fst.d $f18, \base, (18 * FPU_REG_WIDTH)
49*803b0fc5SHuacai Chen	EX	fst.d $f19, \base, (19 * FPU_REG_WIDTH)
50*803b0fc5SHuacai Chen	EX	fst.d $f20, \base, (20 * FPU_REG_WIDTH)
51*803b0fc5SHuacai Chen	EX	fst.d $f21, \base, (21 * FPU_REG_WIDTH)
52*803b0fc5SHuacai Chen	EX	fst.d $f22, \base, (22 * FPU_REG_WIDTH)
53*803b0fc5SHuacai Chen	EX	fst.d $f23, \base, (23 * FPU_REG_WIDTH)
54*803b0fc5SHuacai Chen	EX	fst.d $f24, \base, (24 * FPU_REG_WIDTH)
55*803b0fc5SHuacai Chen	EX	fst.d $f25, \base, (25 * FPU_REG_WIDTH)
56*803b0fc5SHuacai Chen	EX	fst.d $f26, \base, (26 * FPU_REG_WIDTH)
57*803b0fc5SHuacai Chen	EX	fst.d $f27, \base, (27 * FPU_REG_WIDTH)
58*803b0fc5SHuacai Chen	EX	fst.d $f28, \base, (28 * FPU_REG_WIDTH)
59*803b0fc5SHuacai Chen	EX	fst.d $f29, \base, (29 * FPU_REG_WIDTH)
60*803b0fc5SHuacai Chen	EX	fst.d $f30, \base, (30 * FPU_REG_WIDTH)
61*803b0fc5SHuacai Chen	EX	fst.d $f31, \base, (31 * FPU_REG_WIDTH)
62*803b0fc5SHuacai Chen	.endm
63*803b0fc5SHuacai Chen
64*803b0fc5SHuacai Chen	.macro sc_restore_fp base
65*803b0fc5SHuacai Chen	EX	fld.d $f0,  \base, (0 * FPU_REG_WIDTH)
66*803b0fc5SHuacai Chen	EX	fld.d $f1,  \base, (1 * FPU_REG_WIDTH)
67*803b0fc5SHuacai Chen	EX	fld.d $f2,  \base, (2 * FPU_REG_WIDTH)
68*803b0fc5SHuacai Chen	EX	fld.d $f3,  \base, (3 * FPU_REG_WIDTH)
69*803b0fc5SHuacai Chen	EX	fld.d $f4,  \base, (4 * FPU_REG_WIDTH)
70*803b0fc5SHuacai Chen	EX	fld.d $f5,  \base, (5 * FPU_REG_WIDTH)
71*803b0fc5SHuacai Chen	EX	fld.d $f6,  \base, (6 * FPU_REG_WIDTH)
72*803b0fc5SHuacai Chen	EX	fld.d $f7,  \base, (7 * FPU_REG_WIDTH)
73*803b0fc5SHuacai Chen	EX	fld.d $f8,  \base, (8 * FPU_REG_WIDTH)
74*803b0fc5SHuacai Chen	EX	fld.d $f9,  \base, (9 * FPU_REG_WIDTH)
75*803b0fc5SHuacai Chen	EX	fld.d $f10, \base, (10 * FPU_REG_WIDTH)
76*803b0fc5SHuacai Chen	EX	fld.d $f11, \base, (11 * FPU_REG_WIDTH)
77*803b0fc5SHuacai Chen	EX	fld.d $f12, \base, (12 * FPU_REG_WIDTH)
78*803b0fc5SHuacai Chen	EX	fld.d $f13, \base, (13 * FPU_REG_WIDTH)
79*803b0fc5SHuacai Chen	EX	fld.d $f14, \base, (14 * FPU_REG_WIDTH)
80*803b0fc5SHuacai Chen	EX	fld.d $f15, \base, (15 * FPU_REG_WIDTH)
81*803b0fc5SHuacai Chen	EX	fld.d $f16, \base, (16 * FPU_REG_WIDTH)
82*803b0fc5SHuacai Chen	EX	fld.d $f17, \base, (17 * FPU_REG_WIDTH)
83*803b0fc5SHuacai Chen	EX	fld.d $f18, \base, (18 * FPU_REG_WIDTH)
84*803b0fc5SHuacai Chen	EX	fld.d $f19, \base, (19 * FPU_REG_WIDTH)
85*803b0fc5SHuacai Chen	EX	fld.d $f20, \base, (20 * FPU_REG_WIDTH)
86*803b0fc5SHuacai Chen	EX	fld.d $f21, \base, (21 * FPU_REG_WIDTH)
87*803b0fc5SHuacai Chen	EX	fld.d $f22, \base, (22 * FPU_REG_WIDTH)
88*803b0fc5SHuacai Chen	EX	fld.d $f23, \base, (23 * FPU_REG_WIDTH)
89*803b0fc5SHuacai Chen	EX	fld.d $f24, \base, (24 * FPU_REG_WIDTH)
90*803b0fc5SHuacai Chen	EX	fld.d $f25, \base, (25 * FPU_REG_WIDTH)
91*803b0fc5SHuacai Chen	EX	fld.d $f26, \base, (26 * FPU_REG_WIDTH)
92*803b0fc5SHuacai Chen	EX	fld.d $f27, \base, (27 * FPU_REG_WIDTH)
93*803b0fc5SHuacai Chen	EX	fld.d $f28, \base, (28 * FPU_REG_WIDTH)
94*803b0fc5SHuacai Chen	EX	fld.d $f29, \base, (29 * FPU_REG_WIDTH)
95*803b0fc5SHuacai Chen	EX	fld.d $f30, \base, (30 * FPU_REG_WIDTH)
96*803b0fc5SHuacai Chen	EX	fld.d $f31, \base, (31 * FPU_REG_WIDTH)
97*803b0fc5SHuacai Chen	.endm
98*803b0fc5SHuacai Chen
99*803b0fc5SHuacai Chen	.macro sc_save_fcc base, tmp0, tmp1
100*803b0fc5SHuacai Chen	movcf2gr	\tmp0, $fcc0
101*803b0fc5SHuacai Chen	move	\tmp1, \tmp0
102*803b0fc5SHuacai Chen	movcf2gr	\tmp0, $fcc1
103*803b0fc5SHuacai Chen	bstrins.d	\tmp1, \tmp0, 15, 8
104*803b0fc5SHuacai Chen	movcf2gr	\tmp0, $fcc2
105*803b0fc5SHuacai Chen	bstrins.d	\tmp1, \tmp0, 23, 16
106*803b0fc5SHuacai Chen	movcf2gr	\tmp0, $fcc3
107*803b0fc5SHuacai Chen	bstrins.d	\tmp1, \tmp0, 31, 24
108*803b0fc5SHuacai Chen	movcf2gr	\tmp0, $fcc4
109*803b0fc5SHuacai Chen	bstrins.d	\tmp1, \tmp0, 39, 32
110*803b0fc5SHuacai Chen	movcf2gr	\tmp0, $fcc5
111*803b0fc5SHuacai Chen	bstrins.d	\tmp1, \tmp0, 47, 40
112*803b0fc5SHuacai Chen	movcf2gr	\tmp0, $fcc6
113*803b0fc5SHuacai Chen	bstrins.d	\tmp1, \tmp0, 55, 48
114*803b0fc5SHuacai Chen	movcf2gr	\tmp0, $fcc7
115*803b0fc5SHuacai Chen	bstrins.d	\tmp1, \tmp0, 63, 56
116*803b0fc5SHuacai Chen	EX	st.d \tmp1, \base, 0
117*803b0fc5SHuacai Chen	.endm
118*803b0fc5SHuacai Chen
119*803b0fc5SHuacai Chen	.macro sc_restore_fcc base, tmp0, tmp1
120*803b0fc5SHuacai Chen	EX	ld.d \tmp0, \base, 0
121*803b0fc5SHuacai Chen	bstrpick.d	\tmp1, \tmp0, 7, 0
122*803b0fc5SHuacai Chen	movgr2cf	$fcc0, \tmp1
123*803b0fc5SHuacai Chen	bstrpick.d	\tmp1, \tmp0, 15, 8
124*803b0fc5SHuacai Chen	movgr2cf	$fcc1, \tmp1
125*803b0fc5SHuacai Chen	bstrpick.d	\tmp1, \tmp0, 23, 16
126*803b0fc5SHuacai Chen	movgr2cf	$fcc2, \tmp1
127*803b0fc5SHuacai Chen	bstrpick.d	\tmp1, \tmp0, 31, 24
128*803b0fc5SHuacai Chen	movgr2cf	$fcc3, \tmp1
129*803b0fc5SHuacai Chen	bstrpick.d	\tmp1, \tmp0, 39, 32
130*803b0fc5SHuacai Chen	movgr2cf	$fcc4, \tmp1
131*803b0fc5SHuacai Chen	bstrpick.d	\tmp1, \tmp0, 47, 40
132*803b0fc5SHuacai Chen	movgr2cf	$fcc5, \tmp1
133*803b0fc5SHuacai Chen	bstrpick.d	\tmp1, \tmp0, 55, 48
134*803b0fc5SHuacai Chen	movgr2cf	$fcc6, \tmp1
135*803b0fc5SHuacai Chen	bstrpick.d	\tmp1, \tmp0, 63, 56
136*803b0fc5SHuacai Chen	movgr2cf	$fcc7, \tmp1
137*803b0fc5SHuacai Chen	.endm
138*803b0fc5SHuacai Chen
139*803b0fc5SHuacai Chen	.macro sc_save_fcsr base, tmp0
140*803b0fc5SHuacai Chen	movfcsr2gr	\tmp0, fcsr0
141*803b0fc5SHuacai Chen	EX	st.w \tmp0, \base, 0
142*803b0fc5SHuacai Chen	.endm
143*803b0fc5SHuacai Chen
144*803b0fc5SHuacai Chen	.macro sc_restore_fcsr base, tmp0
145*803b0fc5SHuacai Chen	EX	ld.w \tmp0, \base, 0
146*803b0fc5SHuacai Chen	movgr2fcsr	fcsr0, \tmp0
147*803b0fc5SHuacai Chen	.endm
148*803b0fc5SHuacai Chen
149*803b0fc5SHuacai Chen	.macro sc_save_vcsr base, tmp0
150*803b0fc5SHuacai Chen	movfcsr2gr	\tmp0, vcsr16
151*803b0fc5SHuacai Chen	EX	st.w \tmp0, \base, 0
152*803b0fc5SHuacai Chen	.endm
153*803b0fc5SHuacai Chen
154*803b0fc5SHuacai Chen	.macro sc_restore_vcsr base, tmp0
155*803b0fc5SHuacai Chen	EX	ld.w \tmp0, \base, 0
156*803b0fc5SHuacai Chen	movgr2fcsr	vcsr16, \tmp0
157*803b0fc5SHuacai Chen	.endm
158*803b0fc5SHuacai Chen
159*803b0fc5SHuacai Chen/*
160*803b0fc5SHuacai Chen * Save a thread's fp context.
161*803b0fc5SHuacai Chen */
162*803b0fc5SHuacai ChenSYM_FUNC_START(_save_fp)
163*803b0fc5SHuacai Chen	fpu_save_csr	a0 t1
164*803b0fc5SHuacai Chen	fpu_save_double a0 t1			# clobbers t1
165*803b0fc5SHuacai Chen	fpu_save_cc	a0 t1 t2		# clobbers t1, t2
166*803b0fc5SHuacai Chen	jirl zero, ra, 0
167*803b0fc5SHuacai ChenSYM_FUNC_END(_save_fp)
168*803b0fc5SHuacai ChenEXPORT_SYMBOL(_save_fp)
169*803b0fc5SHuacai Chen
170*803b0fc5SHuacai Chen/*
171*803b0fc5SHuacai Chen * Restore a thread's fp context.
172*803b0fc5SHuacai Chen */
173*803b0fc5SHuacai ChenSYM_FUNC_START(_restore_fp)
174*803b0fc5SHuacai Chen	fpu_restore_double a0 t1		# clobbers t1
175*803b0fc5SHuacai Chen	fpu_restore_csr	a0 t1
176*803b0fc5SHuacai Chen	fpu_restore_cc	a0 t1 t2		# clobbers t1, t2
177*803b0fc5SHuacai Chen	jirl zero, ra, 0
178*803b0fc5SHuacai ChenSYM_FUNC_END(_restore_fp)
179*803b0fc5SHuacai Chen
180*803b0fc5SHuacai Chen/*
181*803b0fc5SHuacai Chen * Load the FPU with signalling NANS.  This bit pattern we're using has
182*803b0fc5SHuacai Chen * the property that no matter whether considered as single or as double
183*803b0fc5SHuacai Chen * precision represents signaling NANS.
184*803b0fc5SHuacai Chen *
185*803b0fc5SHuacai Chen * The value to initialize fcsr0 to comes in $a0.
186*803b0fc5SHuacai Chen */
187*803b0fc5SHuacai Chen
188*803b0fc5SHuacai ChenSYM_FUNC_START(_init_fpu)
189*803b0fc5SHuacai Chen	li.w	t1, CSR_EUEN_FPEN
190*803b0fc5SHuacai Chen	csrxchg	t1, t1, LOONGARCH_CSR_EUEN
191*803b0fc5SHuacai Chen
192*803b0fc5SHuacai Chen	movgr2fcsr	fcsr0, a0
193*803b0fc5SHuacai Chen
194*803b0fc5SHuacai Chen	li.w	t1, -1				# SNaN
195*803b0fc5SHuacai Chen
196*803b0fc5SHuacai Chen	movgr2fr.d	$f0, t1
197*803b0fc5SHuacai Chen	movgr2fr.d	$f1, t1
198*803b0fc5SHuacai Chen	movgr2fr.d	$f2, t1
199*803b0fc5SHuacai Chen	movgr2fr.d	$f3, t1
200*803b0fc5SHuacai Chen	movgr2fr.d	$f4, t1
201*803b0fc5SHuacai Chen	movgr2fr.d	$f5, t1
202*803b0fc5SHuacai Chen	movgr2fr.d	$f6, t1
203*803b0fc5SHuacai Chen	movgr2fr.d	$f7, t1
204*803b0fc5SHuacai Chen	movgr2fr.d	$f8, t1
205*803b0fc5SHuacai Chen	movgr2fr.d	$f9, t1
206*803b0fc5SHuacai Chen	movgr2fr.d	$f10, t1
207*803b0fc5SHuacai Chen	movgr2fr.d	$f11, t1
208*803b0fc5SHuacai Chen	movgr2fr.d	$f12, t1
209*803b0fc5SHuacai Chen	movgr2fr.d	$f13, t1
210*803b0fc5SHuacai Chen	movgr2fr.d	$f14, t1
211*803b0fc5SHuacai Chen	movgr2fr.d	$f15, t1
212*803b0fc5SHuacai Chen	movgr2fr.d	$f16, t1
213*803b0fc5SHuacai Chen	movgr2fr.d	$f17, t1
214*803b0fc5SHuacai Chen	movgr2fr.d	$f18, t1
215*803b0fc5SHuacai Chen	movgr2fr.d	$f19, t1
216*803b0fc5SHuacai Chen	movgr2fr.d	$f20, t1
217*803b0fc5SHuacai Chen	movgr2fr.d	$f21, t1
218*803b0fc5SHuacai Chen	movgr2fr.d	$f22, t1
219*803b0fc5SHuacai Chen	movgr2fr.d	$f23, t1
220*803b0fc5SHuacai Chen	movgr2fr.d	$f24, t1
221*803b0fc5SHuacai Chen	movgr2fr.d	$f25, t1
222*803b0fc5SHuacai Chen	movgr2fr.d	$f26, t1
223*803b0fc5SHuacai Chen	movgr2fr.d	$f27, t1
224*803b0fc5SHuacai Chen	movgr2fr.d	$f28, t1
225*803b0fc5SHuacai Chen	movgr2fr.d	$f29, t1
226*803b0fc5SHuacai Chen	movgr2fr.d	$f30, t1
227*803b0fc5SHuacai Chen	movgr2fr.d	$f31, t1
228*803b0fc5SHuacai Chen
229*803b0fc5SHuacai Chen	jirl zero, ra, 0
230*803b0fc5SHuacai ChenSYM_FUNC_END(_init_fpu)
231*803b0fc5SHuacai Chen
232*803b0fc5SHuacai Chen/*
233*803b0fc5SHuacai Chen * a0: fpregs
234*803b0fc5SHuacai Chen * a1: fcc
235*803b0fc5SHuacai Chen * a2: fcsr
236*803b0fc5SHuacai Chen */
237*803b0fc5SHuacai ChenSYM_FUNC_START(_save_fp_context)
238*803b0fc5SHuacai Chen	sc_save_fcc a1 t1 t2
239*803b0fc5SHuacai Chen	sc_save_fcsr a2 t1
240*803b0fc5SHuacai Chen	sc_save_fp a0
241*803b0fc5SHuacai Chen	li.w	a0, 0					# success
242*803b0fc5SHuacai Chen	jirl zero, ra, 0
243*803b0fc5SHuacai ChenSYM_FUNC_END(_save_fp_context)
244*803b0fc5SHuacai Chen
245*803b0fc5SHuacai Chen/*
246*803b0fc5SHuacai Chen * a0: fpregs
247*803b0fc5SHuacai Chen * a1: fcc
248*803b0fc5SHuacai Chen * a2: fcsr
249*803b0fc5SHuacai Chen */
250*803b0fc5SHuacai ChenSYM_FUNC_START(_restore_fp_context)
251*803b0fc5SHuacai Chen	sc_restore_fp a0
252*803b0fc5SHuacai Chen	sc_restore_fcc a1 t1 t2
253*803b0fc5SHuacai Chen	sc_restore_fcsr a2 t1
254*803b0fc5SHuacai Chen	li.w	a0, 0					# success
255*803b0fc5SHuacai Chen	jirl zero, ra, 0
256*803b0fc5SHuacai ChenSYM_FUNC_END(_restore_fp_context)
257*803b0fc5SHuacai Chen
258*803b0fc5SHuacai ChenSYM_FUNC_START(fault)
259*803b0fc5SHuacai Chen	li.w	a0, -EFAULT				# failure
260*803b0fc5SHuacai Chen	jirl zero, ra, 0
261*803b0fc5SHuacai ChenSYM_FUNC_END(fault)
262