1*803b0fc5SHuacai Chen/* SPDX-License-Identifier: GPL-2.0 */ 2*803b0fc5SHuacai Chen/* 3*803b0fc5SHuacai Chen * Author: Lu Zeng <zenglu@loongson.cn> 4*803b0fc5SHuacai Chen * Pei Huang <huangpei@loongson.cn> 5*803b0fc5SHuacai Chen * Huacai Chen <chenhuacai@loongson.cn> 6*803b0fc5SHuacai Chen * 7*803b0fc5SHuacai Chen * Copyright (C) 2020-2022 Loongson Technology Corporation Limited 8*803b0fc5SHuacai Chen */ 9*803b0fc5SHuacai Chen#include <asm/asm.h> 10*803b0fc5SHuacai Chen#include <asm/asmmacro.h> 11*803b0fc5SHuacai Chen#include <asm/asm-offsets.h> 12*803b0fc5SHuacai Chen#include <asm/errno.h> 13*803b0fc5SHuacai Chen#include <asm/export.h> 14*803b0fc5SHuacai Chen#include <asm/fpregdef.h> 15*803b0fc5SHuacai Chen#include <asm/loongarch.h> 16*803b0fc5SHuacai Chen#include <asm/regdef.h> 17*803b0fc5SHuacai Chen 18*803b0fc5SHuacai Chen#define FPU_REG_WIDTH 8 19*803b0fc5SHuacai Chen#define LSX_REG_WIDTH 16 20*803b0fc5SHuacai Chen#define LASX_REG_WIDTH 32 21*803b0fc5SHuacai Chen 22*803b0fc5SHuacai Chen .macro EX insn, reg, src, offs 23*803b0fc5SHuacai Chen.ex\@: \insn \reg, \src, \offs 24*803b0fc5SHuacai Chen .section __ex_table,"a" 25*803b0fc5SHuacai Chen PTR .ex\@, fault 26*803b0fc5SHuacai Chen .previous 27*803b0fc5SHuacai Chen .endm 28*803b0fc5SHuacai Chen 29*803b0fc5SHuacai Chen .macro sc_save_fp base 30*803b0fc5SHuacai Chen EX fst.d $f0, \base, (0 * FPU_REG_WIDTH) 31*803b0fc5SHuacai Chen EX fst.d $f1, \base, (1 * FPU_REG_WIDTH) 32*803b0fc5SHuacai Chen EX fst.d $f2, \base, (2 * FPU_REG_WIDTH) 33*803b0fc5SHuacai Chen EX fst.d $f3, \base, (3 * FPU_REG_WIDTH) 34*803b0fc5SHuacai Chen EX fst.d $f4, \base, (4 * FPU_REG_WIDTH) 35*803b0fc5SHuacai Chen EX fst.d $f5, \base, (5 * FPU_REG_WIDTH) 36*803b0fc5SHuacai Chen EX fst.d $f6, \base, (6 * FPU_REG_WIDTH) 37*803b0fc5SHuacai Chen EX fst.d $f7, \base, (7 * FPU_REG_WIDTH) 38*803b0fc5SHuacai Chen EX fst.d $f8, \base, (8 * FPU_REG_WIDTH) 39*803b0fc5SHuacai Chen EX fst.d $f9, \base, (9 * FPU_REG_WIDTH) 40*803b0fc5SHuacai Chen EX fst.d $f10, \base, (10 * FPU_REG_WIDTH) 41*803b0fc5SHuacai Chen EX fst.d $f11, \base, (11 * FPU_REG_WIDTH) 42*803b0fc5SHuacai Chen EX fst.d $f12, \base, (12 * FPU_REG_WIDTH) 43*803b0fc5SHuacai Chen EX fst.d $f13, \base, (13 * FPU_REG_WIDTH) 44*803b0fc5SHuacai Chen EX fst.d $f14, \base, (14 * FPU_REG_WIDTH) 45*803b0fc5SHuacai Chen EX fst.d $f15, \base, (15 * FPU_REG_WIDTH) 46*803b0fc5SHuacai Chen EX fst.d $f16, \base, (16 * FPU_REG_WIDTH) 47*803b0fc5SHuacai Chen EX fst.d $f17, \base, (17 * FPU_REG_WIDTH) 48*803b0fc5SHuacai Chen EX fst.d $f18, \base, (18 * FPU_REG_WIDTH) 49*803b0fc5SHuacai Chen EX fst.d $f19, \base, (19 * FPU_REG_WIDTH) 50*803b0fc5SHuacai Chen EX fst.d $f20, \base, (20 * FPU_REG_WIDTH) 51*803b0fc5SHuacai Chen EX fst.d $f21, \base, (21 * FPU_REG_WIDTH) 52*803b0fc5SHuacai Chen EX fst.d $f22, \base, (22 * FPU_REG_WIDTH) 53*803b0fc5SHuacai Chen EX fst.d $f23, \base, (23 * FPU_REG_WIDTH) 54*803b0fc5SHuacai Chen EX fst.d $f24, \base, (24 * FPU_REG_WIDTH) 55*803b0fc5SHuacai Chen EX fst.d $f25, \base, (25 * FPU_REG_WIDTH) 56*803b0fc5SHuacai Chen EX fst.d $f26, \base, (26 * FPU_REG_WIDTH) 57*803b0fc5SHuacai Chen EX fst.d $f27, \base, (27 * FPU_REG_WIDTH) 58*803b0fc5SHuacai Chen EX fst.d $f28, \base, (28 * FPU_REG_WIDTH) 59*803b0fc5SHuacai Chen EX fst.d $f29, \base, (29 * FPU_REG_WIDTH) 60*803b0fc5SHuacai Chen EX fst.d $f30, \base, (30 * FPU_REG_WIDTH) 61*803b0fc5SHuacai Chen EX fst.d $f31, \base, (31 * FPU_REG_WIDTH) 62*803b0fc5SHuacai Chen .endm 63*803b0fc5SHuacai Chen 64*803b0fc5SHuacai Chen .macro sc_restore_fp base 65*803b0fc5SHuacai Chen EX fld.d $f0, \base, (0 * FPU_REG_WIDTH) 66*803b0fc5SHuacai Chen EX fld.d $f1, \base, (1 * FPU_REG_WIDTH) 67*803b0fc5SHuacai Chen EX fld.d $f2, \base, (2 * FPU_REG_WIDTH) 68*803b0fc5SHuacai Chen EX fld.d $f3, \base, (3 * FPU_REG_WIDTH) 69*803b0fc5SHuacai Chen EX fld.d $f4, \base, (4 * FPU_REG_WIDTH) 70*803b0fc5SHuacai Chen EX fld.d $f5, \base, (5 * FPU_REG_WIDTH) 71*803b0fc5SHuacai Chen EX fld.d $f6, \base, (6 * FPU_REG_WIDTH) 72*803b0fc5SHuacai Chen EX fld.d $f7, \base, (7 * FPU_REG_WIDTH) 73*803b0fc5SHuacai Chen EX fld.d $f8, \base, (8 * FPU_REG_WIDTH) 74*803b0fc5SHuacai Chen EX fld.d $f9, \base, (9 * FPU_REG_WIDTH) 75*803b0fc5SHuacai Chen EX fld.d $f10, \base, (10 * FPU_REG_WIDTH) 76*803b0fc5SHuacai Chen EX fld.d $f11, \base, (11 * FPU_REG_WIDTH) 77*803b0fc5SHuacai Chen EX fld.d $f12, \base, (12 * FPU_REG_WIDTH) 78*803b0fc5SHuacai Chen EX fld.d $f13, \base, (13 * FPU_REG_WIDTH) 79*803b0fc5SHuacai Chen EX fld.d $f14, \base, (14 * FPU_REG_WIDTH) 80*803b0fc5SHuacai Chen EX fld.d $f15, \base, (15 * FPU_REG_WIDTH) 81*803b0fc5SHuacai Chen EX fld.d $f16, \base, (16 * FPU_REG_WIDTH) 82*803b0fc5SHuacai Chen EX fld.d $f17, \base, (17 * FPU_REG_WIDTH) 83*803b0fc5SHuacai Chen EX fld.d $f18, \base, (18 * FPU_REG_WIDTH) 84*803b0fc5SHuacai Chen EX fld.d $f19, \base, (19 * FPU_REG_WIDTH) 85*803b0fc5SHuacai Chen EX fld.d $f20, \base, (20 * FPU_REG_WIDTH) 86*803b0fc5SHuacai Chen EX fld.d $f21, \base, (21 * FPU_REG_WIDTH) 87*803b0fc5SHuacai Chen EX fld.d $f22, \base, (22 * FPU_REG_WIDTH) 88*803b0fc5SHuacai Chen EX fld.d $f23, \base, (23 * FPU_REG_WIDTH) 89*803b0fc5SHuacai Chen EX fld.d $f24, \base, (24 * FPU_REG_WIDTH) 90*803b0fc5SHuacai Chen EX fld.d $f25, \base, (25 * FPU_REG_WIDTH) 91*803b0fc5SHuacai Chen EX fld.d $f26, \base, (26 * FPU_REG_WIDTH) 92*803b0fc5SHuacai Chen EX fld.d $f27, \base, (27 * FPU_REG_WIDTH) 93*803b0fc5SHuacai Chen EX fld.d $f28, \base, (28 * FPU_REG_WIDTH) 94*803b0fc5SHuacai Chen EX fld.d $f29, \base, (29 * FPU_REG_WIDTH) 95*803b0fc5SHuacai Chen EX fld.d $f30, \base, (30 * FPU_REG_WIDTH) 96*803b0fc5SHuacai Chen EX fld.d $f31, \base, (31 * FPU_REG_WIDTH) 97*803b0fc5SHuacai Chen .endm 98*803b0fc5SHuacai Chen 99*803b0fc5SHuacai Chen .macro sc_save_fcc base, tmp0, tmp1 100*803b0fc5SHuacai Chen movcf2gr \tmp0, $fcc0 101*803b0fc5SHuacai Chen move \tmp1, \tmp0 102*803b0fc5SHuacai Chen movcf2gr \tmp0, $fcc1 103*803b0fc5SHuacai Chen bstrins.d \tmp1, \tmp0, 15, 8 104*803b0fc5SHuacai Chen movcf2gr \tmp0, $fcc2 105*803b0fc5SHuacai Chen bstrins.d \tmp1, \tmp0, 23, 16 106*803b0fc5SHuacai Chen movcf2gr \tmp0, $fcc3 107*803b0fc5SHuacai Chen bstrins.d \tmp1, \tmp0, 31, 24 108*803b0fc5SHuacai Chen movcf2gr \tmp0, $fcc4 109*803b0fc5SHuacai Chen bstrins.d \tmp1, \tmp0, 39, 32 110*803b0fc5SHuacai Chen movcf2gr \tmp0, $fcc5 111*803b0fc5SHuacai Chen bstrins.d \tmp1, \tmp0, 47, 40 112*803b0fc5SHuacai Chen movcf2gr \tmp0, $fcc6 113*803b0fc5SHuacai Chen bstrins.d \tmp1, \tmp0, 55, 48 114*803b0fc5SHuacai Chen movcf2gr \tmp0, $fcc7 115*803b0fc5SHuacai Chen bstrins.d \tmp1, \tmp0, 63, 56 116*803b0fc5SHuacai Chen EX st.d \tmp1, \base, 0 117*803b0fc5SHuacai Chen .endm 118*803b0fc5SHuacai Chen 119*803b0fc5SHuacai Chen .macro sc_restore_fcc base, tmp0, tmp1 120*803b0fc5SHuacai Chen EX ld.d \tmp0, \base, 0 121*803b0fc5SHuacai Chen bstrpick.d \tmp1, \tmp0, 7, 0 122*803b0fc5SHuacai Chen movgr2cf $fcc0, \tmp1 123*803b0fc5SHuacai Chen bstrpick.d \tmp1, \tmp0, 15, 8 124*803b0fc5SHuacai Chen movgr2cf $fcc1, \tmp1 125*803b0fc5SHuacai Chen bstrpick.d \tmp1, \tmp0, 23, 16 126*803b0fc5SHuacai Chen movgr2cf $fcc2, \tmp1 127*803b0fc5SHuacai Chen bstrpick.d \tmp1, \tmp0, 31, 24 128*803b0fc5SHuacai Chen movgr2cf $fcc3, \tmp1 129*803b0fc5SHuacai Chen bstrpick.d \tmp1, \tmp0, 39, 32 130*803b0fc5SHuacai Chen movgr2cf $fcc4, \tmp1 131*803b0fc5SHuacai Chen bstrpick.d \tmp1, \tmp0, 47, 40 132*803b0fc5SHuacai Chen movgr2cf $fcc5, \tmp1 133*803b0fc5SHuacai Chen bstrpick.d \tmp1, \tmp0, 55, 48 134*803b0fc5SHuacai Chen movgr2cf $fcc6, \tmp1 135*803b0fc5SHuacai Chen bstrpick.d \tmp1, \tmp0, 63, 56 136*803b0fc5SHuacai Chen movgr2cf $fcc7, \tmp1 137*803b0fc5SHuacai Chen .endm 138*803b0fc5SHuacai Chen 139*803b0fc5SHuacai Chen .macro sc_save_fcsr base, tmp0 140*803b0fc5SHuacai Chen movfcsr2gr \tmp0, fcsr0 141*803b0fc5SHuacai Chen EX st.w \tmp0, \base, 0 142*803b0fc5SHuacai Chen .endm 143*803b0fc5SHuacai Chen 144*803b0fc5SHuacai Chen .macro sc_restore_fcsr base, tmp0 145*803b0fc5SHuacai Chen EX ld.w \tmp0, \base, 0 146*803b0fc5SHuacai Chen movgr2fcsr fcsr0, \tmp0 147*803b0fc5SHuacai Chen .endm 148*803b0fc5SHuacai Chen 149*803b0fc5SHuacai Chen .macro sc_save_vcsr base, tmp0 150*803b0fc5SHuacai Chen movfcsr2gr \tmp0, vcsr16 151*803b0fc5SHuacai Chen EX st.w \tmp0, \base, 0 152*803b0fc5SHuacai Chen .endm 153*803b0fc5SHuacai Chen 154*803b0fc5SHuacai Chen .macro sc_restore_vcsr base, tmp0 155*803b0fc5SHuacai Chen EX ld.w \tmp0, \base, 0 156*803b0fc5SHuacai Chen movgr2fcsr vcsr16, \tmp0 157*803b0fc5SHuacai Chen .endm 158*803b0fc5SHuacai Chen 159*803b0fc5SHuacai Chen/* 160*803b0fc5SHuacai Chen * Save a thread's fp context. 161*803b0fc5SHuacai Chen */ 162*803b0fc5SHuacai ChenSYM_FUNC_START(_save_fp) 163*803b0fc5SHuacai Chen fpu_save_csr a0 t1 164*803b0fc5SHuacai Chen fpu_save_double a0 t1 # clobbers t1 165*803b0fc5SHuacai Chen fpu_save_cc a0 t1 t2 # clobbers t1, t2 166*803b0fc5SHuacai Chen jirl zero, ra, 0 167*803b0fc5SHuacai ChenSYM_FUNC_END(_save_fp) 168*803b0fc5SHuacai ChenEXPORT_SYMBOL(_save_fp) 169*803b0fc5SHuacai Chen 170*803b0fc5SHuacai Chen/* 171*803b0fc5SHuacai Chen * Restore a thread's fp context. 172*803b0fc5SHuacai Chen */ 173*803b0fc5SHuacai ChenSYM_FUNC_START(_restore_fp) 174*803b0fc5SHuacai Chen fpu_restore_double a0 t1 # clobbers t1 175*803b0fc5SHuacai Chen fpu_restore_csr a0 t1 176*803b0fc5SHuacai Chen fpu_restore_cc a0 t1 t2 # clobbers t1, t2 177*803b0fc5SHuacai Chen jirl zero, ra, 0 178*803b0fc5SHuacai ChenSYM_FUNC_END(_restore_fp) 179*803b0fc5SHuacai Chen 180*803b0fc5SHuacai Chen/* 181*803b0fc5SHuacai Chen * Load the FPU with signalling NANS. This bit pattern we're using has 182*803b0fc5SHuacai Chen * the property that no matter whether considered as single or as double 183*803b0fc5SHuacai Chen * precision represents signaling NANS. 184*803b0fc5SHuacai Chen * 185*803b0fc5SHuacai Chen * The value to initialize fcsr0 to comes in $a0. 186*803b0fc5SHuacai Chen */ 187*803b0fc5SHuacai Chen 188*803b0fc5SHuacai ChenSYM_FUNC_START(_init_fpu) 189*803b0fc5SHuacai Chen li.w t1, CSR_EUEN_FPEN 190*803b0fc5SHuacai Chen csrxchg t1, t1, LOONGARCH_CSR_EUEN 191*803b0fc5SHuacai Chen 192*803b0fc5SHuacai Chen movgr2fcsr fcsr0, a0 193*803b0fc5SHuacai Chen 194*803b0fc5SHuacai Chen li.w t1, -1 # SNaN 195*803b0fc5SHuacai Chen 196*803b0fc5SHuacai Chen movgr2fr.d $f0, t1 197*803b0fc5SHuacai Chen movgr2fr.d $f1, t1 198*803b0fc5SHuacai Chen movgr2fr.d $f2, t1 199*803b0fc5SHuacai Chen movgr2fr.d $f3, t1 200*803b0fc5SHuacai Chen movgr2fr.d $f4, t1 201*803b0fc5SHuacai Chen movgr2fr.d $f5, t1 202*803b0fc5SHuacai Chen movgr2fr.d $f6, t1 203*803b0fc5SHuacai Chen movgr2fr.d $f7, t1 204*803b0fc5SHuacai Chen movgr2fr.d $f8, t1 205*803b0fc5SHuacai Chen movgr2fr.d $f9, t1 206*803b0fc5SHuacai Chen movgr2fr.d $f10, t1 207*803b0fc5SHuacai Chen movgr2fr.d $f11, t1 208*803b0fc5SHuacai Chen movgr2fr.d $f12, t1 209*803b0fc5SHuacai Chen movgr2fr.d $f13, t1 210*803b0fc5SHuacai Chen movgr2fr.d $f14, t1 211*803b0fc5SHuacai Chen movgr2fr.d $f15, t1 212*803b0fc5SHuacai Chen movgr2fr.d $f16, t1 213*803b0fc5SHuacai Chen movgr2fr.d $f17, t1 214*803b0fc5SHuacai Chen movgr2fr.d $f18, t1 215*803b0fc5SHuacai Chen movgr2fr.d $f19, t1 216*803b0fc5SHuacai Chen movgr2fr.d $f20, t1 217*803b0fc5SHuacai Chen movgr2fr.d $f21, t1 218*803b0fc5SHuacai Chen movgr2fr.d $f22, t1 219*803b0fc5SHuacai Chen movgr2fr.d $f23, t1 220*803b0fc5SHuacai Chen movgr2fr.d $f24, t1 221*803b0fc5SHuacai Chen movgr2fr.d $f25, t1 222*803b0fc5SHuacai Chen movgr2fr.d $f26, t1 223*803b0fc5SHuacai Chen movgr2fr.d $f27, t1 224*803b0fc5SHuacai Chen movgr2fr.d $f28, t1 225*803b0fc5SHuacai Chen movgr2fr.d $f29, t1 226*803b0fc5SHuacai Chen movgr2fr.d $f30, t1 227*803b0fc5SHuacai Chen movgr2fr.d $f31, t1 228*803b0fc5SHuacai Chen 229*803b0fc5SHuacai Chen jirl zero, ra, 0 230*803b0fc5SHuacai ChenSYM_FUNC_END(_init_fpu) 231*803b0fc5SHuacai Chen 232*803b0fc5SHuacai Chen/* 233*803b0fc5SHuacai Chen * a0: fpregs 234*803b0fc5SHuacai Chen * a1: fcc 235*803b0fc5SHuacai Chen * a2: fcsr 236*803b0fc5SHuacai Chen */ 237*803b0fc5SHuacai ChenSYM_FUNC_START(_save_fp_context) 238*803b0fc5SHuacai Chen sc_save_fcc a1 t1 t2 239*803b0fc5SHuacai Chen sc_save_fcsr a2 t1 240*803b0fc5SHuacai Chen sc_save_fp a0 241*803b0fc5SHuacai Chen li.w a0, 0 # success 242*803b0fc5SHuacai Chen jirl zero, ra, 0 243*803b0fc5SHuacai ChenSYM_FUNC_END(_save_fp_context) 244*803b0fc5SHuacai Chen 245*803b0fc5SHuacai Chen/* 246*803b0fc5SHuacai Chen * a0: fpregs 247*803b0fc5SHuacai Chen * a1: fcc 248*803b0fc5SHuacai Chen * a2: fcsr 249*803b0fc5SHuacai Chen */ 250*803b0fc5SHuacai ChenSYM_FUNC_START(_restore_fp_context) 251*803b0fc5SHuacai Chen sc_restore_fp a0 252*803b0fc5SHuacai Chen sc_restore_fcc a1 t1 t2 253*803b0fc5SHuacai Chen sc_restore_fcsr a2 t1 254*803b0fc5SHuacai Chen li.w a0, 0 # success 255*803b0fc5SHuacai Chen jirl zero, ra, 0 256*803b0fc5SHuacai ChenSYM_FUNC_END(_restore_fp_context) 257*803b0fc5SHuacai Chen 258*803b0fc5SHuacai ChenSYM_FUNC_START(fault) 259*803b0fc5SHuacai Chen li.w a0, -EFAULT # failure 260*803b0fc5SHuacai Chen jirl zero, ra, 0 261*803b0fc5SHuacai ChenSYM_FUNC_END(fault) 262