1/* SPDX-License-Identifier: GPL-2.0 */ 2#include <asm/processor.h> 3#include <asm/ppc_asm.h> 4#include <asm/reg.h> 5#include <asm/asm-offsets.h> 6#include <asm/cputable.h> 7#include <asm/thread_info.h> 8#include <asm/page.h> 9#include <asm/ptrace.h> 10#include <asm/export.h> 11#include <asm/asm-compat.h> 12 13/* 14 * Load state from memory into VMX registers including VSCR. 15 * Assumes the caller has enabled VMX in the MSR. 16 */ 17_GLOBAL(load_vr_state) 18 li r4,VRSTATE_VSCR 19 lvx v0,r4,r3 20 mtvscr v0 21 REST_32VRS(0,r4,r3) 22 blr 23EXPORT_SYMBOL(load_vr_state) 24_ASM_NOKPROBE_SYMBOL(load_vr_state); /* used by restore_math */ 25 26/* 27 * Store VMX state into memory, including VSCR. 28 * Assumes the caller has enabled VMX in the MSR. 29 */ 30_GLOBAL(store_vr_state) 31 SAVE_32VRS(0, r4, r3) 32 mfvscr v0 33 li r4, VRSTATE_VSCR 34 stvx v0, r4, r3 35 blr 36EXPORT_SYMBOL(store_vr_state) 37 38/* 39 * Disable VMX for the task which had it previously, 40 * and save its vector registers in its thread_struct. 41 * Enables the VMX for use in the kernel on return. 42 * On SMP we know the VMX is free, since we give it up every 43 * switch (ie, no lazy save of the vector registers). 44 * 45 * Note that on 32-bit this can only use registers that will be 46 * restored by fast_exception_return, i.e. r3 - r6, r10 and r11. 47 */ 48_GLOBAL(load_up_altivec) 49 mfmsr r5 /* grab the current MSR */ 50#ifdef CONFIG_PPC_BOOK3S_64 51 /* interrupt doesn't set MSR[RI] and HPT can fault on current access */ 52 ori r5,r5,MSR_RI 53#endif 54 oris r5,r5,MSR_VEC@h 55 MTMSRD(r5) /* enable use of AltiVec now */ 56 isync 57 58 /* 59 * While userspace in general ignores VRSAVE, glibc uses it as a boolean 60 * to optimise userspace context save/restore. Whenever we take an 61 * altivec unavailable exception we must set VRSAVE to something non 62 * zero. Set it to all 1s. See also the programming note in the ISA. 63 */ 64 mfspr r4,SPRN_VRSAVE 65 cmpwi 0,r4,0 66 bne+ 1f 67 li r4,-1 68 mtspr SPRN_VRSAVE,r4 691: 70 /* enable use of VMX after return */ 71#ifdef CONFIG_PPC32 72 addi r5,r2,THREAD 73 oris r9,r9,MSR_VEC@h 74#else 75 ld r4,PACACURRENT(r13) 76 addi r5,r4,THREAD /* Get THREAD */ 77 oris r12,r12,MSR_VEC@h 78 std r12,_MSR(r1) 79#ifdef CONFIG_PPC_BOOK3S_64 80 li r4,0 81 stb r4,PACASRR_VALID(r13) 82#endif 83#endif 84 li r4,1 85 stb r4,THREAD_LOAD_VEC(r5) 86 addi r6,r5,THREAD_VRSTATE 87 li r10,VRSTATE_VSCR 88 stw r4,THREAD_USED_VR(r5) 89 lvx v0,r10,r6 90 mtvscr v0 91 REST_32VRS(0,r4,r6) 92 /* restore registers and return */ 93 blr 94_ASM_NOKPROBE_SYMBOL(load_up_altivec) 95 96/* 97 * save_altivec(tsk) 98 * Save the vector registers to its thread_struct 99 */ 100_GLOBAL(save_altivec) 101 addi r3,r3,THREAD /* want THREAD of task */ 102 PPC_LL r7,THREAD_VRSAVEAREA(r3) 103 PPC_LL r5,PT_REGS(r3) 104 PPC_LCMPI 0,r7,0 105 bne 2f 106 addi r7,r3,THREAD_VRSTATE 1072: SAVE_32VRS(0,r4,r7) 108 mfvscr v0 109 li r4,VRSTATE_VSCR 110 stvx v0,r4,r7 111 blr 112 113#ifdef CONFIG_VSX 114 115#ifdef CONFIG_PPC32 116#error This asm code isn't ready for 32-bit kernels 117#endif 118 119/* 120 * load_up_vsx(unused, unused, tsk) 121 * Disable VSX for the task which had it previously, 122 * and save its vector registers in its thread_struct. 123 * Reuse the fp and vsx saves, but first check to see if they have 124 * been saved already. 125 */ 126_GLOBAL(load_up_vsx) 127/* Load FP and VSX registers if they haven't been done yet */ 128 andi. r5,r12,MSR_FP 129 beql+ load_up_fpu /* skip if already loaded */ 130 andis. r5,r12,MSR_VEC@h 131 beql+ load_up_altivec /* skip if already loaded */ 132 133#ifdef CONFIG_PPC_BOOK3S_64 134 /* interrupt doesn't set MSR[RI] and HPT can fault on current access */ 135 li r5,MSR_RI 136 mtmsrd r5,1 137#endif 138 139 ld r4,PACACURRENT(r13) 140 addi r4,r4,THREAD /* Get THREAD */ 141 li r6,1 142 stw r6,THREAD_USED_VSR(r4) /* ... also set thread used vsr */ 143 /* enable use of VSX after return */ 144 oris r12,r12,MSR_VSX@h 145 std r12,_MSR(r1) 146 li r4,0 147 stb r4,PACASRR_VALID(r13) 148 b fast_interrupt_return_srr 149 150#endif /* CONFIG_VSX */ 151 152 153/* 154 * The routines below are in assembler so we can closely control the 155 * usage of floating-point registers. These routines must be called 156 * with preempt disabled. 157 */ 158 .data 159#ifdef CONFIG_PPC32 160fpzero: 161 .long 0 162fpone: 163 .long 0x3f800000 /* 1.0 in single-precision FP */ 164fphalf: 165 .long 0x3f000000 /* 0.5 in single-precision FP */ 166 167#define LDCONST(fr, name) \ 168 lis r11,name@ha; \ 169 lfs fr,name@l(r11) 170#else 171 172fpzero: 173 .quad 0 174fpone: 175 .quad 0x3ff0000000000000 /* 1.0 */ 176fphalf: 177 .quad 0x3fe0000000000000 /* 0.5 */ 178 179#define LDCONST(fr, name) \ 180 addis r11,r2,name@toc@ha; \ 181 lfd fr,name@toc@l(r11) 182#endif 183 .text 184/* 185 * Internal routine to enable floating point and set FPSCR to 0. 186 * Don't call it from C; it doesn't use the normal calling convention. 187 */ 188fpenable: 189#ifdef CONFIG_PPC32 190 stwu r1,-64(r1) 191#else 192 stdu r1,-64(r1) 193#endif 194 mfmsr r10 195 ori r11,r10,MSR_FP 196 mtmsr r11 197 isync 198 stfd fr0,24(r1) 199 stfd fr1,16(r1) 200 stfd fr31,8(r1) 201 LDCONST(fr1, fpzero) 202 mffs fr31 203 MTFSF_L(fr1) 204 blr 205 206fpdisable: 207 mtlr r12 208 MTFSF_L(fr31) 209 lfd fr31,8(r1) 210 lfd fr1,16(r1) 211 lfd fr0,24(r1) 212 mtmsr r10 213 isync 214 addi r1,r1,64 215 blr 216 217/* 218 * Vector add, floating point. 219 */ 220_GLOBAL(vaddfp) 221 mflr r12 222 bl fpenable 223 li r0,4 224 mtctr r0 225 li r6,0 2261: lfsx fr0,r4,r6 227 lfsx fr1,r5,r6 228 fadds fr0,fr0,fr1 229 stfsx fr0,r3,r6 230 addi r6,r6,4 231 bdnz 1b 232 b fpdisable 233 234/* 235 * Vector subtract, floating point. 236 */ 237_GLOBAL(vsubfp) 238 mflr r12 239 bl fpenable 240 li r0,4 241 mtctr r0 242 li r6,0 2431: lfsx fr0,r4,r6 244 lfsx fr1,r5,r6 245 fsubs fr0,fr0,fr1 246 stfsx fr0,r3,r6 247 addi r6,r6,4 248 bdnz 1b 249 b fpdisable 250 251/* 252 * Vector multiply and add, floating point. 253 */ 254_GLOBAL(vmaddfp) 255 mflr r12 256 bl fpenable 257 stfd fr2,32(r1) 258 li r0,4 259 mtctr r0 260 li r7,0 2611: lfsx fr0,r4,r7 262 lfsx fr1,r5,r7 263 lfsx fr2,r6,r7 264 fmadds fr0,fr0,fr2,fr1 265 stfsx fr0,r3,r7 266 addi r7,r7,4 267 bdnz 1b 268 lfd fr2,32(r1) 269 b fpdisable 270 271/* 272 * Vector negative multiply and subtract, floating point. 273 */ 274_GLOBAL(vnmsubfp) 275 mflr r12 276 bl fpenable 277 stfd fr2,32(r1) 278 li r0,4 279 mtctr r0 280 li r7,0 2811: lfsx fr0,r4,r7 282 lfsx fr1,r5,r7 283 lfsx fr2,r6,r7 284 fnmsubs fr0,fr0,fr2,fr1 285 stfsx fr0,r3,r7 286 addi r7,r7,4 287 bdnz 1b 288 lfd fr2,32(r1) 289 b fpdisable 290 291/* 292 * Vector reciprocal estimate. We just compute 1.0/x. 293 * r3 -> destination, r4 -> source. 294 */ 295_GLOBAL(vrefp) 296 mflr r12 297 bl fpenable 298 li r0,4 299 LDCONST(fr1, fpone) 300 mtctr r0 301 li r6,0 3021: lfsx fr0,r4,r6 303 fdivs fr0,fr1,fr0 304 stfsx fr0,r3,r6 305 addi r6,r6,4 306 bdnz 1b 307 b fpdisable 308 309/* 310 * Vector reciprocal square-root estimate, floating point. 311 * We use the frsqrte instruction for the initial estimate followed 312 * by 2 iterations of Newton-Raphson to get sufficient accuracy. 313 * r3 -> destination, r4 -> source. 314 */ 315_GLOBAL(vrsqrtefp) 316 mflr r12 317 bl fpenable 318 stfd fr2,32(r1) 319 stfd fr3,40(r1) 320 stfd fr4,48(r1) 321 stfd fr5,56(r1) 322 li r0,4 323 LDCONST(fr4, fpone) 324 LDCONST(fr5, fphalf) 325 mtctr r0 326 li r6,0 3271: lfsx fr0,r4,r6 328 frsqrte fr1,fr0 /* r = frsqrte(s) */ 329 fmuls fr3,fr1,fr0 /* r * s */ 330 fmuls fr2,fr1,fr5 /* r * 0.5 */ 331 fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */ 332 fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */ 333 fmuls fr3,fr1,fr0 /* r * s */ 334 fmuls fr2,fr1,fr5 /* r * 0.5 */ 335 fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */ 336 fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */ 337 stfsx fr1,r3,r6 338 addi r6,r6,4 339 bdnz 1b 340 lfd fr5,56(r1) 341 lfd fr4,48(r1) 342 lfd fr3,40(r1) 343 lfd fr2,32(r1) 344 b fpdisable 345