1#include <asm/processor.h> 2#include <asm/ppc_asm.h> 3#include <asm/reg.h> 4#include <asm/asm-offsets.h> 5#include <asm/cputable.h> 6#include <asm/thread_info.h> 7#include <asm/page.h> 8#include <asm/ptrace.h> 9 10/* 11 * Load state from memory into VMX registers including VSCR. 12 * Assumes the caller has enabled VMX in the MSR. 13 */ 14_GLOBAL(load_vr_state) 15 li r4,VRSTATE_VSCR 16 lvx v0,r4,r3 17 mtvscr v0 18 REST_32VRS(0,r4,r3) 19 blr 20 21/* 22 * Store VMX state into memory, including VSCR. 23 * Assumes the caller has enabled VMX in the MSR. 24 */ 25_GLOBAL(store_vr_state) 26 SAVE_32VRS(0, r4, r3) 27 mfvscr v0 28 li r4, VRSTATE_VSCR 29 stvx v0, r4, r3 30 blr 31 32/* 33 * Disable VMX for the task which had it previously, 34 * and save its vector registers in its thread_struct. 35 * Enables the VMX for use in the kernel on return. 36 * On SMP we know the VMX is free, since we give it up every 37 * switch (ie, no lazy save of the vector registers). 38 * 39 * Note that on 32-bit this can only use registers that will be 40 * restored by fast_exception_return, i.e. r3 - r6, r10 and r11. 41 */ 42_GLOBAL(load_up_altivec) 43 mfmsr r5 /* grab the current MSR */ 44 oris r5,r5,MSR_VEC@h 45 MTMSRD(r5) /* enable use of AltiVec now */ 46 isync 47 48 /* 49 * While userspace in general ignores VRSAVE, glibc uses it as a boolean 50 * to optimise userspace context save/restore. Whenever we take an 51 * altivec unavailable exception we must set VRSAVE to something non 52 * zero. Set it to all 1s. See also the programming note in the ISA. 53 */ 54 mfspr r4,SPRN_VRSAVE 55 cmpwi 0,r4,0 56 bne+ 1f 57 li r4,-1 58 mtspr SPRN_VRSAVE,r4 591: 60 /* enable use of VMX after return */ 61#ifdef CONFIG_PPC32 62 mfspr r5,SPRN_SPRG_THREAD /* current task's THREAD (phys) */ 63 oris r9,r9,MSR_VEC@h 64#else 65 ld r4,PACACURRENT(r13) 66 addi r5,r4,THREAD /* Get THREAD */ 67 oris r12,r12,MSR_VEC@h 68 std r12,_MSR(r1) 69#endif 70 /* Don't care if r4 overflows, this is desired behaviour */ 71 lbz r4,THREAD_LOAD_VEC(r5) 72 addi r4,r4,1 73 stb r4,THREAD_LOAD_VEC(r5) 74 addi r6,r5,THREAD_VRSTATE 75 li r4,1 76 li r10,VRSTATE_VSCR 77 stw r4,THREAD_USED_VR(r5) 78 lvx v0,r10,r6 79 mtvscr v0 80 REST_32VRS(0,r4,r6) 81 /* restore registers and return */ 82 blr 83 84/* 85 * save_altivec(tsk) 86 * Save the vector registers to its thread_struct 87 */ 88_GLOBAL(save_altivec) 89 addi r3,r3,THREAD /* want THREAD of task */ 90 PPC_LL r7,THREAD_VRSAVEAREA(r3) 91 PPC_LL r5,PT_REGS(r3) 92 PPC_LCMPI 0,r7,0 93 bne 2f 94 addi r7,r3,THREAD_VRSTATE 952: SAVE_32VRS(0,r4,r7) 96 mfvscr v0 97 li r4,VRSTATE_VSCR 98 stvx v0,r4,r7 99 blr 100 101#ifdef CONFIG_VSX 102 103#ifdef CONFIG_PPC32 104#error This asm code isn't ready for 32-bit kernels 105#endif 106 107/* 108 * load_up_vsx(unused, unused, tsk) 109 * Disable VSX for the task which had it previously, 110 * and save its vector registers in its thread_struct. 111 * Reuse the fp and vsx saves, but first check to see if they have 112 * been saved already. 113 */ 114_GLOBAL(load_up_vsx) 115/* Load FP and VSX registers if they haven't been done yet */ 116 andi. r5,r12,MSR_FP 117 beql+ load_up_fpu /* skip if already loaded */ 118 andis. r5,r12,MSR_VEC@h 119 beql+ load_up_altivec /* skip if already loaded */ 120 121 ld r4,PACACURRENT(r13) 122 addi r4,r4,THREAD /* Get THREAD */ 123 li r6,1 124 stw r6,THREAD_USED_VSR(r4) /* ... also set thread used vsr */ 125 /* enable use of VSX after return */ 126 oris r12,r12,MSR_VSX@h 127 std r12,_MSR(r1) 128 b fast_exception_return 129 130#endif /* CONFIG_VSX */ 131 132 133/* 134 * The routines below are in assembler so we can closely control the 135 * usage of floating-point registers. These routines must be called 136 * with preempt disabled. 137 */ 138#ifdef CONFIG_PPC32 139 .data 140fpzero: 141 .long 0 142fpone: 143 .long 0x3f800000 /* 1.0 in single-precision FP */ 144fphalf: 145 .long 0x3f000000 /* 0.5 in single-precision FP */ 146 147#define LDCONST(fr, name) \ 148 lis r11,name@ha; \ 149 lfs fr,name@l(r11) 150#else 151 152 .section ".toc","aw" 153fpzero: 154 .tc FD_0_0[TC],0 155fpone: 156 .tc FD_3ff00000_0[TC],0x3ff0000000000000 /* 1.0 */ 157fphalf: 158 .tc FD_3fe00000_0[TC],0x3fe0000000000000 /* 0.5 */ 159 160#define LDCONST(fr, name) \ 161 lfd fr,name@toc(r2) 162#endif 163 164 .text 165/* 166 * Internal routine to enable floating point and set FPSCR to 0. 167 * Don't call it from C; it doesn't use the normal calling convention. 168 */ 169fpenable: 170#ifdef CONFIG_PPC32 171 stwu r1,-64(r1) 172#else 173 stdu r1,-64(r1) 174#endif 175 mfmsr r10 176 ori r11,r10,MSR_FP 177 mtmsr r11 178 isync 179 stfd fr0,24(r1) 180 stfd fr1,16(r1) 181 stfd fr31,8(r1) 182 LDCONST(fr1, fpzero) 183 mffs fr31 184 MTFSF_L(fr1) 185 blr 186 187fpdisable: 188 mtlr r12 189 MTFSF_L(fr31) 190 lfd fr31,8(r1) 191 lfd fr1,16(r1) 192 lfd fr0,24(r1) 193 mtmsr r10 194 isync 195 addi r1,r1,64 196 blr 197 198/* 199 * Vector add, floating point. 200 */ 201_GLOBAL(vaddfp) 202 mflr r12 203 bl fpenable 204 li r0,4 205 mtctr r0 206 li r6,0 2071: lfsx fr0,r4,r6 208 lfsx fr1,r5,r6 209 fadds fr0,fr0,fr1 210 stfsx fr0,r3,r6 211 addi r6,r6,4 212 bdnz 1b 213 b fpdisable 214 215/* 216 * Vector subtract, floating point. 217 */ 218_GLOBAL(vsubfp) 219 mflr r12 220 bl fpenable 221 li r0,4 222 mtctr r0 223 li r6,0 2241: lfsx fr0,r4,r6 225 lfsx fr1,r5,r6 226 fsubs fr0,fr0,fr1 227 stfsx fr0,r3,r6 228 addi r6,r6,4 229 bdnz 1b 230 b fpdisable 231 232/* 233 * Vector multiply and add, floating point. 234 */ 235_GLOBAL(vmaddfp) 236 mflr r12 237 bl fpenable 238 stfd fr2,32(r1) 239 li r0,4 240 mtctr r0 241 li r7,0 2421: lfsx fr0,r4,r7 243 lfsx fr1,r5,r7 244 lfsx fr2,r6,r7 245 fmadds fr0,fr0,fr2,fr1 246 stfsx fr0,r3,r7 247 addi r7,r7,4 248 bdnz 1b 249 lfd fr2,32(r1) 250 b fpdisable 251 252/* 253 * Vector negative multiply and subtract, floating point. 254 */ 255_GLOBAL(vnmsubfp) 256 mflr r12 257 bl fpenable 258 stfd fr2,32(r1) 259 li r0,4 260 mtctr r0 261 li r7,0 2621: lfsx fr0,r4,r7 263 lfsx fr1,r5,r7 264 lfsx fr2,r6,r7 265 fnmsubs fr0,fr0,fr2,fr1 266 stfsx fr0,r3,r7 267 addi r7,r7,4 268 bdnz 1b 269 lfd fr2,32(r1) 270 b fpdisable 271 272/* 273 * Vector reciprocal estimate. We just compute 1.0/x. 274 * r3 -> destination, r4 -> source. 275 */ 276_GLOBAL(vrefp) 277 mflr r12 278 bl fpenable 279 li r0,4 280 LDCONST(fr1, fpone) 281 mtctr r0 282 li r6,0 2831: lfsx fr0,r4,r6 284 fdivs fr0,fr1,fr0 285 stfsx fr0,r3,r6 286 addi r6,r6,4 287 bdnz 1b 288 b fpdisable 289 290/* 291 * Vector reciprocal square-root estimate, floating point. 292 * We use the frsqrte instruction for the initial estimate followed 293 * by 2 iterations of Newton-Raphson to get sufficient accuracy. 294 * r3 -> destination, r4 -> source. 295 */ 296_GLOBAL(vrsqrtefp) 297 mflr r12 298 bl fpenable 299 stfd fr2,32(r1) 300 stfd fr3,40(r1) 301 stfd fr4,48(r1) 302 stfd fr5,56(r1) 303 li r0,4 304 LDCONST(fr4, fpone) 305 LDCONST(fr5, fphalf) 306 mtctr r0 307 li r6,0 3081: lfsx fr0,r4,r6 309 frsqrte fr1,fr0 /* r = frsqrte(s) */ 310 fmuls fr3,fr1,fr0 /* r * s */ 311 fmuls fr2,fr1,fr5 /* r * 0.5 */ 312 fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */ 313 fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */ 314 fmuls fr3,fr1,fr0 /* r * s */ 315 fmuls fr2,fr1,fr5 /* r * 0.5 */ 316 fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */ 317 fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */ 318 stfsx fr1,r3,r6 319 addi r6,r6,4 320 bdnz 1b 321 lfd fr5,56(r1) 322 lfd fr4,48(r1) 323 lfd fr3,40(r1) 324 lfd fr2,32(r1) 325 b fpdisable 326