1#include <asm/processor.h> 2#include <asm/ppc_asm.h> 3#include <asm/reg.h> 4#include <asm/asm-offsets.h> 5#include <asm/cputable.h> 6#include <asm/thread_info.h> 7#include <asm/page.h> 8#include <asm/ptrace.h> 9 10#ifdef CONFIG_PPC_TRANSACTIONAL_MEM 11/* void do_load_up_transact_altivec(struct thread_struct *thread) 12 * 13 * This is similar to load_up_altivec but for the transactional version of the 14 * vector regs. It doesn't mess with the task MSR or valid flags. 15 * Furthermore, VEC laziness is not supported with TM currently. 16 */ 17_GLOBAL(do_load_up_transact_altivec) 18 mfmsr r6 19 oris r5,r6,MSR_VEC@h 20 MTMSRD(r5) 21 isync 22 23 li r4,1 24 stw r4,THREAD_USED_VR(r3) 25 26 li r10,THREAD_TRANSACT_VRSTATE+VRSTATE_VSCR 27 lvx v0,r10,r3 28 mtvscr v0 29 addi r10,r3,THREAD_TRANSACT_VRSTATE 30 REST_32VRS(0,r4,r10) 31 32 blr 33#endif 34 35/* 36 * Load state from memory into VMX registers including VSCR. 37 * Assumes the caller has enabled VMX in the MSR. 38 */ 39_GLOBAL(load_vr_state) 40 li r4,VRSTATE_VSCR 41 lvx v0,r4,r3 42 mtvscr v0 43 REST_32VRS(0,r4,r3) 44 blr 45 46/* 47 * Store VMX state into memory, including VSCR. 48 * Assumes the caller has enabled VMX in the MSR. 49 */ 50_GLOBAL(store_vr_state) 51 SAVE_32VRS(0, r4, r3) 52 mfvscr v0 53 li r4, VRSTATE_VSCR 54 stvx v0, r4, r3 55 blr 56 57/* 58 * Disable VMX for the task which had it previously, 59 * and save its vector registers in its thread_struct. 60 * Enables the VMX for use in the kernel on return. 61 * On SMP we know the VMX is free, since we give it up every 62 * switch (ie, no lazy save of the vector registers). 63 * 64 * Note that on 32-bit this can only use registers that will be 65 * restored by fast_exception_return, i.e. r3 - r6, r10 and r11. 66 */ 67_GLOBAL(load_up_altivec) 68 mfmsr r5 /* grab the current MSR */ 69 oris r5,r5,MSR_VEC@h 70 MTMSRD(r5) /* enable use of AltiVec now */ 71 isync 72 73 /* 74 * While userspace in general ignores VRSAVE, glibc uses it as a boolean 75 * to optimise userspace context save/restore. Whenever we take an 76 * altivec unavailable exception we must set VRSAVE to something non 77 * zero. Set it to all 1s. See also the programming note in the ISA. 78 */ 79 mfspr r4,SPRN_VRSAVE 80 cmpwi 0,r4,0 81 bne+ 1f 82 li r4,-1 83 mtspr SPRN_VRSAVE,r4 841: 85 /* enable use of VMX after return */ 86#ifdef CONFIG_PPC32 87 mfspr r5,SPRN_SPRG_THREAD /* current task's THREAD (phys) */ 88 oris r9,r9,MSR_VEC@h 89#else 90 ld r4,PACACURRENT(r13) 91 addi r5,r4,THREAD /* Get THREAD */ 92 oris r12,r12,MSR_VEC@h 93 std r12,_MSR(r1) 94#endif 95 /* Don't care if r4 overflows, this is desired behaviour */ 96 lbz r4,THREAD_LOAD_VEC(r5) 97 addi r4,r4,1 98 stb r4,THREAD_LOAD_VEC(r5) 99 addi r6,r5,THREAD_VRSTATE 100 li r4,1 101 li r10,VRSTATE_VSCR 102 stw r4,THREAD_USED_VR(r5) 103 lvx v0,r10,r6 104 mtvscr v0 105 REST_32VRS(0,r4,r6) 106 /* restore registers and return */ 107 blr 108 109/* 110 * save_altivec(tsk) 111 * Save the vector registers to its thread_struct 112 */ 113_GLOBAL(save_altivec) 114 addi r3,r3,THREAD /* want THREAD of task */ 115 PPC_LL r7,THREAD_VRSAVEAREA(r3) 116 PPC_LL r5,PT_REGS(r3) 117 PPC_LCMPI 0,r7,0 118 bne 2f 119 addi r7,r3,THREAD_VRSTATE 1202: SAVE_32VRS(0,r4,r7) 121 mfvscr v0 122 li r4,VRSTATE_VSCR 123 stvx v0,r4,r7 124 blr 125 126#ifdef CONFIG_VSX 127 128#ifdef CONFIG_PPC32 129#error This asm code isn't ready for 32-bit kernels 130#endif 131 132/* 133 * load_up_vsx(unused, unused, tsk) 134 * Disable VSX for the task which had it previously, 135 * and save its vector registers in its thread_struct. 136 * Reuse the fp and vsx saves, but first check to see if they have 137 * been saved already. 138 */ 139_GLOBAL(load_up_vsx) 140/* Load FP and VSX registers if they haven't been done yet */ 141 andi. r5,r12,MSR_FP 142 beql+ load_up_fpu /* skip if already loaded */ 143 andis. r5,r12,MSR_VEC@h 144 beql+ load_up_altivec /* skip if already loaded */ 145 146 ld r4,PACACURRENT(r13) 147 addi r4,r4,THREAD /* Get THREAD */ 148 li r6,1 149 stw r6,THREAD_USED_VSR(r4) /* ... also set thread used vsr */ 150 /* enable use of VSX after return */ 151 oris r12,r12,MSR_VSX@h 152 std r12,_MSR(r1) 153 b fast_exception_return 154 155#endif /* CONFIG_VSX */ 156 157 158/* 159 * The routines below are in assembler so we can closely control the 160 * usage of floating-point registers. These routines must be called 161 * with preempt disabled. 162 */ 163#ifdef CONFIG_PPC32 164 .data 165fpzero: 166 .long 0 167fpone: 168 .long 0x3f800000 /* 1.0 in single-precision FP */ 169fphalf: 170 .long 0x3f000000 /* 0.5 in single-precision FP */ 171 172#define LDCONST(fr, name) \ 173 lis r11,name@ha; \ 174 lfs fr,name@l(r11) 175#else 176 177 .section ".toc","aw" 178fpzero: 179 .tc FD_0_0[TC],0 180fpone: 181 .tc FD_3ff00000_0[TC],0x3ff0000000000000 /* 1.0 */ 182fphalf: 183 .tc FD_3fe00000_0[TC],0x3fe0000000000000 /* 0.5 */ 184 185#define LDCONST(fr, name) \ 186 lfd fr,name@toc(r2) 187#endif 188 189 .text 190/* 191 * Internal routine to enable floating point and set FPSCR to 0. 192 * Don't call it from C; it doesn't use the normal calling convention. 193 */ 194fpenable: 195#ifdef CONFIG_PPC32 196 stwu r1,-64(r1) 197#else 198 stdu r1,-64(r1) 199#endif 200 mfmsr r10 201 ori r11,r10,MSR_FP 202 mtmsr r11 203 isync 204 stfd fr0,24(r1) 205 stfd fr1,16(r1) 206 stfd fr31,8(r1) 207 LDCONST(fr1, fpzero) 208 mffs fr31 209 MTFSF_L(fr1) 210 blr 211 212fpdisable: 213 mtlr r12 214 MTFSF_L(fr31) 215 lfd fr31,8(r1) 216 lfd fr1,16(r1) 217 lfd fr0,24(r1) 218 mtmsr r10 219 isync 220 addi r1,r1,64 221 blr 222 223/* 224 * Vector add, floating point. 225 */ 226_GLOBAL(vaddfp) 227 mflr r12 228 bl fpenable 229 li r0,4 230 mtctr r0 231 li r6,0 2321: lfsx fr0,r4,r6 233 lfsx fr1,r5,r6 234 fadds fr0,fr0,fr1 235 stfsx fr0,r3,r6 236 addi r6,r6,4 237 bdnz 1b 238 b fpdisable 239 240/* 241 * Vector subtract, floating point. 242 */ 243_GLOBAL(vsubfp) 244 mflr r12 245 bl fpenable 246 li r0,4 247 mtctr r0 248 li r6,0 2491: lfsx fr0,r4,r6 250 lfsx fr1,r5,r6 251 fsubs fr0,fr0,fr1 252 stfsx fr0,r3,r6 253 addi r6,r6,4 254 bdnz 1b 255 b fpdisable 256 257/* 258 * Vector multiply and add, floating point. 259 */ 260_GLOBAL(vmaddfp) 261 mflr r12 262 bl fpenable 263 stfd fr2,32(r1) 264 li r0,4 265 mtctr r0 266 li r7,0 2671: lfsx fr0,r4,r7 268 lfsx fr1,r5,r7 269 lfsx fr2,r6,r7 270 fmadds fr0,fr0,fr2,fr1 271 stfsx fr0,r3,r7 272 addi r7,r7,4 273 bdnz 1b 274 lfd fr2,32(r1) 275 b fpdisable 276 277/* 278 * Vector negative multiply and subtract, floating point. 279 */ 280_GLOBAL(vnmsubfp) 281 mflr r12 282 bl fpenable 283 stfd fr2,32(r1) 284 li r0,4 285 mtctr r0 286 li r7,0 2871: lfsx fr0,r4,r7 288 lfsx fr1,r5,r7 289 lfsx fr2,r6,r7 290 fnmsubs fr0,fr0,fr2,fr1 291 stfsx fr0,r3,r7 292 addi r7,r7,4 293 bdnz 1b 294 lfd fr2,32(r1) 295 b fpdisable 296 297/* 298 * Vector reciprocal estimate. We just compute 1.0/x. 299 * r3 -> destination, r4 -> source. 300 */ 301_GLOBAL(vrefp) 302 mflr r12 303 bl fpenable 304 li r0,4 305 LDCONST(fr1, fpone) 306 mtctr r0 307 li r6,0 3081: lfsx fr0,r4,r6 309 fdivs fr0,fr1,fr0 310 stfsx fr0,r3,r6 311 addi r6,r6,4 312 bdnz 1b 313 b fpdisable 314 315/* 316 * Vector reciprocal square-root estimate, floating point. 317 * We use the frsqrte instruction for the initial estimate followed 318 * by 2 iterations of Newton-Raphson to get sufficient accuracy. 319 * r3 -> destination, r4 -> source. 320 */ 321_GLOBAL(vrsqrtefp) 322 mflr r12 323 bl fpenable 324 stfd fr2,32(r1) 325 stfd fr3,40(r1) 326 stfd fr4,48(r1) 327 stfd fr5,56(r1) 328 li r0,4 329 LDCONST(fr4, fpone) 330 LDCONST(fr5, fphalf) 331 mtctr r0 332 li r6,0 3331: lfsx fr0,r4,r6 334 frsqrte fr1,fr0 /* r = frsqrte(s) */ 335 fmuls fr3,fr1,fr0 /* r * s */ 336 fmuls fr2,fr1,fr5 /* r * 0.5 */ 337 fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */ 338 fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */ 339 fmuls fr3,fr1,fr0 /* r * s */ 340 fmuls fr2,fr1,fr5 /* r * 0.5 */ 341 fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */ 342 fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */ 343 stfsx fr1,r3,r6 344 addi r6,r6,4 345 bdnz 1b 346 lfd fr5,56(r1) 347 lfd fr4,48(r1) 348 lfd fr3,40(r1) 349 lfd fr2,32(r1) 350 b fpdisable 351