1*25c28e83SPiotr Jasiukajtis/* 2*25c28e83SPiotr Jasiukajtis * CDDL HEADER START 3*25c28e83SPiotr Jasiukajtis * 4*25c28e83SPiotr Jasiukajtis * The contents of this file are subject to the terms of the 5*25c28e83SPiotr Jasiukajtis * Common Development and Distribution License (the "License"). 6*25c28e83SPiotr Jasiukajtis * You may not use this file except in compliance with the License. 7*25c28e83SPiotr Jasiukajtis * 8*25c28e83SPiotr Jasiukajtis * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*25c28e83SPiotr Jasiukajtis * or http://www.opensolaris.org/os/licensing. 10*25c28e83SPiotr Jasiukajtis * See the License for the specific language governing permissions 11*25c28e83SPiotr Jasiukajtis * and limitations under the License. 12*25c28e83SPiotr Jasiukajtis * 13*25c28e83SPiotr Jasiukajtis * When distributing Covered Code, include this CDDL HEADER in each 14*25c28e83SPiotr Jasiukajtis * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*25c28e83SPiotr Jasiukajtis * If applicable, add the following below this CDDL HEADER, with the 16*25c28e83SPiotr Jasiukajtis * fields enclosed by brackets "[]" replaced with your own identifying 17*25c28e83SPiotr Jasiukajtis * information: Portions Copyright [yyyy] [name of copyright owner] 18*25c28e83SPiotr Jasiukajtis * 19*25c28e83SPiotr Jasiukajtis * CDDL HEADER END 20*25c28e83SPiotr Jasiukajtis */ 21*25c28e83SPiotr Jasiukajtis/* 22*25c28e83SPiotr Jasiukajtis * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 23*25c28e83SPiotr Jasiukajtis */ 24*25c28e83SPiotr Jasiukajtis/* 25*25c28e83SPiotr Jasiukajtis * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 26*25c28e83SPiotr Jasiukajtis * Use is subject to license terms. 27*25c28e83SPiotr Jasiukajtis */ 28*25c28e83SPiotr Jasiukajtis 29*25c28e83SPiotr Jasiukajtis .file "__vcos.S" 30*25c28e83SPiotr Jasiukajtis 31*25c28e83SPiotr Jasiukajtis#include "libm.h" 32*25c28e83SPiotr Jasiukajtis 33*25c28e83SPiotr Jasiukajtis RO_DATA 34*25c28e83SPiotr Jasiukajtis .align 64 35*25c28e83SPiotr Jasiukajtisconstants: 36*25c28e83SPiotr Jasiukajtis .word 0x3ec718e3,0xa6972785 37*25c28e83SPiotr Jasiukajtis .word 0x3ef9fd39,0x94293940 38*25c28e83SPiotr Jasiukajtis .word 0xbf2a019f,0x75ee4be1 39*25c28e83SPiotr Jasiukajtis .word 0xbf56c16b,0xba552569 40*25c28e83SPiotr Jasiukajtis .word 0x3f811111,0x1108c703 41*25c28e83SPiotr Jasiukajtis .word 0x3fa55555,0x554f5b35 42*25c28e83SPiotr Jasiukajtis .word 0xbfc55555,0x555554d0 43*25c28e83SPiotr Jasiukajtis .word 0xbfdfffff,0xffffff85 44*25c28e83SPiotr Jasiukajtis .word 0x3ff00000,0x00000000 45*25c28e83SPiotr Jasiukajtis .word 0xbfc55555,0x5551fc28 46*25c28e83SPiotr Jasiukajtis .word 0x3f811107,0x62eacc9d 47*25c28e83SPiotr Jasiukajtis .word 0xbfdfffff,0xffff6328 48*25c28e83SPiotr Jasiukajtis .word 0x3fa55551,0x5f7acf0c 49*25c28e83SPiotr Jasiukajtis .word 0x3fe45f30,0x6dc9c883 50*25c28e83SPiotr Jasiukajtis .word 0x43380000,0x00000000 51*25c28e83SPiotr Jasiukajtis .word 0x3ff921fb,0x54400000 52*25c28e83SPiotr Jasiukajtis .word 0x3dd0b461,0x1a600000 53*25c28e83SPiotr Jasiukajtis .word 0x3ba3198a,0x2e000000 54*25c28e83SPiotr Jasiukajtis .word 0x397b839a,0x252049c1 55*25c28e83SPiotr Jasiukajtis .word 0x80000000,0x00004000 56*25c28e83SPiotr Jasiukajtis .word 0xffff8000,0x00000000 ! N.B.: low-order words used 57*25c28e83SPiotr Jasiukajtis .word 0x3fc90000,0x80000000 ! for sign bit hacking; see 58*25c28e83SPiotr Jasiukajtis .word 0x3fc40000,0x00000000 ! references to "thresh" below 59*25c28e83SPiotr Jasiukajtis 60*25c28e83SPiotr Jasiukajtis#define p4 0x0 61*25c28e83SPiotr Jasiukajtis#define q4 0x08 62*25c28e83SPiotr Jasiukajtis#define p3 0x10 63*25c28e83SPiotr Jasiukajtis#define q3 0x18 64*25c28e83SPiotr Jasiukajtis#define p2 0x20 65*25c28e83SPiotr Jasiukajtis#define q2 0x28 66*25c28e83SPiotr Jasiukajtis#define p1 0x30 67*25c28e83SPiotr Jasiukajtis#define q1 0x38 68*25c28e83SPiotr Jasiukajtis#define one 0x40 69*25c28e83SPiotr Jasiukajtis#define pp1 0x48 70*25c28e83SPiotr Jasiukajtis#define pp2 0x50 71*25c28e83SPiotr Jasiukajtis#define qq1 0x58 72*25c28e83SPiotr Jasiukajtis#define qq2 0x60 73*25c28e83SPiotr Jasiukajtis#define invpio2 0x68 74*25c28e83SPiotr Jasiukajtis#define round 0x70 75*25c28e83SPiotr Jasiukajtis#define pio2_1 0x78 76*25c28e83SPiotr Jasiukajtis#define pio2_2 0x80 77*25c28e83SPiotr Jasiukajtis#define pio2_3 0x88 78*25c28e83SPiotr Jasiukajtis#define pio2_3t 0x90 79*25c28e83SPiotr Jasiukajtis#define f30val 0x98 80*25c28e83SPiotr Jasiukajtis#define mask 0xa0 81*25c28e83SPiotr Jasiukajtis#define thresh 0xa8 82*25c28e83SPiotr Jasiukajtis 83*25c28e83SPiotr Jasiukajtis! local storage indices 84*25c28e83SPiotr Jasiukajtis 85*25c28e83SPiotr Jasiukajtis#define xsave STACK_BIAS-0x8 86*25c28e83SPiotr Jasiukajtis#define ysave STACK_BIAS-0x10 87*25c28e83SPiotr Jasiukajtis#define nsave STACK_BIAS-0x14 88*25c28e83SPiotr Jasiukajtis#define sxsave STACK_BIAS-0x18 89*25c28e83SPiotr Jasiukajtis#define sysave STACK_BIAS-0x1c 90*25c28e83SPiotr Jasiukajtis#define biguns STACK_BIAS-0x20 91*25c28e83SPiotr Jasiukajtis#define n2 STACK_BIAS-0x24 92*25c28e83SPiotr Jasiukajtis#define n1 STACK_BIAS-0x28 93*25c28e83SPiotr Jasiukajtis#define n0 STACK_BIAS-0x2c 94*25c28e83SPiotr Jasiukajtis#define x2_1 STACK_BIAS-0x40 95*25c28e83SPiotr Jasiukajtis#define x1_1 STACK_BIAS-0x50 96*25c28e83SPiotr Jasiukajtis#define x0_1 STACK_BIAS-0x60 97*25c28e83SPiotr Jasiukajtis#define y2_0 STACK_BIAS-0x70 98*25c28e83SPiotr Jasiukajtis#define y1_0 STACK_BIAS-0x80 99*25c28e83SPiotr Jasiukajtis#define y0_0 STACK_BIAS-0x90 100*25c28e83SPiotr Jasiukajtis! sizeof temp storage - must be a multiple of 16 for V9 101*25c28e83SPiotr Jasiukajtis#define tmps 0x90 102*25c28e83SPiotr Jasiukajtis 103*25c28e83SPiotr Jasiukajtis!-------------------------------------------------------------------- 104*25c28e83SPiotr Jasiukajtis! define pipes for easier reading 105*25c28e83SPiotr Jasiukajtis 106*25c28e83SPiotr Jasiukajtis#define P0_f0 %f0 107*25c28e83SPiotr Jasiukajtis#define P0_f1 %f1 108*25c28e83SPiotr Jasiukajtis#define P0_f2 %f2 109*25c28e83SPiotr Jasiukajtis#define P0_f3 %f3 110*25c28e83SPiotr Jasiukajtis#define P0_f4 %f4 111*25c28e83SPiotr Jasiukajtis#define P0_f5 %f5 112*25c28e83SPiotr Jasiukajtis#define P0_f6 %f6 113*25c28e83SPiotr Jasiukajtis#define P0_f7 %f7 114*25c28e83SPiotr Jasiukajtis#define P0_f8 %f8 115*25c28e83SPiotr Jasiukajtis#define P0_f9 %f9 116*25c28e83SPiotr Jasiukajtis 117*25c28e83SPiotr Jasiukajtis#define P1_f10 %f10 118*25c28e83SPiotr Jasiukajtis#define P1_f11 %f11 119*25c28e83SPiotr Jasiukajtis#define P1_f12 %f12 120*25c28e83SPiotr Jasiukajtis#define P1_f13 %f13 121*25c28e83SPiotr Jasiukajtis#define P1_f14 %f14 122*25c28e83SPiotr Jasiukajtis#define P1_f15 %f15 123*25c28e83SPiotr Jasiukajtis#define P1_f16 %f16 124*25c28e83SPiotr Jasiukajtis#define P1_f17 %f17 125*25c28e83SPiotr Jasiukajtis#define P1_f18 %f18 126*25c28e83SPiotr Jasiukajtis#define P1_f19 %f19 127*25c28e83SPiotr Jasiukajtis 128*25c28e83SPiotr Jasiukajtis#define P2_f20 %f20 129*25c28e83SPiotr Jasiukajtis#define P2_f21 %f21 130*25c28e83SPiotr Jasiukajtis#define P2_f22 %f22 131*25c28e83SPiotr Jasiukajtis#define P2_f23 %f23 132*25c28e83SPiotr Jasiukajtis#define P2_f24 %f24 133*25c28e83SPiotr Jasiukajtis#define P2_f25 %f25 134*25c28e83SPiotr Jasiukajtis#define P2_f26 %f26 135*25c28e83SPiotr Jasiukajtis#define P2_f27 %f27 136*25c28e83SPiotr Jasiukajtis#define P2_f28 %f28 137*25c28e83SPiotr Jasiukajtis#define P2_f29 %f29 138*25c28e83SPiotr Jasiukajtis 139*25c28e83SPiotr Jasiukajtis! define __vlibm_TBL_sincos_hi & lo for easy reading 140*25c28e83SPiotr Jasiukajtis 141*25c28e83SPiotr Jasiukajtis#define SC_HI %l3 142*25c28e83SPiotr Jasiukajtis#define SC_LO %l4 143*25c28e83SPiotr Jasiukajtis 144*25c28e83SPiotr Jasiukajtis! define constants for easy reading 145*25c28e83SPiotr Jasiukajtis 146*25c28e83SPiotr Jasiukajtis#define C_q1 %f46 147*25c28e83SPiotr Jasiukajtis#define C_q2 %f48 148*25c28e83SPiotr Jasiukajtis#define C_q3 %f50 149*25c28e83SPiotr Jasiukajtis#define C_q4 %f52 150*25c28e83SPiotr Jasiukajtis 151*25c28e83SPiotr Jasiukajtis! one ( 1 ) uno eins echi un 152*25c28e83SPiotr Jasiukajtis#define C_ONE %f54 153*25c28e83SPiotr Jasiukajtis#define C_ONE_LO %f55 154*25c28e83SPiotr Jasiukajtis 155*25c28e83SPiotr Jasiukajtis! masks 156*25c28e83SPiotr Jasiukajtis#define MSK_SIGN %i5 157*25c28e83SPiotr Jasiukajtis#define MSK_BIT31 %f30 158*25c28e83SPiotr Jasiukajtis#define MSK_BIT13 %f31 159*25c28e83SPiotr Jasiukajtis#define MSK_BITSHI17 %f44 160*25c28e83SPiotr Jasiukajtis 161*25c28e83SPiotr Jasiukajtis 162*25c28e83SPiotr Jasiukajtis! constants for pp and qq 163*25c28e83SPiotr Jasiukajtis#define C_pp1 %f56 164*25c28e83SPiotr Jasiukajtis#define C_pp2 %f58 165*25c28e83SPiotr Jasiukajtis#define C_qq1 %f60 166*25c28e83SPiotr Jasiukajtis#define C_qq2 %f62 167*25c28e83SPiotr Jasiukajtis 168*25c28e83SPiotr Jasiukajtis! sign mask 169*25c28e83SPiotr Jasiukajtis#define C_signM %i5 170*25c28e83SPiotr Jasiukajtis 171*25c28e83SPiotr Jasiukajtis#define LIM_l5 %l5 172*25c28e83SPiotr Jasiukajtis#define LIM_l6 %l6 173*25c28e83SPiotr Jasiukajtis! when in pri range, using value as transition from poly to table. 174*25c28e83SPiotr Jasiukajtis! for Medium range,change use of %l6 and use to keep track of biguns. 175*25c28e83SPiotr Jasiukajtis#define LIM_l7 %l7 176*25c28e83SPiotr Jasiukajtis 177*25c28e83SPiotr Jasiukajtis!-------------------------------------------------------------------- 178*25c28e83SPiotr Jasiukajtis 179*25c28e83SPiotr Jasiukajtis 180*25c28e83SPiotr Jasiukajtis ENTRY(__vcos) 181*25c28e83SPiotr Jasiukajtis save %sp,-SA(MINFRAME)-tmps,%sp 182*25c28e83SPiotr Jasiukajtis PIC_SETUP(g5) 183*25c28e83SPiotr Jasiukajtis PIC_SET(g5,__vlibm_TBL_sincos_hi,l3) 184*25c28e83SPiotr Jasiukajtis PIC_SET(g5,__vlibm_TBL_sincos_lo,l4) 185*25c28e83SPiotr Jasiukajtis PIC_SET(g5,constants,o0) 186*25c28e83SPiotr Jasiukajtis mov %o0,%g1 187*25c28e83SPiotr Jasiukajtis wr %g0,0x82,%asi ! set %asi for non-faulting loads 188*25c28e83SPiotr Jasiukajtis 189*25c28e83SPiotr Jasiukajtis! ========== primary range ========== 190*25c28e83SPiotr Jasiukajtis 191*25c28e83SPiotr Jasiukajtis! register use 192*25c28e83SPiotr Jasiukajtis 193*25c28e83SPiotr Jasiukajtis! i0 n 194*25c28e83SPiotr Jasiukajtis! i1 x 195*25c28e83SPiotr Jasiukajtis! i2 stridex 196*25c28e83SPiotr Jasiukajtis! i3 y 197*25c28e83SPiotr Jasiukajtis! i4 stridey 198*25c28e83SPiotr Jasiukajtis! i5 0x80000000 199*25c28e83SPiotr Jasiukajtis 200*25c28e83SPiotr Jasiukajtis! l0 hx0 201*25c28e83SPiotr Jasiukajtis! l1 hx1 202*25c28e83SPiotr Jasiukajtis! l2 hx2 203*25c28e83SPiotr Jasiukajtis! l3 __vlibm_TBL_sincos_hi 204*25c28e83SPiotr Jasiukajtis! l4 __vlibm_TBL_sincos_lo 205*25c28e83SPiotr Jasiukajtis! l5 0x3fc40000 206*25c28e83SPiotr Jasiukajtis! l6 0x3e400000 207*25c28e83SPiotr Jasiukajtis! l7 0x3fe921fb 208*25c28e83SPiotr Jasiukajtis 209*25c28e83SPiotr Jasiukajtis! the following are 64-bit registers in both V8+ and V9 210*25c28e83SPiotr Jasiukajtis 211*25c28e83SPiotr Jasiukajtis! g1 scratch 212*25c28e83SPiotr Jasiukajtis! g5 213*25c28e83SPiotr Jasiukajtis 214*25c28e83SPiotr Jasiukajtis! o0 py0 215*25c28e83SPiotr Jasiukajtis! o1 py1 216*25c28e83SPiotr Jasiukajtis! o2 py2 217*25c28e83SPiotr Jasiukajtis! o3 oy0 218*25c28e83SPiotr Jasiukajtis! o4 oy1 219*25c28e83SPiotr Jasiukajtis! o5 oy2 220*25c28e83SPiotr Jasiukajtis! o7 scratch 221*25c28e83SPiotr Jasiukajtis 222*25c28e83SPiotr Jasiukajtis! f0 x0 223*25c28e83SPiotr Jasiukajtis! f2 224*25c28e83SPiotr Jasiukajtis! f4 225*25c28e83SPiotr Jasiukajtis! f6 226*25c28e83SPiotr Jasiukajtis! f8 scratch for table base 227*25c28e83SPiotr Jasiukajtis! f9 signbit0 228*25c28e83SPiotr Jasiukajtis! f10 x1 229*25c28e83SPiotr Jasiukajtis! f12 230*25c28e83SPiotr Jasiukajtis! f14 231*25c28e83SPiotr Jasiukajtis! f16 232*25c28e83SPiotr Jasiukajtis! f18 scratch for table base 233*25c28e83SPiotr Jasiukajtis! f19 signbit1 234*25c28e83SPiotr Jasiukajtis! f20 x2 235*25c28e83SPiotr Jasiukajtis! f22 236*25c28e83SPiotr Jasiukajtis! f24 237*25c28e83SPiotr Jasiukajtis! f26 238*25c28e83SPiotr Jasiukajtis! f28 scratch for table base 239*25c28e83SPiotr Jasiukajtis! f29 signbit2 240*25c28e83SPiotr Jasiukajtis! f30 0x80000000 241*25c28e83SPiotr Jasiukajtis! f31 0x4000 242*25c28e83SPiotr Jasiukajtis! f32 243*25c28e83SPiotr Jasiukajtis! f34 244*25c28e83SPiotr Jasiukajtis! f36 245*25c28e83SPiotr Jasiukajtis! f38 246*25c28e83SPiotr Jasiukajtis! f40 247*25c28e83SPiotr Jasiukajtis! f42 248*25c28e83SPiotr Jasiukajtis! f44 0xffff800000000000 249*25c28e83SPiotr Jasiukajtis! f46 p1 250*25c28e83SPiotr Jasiukajtis! f48 p2 251*25c28e83SPiotr Jasiukajtis! f50 p3 252*25c28e83SPiotr Jasiukajtis! f52 p4 253*25c28e83SPiotr Jasiukajtis! f54 one 254*25c28e83SPiotr Jasiukajtis! f56 pp1 255*25c28e83SPiotr Jasiukajtis! f58 pp2 256*25c28e83SPiotr Jasiukajtis! f60 qq1 257*25c28e83SPiotr Jasiukajtis! f62 qq2 258*25c28e83SPiotr Jasiukajtis 259*25c28e83SPiotr Jasiukajtis#ifdef __sparcv9 260*25c28e83SPiotr Jasiukajtis stx %i1,[%fp+xsave] ! save arguments 261*25c28e83SPiotr Jasiukajtis stx %i3,[%fp+ysave] 262*25c28e83SPiotr Jasiukajtis#else 263*25c28e83SPiotr Jasiukajtis st %i1,[%fp+xsave] ! save arguments 264*25c28e83SPiotr Jasiukajtis st %i3,[%fp+ysave] 265*25c28e83SPiotr Jasiukajtis#endif 266*25c28e83SPiotr Jasiukajtis 267*25c28e83SPiotr Jasiukajtis st %i0,[%fp+nsave] 268*25c28e83SPiotr Jasiukajtis st %i2,[%fp+sxsave] 269*25c28e83SPiotr Jasiukajtis st %i4,[%fp+sysave] 270*25c28e83SPiotr Jasiukajtis sethi %hi(0x80000000),MSK_SIGN ! load/set up constants 271*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fc40000),LIM_l5 272*25c28e83SPiotr Jasiukajtis sethi %hi(0x3e400000),LIM_l6 273*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fe921fb),LIM_l7 274*25c28e83SPiotr Jasiukajtis or LIM_l7,%lo(0x3fe921fb),LIM_l7 275*25c28e83SPiotr Jasiukajtis ldd [%g1+f30val],MSK_BIT31 276*25c28e83SPiotr Jasiukajtis ldd [%g1+mask],MSK_BITSHI17 277*25c28e83SPiotr Jasiukajtis ldd [%g1+q1],C_q1 278*25c28e83SPiotr Jasiukajtis ldd [%g1+q2],C_q2 279*25c28e83SPiotr Jasiukajtis ldd [%g1+q3],C_q3 280*25c28e83SPiotr Jasiukajtis ldd [%g1+q4],C_q4 281*25c28e83SPiotr Jasiukajtis ldd [%g1+one],C_ONE 282*25c28e83SPiotr Jasiukajtis ldd [%g1+pp1],C_pp1 283*25c28e83SPiotr Jasiukajtis ldd [%g1+pp2],C_pp2 284*25c28e83SPiotr Jasiukajtis ldd [%g1+qq1],C_qq1 285*25c28e83SPiotr Jasiukajtis ldd [%g1+qq2],C_qq2 286*25c28e83SPiotr Jasiukajtis sll %i2,3,%i2 ! scale strides 287*25c28e83SPiotr Jasiukajtis sll %i4,3,%i4 288*25c28e83SPiotr Jasiukajtis add %fp,x0_1,%o3 ! precondition loop 289*25c28e83SPiotr Jasiukajtis add %fp,x0_1,%o4 290*25c28e83SPiotr Jasiukajtis add %fp,x0_1,%o5 291*25c28e83SPiotr Jasiukajtis ld [%i1],%l0 ! hx = *x 292*25c28e83SPiotr Jasiukajtis ld [%i1],P0_f0 293*25c28e83SPiotr Jasiukajtis ld [%i1+4],P0_f1 294*25c28e83SPiotr Jasiukajtis andn %l0,MSK_SIGN,%l0 ! hx &= ~0x80000000 295*25c28e83SPiotr Jasiukajtis add %i1,%i2,%i1 ! x += stridex 296*25c28e83SPiotr Jasiukajtis 297*25c28e83SPiotr Jasiukajtis ba,pt %icc,.loop0 298*25c28e83SPiotr Jasiukajtis!delay slot 299*25c28e83SPiotr Jasiukajtis nop 300*25c28e83SPiotr Jasiukajtis 301*25c28e83SPiotr Jasiukajtis .align 32 302*25c28e83SPiotr Jasiukajtis.loop0: 303*25c28e83SPiotr Jasiukajtis lda [%i1]%asi,%l1 ! preload next argument 304*25c28e83SPiotr Jasiukajtis sub %l0,LIM_l6,%g1 305*25c28e83SPiotr Jasiukajtis sub LIM_l7,%l0,%o7 306*25c28e83SPiotr Jasiukajtis fands P0_f0,MSK_BIT31,P0_f9 ! save signbit 307*25c28e83SPiotr Jasiukajtis 308*25c28e83SPiotr Jasiukajtis lda [%i1]%asi,P1_f10 309*25c28e83SPiotr Jasiukajtis orcc %o7,%g1,%g0 310*25c28e83SPiotr Jasiukajtis mov %i3,%o0 ! py0 = y 311*25c28e83SPiotr Jasiukajtis bl,pn %icc,.range0 ! if hx < 0x3e400000 or > 0x3fe921fb 312*25c28e83SPiotr Jasiukajtis 313*25c28e83SPiotr Jasiukajtis! delay slot 314*25c28e83SPiotr Jasiukajtis lda [%i1+4]%asi,P1_f11 315*25c28e83SPiotr Jasiukajtis addcc %i0,-1,%i0 316*25c28e83SPiotr Jasiukajtis add %i3,%i4,%i3 ! y += stridey 317*25c28e83SPiotr Jasiukajtis ble,pn %icc,.endloop1 318*25c28e83SPiotr Jasiukajtis 319*25c28e83SPiotr Jasiukajtis! delay slot 320*25c28e83SPiotr Jasiukajtis andn %l1,MSK_SIGN,%l1 321*25c28e83SPiotr Jasiukajtis add %i1,%i2,%i1 ! x += stridex 322*25c28e83SPiotr Jasiukajtis fabsd P0_f0,P0_f0 323*25c28e83SPiotr Jasiukajtis fmuld C_ONE,C_ONE,C_ONE ! one*one; a nop for alignment only 324*25c28e83SPiotr Jasiukajtis 325*25c28e83SPiotr Jasiukajtis.loop1: 326*25c28e83SPiotr Jasiukajtis lda [%i1]%asi,%l2 ! preload next argument 327*25c28e83SPiotr Jasiukajtis sub %l1,LIM_l6,%g1 328*25c28e83SPiotr Jasiukajtis sub LIM_l7,%l1,%o7 329*25c28e83SPiotr Jasiukajtis fands P1_f10,MSK_BIT31,P1_f19 ! save signbit 330*25c28e83SPiotr Jasiukajtis 331*25c28e83SPiotr Jasiukajtis lda [%i1]%asi,P2_f20 332*25c28e83SPiotr Jasiukajtis orcc %o7,%g1,%g0 333*25c28e83SPiotr Jasiukajtis mov %i3,%o1 ! py1 = y 334*25c28e83SPiotr Jasiukajtis bl,pn %icc,.range1 ! if hx < 0x3e400000 or > 0x3fe921fb 335*25c28e83SPiotr Jasiukajtis 336*25c28e83SPiotr Jasiukajtis! delay slot 337*25c28e83SPiotr Jasiukajtis lda [%i1+4]%asi,P2_f21 338*25c28e83SPiotr Jasiukajtis addcc %i0,-1,%i0 339*25c28e83SPiotr Jasiukajtis add %i3,%i4,%i3 ! y += stridey 340*25c28e83SPiotr Jasiukajtis ble,pn %icc,.endloop2 341*25c28e83SPiotr Jasiukajtis 342*25c28e83SPiotr Jasiukajtis! delay slot 343*25c28e83SPiotr Jasiukajtis andn %l2,MSK_SIGN,%l2 344*25c28e83SPiotr Jasiukajtis add %i1,%i2,%i1 ! x += stridex 345*25c28e83SPiotr Jasiukajtis fabsd P1_f10,P1_f10 346*25c28e83SPiotr Jasiukajtis fmuld C_ONE,C_ONE,C_ONE ! one*one; a nop for alignment only 347*25c28e83SPiotr Jasiukajtis 348*25c28e83SPiotr Jasiukajtis.loop2: 349*25c28e83SPiotr Jasiukajtis st P0_f6,[%o3] 350*25c28e83SPiotr Jasiukajtis sub %l2,LIM_l6,%g1 351*25c28e83SPiotr Jasiukajtis sub LIM_l7,%l2,%o7 352*25c28e83SPiotr Jasiukajtis fands P2_f20,MSK_BIT31,P2_f29 ! save signbit 353*25c28e83SPiotr Jasiukajtis 354*25c28e83SPiotr Jasiukajtis st P0_f7,[%o3+4] 355*25c28e83SPiotr Jasiukajtis orcc %g1,%o7,%g0 356*25c28e83SPiotr Jasiukajtis mov %i3,%o2 ! py2 = y 357*25c28e83SPiotr Jasiukajtis bl,pn %icc,.range2 ! if hx < 0x3e400000 or > 0x3fe921fb 358*25c28e83SPiotr Jasiukajtis 359*25c28e83SPiotr Jasiukajtis! delay slot 360*25c28e83SPiotr Jasiukajtis add %i3,%i4,%i3 ! y += stridey 361*25c28e83SPiotr Jasiukajtis cmp %l0,LIM_l5 362*25c28e83SPiotr Jasiukajtis fabsd P2_f20,P2_f20 363*25c28e83SPiotr Jasiukajtis bl,pn %icc,.case4 364*25c28e83SPiotr Jasiukajtis 365*25c28e83SPiotr Jasiukajtis! delay slot 366*25c28e83SPiotr Jasiukajtis st P1_f16,[%o4] 367*25c28e83SPiotr Jasiukajtis cmp %l1,LIM_l5 368*25c28e83SPiotr Jasiukajtis fpadd32s P0_f0,MSK_BIT13,P0_f8 369*25c28e83SPiotr Jasiukajtis bl,pn %icc,.case2 370*25c28e83SPiotr Jasiukajtis 371*25c28e83SPiotr Jasiukajtis! delay slot 372*25c28e83SPiotr Jasiukajtis st P1_f17,[%o4+4] 373*25c28e83SPiotr Jasiukajtis cmp %l2,LIM_l5 374*25c28e83SPiotr Jasiukajtis fpadd32s P1_f10,MSK_BIT13,P1_f18 375*25c28e83SPiotr Jasiukajtis bl,pn %icc,.case1 376*25c28e83SPiotr Jasiukajtis 377*25c28e83SPiotr Jasiukajtis! delay slot 378*25c28e83SPiotr Jasiukajtis st P2_f26,[%o5] 379*25c28e83SPiotr Jasiukajtis mov %o0,%o3 380*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fc3c000),%o7 381*25c28e83SPiotr Jasiukajtis fpadd32s P2_f20,MSK_BIT13,P2_f28 382*25c28e83SPiotr Jasiukajtis 383*25c28e83SPiotr Jasiukajtis st P2_f27,[%o5+4] 384*25c28e83SPiotr Jasiukajtis fand P0_f8,MSK_BITSHI17,P0_f2 385*25c28e83SPiotr Jasiukajtis mov %o1,%o4 386*25c28e83SPiotr Jasiukajtis 387*25c28e83SPiotr Jasiukajtis fand P1_f18,MSK_BITSHI17,P1_f12 388*25c28e83SPiotr Jasiukajtis mov %o2,%o5 389*25c28e83SPiotr Jasiukajtis sub %l0,%o7,%l0 390*25c28e83SPiotr Jasiukajtis 391*25c28e83SPiotr Jasiukajtis fand P2_f28,MSK_BITSHI17,P2_f22 392*25c28e83SPiotr Jasiukajtis sub %l1,%o7,%l1 393*25c28e83SPiotr Jasiukajtis sub %l2,%o7,%l2 394*25c28e83SPiotr Jasiukajtis 395*25c28e83SPiotr Jasiukajtis fsubd P0_f0,P0_f2,P0_f0 396*25c28e83SPiotr Jasiukajtis srl %l0,10,%l0 397*25c28e83SPiotr Jasiukajtis add SC_HI,8,%g1;add SC_LO,8,%o7 398*25c28e83SPiotr Jasiukajtis 399*25c28e83SPiotr Jasiukajtis fsubd P1_f10,P1_f12,P1_f10 400*25c28e83SPiotr Jasiukajtis srl %l1,10,%l1 401*25c28e83SPiotr Jasiukajtis 402*25c28e83SPiotr Jasiukajtis fsubd P2_f20,P2_f22,P2_f20 403*25c28e83SPiotr Jasiukajtis srl %l2,10,%l2 404*25c28e83SPiotr Jasiukajtis 405*25c28e83SPiotr Jasiukajtis fmuld P0_f0,P0_f0,P0_f2 406*25c28e83SPiotr Jasiukajtis andn %l0,0x1f,%l0 407*25c28e83SPiotr Jasiukajtis 408*25c28e83SPiotr Jasiukajtis fmuld P1_f10,P1_f10,P1_f12 409*25c28e83SPiotr Jasiukajtis andn %l1,0x1f,%l1 410*25c28e83SPiotr Jasiukajtis 411*25c28e83SPiotr Jasiukajtis fmuld P2_f20,P2_f20,P2_f22 412*25c28e83SPiotr Jasiukajtis andn %l2,0x1f,%l2 413*25c28e83SPiotr Jasiukajtis 414*25c28e83SPiotr Jasiukajtis fmuld P0_f2,C_pp2,P0_f6 415*25c28e83SPiotr Jasiukajtis ldd [%g1+%l0],%f32 416*25c28e83SPiotr Jasiukajtis 417*25c28e83SPiotr Jasiukajtis fmuld P1_f12,C_pp2,P1_f16 418*25c28e83SPiotr Jasiukajtis ldd [%g1+%l1],%f36 419*25c28e83SPiotr Jasiukajtis 420*25c28e83SPiotr Jasiukajtis fmuld P2_f22,C_pp2,P2_f26 421*25c28e83SPiotr Jasiukajtis ldd [%g1+%l2],%f40 422*25c28e83SPiotr Jasiukajtis 423*25c28e83SPiotr Jasiukajtis faddd P0_f6,C_pp1,P0_f6 424*25c28e83SPiotr Jasiukajtis fmuld P0_f2,C_qq2,P0_f4 425*25c28e83SPiotr Jasiukajtis ldd [SC_HI+%l0],%f34 426*25c28e83SPiotr Jasiukajtis 427*25c28e83SPiotr Jasiukajtis faddd P1_f16,C_pp1,P1_f16 428*25c28e83SPiotr Jasiukajtis fmuld P1_f12,C_qq2,P1_f14 429*25c28e83SPiotr Jasiukajtis ldd [SC_HI+%l1],%f38 430*25c28e83SPiotr Jasiukajtis 431*25c28e83SPiotr Jasiukajtis faddd P2_f26,C_pp1,P2_f26 432*25c28e83SPiotr Jasiukajtis fmuld P2_f22,C_qq2,P2_f24 433*25c28e83SPiotr Jasiukajtis ldd [SC_HI+%l2],%f42 434*25c28e83SPiotr Jasiukajtis 435*25c28e83SPiotr Jasiukajtis fmuld P0_f2,P0_f6,P0_f6 436*25c28e83SPiotr Jasiukajtis faddd P0_f4,C_qq1,P0_f4 437*25c28e83SPiotr Jasiukajtis 438*25c28e83SPiotr Jasiukajtis fmuld P1_f12,P1_f16,P1_f16 439*25c28e83SPiotr Jasiukajtis faddd P1_f14,C_qq1,P1_f14 440*25c28e83SPiotr Jasiukajtis 441*25c28e83SPiotr Jasiukajtis fmuld P2_f22,P2_f26,P2_f26 442*25c28e83SPiotr Jasiukajtis faddd P2_f24,C_qq1,P2_f24 443*25c28e83SPiotr Jasiukajtis 444*25c28e83SPiotr Jasiukajtis faddd P0_f6,C_ONE,P0_f6 445*25c28e83SPiotr Jasiukajtis fmuld P0_f2,P0_f4,P0_f4 446*25c28e83SPiotr Jasiukajtis 447*25c28e83SPiotr Jasiukajtis faddd P1_f16,C_ONE,P1_f16 448*25c28e83SPiotr Jasiukajtis fmuld P1_f12,P1_f14,P1_f14 449*25c28e83SPiotr Jasiukajtis 450*25c28e83SPiotr Jasiukajtis faddd P2_f26,C_ONE,P2_f26 451*25c28e83SPiotr Jasiukajtis fmuld P2_f22,P2_f24,P2_f24 452*25c28e83SPiotr Jasiukajtis 453*25c28e83SPiotr Jasiukajtis fmuld P0_f0,P0_f6,P0_f6 454*25c28e83SPiotr Jasiukajtis ldd [%o7+%l0],P0_f2 455*25c28e83SPiotr Jasiukajtis 456*25c28e83SPiotr Jasiukajtis fmuld P1_f10,P1_f16,P1_f16 457*25c28e83SPiotr Jasiukajtis ldd [%o7+%l1],P1_f12 458*25c28e83SPiotr Jasiukajtis 459*25c28e83SPiotr Jasiukajtis fmuld P2_f20,P2_f26,P2_f26 460*25c28e83SPiotr Jasiukajtis ldd [%o7+%l2],P2_f22 461*25c28e83SPiotr Jasiukajtis 462*25c28e83SPiotr Jasiukajtis fmuld P0_f4,%f32,P0_f4 463*25c28e83SPiotr Jasiukajtis lda [%i1]%asi,%l0 ! preload next argument 464*25c28e83SPiotr Jasiukajtis 465*25c28e83SPiotr Jasiukajtis fmuld P1_f14,%f36,P1_f14 466*25c28e83SPiotr Jasiukajtis lda [%i1]%asi,P0_f0 467*25c28e83SPiotr Jasiukajtis 468*25c28e83SPiotr Jasiukajtis fmuld P2_f24,%f40,P2_f24 469*25c28e83SPiotr Jasiukajtis lda [%i1+4]%asi,P0_f1 470*25c28e83SPiotr Jasiukajtis 471*25c28e83SPiotr Jasiukajtis fmuld P0_f6,%f34,P0_f6 472*25c28e83SPiotr Jasiukajtis add %i1,%i2,%i1 ! x += stridex 473*25c28e83SPiotr Jasiukajtis 474*25c28e83SPiotr Jasiukajtis fmuld P1_f16,%f38,P1_f16 475*25c28e83SPiotr Jasiukajtis 476*25c28e83SPiotr Jasiukajtis fmuld P2_f26,%f42,P2_f26 477*25c28e83SPiotr Jasiukajtis 478*25c28e83SPiotr Jasiukajtis fsubd P0_f6,P0_f4,P0_f6 479*25c28e83SPiotr Jasiukajtis 480*25c28e83SPiotr Jasiukajtis fsubd P1_f16,P1_f14,P1_f16 481*25c28e83SPiotr Jasiukajtis 482*25c28e83SPiotr Jasiukajtis fsubd P2_f26,P2_f24,P2_f26 483*25c28e83SPiotr Jasiukajtis 484*25c28e83SPiotr Jasiukajtis fsubd P0_f2,P0_f6,P0_f6 485*25c28e83SPiotr Jasiukajtis 486*25c28e83SPiotr Jasiukajtis fsubd P1_f12,P1_f16,P1_f16 487*25c28e83SPiotr Jasiukajtis 488*25c28e83SPiotr Jasiukajtis fsubd P2_f22,P2_f26,P2_f26 489*25c28e83SPiotr Jasiukajtis 490*25c28e83SPiotr Jasiukajtis faddd P0_f6,%f32,P0_f6 491*25c28e83SPiotr Jasiukajtis 492*25c28e83SPiotr Jasiukajtis faddd P1_f16,%f36,P1_f16 493*25c28e83SPiotr Jasiukajtis 494*25c28e83SPiotr Jasiukajtis faddd P2_f26,%f40,P2_f26 495*25c28e83SPiotr Jasiukajtis andn %l0,MSK_SIGN,%l0 ! hx &= ~0x80000000 496*25c28e83SPiotr Jasiukajtis 497*25c28e83SPiotr Jasiukajtis nop !!(vsin) fors P0_f6,P0_f9,P0_f6 498*25c28e83SPiotr Jasiukajtis addcc %i0,-1,%i0 499*25c28e83SPiotr Jasiukajtis 500*25c28e83SPiotr Jasiukajtis nop !!(vsin) fors P1_f16,P1_f19,P1_f16 501*25c28e83SPiotr Jasiukajtis bg,pt %icc,.loop0 502*25c28e83SPiotr Jasiukajtis 503*25c28e83SPiotr Jasiukajtis! delay slot 504*25c28e83SPiotr Jasiukajtis nop !!(vsin) fors P2_f26,P2_f29,P2_f26 505*25c28e83SPiotr Jasiukajtis 506*25c28e83SPiotr Jasiukajtis ba,pt %icc,.endloop0 507*25c28e83SPiotr Jasiukajtis! delay slot 508*25c28e83SPiotr Jasiukajtis nop 509*25c28e83SPiotr Jasiukajtis 510*25c28e83SPiotr Jasiukajtis .align 32 511*25c28e83SPiotr Jasiukajtis.case1: 512*25c28e83SPiotr Jasiukajtis st P2_f27,[%o5+4] 513*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fc3c000),%o7 514*25c28e83SPiotr Jasiukajtis fand P0_f8,MSK_BITSHI17,P0_f2 515*25c28e83SPiotr Jasiukajtis 516*25c28e83SPiotr Jasiukajtis sub %l0,%o7,%l0 517*25c28e83SPiotr Jasiukajtis sub %l1,%o7,%l1 518*25c28e83SPiotr Jasiukajtis add SC_HI,8,%g1;add SC_LO,8,%o7 519*25c28e83SPiotr Jasiukajtis fand P1_f18,MSK_BITSHI17,P1_f12 520*25c28e83SPiotr Jasiukajtis fmuld P2_f20,P2_f20,P2_f22 521*25c28e83SPiotr Jasiukajtis 522*25c28e83SPiotr Jasiukajtis fsubd P0_f0,P0_f2,P0_f0 523*25c28e83SPiotr Jasiukajtis srl %l0,10,%l0 524*25c28e83SPiotr Jasiukajtis mov %o0,%o3 525*25c28e83SPiotr Jasiukajtis 526*25c28e83SPiotr Jasiukajtis fsubd P1_f10,P1_f12,P1_f10 527*25c28e83SPiotr Jasiukajtis srl %l1,10,%l1 528*25c28e83SPiotr Jasiukajtis mov %o1,%o4 529*25c28e83SPiotr Jasiukajtis 530*25c28e83SPiotr Jasiukajtis fmuld P2_f22,C_q4,P2_f24 531*25c28e83SPiotr Jasiukajtis mov %o2,%o5 532*25c28e83SPiotr Jasiukajtis 533*25c28e83SPiotr Jasiukajtis fmuld P0_f0,P0_f0,P0_f2 534*25c28e83SPiotr Jasiukajtis andn %l0,0x1f,%l0 535*25c28e83SPiotr Jasiukajtis 536*25c28e83SPiotr Jasiukajtis fmuld P1_f10,P1_f10,P1_f12 537*25c28e83SPiotr Jasiukajtis andn %l1,0x1f,%l1 538*25c28e83SPiotr Jasiukajtis 539*25c28e83SPiotr Jasiukajtis faddd P2_f24,C_q3,P2_f24 540*25c28e83SPiotr Jasiukajtis 541*25c28e83SPiotr Jasiukajtis fmuld P0_f2,C_pp2,P0_f6 542*25c28e83SPiotr Jasiukajtis ldd [%g1+%l0],%f32 543*25c28e83SPiotr Jasiukajtis 544*25c28e83SPiotr Jasiukajtis fmuld P1_f12,C_pp2,P1_f16 545*25c28e83SPiotr Jasiukajtis ldd [%g1+%l1],%f36 546*25c28e83SPiotr Jasiukajtis 547*25c28e83SPiotr Jasiukajtis fmuld P2_f22,P2_f24,P2_f24 548*25c28e83SPiotr Jasiukajtis 549*25c28e83SPiotr Jasiukajtis faddd P0_f6,C_pp1,P0_f6 550*25c28e83SPiotr Jasiukajtis fmuld P0_f2,C_qq2,P0_f4 551*25c28e83SPiotr Jasiukajtis ldd [SC_HI+%l0],%f34 552*25c28e83SPiotr Jasiukajtis 553*25c28e83SPiotr Jasiukajtis faddd P1_f16,C_pp1,P1_f16 554*25c28e83SPiotr Jasiukajtis fmuld P1_f12,C_qq2,P1_f14 555*25c28e83SPiotr Jasiukajtis ldd [SC_HI+%l1],%f38 556*25c28e83SPiotr Jasiukajtis 557*25c28e83SPiotr Jasiukajtis faddd P2_f24,C_q2,P2_f24 558*25c28e83SPiotr Jasiukajtis 559*25c28e83SPiotr Jasiukajtis fmuld P0_f2,P0_f6,P0_f6 560*25c28e83SPiotr Jasiukajtis faddd P0_f4,C_qq1,P0_f4 561*25c28e83SPiotr Jasiukajtis 562*25c28e83SPiotr Jasiukajtis fmuld P1_f12,P1_f16,P1_f16 563*25c28e83SPiotr Jasiukajtis faddd P1_f14,C_qq1,P1_f14 564*25c28e83SPiotr Jasiukajtis 565*25c28e83SPiotr Jasiukajtis fmuld P2_f22,P2_f24,P2_f24 566*25c28e83SPiotr Jasiukajtis 567*25c28e83SPiotr Jasiukajtis faddd P0_f6,C_ONE,P0_f6 568*25c28e83SPiotr Jasiukajtis fmuld P0_f2,P0_f4,P0_f4 569*25c28e83SPiotr Jasiukajtis 570*25c28e83SPiotr Jasiukajtis faddd P1_f16,C_ONE,P1_f16 571*25c28e83SPiotr Jasiukajtis fmuld P1_f12,P1_f14,P1_f14 572*25c28e83SPiotr Jasiukajtis 573*25c28e83SPiotr Jasiukajtis faddd P2_f24,C_q1,P2_f24 574*25c28e83SPiotr Jasiukajtis 575*25c28e83SPiotr Jasiukajtis fmuld P0_f0,P0_f6,P0_f6 576*25c28e83SPiotr Jasiukajtis ldd [%o7+%l0],P0_f2 577*25c28e83SPiotr Jasiukajtis 578*25c28e83SPiotr Jasiukajtis fmuld P1_f10,P1_f16,P1_f16 579*25c28e83SPiotr Jasiukajtis ldd [%o7+%l1],P1_f12 580*25c28e83SPiotr Jasiukajtis 581*25c28e83SPiotr Jasiukajtis fmuld P0_f4,%f32,P0_f4 582*25c28e83SPiotr Jasiukajtis lda [%i1]%asi,%l0 ! preload next argument 583*25c28e83SPiotr Jasiukajtis 584*25c28e83SPiotr Jasiukajtis fmuld P1_f14,%f36,P1_f14 585*25c28e83SPiotr Jasiukajtis lda [%i1]%asi,P0_f0 586*25c28e83SPiotr Jasiukajtis 587*25c28e83SPiotr Jasiukajtis fmuld P0_f6,%f34,P0_f6 588*25c28e83SPiotr Jasiukajtis lda [%i1+4]%asi,P0_f1 589*25c28e83SPiotr Jasiukajtis 590*25c28e83SPiotr Jasiukajtis fmuld P1_f16,%f38,P1_f16 591*25c28e83SPiotr Jasiukajtis add %i1,%i2,%i1 ! x += stridex 592*25c28e83SPiotr Jasiukajtis 593*25c28e83SPiotr Jasiukajtis fmuld P2_f22,P2_f24,P2_f24 594*25c28e83SPiotr Jasiukajtis 595*25c28e83SPiotr Jasiukajtis fsubd P0_f6,P0_f4,P0_f6 596*25c28e83SPiotr Jasiukajtis 597*25c28e83SPiotr Jasiukajtis fsubd P1_f16,P1_f14,P1_f16 598*25c28e83SPiotr Jasiukajtis 599*25c28e83SPiotr Jasiukajtis !!(vsin)fmuld P2_f20,P2_f24,P2_f24 600*25c28e83SPiotr Jasiukajtis 601*25c28e83SPiotr Jasiukajtis fsubd P0_f2,P0_f6,P0_f6 602*25c28e83SPiotr Jasiukajtis 603*25c28e83SPiotr Jasiukajtis fsubd P1_f12,P1_f16,P1_f16 604*25c28e83SPiotr Jasiukajtis 605*25c28e83SPiotr Jasiukajtis faddd C_ONE,P2_f24,P2_f26 !!(vsin)faddd P2_f20,P2_f24,P2_f26 606*25c28e83SPiotr Jasiukajtis 607*25c28e83SPiotr Jasiukajtis faddd P0_f6,%f32,P0_f6 608*25c28e83SPiotr Jasiukajtis 609*25c28e83SPiotr Jasiukajtis faddd P1_f16,%f36,P1_f16 610*25c28e83SPiotr Jasiukajtis andn %l0,MSK_SIGN,%l0 ! hx &= ~0x80000000 611*25c28e83SPiotr Jasiukajtis 612*25c28e83SPiotr Jasiukajtis nop !!(vsin) fors P2_f26,P2_f29,P2_f26 613*25c28e83SPiotr Jasiukajtis addcc %i0,-1,%i0 614*25c28e83SPiotr Jasiukajtis 615*25c28e83SPiotr Jasiukajtis nop !!(vsin) fors P0_f6,P0_f9,P0_f6 616*25c28e83SPiotr Jasiukajtis bg,pt %icc,.loop0 617*25c28e83SPiotr Jasiukajtis 618*25c28e83SPiotr Jasiukajtis! delay slot 619*25c28e83SPiotr Jasiukajtis nop !!(vsin) fors P1_f16,P1_f19,P1_f16 620*25c28e83SPiotr Jasiukajtis 621*25c28e83SPiotr Jasiukajtis ba,pt %icc,.endloop0 622*25c28e83SPiotr Jasiukajtis! delay slot 623*25c28e83SPiotr Jasiukajtis nop 624*25c28e83SPiotr Jasiukajtis 625*25c28e83SPiotr Jasiukajtis .align 32 626*25c28e83SPiotr Jasiukajtis.case2: 627*25c28e83SPiotr Jasiukajtis st P2_f26,[%o5] 628*25c28e83SPiotr Jasiukajtis cmp %l2,LIM_l5 629*25c28e83SPiotr Jasiukajtis fpadd32s P2_f20,MSK_BIT13,P2_f28 630*25c28e83SPiotr Jasiukajtis bl,pn %icc,.case3 631*25c28e83SPiotr Jasiukajtis 632*25c28e83SPiotr Jasiukajtis! delay slot 633*25c28e83SPiotr Jasiukajtis st P2_f27,[%o5+4] 634*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fc3c000),%o7 635*25c28e83SPiotr Jasiukajtis fand P0_f8,MSK_BITSHI17,P0_f2 636*25c28e83SPiotr Jasiukajtis 637*25c28e83SPiotr Jasiukajtis sub %l0,%o7,%l0 638*25c28e83SPiotr Jasiukajtis sub %l2,%o7,%l2 639*25c28e83SPiotr Jasiukajtis add SC_HI,8,%g1;add SC_LO,8,%o7 640*25c28e83SPiotr Jasiukajtis fand P2_f28,MSK_BITSHI17,P2_f22 641*25c28e83SPiotr Jasiukajtis fmuld P1_f10,P1_f10,P1_f12 642*25c28e83SPiotr Jasiukajtis 643*25c28e83SPiotr Jasiukajtis fsubd P0_f0,P0_f2,P0_f0 644*25c28e83SPiotr Jasiukajtis srl %l0,10,%l0 645*25c28e83SPiotr Jasiukajtis mov %o0,%o3 646*25c28e83SPiotr Jasiukajtis 647*25c28e83SPiotr Jasiukajtis fsubd P2_f20,P2_f22,P2_f20 648*25c28e83SPiotr Jasiukajtis srl %l2,10,%l2 649*25c28e83SPiotr Jasiukajtis mov %o2,%o5 650*25c28e83SPiotr Jasiukajtis 651*25c28e83SPiotr Jasiukajtis fmuld P1_f12,C_q4,P1_f14 652*25c28e83SPiotr Jasiukajtis mov %o1,%o4 653*25c28e83SPiotr Jasiukajtis 654*25c28e83SPiotr Jasiukajtis fmuld P0_f0,P0_f0,P0_f2 655*25c28e83SPiotr Jasiukajtis andn %l0,0x1f,%l0 656*25c28e83SPiotr Jasiukajtis 657*25c28e83SPiotr Jasiukajtis fmuld P2_f20,P2_f20,P2_f22 658*25c28e83SPiotr Jasiukajtis andn %l2,0x1f,%l2 659*25c28e83SPiotr Jasiukajtis 660*25c28e83SPiotr Jasiukajtis faddd P1_f14,C_q3,P1_f14 661*25c28e83SPiotr Jasiukajtis 662*25c28e83SPiotr Jasiukajtis fmuld P0_f2,C_pp2,P0_f6 663*25c28e83SPiotr Jasiukajtis ldd [%g1+%l0],%f32 664*25c28e83SPiotr Jasiukajtis 665*25c28e83SPiotr Jasiukajtis fmuld P2_f22,C_pp2,P2_f26 666*25c28e83SPiotr Jasiukajtis ldd [%g1+%l2],%f40 667*25c28e83SPiotr Jasiukajtis 668*25c28e83SPiotr Jasiukajtis fmuld P1_f12,P1_f14,P1_f14 669*25c28e83SPiotr Jasiukajtis 670*25c28e83SPiotr Jasiukajtis faddd P0_f6,C_pp1,P0_f6 671*25c28e83SPiotr Jasiukajtis fmuld P0_f2,C_qq2,P0_f4 672*25c28e83SPiotr Jasiukajtis ldd [SC_HI+%l0],%f34 673*25c28e83SPiotr Jasiukajtis 674*25c28e83SPiotr Jasiukajtis faddd P2_f26,C_pp1,P2_f26 675*25c28e83SPiotr Jasiukajtis fmuld P2_f22,C_qq2,P2_f24 676*25c28e83SPiotr Jasiukajtis ldd [SC_HI+%l2],%f42 677*25c28e83SPiotr Jasiukajtis 678*25c28e83SPiotr Jasiukajtis faddd P1_f14,C_q2,P1_f14 679*25c28e83SPiotr Jasiukajtis 680*25c28e83SPiotr Jasiukajtis fmuld P0_f2,P0_f6,P0_f6 681*25c28e83SPiotr Jasiukajtis faddd P0_f4,C_qq1,P0_f4 682*25c28e83SPiotr Jasiukajtis 683*25c28e83SPiotr Jasiukajtis fmuld P2_f22,P2_f26,P2_f26 684*25c28e83SPiotr Jasiukajtis faddd P2_f24,C_qq1,P2_f24 685*25c28e83SPiotr Jasiukajtis 686*25c28e83SPiotr Jasiukajtis fmuld P1_f12,P1_f14,P1_f14 687*25c28e83SPiotr Jasiukajtis 688*25c28e83SPiotr Jasiukajtis faddd P0_f6,C_ONE,P0_f6 689*25c28e83SPiotr Jasiukajtis fmuld P0_f2,P0_f4,P0_f4 690*25c28e83SPiotr Jasiukajtis 691*25c28e83SPiotr Jasiukajtis faddd P2_f26,C_ONE,P2_f26 692*25c28e83SPiotr Jasiukajtis fmuld P2_f22,P2_f24,P2_f24 693*25c28e83SPiotr Jasiukajtis 694*25c28e83SPiotr Jasiukajtis faddd P1_f14,C_q1,P1_f14 695*25c28e83SPiotr Jasiukajtis 696*25c28e83SPiotr Jasiukajtis fmuld P0_f0,P0_f6,P0_f6 697*25c28e83SPiotr Jasiukajtis ldd [%o7+%l0],P0_f2 698*25c28e83SPiotr Jasiukajtis 699*25c28e83SPiotr Jasiukajtis fmuld P2_f20,P2_f26,P2_f26 700*25c28e83SPiotr Jasiukajtis ldd [%o7+%l2],P2_f22 701*25c28e83SPiotr Jasiukajtis 702*25c28e83SPiotr Jasiukajtis fmuld P0_f4,%f32,P0_f4 703*25c28e83SPiotr Jasiukajtis lda [%i1]%asi,%l0 ! preload next argument 704*25c28e83SPiotr Jasiukajtis 705*25c28e83SPiotr Jasiukajtis fmuld P2_f24,%f40,P2_f24 706*25c28e83SPiotr Jasiukajtis lda [%i1]%asi,P0_f0 707*25c28e83SPiotr Jasiukajtis 708*25c28e83SPiotr Jasiukajtis fmuld P0_f6,%f34,P0_f6 709*25c28e83SPiotr Jasiukajtis lda [%i1+4]%asi,P0_f1 710*25c28e83SPiotr Jasiukajtis 711*25c28e83SPiotr Jasiukajtis fmuld P2_f26,%f42,P2_f26 712*25c28e83SPiotr Jasiukajtis add %i1,%i2,%i1 ! x += stridex 713*25c28e83SPiotr Jasiukajtis 714*25c28e83SPiotr Jasiukajtis fmuld P1_f12,P1_f14,P1_f14 715*25c28e83SPiotr Jasiukajtis 716*25c28e83SPiotr Jasiukajtis fsubd P0_f6,P0_f4,P0_f6 717*25c28e83SPiotr Jasiukajtis 718*25c28e83SPiotr Jasiukajtis fsubd P2_f26,P2_f24,P2_f26 719*25c28e83SPiotr Jasiukajtis 720*25c28e83SPiotr Jasiukajtis !!(vsin)fmuld P1_f10,P1_f14,P1_f14 721*25c28e83SPiotr Jasiukajtis 722*25c28e83SPiotr Jasiukajtis fsubd P0_f2,P0_f6,P0_f6 723*25c28e83SPiotr Jasiukajtis 724*25c28e83SPiotr Jasiukajtis fsubd P2_f22,P2_f26,P2_f26 725*25c28e83SPiotr Jasiukajtis 726*25c28e83SPiotr Jasiukajtis faddd C_ONE,P1_f14,P1_f16 !!(vsin)faddd P1_f10,P1_f14,P1_f16 727*25c28e83SPiotr Jasiukajtis 728*25c28e83SPiotr Jasiukajtis faddd P0_f6,%f32,P0_f6 729*25c28e83SPiotr Jasiukajtis 730*25c28e83SPiotr Jasiukajtis faddd P2_f26,%f40,P2_f26 731*25c28e83SPiotr Jasiukajtis andn %l0,MSK_SIGN,%l0 ! hx &= ~0x80000000 732*25c28e83SPiotr Jasiukajtis 733*25c28e83SPiotr Jasiukajtis nop !!(vsin) fors P1_f16,P1_f19,P1_f16 734*25c28e83SPiotr Jasiukajtis addcc %i0,-1,%i0 735*25c28e83SPiotr Jasiukajtis 736*25c28e83SPiotr Jasiukajtis nop !!(vsin) fors P0_f6,P0_f9,P0_f6 737*25c28e83SPiotr Jasiukajtis bg,pt %icc,.loop0 738*25c28e83SPiotr Jasiukajtis 739*25c28e83SPiotr Jasiukajtis! delay slot 740*25c28e83SPiotr Jasiukajtis nop !!(vsin) fors P2_f26,P2_f29,P2_f26 741*25c28e83SPiotr Jasiukajtis 742*25c28e83SPiotr Jasiukajtis ba,pt %icc,.endloop0 743*25c28e83SPiotr Jasiukajtis! delay slot 744*25c28e83SPiotr Jasiukajtis nop 745*25c28e83SPiotr Jasiukajtis 746*25c28e83SPiotr Jasiukajtis .align 32 747*25c28e83SPiotr Jasiukajtis.case3: 748*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fc3c000),%o7 749*25c28e83SPiotr Jasiukajtis fand P0_f8,MSK_BITSHI17,P0_f2 750*25c28e83SPiotr Jasiukajtis fmuld P1_f10,P1_f10,P1_f12 751*25c28e83SPiotr Jasiukajtis 752*25c28e83SPiotr Jasiukajtis sub %l0,%o7,%l0 753*25c28e83SPiotr Jasiukajtis add SC_HI,8,%g1;add SC_LO,8,%o7 754*25c28e83SPiotr Jasiukajtis fmuld P2_f20,P2_f20,P2_f22 755*25c28e83SPiotr Jasiukajtis 756*25c28e83SPiotr Jasiukajtis fsubd P0_f0,P0_f2,P0_f0 757*25c28e83SPiotr Jasiukajtis srl %l0,10,%l0 758*25c28e83SPiotr Jasiukajtis mov %o0,%o3 759*25c28e83SPiotr Jasiukajtis 760*25c28e83SPiotr Jasiukajtis fmuld P1_f12,C_q4,P1_f14 761*25c28e83SPiotr Jasiukajtis mov %o1,%o4 762*25c28e83SPiotr Jasiukajtis 763*25c28e83SPiotr Jasiukajtis fmuld P2_f22,C_q4,P2_f24 764*25c28e83SPiotr Jasiukajtis mov %o2,%o5 765*25c28e83SPiotr Jasiukajtis 766*25c28e83SPiotr Jasiukajtis fmuld P0_f0,P0_f0,P0_f2 767*25c28e83SPiotr Jasiukajtis andn %l0,0x1f,%l0 768*25c28e83SPiotr Jasiukajtis 769*25c28e83SPiotr Jasiukajtis faddd P1_f14,C_q3,P1_f14 770*25c28e83SPiotr Jasiukajtis 771*25c28e83SPiotr Jasiukajtis faddd P2_f24,C_q3,P2_f24 772*25c28e83SPiotr Jasiukajtis 773*25c28e83SPiotr Jasiukajtis fmuld P0_f2,C_pp2,P0_f6 774*25c28e83SPiotr Jasiukajtis ldd [%g1+%l0],%f32 775*25c28e83SPiotr Jasiukajtis 776*25c28e83SPiotr Jasiukajtis fmuld P1_f12,P1_f14,P1_f14 777*25c28e83SPiotr Jasiukajtis 778*25c28e83SPiotr Jasiukajtis fmuld P2_f22,P2_f24,P2_f24 779*25c28e83SPiotr Jasiukajtis 780*25c28e83SPiotr Jasiukajtis faddd P0_f6,C_pp1,P0_f6 781*25c28e83SPiotr Jasiukajtis fmuld P0_f2,C_qq2,P0_f4 782*25c28e83SPiotr Jasiukajtis ldd [SC_HI+%l0],%f34 783*25c28e83SPiotr Jasiukajtis 784*25c28e83SPiotr Jasiukajtis faddd P1_f14,C_q2,P1_f14 785*25c28e83SPiotr Jasiukajtis 786*25c28e83SPiotr Jasiukajtis faddd P2_f24,C_q2,P2_f24 787*25c28e83SPiotr Jasiukajtis 788*25c28e83SPiotr Jasiukajtis fmuld P0_f2,P0_f6,P0_f6 789*25c28e83SPiotr Jasiukajtis faddd P0_f4,C_qq1,P0_f4 790*25c28e83SPiotr Jasiukajtis 791*25c28e83SPiotr Jasiukajtis fmuld P1_f12,P1_f14,P1_f14 792*25c28e83SPiotr Jasiukajtis 793*25c28e83SPiotr Jasiukajtis fmuld P2_f22,P2_f24,P2_f24 794*25c28e83SPiotr Jasiukajtis 795*25c28e83SPiotr Jasiukajtis faddd P0_f6,C_ONE,P0_f6 796*25c28e83SPiotr Jasiukajtis fmuld P0_f2,P0_f4,P0_f4 797*25c28e83SPiotr Jasiukajtis 798*25c28e83SPiotr Jasiukajtis faddd P1_f14,C_q1,P1_f14 799*25c28e83SPiotr Jasiukajtis 800*25c28e83SPiotr Jasiukajtis faddd P2_f24,C_q1,P2_f24 801*25c28e83SPiotr Jasiukajtis 802*25c28e83SPiotr Jasiukajtis fmuld P0_f0,P0_f6,P0_f6 803*25c28e83SPiotr Jasiukajtis ldd [%o7+%l0],P0_f2 804*25c28e83SPiotr Jasiukajtis 805*25c28e83SPiotr Jasiukajtis fmuld P0_f4,%f32,P0_f4 806*25c28e83SPiotr Jasiukajtis lda [%i1]%asi,%l0 ! preload next argument 807*25c28e83SPiotr Jasiukajtis 808*25c28e83SPiotr Jasiukajtis fmuld P1_f12,P1_f14,P1_f14 809*25c28e83SPiotr Jasiukajtis lda [%i1]%asi,P0_f0 810*25c28e83SPiotr Jasiukajtis 811*25c28e83SPiotr Jasiukajtis fmuld P0_f6,%f34,P0_f6 812*25c28e83SPiotr Jasiukajtis lda [%i1+4]%asi,P0_f1 813*25c28e83SPiotr Jasiukajtis 814*25c28e83SPiotr Jasiukajtis fmuld P2_f22,P2_f24,P2_f24 815*25c28e83SPiotr Jasiukajtis add %i1,%i2,%i1 ! x += stridex 816*25c28e83SPiotr Jasiukajtis 817*25c28e83SPiotr Jasiukajtis !!(vsin)fmuld P1_f10,P1_f14,P1_f14 818*25c28e83SPiotr Jasiukajtis 819*25c28e83SPiotr Jasiukajtis fsubd P0_f6,P0_f4,P0_f6 820*25c28e83SPiotr Jasiukajtis 821*25c28e83SPiotr Jasiukajtis !!(vsin)fmuld P2_f20,P2_f24,P2_f24 822*25c28e83SPiotr Jasiukajtis 823*25c28e83SPiotr Jasiukajtis faddd C_ONE,P1_f14,P1_f16 !!(vsin)faddd P1_f10,P1_f14,P1_f16 824*25c28e83SPiotr Jasiukajtis 825*25c28e83SPiotr Jasiukajtis fsubd P0_f2,P0_f6,P0_f6 826*25c28e83SPiotr Jasiukajtis 827*25c28e83SPiotr Jasiukajtis faddd C_ONE,P2_f24,P2_f26 !!(vsin)faddd P2_f20,P2_f24,P2_f26 828*25c28e83SPiotr Jasiukajtis 829*25c28e83SPiotr Jasiukajtis nop !!(vsin) fors P1_f16,P1_f19,P1_f16 830*25c28e83SPiotr Jasiukajtis andn %l0,MSK_SIGN,%l0 ! hx &= ~0x80000000 831*25c28e83SPiotr Jasiukajtis 832*25c28e83SPiotr Jasiukajtis faddd P0_f6,%f32,P0_f6 833*25c28e83SPiotr Jasiukajtis addcc %i0,-1,%i0 834*25c28e83SPiotr Jasiukajtis 835*25c28e83SPiotr Jasiukajtis nop !!(vsin) fors P2_f26,P2_f29,P2_f26 836*25c28e83SPiotr Jasiukajtis bg,pt %icc,.loop0 837*25c28e83SPiotr Jasiukajtis 838*25c28e83SPiotr Jasiukajtis! delay slot 839*25c28e83SPiotr Jasiukajtis nop !!(vsin) fors P0_f6,P0_f9,P0_f6 840*25c28e83SPiotr Jasiukajtis 841*25c28e83SPiotr Jasiukajtis ba,pt %icc,.endloop0 842*25c28e83SPiotr Jasiukajtis! delay slot 843*25c28e83SPiotr Jasiukajtis nop 844*25c28e83SPiotr Jasiukajtis 845*25c28e83SPiotr Jasiukajtis .align 32 846*25c28e83SPiotr Jasiukajtis.case4: 847*25c28e83SPiotr Jasiukajtis st P1_f17,[%o4+4] 848*25c28e83SPiotr Jasiukajtis cmp %l1,LIM_l5 849*25c28e83SPiotr Jasiukajtis fpadd32s P1_f10,MSK_BIT13,P1_f18 850*25c28e83SPiotr Jasiukajtis bl,pn %icc,.case6 851*25c28e83SPiotr Jasiukajtis 852*25c28e83SPiotr Jasiukajtis! delay slot 853*25c28e83SPiotr Jasiukajtis st P2_f26,[%o5] 854*25c28e83SPiotr Jasiukajtis cmp %l2,LIM_l5 855*25c28e83SPiotr Jasiukajtis fpadd32s P2_f20,MSK_BIT13,P2_f28 856*25c28e83SPiotr Jasiukajtis bl,pn %icc,.case5 857*25c28e83SPiotr Jasiukajtis 858*25c28e83SPiotr Jasiukajtis! delay slot 859*25c28e83SPiotr Jasiukajtis st P2_f27,[%o5+4] 860*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fc3c000),%o7 861*25c28e83SPiotr Jasiukajtis fand P1_f18,MSK_BITSHI17,P1_f12 862*25c28e83SPiotr Jasiukajtis 863*25c28e83SPiotr Jasiukajtis sub %l1,%o7,%l1 864*25c28e83SPiotr Jasiukajtis sub %l2,%o7,%l2 865*25c28e83SPiotr Jasiukajtis add SC_HI,8,%g1;add SC_LO,8,%o7 866*25c28e83SPiotr Jasiukajtis fand P2_f28,MSK_BITSHI17,P2_f22 867*25c28e83SPiotr Jasiukajtis fmuld P0_f0,P0_f0,P0_f2 868*25c28e83SPiotr Jasiukajtis 869*25c28e83SPiotr Jasiukajtis fsubd P1_f10,P1_f12,P1_f10 870*25c28e83SPiotr Jasiukajtis srl %l1,10,%l1 871*25c28e83SPiotr Jasiukajtis mov %o1,%o4 872*25c28e83SPiotr Jasiukajtis 873*25c28e83SPiotr Jasiukajtis fsubd P2_f20,P2_f22,P2_f20 874*25c28e83SPiotr Jasiukajtis srl %l2,10,%l2 875*25c28e83SPiotr Jasiukajtis mov %o2,%o5 876*25c28e83SPiotr Jasiukajtis 877*25c28e83SPiotr Jasiukajtis fmovd P0_f0,P0_f6 !ID for processing 878*25c28e83SPiotr Jasiukajtis fmuld P0_f2,C_q4,P0_f4 879*25c28e83SPiotr Jasiukajtis mov %o0,%o3 880*25c28e83SPiotr Jasiukajtis 881*25c28e83SPiotr Jasiukajtis fmuld P1_f10,P1_f10,P1_f12 882*25c28e83SPiotr Jasiukajtis andn %l1,0x1f,%l1 883*25c28e83SPiotr Jasiukajtis 884*25c28e83SPiotr Jasiukajtis fmuld P2_f20,P2_f20,P2_f22 885*25c28e83SPiotr Jasiukajtis andn %l2,0x1f,%l2 886*25c28e83SPiotr Jasiukajtis 887*25c28e83SPiotr Jasiukajtis faddd P0_f4,C_q3,P0_f4 888*25c28e83SPiotr Jasiukajtis 889*25c28e83SPiotr Jasiukajtis fmuld P1_f12,C_pp2,P1_f16 890*25c28e83SPiotr Jasiukajtis ldd [%g1+%l1],%f36 891*25c28e83SPiotr Jasiukajtis 892*25c28e83SPiotr Jasiukajtis fmuld P2_f22,C_pp2,P2_f26 893*25c28e83SPiotr Jasiukajtis ldd [%g1+%l2],%f40 894*25c28e83SPiotr Jasiukajtis 895*25c28e83SPiotr Jasiukajtis fmuld P0_f2,P0_f4,P0_f4 896*25c28e83SPiotr Jasiukajtis 897*25c28e83SPiotr Jasiukajtis faddd P1_f16,C_pp1,P1_f16 898*25c28e83SPiotr Jasiukajtis fmuld P1_f12,C_qq2,P1_f14 899*25c28e83SPiotr Jasiukajtis ldd [SC_HI+%l1],%f38 900*25c28e83SPiotr Jasiukajtis 901*25c28e83SPiotr Jasiukajtis faddd P2_f26,C_pp1,P2_f26 902*25c28e83SPiotr Jasiukajtis fmuld P2_f22,C_qq2,P2_f24 903*25c28e83SPiotr Jasiukajtis ldd [SC_HI+%l2],%f42 904*25c28e83SPiotr Jasiukajtis 905*25c28e83SPiotr Jasiukajtis faddd P0_f4,C_q2,P0_f4 906*25c28e83SPiotr Jasiukajtis 907*25c28e83SPiotr Jasiukajtis fmuld P1_f12,P1_f16,P1_f16 908*25c28e83SPiotr Jasiukajtis faddd P1_f14,C_qq1,P1_f14 909*25c28e83SPiotr Jasiukajtis 910*25c28e83SPiotr Jasiukajtis fmuld P2_f22,P2_f26,P2_f26 911*25c28e83SPiotr Jasiukajtis faddd P2_f24,C_qq1,P2_f24 912*25c28e83SPiotr Jasiukajtis 913*25c28e83SPiotr Jasiukajtis fmuld P0_f2,P0_f4,P0_f4 914*25c28e83SPiotr Jasiukajtis 915*25c28e83SPiotr Jasiukajtis faddd P1_f16,C_ONE,P1_f16 916*25c28e83SPiotr Jasiukajtis fmuld P1_f12,P1_f14,P1_f14 917*25c28e83SPiotr Jasiukajtis 918*25c28e83SPiotr Jasiukajtis faddd P2_f26,C_ONE,P2_f26 919*25c28e83SPiotr Jasiukajtis fmuld P2_f22,P2_f24,P2_f24 920*25c28e83SPiotr Jasiukajtis 921*25c28e83SPiotr Jasiukajtis faddd P0_f4,C_q1,P0_f4 922*25c28e83SPiotr Jasiukajtis 923*25c28e83SPiotr Jasiukajtis fmuld P1_f10,P1_f16,P1_f16 924*25c28e83SPiotr Jasiukajtis ldd [%o7+%l1],P1_f12 925*25c28e83SPiotr Jasiukajtis 926*25c28e83SPiotr Jasiukajtis fmuld P2_f20,P2_f26,P2_f26 927*25c28e83SPiotr Jasiukajtis ldd [%o7+%l2],P2_f22 928*25c28e83SPiotr Jasiukajtis 929*25c28e83SPiotr Jasiukajtis fmuld P1_f14,%f36,P1_f14 930*25c28e83SPiotr Jasiukajtis lda [%i1]%asi,%l0 ! preload next argument 931*25c28e83SPiotr Jasiukajtis 932*25c28e83SPiotr Jasiukajtis fmuld P2_f24,%f40,P2_f24 933*25c28e83SPiotr Jasiukajtis lda [%i1]%asi,P0_f0 934*25c28e83SPiotr Jasiukajtis 935*25c28e83SPiotr Jasiukajtis fmuld P1_f16,%f38,P1_f16 936*25c28e83SPiotr Jasiukajtis lda [%i1+4]%asi,P0_f1 937*25c28e83SPiotr Jasiukajtis 938*25c28e83SPiotr Jasiukajtis fmuld P2_f26,%f42,P2_f26 939*25c28e83SPiotr Jasiukajtis add %i1,%i2,%i1 ! x += stridex 940*25c28e83SPiotr Jasiukajtis 941*25c28e83SPiotr Jasiukajtis fmuld P0_f2,P0_f4,P0_f4 942*25c28e83SPiotr Jasiukajtis 943*25c28e83SPiotr Jasiukajtis fsubd P1_f16,P1_f14,P1_f16 944*25c28e83SPiotr Jasiukajtis 945*25c28e83SPiotr Jasiukajtis fsubd P2_f26,P2_f24,P2_f26 946*25c28e83SPiotr Jasiukajtis 947*25c28e83SPiotr Jasiukajtis !!(vsin)fmuld P0_f6,P0_f4,P0_f4 948*25c28e83SPiotr Jasiukajtis 949*25c28e83SPiotr Jasiukajtis fsubd P1_f12,P1_f16,P1_f16 950*25c28e83SPiotr Jasiukajtis 951*25c28e83SPiotr Jasiukajtis fsubd P2_f22,P2_f26,P2_f26 952*25c28e83SPiotr Jasiukajtis 953*25c28e83SPiotr Jasiukajtis faddd C_ONE,P0_f4,P0_f6 !!(vsin)faddd P0_f6,P0_f4,P0_f6 ! faddd then spaces for processing 954*25c28e83SPiotr Jasiukajtis 955*25c28e83SPiotr Jasiukajtis faddd P1_f16,%f36,P1_f16 956*25c28e83SPiotr Jasiukajtis 957*25c28e83SPiotr Jasiukajtis faddd P2_f26,%f40,P2_f26 958*25c28e83SPiotr Jasiukajtis andn %l0,MSK_SIGN,%l0 ! hx &= ~0x80000000 959*25c28e83SPiotr Jasiukajtis 960*25c28e83SPiotr Jasiukajtis nop !!(vsin) fors P0_f6,P0_f9,P0_f6 961*25c28e83SPiotr Jasiukajtis addcc %i0,-1,%i0 962*25c28e83SPiotr Jasiukajtis 963*25c28e83SPiotr Jasiukajtis nop !!(vsin) fors P1_f16,P1_f19,P1_f16 964*25c28e83SPiotr Jasiukajtis bg,pt %icc,.loop0 965*25c28e83SPiotr Jasiukajtis 966*25c28e83SPiotr Jasiukajtis! delay slot 967*25c28e83SPiotr Jasiukajtis nop !!(vsin) fors P2_f26,P2_f29,P2_f26 968*25c28e83SPiotr Jasiukajtis 969*25c28e83SPiotr Jasiukajtis ba,pt %icc,.endloop0 970*25c28e83SPiotr Jasiukajtis! delay slot 971*25c28e83SPiotr Jasiukajtis nop 972*25c28e83SPiotr Jasiukajtis 973*25c28e83SPiotr Jasiukajtis .align 32 974*25c28e83SPiotr Jasiukajtis.case5: 975*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fc3c000),%o7 976*25c28e83SPiotr Jasiukajtis fand P1_f18,MSK_BITSHI17,P1_f12 977*25c28e83SPiotr Jasiukajtis fmuld P0_f0,P0_f0,P0_f2 978*25c28e83SPiotr Jasiukajtis 979*25c28e83SPiotr Jasiukajtis sub %l1,%o7,%l1 980*25c28e83SPiotr Jasiukajtis add SC_HI,8,%g1;add SC_LO,8,%o7 981*25c28e83SPiotr Jasiukajtis fmuld P2_f20,P2_f20,P2_f22 982*25c28e83SPiotr Jasiukajtis 983*25c28e83SPiotr Jasiukajtis fsubd P1_f10,P1_f12,P1_f10 984*25c28e83SPiotr Jasiukajtis srl %l1,10,%l1 985*25c28e83SPiotr Jasiukajtis mov %o1,%o4 986*25c28e83SPiotr Jasiukajtis 987*25c28e83SPiotr Jasiukajtis fmovd P0_f0,P0_f6 !ID for processing 988*25c28e83SPiotr Jasiukajtis fmuld P0_f2,C_q4,P0_f4 989*25c28e83SPiotr Jasiukajtis mov %o0,%o3 990*25c28e83SPiotr Jasiukajtis 991*25c28e83SPiotr Jasiukajtis fmuld P2_f22,C_q4,P2_f24 992*25c28e83SPiotr Jasiukajtis mov %o2,%o5 993*25c28e83SPiotr Jasiukajtis 994*25c28e83SPiotr Jasiukajtis fmuld P1_f10,P1_f10,P1_f12 995*25c28e83SPiotr Jasiukajtis andn %l1,0x1f,%l1 996*25c28e83SPiotr Jasiukajtis 997*25c28e83SPiotr Jasiukajtis faddd P0_f4,C_q3,P0_f4 998*25c28e83SPiotr Jasiukajtis 999*25c28e83SPiotr Jasiukajtis faddd P2_f24,C_q3,P2_f24 1000*25c28e83SPiotr Jasiukajtis 1001*25c28e83SPiotr Jasiukajtis fmuld P1_f12,C_pp2,P1_f16 1002*25c28e83SPiotr Jasiukajtis ldd [%g1+%l1],%f36 1003*25c28e83SPiotr Jasiukajtis 1004*25c28e83SPiotr Jasiukajtis fmuld P0_f2,P0_f4,P0_f4 1005*25c28e83SPiotr Jasiukajtis 1006*25c28e83SPiotr Jasiukajtis fmuld P2_f22,P2_f24,P2_f24 1007*25c28e83SPiotr Jasiukajtis 1008*25c28e83SPiotr Jasiukajtis faddd P1_f16,C_pp1,P1_f16 1009*25c28e83SPiotr Jasiukajtis fmuld P1_f12,C_qq2,P1_f14 1010*25c28e83SPiotr Jasiukajtis ldd [SC_HI+%l1],%f38 1011*25c28e83SPiotr Jasiukajtis 1012*25c28e83SPiotr Jasiukajtis faddd P0_f4,C_q2,P0_f4 1013*25c28e83SPiotr Jasiukajtis 1014*25c28e83SPiotr Jasiukajtis faddd P2_f24,C_q2,P2_f24 1015*25c28e83SPiotr Jasiukajtis 1016*25c28e83SPiotr Jasiukajtis fmuld P1_f12,P1_f16,P1_f16 1017*25c28e83SPiotr Jasiukajtis faddd P1_f14,C_qq1,P1_f14 1018*25c28e83SPiotr Jasiukajtis 1019*25c28e83SPiotr Jasiukajtis fmuld P0_f2,P0_f4,P0_f4 1020*25c28e83SPiotr Jasiukajtis 1021*25c28e83SPiotr Jasiukajtis fmuld P2_f22,P2_f24,P2_f24 1022*25c28e83SPiotr Jasiukajtis 1023*25c28e83SPiotr Jasiukajtis faddd P1_f16,C_ONE,P1_f16 1024*25c28e83SPiotr Jasiukajtis fmuld P1_f12,P1_f14,P1_f14 1025*25c28e83SPiotr Jasiukajtis 1026*25c28e83SPiotr Jasiukajtis faddd P0_f4,C_q1,P0_f4 1027*25c28e83SPiotr Jasiukajtis 1028*25c28e83SPiotr Jasiukajtis faddd P2_f24,C_q1,P2_f24 1029*25c28e83SPiotr Jasiukajtis 1030*25c28e83SPiotr Jasiukajtis fmuld P1_f10,P1_f16,P1_f16 1031*25c28e83SPiotr Jasiukajtis ldd [%o7+%l1],P1_f12 1032*25c28e83SPiotr Jasiukajtis 1033*25c28e83SPiotr Jasiukajtis fmuld P1_f14,%f36,P1_f14 1034*25c28e83SPiotr Jasiukajtis lda [%i1]%asi,%l0 ! preload next argument 1035*25c28e83SPiotr Jasiukajtis 1036*25c28e83SPiotr Jasiukajtis fmuld P0_f2,P0_f4,P0_f4 1037*25c28e83SPiotr Jasiukajtis lda [%i1]%asi,P0_f0 1038*25c28e83SPiotr Jasiukajtis 1039*25c28e83SPiotr Jasiukajtis fmuld P1_f16,%f38,P1_f16 1040*25c28e83SPiotr Jasiukajtis lda [%i1+4]%asi,P0_f1 1041*25c28e83SPiotr Jasiukajtis 1042*25c28e83SPiotr Jasiukajtis fmuld P2_f22,P2_f24,P2_f24 1043*25c28e83SPiotr Jasiukajtis add %i1,%i2,%i1 ! x += stridex 1044*25c28e83SPiotr Jasiukajtis 1045*25c28e83SPiotr Jasiukajtis !!(vsin)fmuld P0_f6,P0_f4,P0_f4 1046*25c28e83SPiotr Jasiukajtis 1047*25c28e83SPiotr Jasiukajtis fsubd P1_f16,P1_f14,P1_f16 1048*25c28e83SPiotr Jasiukajtis 1049*25c28e83SPiotr Jasiukajtis !!(vsin)fmuld P2_f20,P2_f24,P2_f24 1050*25c28e83SPiotr Jasiukajtis 1051*25c28e83SPiotr Jasiukajtis faddd C_ONE,P0_f4,P0_f6 !!(vsin)faddd P0_f6,P0_f4,P0_f6 ! faddd then spaces for processing 1052*25c28e83SPiotr Jasiukajtis 1053*25c28e83SPiotr Jasiukajtis fsubd P1_f12,P1_f16,P1_f16 1054*25c28e83SPiotr Jasiukajtis 1055*25c28e83SPiotr Jasiukajtis faddd C_ONE,P2_f24,P2_f26 !!(vsin)faddd P2_f20,P2_f24,P2_f26 1056*25c28e83SPiotr Jasiukajtis 1057*25c28e83SPiotr Jasiukajtis nop !!(vsin) fors P0_f6,P0_f9,P0_f6 1058*25c28e83SPiotr Jasiukajtis andn %l0,MSK_SIGN,%l0 ! hx &= ~0x80000000 1059*25c28e83SPiotr Jasiukajtis 1060*25c28e83SPiotr Jasiukajtis faddd P1_f16,%f36,P1_f16 1061*25c28e83SPiotr Jasiukajtis addcc %i0,-1,%i0 1062*25c28e83SPiotr Jasiukajtis 1063*25c28e83SPiotr Jasiukajtis nop !!(vsin) fors P2_f26,P2_f29,P2_f26 1064*25c28e83SPiotr Jasiukajtis bg,pt %icc,.loop0 1065*25c28e83SPiotr Jasiukajtis 1066*25c28e83SPiotr Jasiukajtis! delay slot 1067*25c28e83SPiotr Jasiukajtis nop !!(vsin) fors P1_f16,P1_f19,P1_f16 1068*25c28e83SPiotr Jasiukajtis 1069*25c28e83SPiotr Jasiukajtis ba,pt %icc,.endloop0 1070*25c28e83SPiotr Jasiukajtis! delay slot 1071*25c28e83SPiotr Jasiukajtis nop 1072*25c28e83SPiotr Jasiukajtis 1073*25c28e83SPiotr Jasiukajtis .align 32 1074*25c28e83SPiotr Jasiukajtis.case6: 1075*25c28e83SPiotr Jasiukajtis st P2_f27,[%o5+4] 1076*25c28e83SPiotr Jasiukajtis cmp %l2,LIM_l5 1077*25c28e83SPiotr Jasiukajtis fpadd32s P2_f20,MSK_BIT13,P2_f28 1078*25c28e83SPiotr Jasiukajtis bl,pn %icc,.case7 1079*25c28e83SPiotr Jasiukajtis 1080*25c28e83SPiotr Jasiukajtis! delay slot 1081*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fc3c000),%o7 1082*25c28e83SPiotr Jasiukajtis fand P2_f28,MSK_BITSHI17,P2_f22 1083*25c28e83SPiotr Jasiukajtis fmuld P0_f0,P0_f0,P0_f2 1084*25c28e83SPiotr Jasiukajtis 1085*25c28e83SPiotr Jasiukajtis sub %l2,%o7,%l2 1086*25c28e83SPiotr Jasiukajtis add SC_HI,8,%g1;add SC_LO,8,%o7 1087*25c28e83SPiotr Jasiukajtis fmuld P1_f10,P1_f10,P1_f12 1088*25c28e83SPiotr Jasiukajtis 1089*25c28e83SPiotr Jasiukajtis fsubd P2_f20,P2_f22,P2_f20 1090*25c28e83SPiotr Jasiukajtis srl %l2,10,%l2 1091*25c28e83SPiotr Jasiukajtis mov %o2,%o5 1092*25c28e83SPiotr Jasiukajtis 1093*25c28e83SPiotr Jasiukajtis fmovd P0_f0,P0_f6 !ID for processing 1094*25c28e83SPiotr Jasiukajtis fmuld P0_f2,C_q4,P0_f4 1095*25c28e83SPiotr Jasiukajtis mov %o0,%o3 1096*25c28e83SPiotr Jasiukajtis 1097*25c28e83SPiotr Jasiukajtis fmuld P1_f12,C_q4,P1_f14 1098*25c28e83SPiotr Jasiukajtis mov %o1,%o4 1099*25c28e83SPiotr Jasiukajtis 1100*25c28e83SPiotr Jasiukajtis fmuld P2_f20,P2_f20,P2_f22 1101*25c28e83SPiotr Jasiukajtis andn %l2,0x1f,%l2 1102*25c28e83SPiotr Jasiukajtis 1103*25c28e83SPiotr Jasiukajtis faddd P0_f4,C_q3,P0_f4 1104*25c28e83SPiotr Jasiukajtis 1105*25c28e83SPiotr Jasiukajtis faddd P1_f14,C_q3,P1_f14 1106*25c28e83SPiotr Jasiukajtis 1107*25c28e83SPiotr Jasiukajtis fmuld P2_f22,C_pp2,P2_f26 1108*25c28e83SPiotr Jasiukajtis ldd [%g1+%l2],%f40 1109*25c28e83SPiotr Jasiukajtis 1110*25c28e83SPiotr Jasiukajtis fmuld P0_f2,P0_f4,P0_f4 1111*25c28e83SPiotr Jasiukajtis 1112*25c28e83SPiotr Jasiukajtis fmuld P1_f12,P1_f14,P1_f14 1113*25c28e83SPiotr Jasiukajtis 1114*25c28e83SPiotr Jasiukajtis faddd P2_f26,C_pp1,P2_f26 1115*25c28e83SPiotr Jasiukajtis fmuld P2_f22,C_qq2,P2_f24 1116*25c28e83SPiotr Jasiukajtis ldd [SC_HI+%l2],%f42 1117*25c28e83SPiotr Jasiukajtis 1118*25c28e83SPiotr Jasiukajtis faddd P0_f4,C_q2,P0_f4 1119*25c28e83SPiotr Jasiukajtis 1120*25c28e83SPiotr Jasiukajtis faddd P1_f14,C_q2,P1_f14 1121*25c28e83SPiotr Jasiukajtis 1122*25c28e83SPiotr Jasiukajtis fmuld P2_f22,P2_f26,P2_f26 1123*25c28e83SPiotr Jasiukajtis faddd P2_f24,C_qq1,P2_f24 1124*25c28e83SPiotr Jasiukajtis 1125*25c28e83SPiotr Jasiukajtis fmuld P0_f2,P0_f4,P0_f4 1126*25c28e83SPiotr Jasiukajtis 1127*25c28e83SPiotr Jasiukajtis fmuld P1_f12,P1_f14,P1_f14 1128*25c28e83SPiotr Jasiukajtis 1129*25c28e83SPiotr Jasiukajtis faddd P2_f26,C_ONE,P2_f26 1130*25c28e83SPiotr Jasiukajtis fmuld P2_f22,P2_f24,P2_f24 1131*25c28e83SPiotr Jasiukajtis 1132*25c28e83SPiotr Jasiukajtis faddd P0_f4,C_q1,P0_f4 1133*25c28e83SPiotr Jasiukajtis 1134*25c28e83SPiotr Jasiukajtis faddd P1_f14,C_q1,P1_f14 1135*25c28e83SPiotr Jasiukajtis 1136*25c28e83SPiotr Jasiukajtis fmuld P2_f20,P2_f26,P2_f26 1137*25c28e83SPiotr Jasiukajtis ldd [%o7+%l2],P2_f22 1138*25c28e83SPiotr Jasiukajtis 1139*25c28e83SPiotr Jasiukajtis fmuld P2_f24,%f40,P2_f24 1140*25c28e83SPiotr Jasiukajtis lda [%i1]%asi,%l0 ! preload next argument 1141*25c28e83SPiotr Jasiukajtis 1142*25c28e83SPiotr Jasiukajtis fmuld P0_f2,P0_f4,P0_f4 1143*25c28e83SPiotr Jasiukajtis lda [%i1]%asi,P0_f0 1144*25c28e83SPiotr Jasiukajtis 1145*25c28e83SPiotr Jasiukajtis fmuld P2_f26,%f42,P2_f26 1146*25c28e83SPiotr Jasiukajtis lda [%i1+4]%asi,P0_f1 1147*25c28e83SPiotr Jasiukajtis 1148*25c28e83SPiotr Jasiukajtis fmuld P1_f12,P1_f14,P1_f14 1149*25c28e83SPiotr Jasiukajtis add %i1,%i2,%i1 ! x += stridex 1150*25c28e83SPiotr Jasiukajtis 1151*25c28e83SPiotr Jasiukajtis !!(vsin)fmuld P0_f6,P0_f4,P0_f4 1152*25c28e83SPiotr Jasiukajtis 1153*25c28e83SPiotr Jasiukajtis fsubd P2_f26,P2_f24,P2_f26 1154*25c28e83SPiotr Jasiukajtis 1155*25c28e83SPiotr Jasiukajtis !!(vsin)fmuld P1_f10,P1_f14,P1_f14 1156*25c28e83SPiotr Jasiukajtis 1157*25c28e83SPiotr Jasiukajtis faddd C_ONE,P0_f4,P0_f6 !!(vsin)faddd P0_f6,P0_f4,P0_f6 ! faddd then spaces for processing 1158*25c28e83SPiotr Jasiukajtis 1159*25c28e83SPiotr Jasiukajtis fsubd P2_f22,P2_f26,P2_f26 1160*25c28e83SPiotr Jasiukajtis 1161*25c28e83SPiotr Jasiukajtis faddd C_ONE,P1_f14,P1_f16 !!(vsin)faddd P1_f10,P1_f14,P1_f16 1162*25c28e83SPiotr Jasiukajtis 1163*25c28e83SPiotr Jasiukajtis nop !!(vsin) fors P0_f6,P0_f9,P0_f6 1164*25c28e83SPiotr Jasiukajtis andn %l0,MSK_SIGN,%l0 ! hx &= ~0x80000000 1165*25c28e83SPiotr Jasiukajtis 1166*25c28e83SPiotr Jasiukajtis faddd P2_f26,%f40,P2_f26 1167*25c28e83SPiotr Jasiukajtis addcc %i0,-1,%i0 1168*25c28e83SPiotr Jasiukajtis 1169*25c28e83SPiotr Jasiukajtis nop !!(vsin) fors P1_f16,P1_f19,P1_f16 1170*25c28e83SPiotr Jasiukajtis bg,pt %icc,.loop0 1171*25c28e83SPiotr Jasiukajtis 1172*25c28e83SPiotr Jasiukajtis! delay slot 1173*25c28e83SPiotr Jasiukajtis nop !!(vsin) fors P2_f26,P2_f29,P2_f26 1174*25c28e83SPiotr Jasiukajtis 1175*25c28e83SPiotr Jasiukajtis ba,pt %icc,.endloop0 1176*25c28e83SPiotr Jasiukajtis! delay slot 1177*25c28e83SPiotr Jasiukajtis nop 1178*25c28e83SPiotr Jasiukajtis 1179*25c28e83SPiotr Jasiukajtis .align 32 1180*25c28e83SPiotr Jasiukajtis.case7: 1181*25c28e83SPiotr Jasiukajtis fmuld P0_f0,P0_f0,P0_f2 1182*25c28e83SPiotr Jasiukajtis fmovd P0_f0,P0_f6 !ID for processing 1183*25c28e83SPiotr Jasiukajtis mov %o0,%o3 1184*25c28e83SPiotr Jasiukajtis 1185*25c28e83SPiotr Jasiukajtis fmuld P1_f10,P1_f10,P1_f12 1186*25c28e83SPiotr Jasiukajtis mov %o1,%o4 1187*25c28e83SPiotr Jasiukajtis 1188*25c28e83SPiotr Jasiukajtis fmuld P2_f20,P2_f20,P2_f22 1189*25c28e83SPiotr Jasiukajtis mov %o2,%o5 1190*25c28e83SPiotr Jasiukajtis 1191*25c28e83SPiotr Jasiukajtis fmuld P0_f2,C_q4,P0_f4 1192*25c28e83SPiotr Jasiukajtis lda [%i1]%asi,%l0 ! preload next argument 1193*25c28e83SPiotr Jasiukajtis 1194*25c28e83SPiotr Jasiukajtis fmuld P1_f12,C_q4,P1_f14 1195*25c28e83SPiotr Jasiukajtis lda [%i1]%asi,P0_f0 1196*25c28e83SPiotr Jasiukajtis 1197*25c28e83SPiotr Jasiukajtis fmuld P2_f22,C_q4,P2_f24 1198*25c28e83SPiotr Jasiukajtis lda [%i1+4]%asi,P0_f1 1199*25c28e83SPiotr Jasiukajtis 1200*25c28e83SPiotr Jasiukajtis faddd P0_f4,C_q3,P0_f4 1201*25c28e83SPiotr Jasiukajtis add %i1,%i2,%i1 ! x += stridex 1202*25c28e83SPiotr Jasiukajtis 1203*25c28e83SPiotr Jasiukajtis faddd P1_f14,C_q3,P1_f14 1204*25c28e83SPiotr Jasiukajtis 1205*25c28e83SPiotr Jasiukajtis faddd P2_f24,C_q3,P2_f24 1206*25c28e83SPiotr Jasiukajtis 1207*25c28e83SPiotr Jasiukajtis fmuld P0_f2,P0_f4,P0_f4 1208*25c28e83SPiotr Jasiukajtis 1209*25c28e83SPiotr Jasiukajtis fmuld P1_f12,P1_f14,P1_f14 1210*25c28e83SPiotr Jasiukajtis 1211*25c28e83SPiotr Jasiukajtis fmuld P2_f22,P2_f24,P2_f24 1212*25c28e83SPiotr Jasiukajtis 1213*25c28e83SPiotr Jasiukajtis faddd P0_f4,C_q2,P0_f4 1214*25c28e83SPiotr Jasiukajtis 1215*25c28e83SPiotr Jasiukajtis faddd P1_f14,C_q2,P1_f14 1216*25c28e83SPiotr Jasiukajtis 1217*25c28e83SPiotr Jasiukajtis faddd P2_f24,C_q2,P2_f24 1218*25c28e83SPiotr Jasiukajtis 1219*25c28e83SPiotr Jasiukajtis fmuld P0_f2,P0_f4,P0_f4 1220*25c28e83SPiotr Jasiukajtis 1221*25c28e83SPiotr Jasiukajtis fmuld P1_f12,P1_f14,P1_f14 1222*25c28e83SPiotr Jasiukajtis 1223*25c28e83SPiotr Jasiukajtis fmuld P2_f22,P2_f24,P2_f24 1224*25c28e83SPiotr Jasiukajtis 1225*25c28e83SPiotr Jasiukajtis faddd P0_f4,C_q1,P0_f4 1226*25c28e83SPiotr Jasiukajtis 1227*25c28e83SPiotr Jasiukajtis faddd P1_f14,C_q1,P1_f14 1228*25c28e83SPiotr Jasiukajtis 1229*25c28e83SPiotr Jasiukajtis faddd P2_f24,C_q1,P2_f24 1230*25c28e83SPiotr Jasiukajtis 1231*25c28e83SPiotr Jasiukajtis fmuld P0_f2,P0_f4,P0_f4 1232*25c28e83SPiotr Jasiukajtis 1233*25c28e83SPiotr Jasiukajtis fmuld P1_f12,P1_f14,P1_f14 1234*25c28e83SPiotr Jasiukajtis 1235*25c28e83SPiotr Jasiukajtis fmuld P2_f22,P2_f24,P2_f24 1236*25c28e83SPiotr Jasiukajtis 1237*25c28e83SPiotr Jasiukajtis !!(vsin)fmuld P0_f6,P0_f4,P0_f4 1238*25c28e83SPiotr Jasiukajtis 1239*25c28e83SPiotr Jasiukajtis !!(vsin)fmuld P1_f10,P1_f14,P1_f14 1240*25c28e83SPiotr Jasiukajtis 1241*25c28e83SPiotr Jasiukajtis !!(vsin)fmuld P2_f20,P2_f24,P2_f24 1242*25c28e83SPiotr Jasiukajtis 1243*25c28e83SPiotr Jasiukajtis faddd C_ONE,P0_f4,P0_f6 !!(vsin)faddd P0_f6,P0_f4,P0_f6 ! faddd then spaces for processing 1244*25c28e83SPiotr Jasiukajtis 1245*25c28e83SPiotr Jasiukajtis faddd C_ONE,P1_f14,P1_f16 !!(vsin)faddd P1_f10,P1_f14,P1_f16 1246*25c28e83SPiotr Jasiukajtis 1247*25c28e83SPiotr Jasiukajtis faddd C_ONE,P2_f24,P2_f26 !!(vsin)faddd P2_f20,P2_f24,P2_f26 1248*25c28e83SPiotr Jasiukajtis andn %l0,MSK_SIGN,%l0 ! hx &= ~0x80000000 1249*25c28e83SPiotr Jasiukajtis 1250*25c28e83SPiotr Jasiukajtis nop !!(vsin) fors P0_f6,P0_f9,P0_f6 1251*25c28e83SPiotr Jasiukajtis addcc %i0,-1,%i0 1252*25c28e83SPiotr Jasiukajtis 1253*25c28e83SPiotr Jasiukajtis nop !!(vsin) fors P1_f16,P1_f19,P1_f16 1254*25c28e83SPiotr Jasiukajtis bg,pt %icc,.loop0 1255*25c28e83SPiotr Jasiukajtis 1256*25c28e83SPiotr Jasiukajtis! delay slot 1257*25c28e83SPiotr Jasiukajtis nop !!(vsin) fors P2_f26,P2_f29,P2_f26 1258*25c28e83SPiotr Jasiukajtis 1259*25c28e83SPiotr Jasiukajtis ba,pt %icc,.endloop0 1260*25c28e83SPiotr Jasiukajtis! delay slot 1261*25c28e83SPiotr Jasiukajtis nop 1262*25c28e83SPiotr Jasiukajtis 1263*25c28e83SPiotr Jasiukajtis 1264*25c28e83SPiotr Jasiukajtis .align 32 1265*25c28e83SPiotr Jasiukajtis.endloop2: 1266*25c28e83SPiotr Jasiukajtis cmp %l1,LIM_l5 1267*25c28e83SPiotr Jasiukajtis bl,pn %icc,1f 1268*25c28e83SPiotr Jasiukajtis! delay slot 1269*25c28e83SPiotr Jasiukajtis fabsd P1_f10,P1_f10 1270*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fc3c000),%o7 1271*25c28e83SPiotr Jasiukajtis fpadd32s P1_f10,MSK_BIT13,P1_f18 1272*25c28e83SPiotr Jasiukajtis fand P1_f18,MSK_BITSHI17,P1_f12 1273*25c28e83SPiotr Jasiukajtis sub %l1,%o7,%l1 1274*25c28e83SPiotr Jasiukajtis add SC_HI,8,%g1;add SC_LO,8,%o7 1275*25c28e83SPiotr Jasiukajtis fsubd P1_f10,P1_f12,P1_f10 1276*25c28e83SPiotr Jasiukajtis srl %l1,10,%l1 1277*25c28e83SPiotr Jasiukajtis fmuld P1_f10,P1_f10,P1_f12 1278*25c28e83SPiotr Jasiukajtis andn %l1,0x1f,%l1 1279*25c28e83SPiotr Jasiukajtis fmuld P1_f12,C_pp2,P2_f20 1280*25c28e83SPiotr Jasiukajtis ldd [%g1+%l1],%f36 1281*25c28e83SPiotr Jasiukajtis faddd P2_f20,C_pp1,P2_f20 1282*25c28e83SPiotr Jasiukajtis fmuld P1_f12,C_qq2,P1_f14 1283*25c28e83SPiotr Jasiukajtis ldd [SC_HI+%l1],%f38 1284*25c28e83SPiotr Jasiukajtis fmuld P1_f12,P2_f20,P2_f20 1285*25c28e83SPiotr Jasiukajtis faddd P1_f14,C_qq1,P1_f14 1286*25c28e83SPiotr Jasiukajtis faddd P2_f20,C_ONE,P2_f20 1287*25c28e83SPiotr Jasiukajtis fmuld P1_f12,P1_f14,P1_f14 1288*25c28e83SPiotr Jasiukajtis fmuld P1_f10,P2_f20,P2_f20 1289*25c28e83SPiotr Jasiukajtis ldd [%o7+%l1],P1_f12 1290*25c28e83SPiotr Jasiukajtis fmuld P1_f14,%f36,P1_f14 1291*25c28e83SPiotr Jasiukajtis fmuld P2_f20,%f38,P2_f20 1292*25c28e83SPiotr Jasiukajtis fsubd P2_f20,P1_f14,P2_f20 1293*25c28e83SPiotr Jasiukajtis fsubd P1_f12,P2_f20,P2_f20 1294*25c28e83SPiotr Jasiukajtis ba,pt %icc,2f 1295*25c28e83SPiotr Jasiukajtis! delay slot 1296*25c28e83SPiotr Jasiukajtis faddd P2_f20,%f36,P2_f20 1297*25c28e83SPiotr Jasiukajtis1: 1298*25c28e83SPiotr Jasiukajtis fmuld P1_f10,P1_f10,P1_f12 1299*25c28e83SPiotr Jasiukajtis fmuld P1_f12,C_q4,P1_f14 1300*25c28e83SPiotr Jasiukajtis faddd P1_f14,C_q3,P1_f14 1301*25c28e83SPiotr Jasiukajtis fmuld P1_f12,P1_f14,P1_f14 1302*25c28e83SPiotr Jasiukajtis faddd P1_f14,C_q2,P1_f14 1303*25c28e83SPiotr Jasiukajtis fmuld P1_f12,P1_f14,P1_f14 1304*25c28e83SPiotr Jasiukajtis faddd P1_f14,C_q1,P1_f14 1305*25c28e83SPiotr Jasiukajtis fmuld P1_f12,P1_f14,P1_f14 1306*25c28e83SPiotr Jasiukajtis !!(vsin)fmuld P1_f10,P1_f14,P1_f14 1307*25c28e83SPiotr Jasiukajtis faddd C_ONE,P1_f14,P2_f20 !!(vsin)faddd P1_f10,P1_f14,P2_f20 1308*25c28e83SPiotr Jasiukajtis2: 1309*25c28e83SPiotr Jasiukajtis nop !!(vsin) fors P2_f20,P1_f19,P2_f20 1310*25c28e83SPiotr Jasiukajtis st P2_f20,[%o1] 1311*25c28e83SPiotr Jasiukajtis st P2_f21,[%o1+4] 1312*25c28e83SPiotr Jasiukajtis 1313*25c28e83SPiotr Jasiukajtis.endloop1: 1314*25c28e83SPiotr Jasiukajtis cmp %l0,LIM_l5 1315*25c28e83SPiotr Jasiukajtis bl,pn %icc,1f 1316*25c28e83SPiotr Jasiukajtis! delay slot 1317*25c28e83SPiotr Jasiukajtis fabsd P0_f0,P0_f0 1318*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fc3c000),%o7 1319*25c28e83SPiotr Jasiukajtis fpadd32s P0_f0,MSK_BIT13,P0_f8 1320*25c28e83SPiotr Jasiukajtis fand P0_f8,MSK_BITSHI17,P0_f2 1321*25c28e83SPiotr Jasiukajtis sub %l0,%o7,%l0 1322*25c28e83SPiotr Jasiukajtis add SC_HI,8,%g1;add SC_LO,8,%o7 1323*25c28e83SPiotr Jasiukajtis fsubd P0_f0,P0_f2,P0_f0 1324*25c28e83SPiotr Jasiukajtis srl %l0,10,%l0 1325*25c28e83SPiotr Jasiukajtis fmuld P0_f0,P0_f0,P0_f2 1326*25c28e83SPiotr Jasiukajtis andn %l0,0x1f,%l0 1327*25c28e83SPiotr Jasiukajtis fmuld P0_f2,C_pp2,P2_f20 1328*25c28e83SPiotr Jasiukajtis ldd [%g1+%l0],%f32 1329*25c28e83SPiotr Jasiukajtis faddd P2_f20,C_pp1,P2_f20 1330*25c28e83SPiotr Jasiukajtis fmuld P0_f2,C_qq2,P0_f4 1331*25c28e83SPiotr Jasiukajtis ldd [SC_HI+%l0],%f34 1332*25c28e83SPiotr Jasiukajtis fmuld P0_f2,P2_f20,P2_f20 1333*25c28e83SPiotr Jasiukajtis faddd P0_f4,C_qq1,P0_f4 1334*25c28e83SPiotr Jasiukajtis faddd P2_f20,C_ONE,P2_f20 1335*25c28e83SPiotr Jasiukajtis fmuld P0_f2,P0_f4,P0_f4 1336*25c28e83SPiotr Jasiukajtis fmuld P0_f0,P2_f20,P2_f20 1337*25c28e83SPiotr Jasiukajtis ldd [%o7+%l0],P0_f2 1338*25c28e83SPiotr Jasiukajtis fmuld P0_f4,%f32,P0_f4 1339*25c28e83SPiotr Jasiukajtis fmuld P2_f20,%f34,P2_f20 1340*25c28e83SPiotr Jasiukajtis fsubd P2_f20,P0_f4,P2_f20 1341*25c28e83SPiotr Jasiukajtis fsubd P0_f2,P2_f20,P2_f20 1342*25c28e83SPiotr Jasiukajtis ba,pt %icc,2f 1343*25c28e83SPiotr Jasiukajtis! delay slot 1344*25c28e83SPiotr Jasiukajtis faddd P2_f20,%f32,P2_f20 1345*25c28e83SPiotr Jasiukajtis1: 1346*25c28e83SPiotr Jasiukajtis fmuld P0_f0,P0_f0,P0_f2 1347*25c28e83SPiotr Jasiukajtis fmuld P0_f2,C_q4,P0_f4 1348*25c28e83SPiotr Jasiukajtis faddd P0_f4,C_q3,P0_f4 1349*25c28e83SPiotr Jasiukajtis fmuld P0_f2,P0_f4,P0_f4 1350*25c28e83SPiotr Jasiukajtis faddd P0_f4,C_q2,P0_f4 1351*25c28e83SPiotr Jasiukajtis fmuld P0_f2,P0_f4,P0_f4 1352*25c28e83SPiotr Jasiukajtis faddd P0_f4,C_q1,P0_f4 1353*25c28e83SPiotr Jasiukajtis fmuld P0_f2,P0_f4,P0_f4 1354*25c28e83SPiotr Jasiukajtis !!(vsin)fmuld P0_f0,P0_f4,P0_f4 1355*25c28e83SPiotr Jasiukajtis faddd C_ONE,P0_f4,P2_f20 !!(vsin)faddd P0_f0,P0_f4,P2_f20 1356*25c28e83SPiotr Jasiukajtis2: 1357*25c28e83SPiotr Jasiukajtis nop !!(vsin) fors P2_f20,P0_f9,P2_f20 1358*25c28e83SPiotr Jasiukajtis st P2_f20,[%o0] 1359*25c28e83SPiotr Jasiukajtis st P2_f21,[%o0+4] 1360*25c28e83SPiotr Jasiukajtis 1361*25c28e83SPiotr Jasiukajtis.endloop0: 1362*25c28e83SPiotr Jasiukajtis st P0_f6,[%o3] 1363*25c28e83SPiotr Jasiukajtis st P0_f7,[%o3+4] 1364*25c28e83SPiotr Jasiukajtis st P1_f16,[%o4] 1365*25c28e83SPiotr Jasiukajtis st P1_f17,[%o4+4] 1366*25c28e83SPiotr Jasiukajtis st P2_f26,[%o5] 1367*25c28e83SPiotr Jasiukajtis st P2_f27,[%o5+4] 1368*25c28e83SPiotr Jasiukajtis 1369*25c28e83SPiotr Jasiukajtis! return. finished off with only primary range arguments 1370*25c28e83SPiotr Jasiukajtis 1371*25c28e83SPiotr Jasiukajtis ret 1372*25c28e83SPiotr Jasiukajtis restore 1373*25c28e83SPiotr Jasiukajtis 1374*25c28e83SPiotr Jasiukajtis 1375*25c28e83SPiotr Jasiukajtis .align 32 1376*25c28e83SPiotr Jasiukajtis.range0: 1377*25c28e83SPiotr Jasiukajtis cmp %l0,LIM_l6 1378*25c28e83SPiotr Jasiukajtis bg,a,pt %icc,.MEDIUM ! branch to Medium range on big arg. 1379*25c28e83SPiotr Jasiukajtis! delay slot, annulled if branch not taken 1380*25c28e83SPiotr Jasiukajtis mov 0x1,LIM_l6 ! set biguns flag or 1381*25c28e83SPiotr Jasiukajtis fdtoi P0_f0,P0_f2; fmovd C_ONE,P0_f0 ; st P0_f0,[%o0] ! *y = *x with inexact if x nonzero 1382*25c28e83SPiotr Jasiukajtis st P0_f1,[%o0+4] 1383*25c28e83SPiotr Jasiukajtis !nop ! (vsin) fdtoi P0_f0,P0_f2 1384*25c28e83SPiotr Jasiukajtis addcc %i0,-1,%i0 1385*25c28e83SPiotr Jasiukajtis ble,pn %icc,.endloop0 1386*25c28e83SPiotr Jasiukajtis! delay slot, harmless if branch taken 1387*25c28e83SPiotr Jasiukajtis add %i3,%i4,%i3 ! y += stridey 1388*25c28e83SPiotr Jasiukajtis andn %l1,MSK_SIGN,%l0 ! hx &= ~0x80000000 1389*25c28e83SPiotr Jasiukajtis fmovd P1_f10,P0_f0 1390*25c28e83SPiotr Jasiukajtis ba,pt %icc,.loop0 1391*25c28e83SPiotr Jasiukajtis! delay slot 1392*25c28e83SPiotr Jasiukajtis add %i1,%i2,%i1 ! x += stridex 1393*25c28e83SPiotr Jasiukajtis 1394*25c28e83SPiotr Jasiukajtis 1395*25c28e83SPiotr Jasiukajtis .align 32 1396*25c28e83SPiotr Jasiukajtis.range1: 1397*25c28e83SPiotr Jasiukajtis cmp %l1,LIM_l6 1398*25c28e83SPiotr Jasiukajtis bg,a,pt %icc,.MEDIUM ! branch to Medium range on big arg. 1399*25c28e83SPiotr Jasiukajtis! delay slot, annulled if branch not taken 1400*25c28e83SPiotr Jasiukajtis mov 0x2,LIM_l6 ! set biguns flag or 1401*25c28e83SPiotr Jasiukajtis fdtoi P1_f10,P1_f12; fmovd C_ONE,P1_f10 ; st P1_f10,[%o1] ! *y = *x with inexact if x nonzero 1402*25c28e83SPiotr Jasiukajtis st P1_f11,[%o1+4] 1403*25c28e83SPiotr Jasiukajtis !nop ! (vsin) fdtoi P1_f10,P1_f12 1404*25c28e83SPiotr Jasiukajtis addcc %i0,-1,%i0 1405*25c28e83SPiotr Jasiukajtis ble,pn %icc,.endloop1 1406*25c28e83SPiotr Jasiukajtis! delay slot, harmless if branch taken 1407*25c28e83SPiotr Jasiukajtis add %i3,%i4,%i3 ! y += stridey 1408*25c28e83SPiotr Jasiukajtis andn %l2,MSK_SIGN,%l1 ! hx &= ~0x80000000 1409*25c28e83SPiotr Jasiukajtis fmovd P2_f20,P1_f10 1410*25c28e83SPiotr Jasiukajtis ba,pt %icc,.loop1 1411*25c28e83SPiotr Jasiukajtis! delay slot 1412*25c28e83SPiotr Jasiukajtis add %i1,%i2,%i1 ! x += stridex 1413*25c28e83SPiotr Jasiukajtis 1414*25c28e83SPiotr Jasiukajtis 1415*25c28e83SPiotr Jasiukajtis .align 32 1416*25c28e83SPiotr Jasiukajtis.range2: 1417*25c28e83SPiotr Jasiukajtis cmp %l2,LIM_l6 1418*25c28e83SPiotr Jasiukajtis bg,a,pt %icc,.MEDIUM ! brance to Medium range on big arg. 1419*25c28e83SPiotr Jasiukajtis! delay slot, annulled if branch not taken 1420*25c28e83SPiotr Jasiukajtis mov 0x3,LIM_l6 ! set biguns flag or 1421*25c28e83SPiotr Jasiukajtis fdtoi P2_f20,P2_f22; fmovd C_ONE,P2_f20 ; st P2_f20,[%o2] ! *y = *x with inexact if x nonzero 1422*25c28e83SPiotr Jasiukajtis st P2_f21,[%o2+4] 1423*25c28e83SPiotr Jasiukajtis nop ! (vsin) fdtoi P2_f20,P2_f22 1424*25c28e83SPiotr Jasiukajtis1: 1425*25c28e83SPiotr Jasiukajtis addcc %i0,-1,%i0 1426*25c28e83SPiotr Jasiukajtis ble,pn %icc,.endloop2 1427*25c28e83SPiotr Jasiukajtis! delay slot 1428*25c28e83SPiotr Jasiukajtis nop 1429*25c28e83SPiotr Jasiukajtis ld [%i1],%l2 1430*25c28e83SPiotr Jasiukajtis ld [%i1],P2_f20 1431*25c28e83SPiotr Jasiukajtis ld [%i1+4],P2_f21 1432*25c28e83SPiotr Jasiukajtis andn %l2,MSK_SIGN,%l2 ! hx &= ~0x80000000 1433*25c28e83SPiotr Jasiukajtis ba,pt %icc,.loop2 1434*25c28e83SPiotr Jasiukajtis! delay slot 1435*25c28e83SPiotr Jasiukajtis add %i1,%i2,%i1 ! x += stridex 1436*25c28e83SPiotr Jasiukajtis 1437*25c28e83SPiotr Jasiukajtis 1438*25c28e83SPiotr Jasiukajtis .align 32 1439*25c28e83SPiotr Jasiukajtis.MEDIUM: 1440*25c28e83SPiotr Jasiukajtis 1441*25c28e83SPiotr Jasiukajtis! ========== medium range ========== 1442*25c28e83SPiotr Jasiukajtis 1443*25c28e83SPiotr Jasiukajtis! register use 1444*25c28e83SPiotr Jasiukajtis 1445*25c28e83SPiotr Jasiukajtis! i0 n 1446*25c28e83SPiotr Jasiukajtis! i1 x 1447*25c28e83SPiotr Jasiukajtis! i2 stridex 1448*25c28e83SPiotr Jasiukajtis! i3 y 1449*25c28e83SPiotr Jasiukajtis! i4 stridey 1450*25c28e83SPiotr Jasiukajtis! i5 0x80000000 1451*25c28e83SPiotr Jasiukajtis 1452*25c28e83SPiotr Jasiukajtis! l0 hx0 1453*25c28e83SPiotr Jasiukajtis! l1 hx1 1454*25c28e83SPiotr Jasiukajtis! l2 hx2 1455*25c28e83SPiotr Jasiukajtis! l3 __vlibm_TBL_sincos_hi 1456*25c28e83SPiotr Jasiukajtis! l4 __vlibm_TBL_sincos_lo 1457*25c28e83SPiotr Jasiukajtis! l5 constants 1458*25c28e83SPiotr Jasiukajtis! l6 biguns stored here : still called LIM_l6 1459*25c28e83SPiotr Jasiukajtis! l7 0x413921fb 1460*25c28e83SPiotr Jasiukajtis 1461*25c28e83SPiotr Jasiukajtis! the following are 64-bit registers in both V8+ and V9 1462*25c28e83SPiotr Jasiukajtis 1463*25c28e83SPiotr Jasiukajtis! g1 scratch 1464*25c28e83SPiotr Jasiukajtis! g5 1465*25c28e83SPiotr Jasiukajtis 1466*25c28e83SPiotr Jasiukajtis! o0 py0 1467*25c28e83SPiotr Jasiukajtis! o1 py1 1468*25c28e83SPiotr Jasiukajtis! o2 py2 1469*25c28e83SPiotr Jasiukajtis! o3 n0 1470*25c28e83SPiotr Jasiukajtis! o4 n1 1471*25c28e83SPiotr Jasiukajtis! o5 n2 1472*25c28e83SPiotr Jasiukajtis! o7 scratch 1473*25c28e83SPiotr Jasiukajtis 1474*25c28e83SPiotr Jasiukajtis! f0 x0 1475*25c28e83SPiotr Jasiukajtis! f2 n0,y0 1476*25c28e83SPiotr Jasiukajtis! f4 1477*25c28e83SPiotr Jasiukajtis! f6 1478*25c28e83SPiotr Jasiukajtis! f8 scratch for table base 1479*25c28e83SPiotr Jasiukajtis! f9 signbit0 1480*25c28e83SPiotr Jasiukajtis! f10 x1 1481*25c28e83SPiotr Jasiukajtis! f12 n1,y1 1482*25c28e83SPiotr Jasiukajtis! f14 1483*25c28e83SPiotr Jasiukajtis! f16 1484*25c28e83SPiotr Jasiukajtis! f18 scratch for table base 1485*25c28e83SPiotr Jasiukajtis! f19 signbit1 1486*25c28e83SPiotr Jasiukajtis! f20 x2 1487*25c28e83SPiotr Jasiukajtis! f22 n2,y2 1488*25c28e83SPiotr Jasiukajtis! f24 1489*25c28e83SPiotr Jasiukajtis! f26 1490*25c28e83SPiotr Jasiukajtis! f28 scratch for table base 1491*25c28e83SPiotr Jasiukajtis! f29 signbit2 1492*25c28e83SPiotr Jasiukajtis! f30 0x80000000 1493*25c28e83SPiotr Jasiukajtis! f31 0x4000 1494*25c28e83SPiotr Jasiukajtis! f32 1495*25c28e83SPiotr Jasiukajtis! f34 1496*25c28e83SPiotr Jasiukajtis! f36 1497*25c28e83SPiotr Jasiukajtis! f38 1498*25c28e83SPiotr Jasiukajtis! f40 invpio2 1499*25c28e83SPiotr Jasiukajtis! f42 round 1500*25c28e83SPiotr Jasiukajtis! f44 0xffff800000000000 1501*25c28e83SPiotr Jasiukajtis! f46 pio2_1 1502*25c28e83SPiotr Jasiukajtis! f48 pio2_2 1503*25c28e83SPiotr Jasiukajtis! f50 pio2_3 1504*25c28e83SPiotr Jasiukajtis! f52 pio2_3t 1505*25c28e83SPiotr Jasiukajtis! f54 one 1506*25c28e83SPiotr Jasiukajtis! f56 pp1 1507*25c28e83SPiotr Jasiukajtis! f58 pp2 1508*25c28e83SPiotr Jasiukajtis! f60 qq1 1509*25c28e83SPiotr Jasiukajtis! f62 qq2 1510*25c28e83SPiotr Jasiukajtis 1511*25c28e83SPiotr Jasiukajtis 1512*25c28e83SPiotr Jasiukajtis PIC_SET(g5,constants,l5) 1513*25c28e83SPiotr Jasiukajtis 1514*25c28e83SPiotr Jasiukajtis ! %o3,%o4,%o5 need to be stored 1515*25c28e83SPiotr Jasiukajtis st P0_f6,[%o3] 1516*25c28e83SPiotr Jasiukajtis sethi %hi(0x413921fb),%l7 1517*25c28e83SPiotr Jasiukajtis st P0_f7,[%o3+4] 1518*25c28e83SPiotr Jasiukajtis or %l7,%lo(0x413921fb),%l7 1519*25c28e83SPiotr Jasiukajtis st P1_f16,[%o4] 1520*25c28e83SPiotr Jasiukajtis st P1_f17,[%o4+4] 1521*25c28e83SPiotr Jasiukajtis st P2_f26,[%o5] 1522*25c28e83SPiotr Jasiukajtis st P2_f27,[%o5+4] 1523*25c28e83SPiotr Jasiukajtis ldd [%l5+invpio2],%f40 1524*25c28e83SPiotr Jasiukajtis ldd [%l5+round],%f42 1525*25c28e83SPiotr Jasiukajtis ldd [%l5+pio2_1],%f46 1526*25c28e83SPiotr Jasiukajtis ldd [%l5+pio2_2],%f48 1527*25c28e83SPiotr Jasiukajtis ldd [%l5+pio2_3],%f50 1528*25c28e83SPiotr Jasiukajtis ldd [%l5+pio2_3t],%f52 1529*25c28e83SPiotr Jasiukajtis std %f54,[%fp+x0_1+8] ! set up stack data 1530*25c28e83SPiotr Jasiukajtis std %f54,[%fp+x1_1+8] 1531*25c28e83SPiotr Jasiukajtis std %f54,[%fp+x2_1+8] 1532*25c28e83SPiotr Jasiukajtis stx %g0,[%fp+y0_0+8] 1533*25c28e83SPiotr Jasiukajtis stx %g0,[%fp+y1_0+8] 1534*25c28e83SPiotr Jasiukajtis stx %g0,[%fp+y2_0+8] 1535*25c28e83SPiotr Jasiukajtis 1536*25c28e83SPiotr Jasiukajtis! branched here in the middle of the array. Need to adjust 1537*25c28e83SPiotr Jasiukajtis! for the members of the triple that were selected in the primary 1538*25c28e83SPiotr Jasiukajtis! loop. 1539*25c28e83SPiotr Jasiukajtis 1540*25c28e83SPiotr Jasiukajtis! no adjustment since all three selected here 1541*25c28e83SPiotr Jasiukajtis subcc LIM_l6,0x1,%g0 ! continue in LOOP0? 1542*25c28e83SPiotr Jasiukajtis bz,a %icc,.LOOP0 1543*25c28e83SPiotr Jasiukajtis mov 0x0,LIM_l6 ! delay slot set biguns=0 1544*25c28e83SPiotr Jasiukajtis 1545*25c28e83SPiotr Jasiukajtis! ajust 1st triple since 2d and 3d done here 1546*25c28e83SPiotr Jasiukajtis subcc LIM_l6,0x2,%g0 ! continue in LOOP1? 1547*25c28e83SPiotr Jasiukajtis fmuld %f0,%f40,%f2 ! adj LOOP0 1548*25c28e83SPiotr Jasiukajtis bz,a %icc,.LOOP1 1549*25c28e83SPiotr Jasiukajtis mov 0x0,LIM_l6 ! delay slot set biguns=0 1550*25c28e83SPiotr Jasiukajtis 1551*25c28e83SPiotr Jasiukajtis! ajust 1st and 2d triple since 3d done here 1552*25c28e83SPiotr Jasiukajtis subcc LIM_l6,0x3,%g0 ! continue in LOOP2? 1553*25c28e83SPiotr Jasiukajtis !done fmuld %f0,%f40,%f2 ! adj LOOP0 1554*25c28e83SPiotr Jasiukajtis sub %i3,%i4,%i3 ! adjust to not double increment 1555*25c28e83SPiotr Jasiukajtis fmuld %f10,%f40,%f12 ! adj LOOP1 1556*25c28e83SPiotr Jasiukajtis faddd %f2,%f42,%f2 ! adj LOOP1 1557*25c28e83SPiotr Jasiukajtis bz,a %icc,.LOOP2 1558*25c28e83SPiotr Jasiukajtis mov 0x0,LIM_l6 ! delay slot set biguns=0 1559*25c28e83SPiotr Jasiukajtis 1560*25c28e83SPiotr Jasiukajtis ba .LOOP0 1561*25c28e83SPiotr Jasiukajtis nop 1562*25c28e83SPiotr Jasiukajtis 1563*25c28e83SPiotr Jasiukajtis! -- 16 byte aligned 1564*25c28e83SPiotr Jasiukajtis 1565*25c28e83SPiotr Jasiukajtis .align 32 1566*25c28e83SPiotr Jasiukajtis.LOOP0: 1567*25c28e83SPiotr Jasiukajtis lda [%i1]%asi,%l1 ! preload next argument 1568*25c28e83SPiotr Jasiukajtis mov %i3,%o0 ! py0 = y 1569*25c28e83SPiotr Jasiukajtis 1570*25c28e83SPiotr Jasiukajtis lda [%i1]%asi,%f10 1571*25c28e83SPiotr Jasiukajtis cmp %l0,%l7 1572*25c28e83SPiotr Jasiukajtis add %i3,%i4,%i3 ! y += stridey 1573*25c28e83SPiotr Jasiukajtis bg,pn %icc,.BIG0 ! if hx > 0x413921fb 1574*25c28e83SPiotr Jasiukajtis 1575*25c28e83SPiotr Jasiukajtis! delay slot 1576*25c28e83SPiotr Jasiukajtis lda [%i1+4]%asi,%f11 1577*25c28e83SPiotr Jasiukajtis addcc %i0,-1,%i0 1578*25c28e83SPiotr Jasiukajtis add %i1,%i2,%i1 ! x += stridex 1579*25c28e83SPiotr Jasiukajtis ble,pn %icc,.ENDLOOP1 1580*25c28e83SPiotr Jasiukajtis 1581*25c28e83SPiotr Jasiukajtis! delay slot 1582*25c28e83SPiotr Jasiukajtis andn %l1,%i5,%l1 1583*25c28e83SPiotr Jasiukajtis nop 1584*25c28e83SPiotr Jasiukajtis fmuld %f0,%f40,%f2 1585*25c28e83SPiotr Jasiukajtis fabsd %f54,%f54 ! a nop for alignment only 1586*25c28e83SPiotr Jasiukajtis 1587*25c28e83SPiotr Jasiukajtis.LOOP1: 1588*25c28e83SPiotr Jasiukajtis lda [%i1]%asi,%l2 ! preload next argument 1589*25c28e83SPiotr Jasiukajtis mov %i3,%o1 ! py1 = y 1590*25c28e83SPiotr Jasiukajtis 1591*25c28e83SPiotr Jasiukajtis lda [%i1]%asi,%f20 1592*25c28e83SPiotr Jasiukajtis cmp %l1,%l7 1593*25c28e83SPiotr Jasiukajtis add %i3,%i4,%i3 ! y += stridey 1594*25c28e83SPiotr Jasiukajtis bg,pn %icc,.BIG1 ! if hx > 0x413921fb 1595*25c28e83SPiotr Jasiukajtis 1596*25c28e83SPiotr Jasiukajtis! delay slot 1597*25c28e83SPiotr Jasiukajtis lda [%i1+4]%asi,%f21 1598*25c28e83SPiotr Jasiukajtis addcc %i0,-1,%i0 1599*25c28e83SPiotr Jasiukajtis add %i1,%i2,%i1 ! x += stridex 1600*25c28e83SPiotr Jasiukajtis ble,pn %icc,.ENDLOOP2 1601*25c28e83SPiotr Jasiukajtis 1602*25c28e83SPiotr Jasiukajtis! delay slot 1603*25c28e83SPiotr Jasiukajtis andn %l2,%i5,%l2 1604*25c28e83SPiotr Jasiukajtis nop 1605*25c28e83SPiotr Jasiukajtis fmuld %f10,%f40,%f12 1606*25c28e83SPiotr Jasiukajtis faddd %f2,%f42,%f2 1607*25c28e83SPiotr Jasiukajtis 1608*25c28e83SPiotr Jasiukajtis.LOOP2: 1609*25c28e83SPiotr Jasiukajtis st %f3,[%fp+n0] 1610*25c28e83SPiotr Jasiukajtis mov %i3,%o2 ! py2 = y 1611*25c28e83SPiotr Jasiukajtis 1612*25c28e83SPiotr Jasiukajtis cmp %l2,%l7 1613*25c28e83SPiotr Jasiukajtis add %i3,%i4,%i3 ! y += stridey 1614*25c28e83SPiotr Jasiukajtis fmuld %f20,%f40,%f22 1615*25c28e83SPiotr Jasiukajtis bg,pn %icc,.BIG2 ! if hx > 0x413921fb 1616*25c28e83SPiotr Jasiukajtis 1617*25c28e83SPiotr Jasiukajtis! delay slot 1618*25c28e83SPiotr Jasiukajtis add %l5,thresh+4,%o7 1619*25c28e83SPiotr Jasiukajtis faddd %f12,%f42,%f12 1620*25c28e83SPiotr Jasiukajtis st %f13,[%fp+n1] 1621*25c28e83SPiotr Jasiukajtis 1622*25c28e83SPiotr Jasiukajtis! - 1623*25c28e83SPiotr Jasiukajtis 1624*25c28e83SPiotr Jasiukajtis add %l5,thresh,%g1 1625*25c28e83SPiotr Jasiukajtis faddd %f22,%f42,%f22 1626*25c28e83SPiotr Jasiukajtis st %f23,[%fp+n2] 1627*25c28e83SPiotr Jasiukajtis 1628*25c28e83SPiotr Jasiukajtis fsubd %f2,%f42,%f2 ! n 1629*25c28e83SPiotr Jasiukajtis 1630*25c28e83SPiotr Jasiukajtis fsubd %f12,%f42,%f12 ! n 1631*25c28e83SPiotr Jasiukajtis 1632*25c28e83SPiotr Jasiukajtis fsubd %f22,%f42,%f22 ! n 1633*25c28e83SPiotr Jasiukajtis 1634*25c28e83SPiotr Jasiukajtis fmuld %f2,%f46,%f4 1635*25c28e83SPiotr Jasiukajtis 1636*25c28e83SPiotr Jasiukajtis fmuld %f12,%f46,%f14 1637*25c28e83SPiotr Jasiukajtis 1638*25c28e83SPiotr Jasiukajtis fmuld %f22,%f46,%f24 1639*25c28e83SPiotr Jasiukajtis 1640*25c28e83SPiotr Jasiukajtis fsubd %f0,%f4,%f4 1641*25c28e83SPiotr Jasiukajtis fmuld %f2,%f48,%f6 1642*25c28e83SPiotr Jasiukajtis 1643*25c28e83SPiotr Jasiukajtis fsubd %f10,%f14,%f14 1644*25c28e83SPiotr Jasiukajtis fmuld %f12,%f48,%f16 1645*25c28e83SPiotr Jasiukajtis 1646*25c28e83SPiotr Jasiukajtis fsubd %f20,%f24,%f24 1647*25c28e83SPiotr Jasiukajtis fmuld %f22,%f48,%f26 1648*25c28e83SPiotr Jasiukajtis 1649*25c28e83SPiotr Jasiukajtis fsubd %f4,%f6,%f0 1650*25c28e83SPiotr Jasiukajtis ld [%fp+n0],%o3 ; add %o3,1,%o3 1651*25c28e83SPiotr Jasiukajtis 1652*25c28e83SPiotr Jasiukajtis fsubd %f14,%f16,%f10 1653*25c28e83SPiotr Jasiukajtis ld [%fp+n1],%o4 ; add %o4,1,%o4 1654*25c28e83SPiotr Jasiukajtis 1655*25c28e83SPiotr Jasiukajtis fsubd %f24,%f26,%f20 1656*25c28e83SPiotr Jasiukajtis ld [%fp+n2],%o5 ; add %o5,1,%o5 1657*25c28e83SPiotr Jasiukajtis 1658*25c28e83SPiotr Jasiukajtis fsubd %f4,%f0,%f32 1659*25c28e83SPiotr Jasiukajtis and %o3,1,%o3 1660*25c28e83SPiotr Jasiukajtis 1661*25c28e83SPiotr Jasiukajtis fsubd %f14,%f10,%f34 1662*25c28e83SPiotr Jasiukajtis and %o4,1,%o4 1663*25c28e83SPiotr Jasiukajtis 1664*25c28e83SPiotr Jasiukajtis fsubd %f24,%f20,%f36 1665*25c28e83SPiotr Jasiukajtis and %o5,1,%o5 1666*25c28e83SPiotr Jasiukajtis 1667*25c28e83SPiotr Jasiukajtis fsubd %f32,%f6,%f32 1668*25c28e83SPiotr Jasiukajtis fmuld %f2,%f50,%f8 1669*25c28e83SPiotr Jasiukajtis sll %o3,3,%o3 1670*25c28e83SPiotr Jasiukajtis 1671*25c28e83SPiotr Jasiukajtis fsubd %f34,%f16,%f34 1672*25c28e83SPiotr Jasiukajtis fmuld %f12,%f50,%f18 1673*25c28e83SPiotr Jasiukajtis sll %o4,3,%o4 1674*25c28e83SPiotr Jasiukajtis 1675*25c28e83SPiotr Jasiukajtis fsubd %f36,%f26,%f36 1676*25c28e83SPiotr Jasiukajtis fmuld %f22,%f50,%f28 1677*25c28e83SPiotr Jasiukajtis sll %o5,3,%o5 1678*25c28e83SPiotr Jasiukajtis 1679*25c28e83SPiotr Jasiukajtis fsubd %f8,%f32,%f8 1680*25c28e83SPiotr Jasiukajtis ld [%g1+%o3],%f6 1681*25c28e83SPiotr Jasiukajtis 1682*25c28e83SPiotr Jasiukajtis fsubd %f18,%f34,%f18 1683*25c28e83SPiotr Jasiukajtis ld [%g1+%o4],%f16 1684*25c28e83SPiotr Jasiukajtis 1685*25c28e83SPiotr Jasiukajtis fsubd %f28,%f36,%f28 1686*25c28e83SPiotr Jasiukajtis ld [%g1+%o5],%f26 1687*25c28e83SPiotr Jasiukajtis 1688*25c28e83SPiotr Jasiukajtis fsubd %f0,%f8,%f4 1689*25c28e83SPiotr Jasiukajtis 1690*25c28e83SPiotr Jasiukajtis fsubd %f10,%f18,%f14 1691*25c28e83SPiotr Jasiukajtis 1692*25c28e83SPiotr Jasiukajtis fsubd %f20,%f28,%f24 1693*25c28e83SPiotr Jasiukajtis 1694*25c28e83SPiotr Jasiukajtis fsubd %f0,%f4,%f32 1695*25c28e83SPiotr Jasiukajtis 1696*25c28e83SPiotr Jasiukajtis fsubd %f10,%f14,%f34 1697*25c28e83SPiotr Jasiukajtis 1698*25c28e83SPiotr Jasiukajtis fsubd %f20,%f24,%f36 1699*25c28e83SPiotr Jasiukajtis 1700*25c28e83SPiotr Jasiukajtis fsubd %f32,%f8,%f32 1701*25c28e83SPiotr Jasiukajtis fmuld %f2,%f52,%f2 1702*25c28e83SPiotr Jasiukajtis 1703*25c28e83SPiotr Jasiukajtis fsubd %f34,%f18,%f34 1704*25c28e83SPiotr Jasiukajtis fmuld %f12,%f52,%f12 1705*25c28e83SPiotr Jasiukajtis 1706*25c28e83SPiotr Jasiukajtis fsubd %f36,%f28,%f36 1707*25c28e83SPiotr Jasiukajtis fmuld %f22,%f52,%f22 1708*25c28e83SPiotr Jasiukajtis 1709*25c28e83SPiotr Jasiukajtis fsubd %f2,%f32,%f2 1710*25c28e83SPiotr Jasiukajtis ld [%o7+%o3],%f8 1711*25c28e83SPiotr Jasiukajtis 1712*25c28e83SPiotr Jasiukajtis fsubd %f12,%f34,%f12 1713*25c28e83SPiotr Jasiukajtis ld [%o7+%o4],%f18 1714*25c28e83SPiotr Jasiukajtis 1715*25c28e83SPiotr Jasiukajtis fsubd %f22,%f36,%f22 1716*25c28e83SPiotr Jasiukajtis ld [%o7+%o5],%f28 1717*25c28e83SPiotr Jasiukajtis 1718*25c28e83SPiotr Jasiukajtis fsubd %f4,%f2,%f0 ! x 1719*25c28e83SPiotr Jasiukajtis 1720*25c28e83SPiotr Jasiukajtis fsubd %f14,%f12,%f10 ! x 1721*25c28e83SPiotr Jasiukajtis 1722*25c28e83SPiotr Jasiukajtis fsubd %f24,%f22,%f20 ! x 1723*25c28e83SPiotr Jasiukajtis 1724*25c28e83SPiotr Jasiukajtis fsubd %f4,%f0,%f4 1725*25c28e83SPiotr Jasiukajtis 1726*25c28e83SPiotr Jasiukajtis fsubd %f14,%f10,%f14 1727*25c28e83SPiotr Jasiukajtis 1728*25c28e83SPiotr Jasiukajtis fsubd %f24,%f20,%f24 1729*25c28e83SPiotr Jasiukajtis 1730*25c28e83SPiotr Jasiukajtis fands %f0,%f30,%f9 ! save signbit 1731*25c28e83SPiotr Jasiukajtis 1732*25c28e83SPiotr Jasiukajtis fands %f10,%f30,%f19 ! save signbit 1733*25c28e83SPiotr Jasiukajtis 1734*25c28e83SPiotr Jasiukajtis fands %f20,%f30,%f29 ! save signbit 1735*25c28e83SPiotr Jasiukajtis 1736*25c28e83SPiotr Jasiukajtis fabsd %f0,%f0 1737*25c28e83SPiotr Jasiukajtis std %f0,[%fp+x0_1] 1738*25c28e83SPiotr Jasiukajtis 1739*25c28e83SPiotr Jasiukajtis fabsd %f10,%f10 1740*25c28e83SPiotr Jasiukajtis std %f10,[%fp+x1_1] 1741*25c28e83SPiotr Jasiukajtis 1742*25c28e83SPiotr Jasiukajtis fabsd %f20,%f20 1743*25c28e83SPiotr Jasiukajtis std %f20,[%fp+x2_1] 1744*25c28e83SPiotr Jasiukajtis 1745*25c28e83SPiotr Jasiukajtis fsubd %f4,%f2,%f2 ! y 1746*25c28e83SPiotr Jasiukajtis 1747*25c28e83SPiotr Jasiukajtis fsubd %f14,%f12,%f12 ! y 1748*25c28e83SPiotr Jasiukajtis 1749*25c28e83SPiotr Jasiukajtis fsubd %f24,%f22,%f22 ! y 1750*25c28e83SPiotr Jasiukajtis 1751*25c28e83SPiotr Jasiukajtis fcmpgt32 %f6,%f0,%l0 1752*25c28e83SPiotr Jasiukajtis 1753*25c28e83SPiotr Jasiukajtis fcmpgt32 %f16,%f10,%l1 1754*25c28e83SPiotr Jasiukajtis 1755*25c28e83SPiotr Jasiukajtis fcmpgt32 %f26,%f20,%l2 1756*25c28e83SPiotr Jasiukajtis 1757*25c28e83SPiotr Jasiukajtis! -- 16 byte aligned 1758*25c28e83SPiotr Jasiukajtis fxors %f2,%f9,%f2 1759*25c28e83SPiotr Jasiukajtis 1760*25c28e83SPiotr Jasiukajtis fxors %f12,%f19,%f12 1761*25c28e83SPiotr Jasiukajtis 1762*25c28e83SPiotr Jasiukajtis fxors %f22,%f29,%f22 1763*25c28e83SPiotr Jasiukajtis 1764*25c28e83SPiotr Jasiukajtis fands %f9,%f8,%f9 ! if (n & 1) clear sign bit 1765*25c28e83SPiotr Jasiukajtis andcc %l0,2,%g0 1766*25c28e83SPiotr Jasiukajtis bne,pn %icc,.CASE4 1767*25c28e83SPiotr Jasiukajtis 1768*25c28e83SPiotr Jasiukajtis! delay slot 1769*25c28e83SPiotr Jasiukajtis fands %f19,%f18,%f19 ! if (n & 1) clear sign bit 1770*25c28e83SPiotr Jasiukajtis andcc %l1,2,%g0 1771*25c28e83SPiotr Jasiukajtis bne,pn %icc,.CASE2 1772*25c28e83SPiotr Jasiukajtis 1773*25c28e83SPiotr Jasiukajtis! delay slot 1774*25c28e83SPiotr Jasiukajtis fands %f29,%f28,%f29 ! if (n & 1) clear sign bit 1775*25c28e83SPiotr Jasiukajtis andcc %l2,2,%g0 1776*25c28e83SPiotr Jasiukajtis bne,pn %icc,.CASE1 1777*25c28e83SPiotr Jasiukajtis 1778*25c28e83SPiotr Jasiukajtis! delay slot 1779*25c28e83SPiotr Jasiukajtis fpadd32s %f0,%f31,%f8 1780*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fc3c000),%o7 1781*25c28e83SPiotr Jasiukajtis ld [%fp+x0_1],%l0 1782*25c28e83SPiotr Jasiukajtis 1783*25c28e83SPiotr Jasiukajtis fpadd32s %f10,%f31,%f18 1784*25c28e83SPiotr Jasiukajtis add %l3,8,%g1 1785*25c28e83SPiotr Jasiukajtis ld [%fp+x1_1],%l1 1786*25c28e83SPiotr Jasiukajtis 1787*25c28e83SPiotr Jasiukajtis fpadd32s %f20,%f31,%f28 1788*25c28e83SPiotr Jasiukajtis ld [%fp+x2_1],%l2 1789*25c28e83SPiotr Jasiukajtis 1790*25c28e83SPiotr Jasiukajtis fand %f8,%f44,%f4 1791*25c28e83SPiotr Jasiukajtis sub %l0,%o7,%l0 1792*25c28e83SPiotr Jasiukajtis 1793*25c28e83SPiotr Jasiukajtis fand %f18,%f44,%f14 1794*25c28e83SPiotr Jasiukajtis sub %l1,%o7,%l1 1795*25c28e83SPiotr Jasiukajtis 1796*25c28e83SPiotr Jasiukajtis fand %f28,%f44,%f24 1797*25c28e83SPiotr Jasiukajtis sub %l2,%o7,%l2 1798*25c28e83SPiotr Jasiukajtis 1799*25c28e83SPiotr Jasiukajtis fsubd %f0,%f4,%f0 1800*25c28e83SPiotr Jasiukajtis srl %l0,10,%l0 1801*25c28e83SPiotr Jasiukajtis 1802*25c28e83SPiotr Jasiukajtis fsubd %f10,%f14,%f10 1803*25c28e83SPiotr Jasiukajtis srl %l1,10,%l1 1804*25c28e83SPiotr Jasiukajtis 1805*25c28e83SPiotr Jasiukajtis fsubd %f20,%f24,%f20 1806*25c28e83SPiotr Jasiukajtis srl %l2,10,%l2 1807*25c28e83SPiotr Jasiukajtis 1808*25c28e83SPiotr Jasiukajtis faddd %f0,%f2,%f0 1809*25c28e83SPiotr Jasiukajtis andn %l0,0x1f,%l0 1810*25c28e83SPiotr Jasiukajtis 1811*25c28e83SPiotr Jasiukajtis faddd %f10,%f12,%f10 1812*25c28e83SPiotr Jasiukajtis andn %l1,0x1f,%l1 1813*25c28e83SPiotr Jasiukajtis 1814*25c28e83SPiotr Jasiukajtis faddd %f20,%f22,%f20 1815*25c28e83SPiotr Jasiukajtis andn %l2,0x1f,%l2 1816*25c28e83SPiotr Jasiukajtis 1817*25c28e83SPiotr Jasiukajtis fmuld %f0,%f0,%f2 1818*25c28e83SPiotr Jasiukajtis add %l0,%o3,%l0 1819*25c28e83SPiotr Jasiukajtis 1820*25c28e83SPiotr Jasiukajtis fmuld %f10,%f10,%f12 1821*25c28e83SPiotr Jasiukajtis add %l1,%o4,%l1 1822*25c28e83SPiotr Jasiukajtis 1823*25c28e83SPiotr Jasiukajtis fmuld %f20,%f20,%f22 1824*25c28e83SPiotr Jasiukajtis add %l2,%o5,%l2 1825*25c28e83SPiotr Jasiukajtis 1826*25c28e83SPiotr Jasiukajtis fmuld %f2,%f58,%f6 1827*25c28e83SPiotr Jasiukajtis ldd [%l3+%l0],%f32 1828*25c28e83SPiotr Jasiukajtis 1829*25c28e83SPiotr Jasiukajtis fmuld %f12,%f58,%f16 1830*25c28e83SPiotr Jasiukajtis ldd [%l3+%l1],%f34 1831*25c28e83SPiotr Jasiukajtis 1832*25c28e83SPiotr Jasiukajtis fmuld %f22,%f58,%f26 1833*25c28e83SPiotr Jasiukajtis ldd [%l3+%l2],%f36 1834*25c28e83SPiotr Jasiukajtis 1835*25c28e83SPiotr Jasiukajtis faddd %f6,%f56,%f6 1836*25c28e83SPiotr Jasiukajtis fmuld %f2,%f62,%f4 1837*25c28e83SPiotr Jasiukajtis 1838*25c28e83SPiotr Jasiukajtis faddd %f16,%f56,%f16 1839*25c28e83SPiotr Jasiukajtis fmuld %f12,%f62,%f14 1840*25c28e83SPiotr Jasiukajtis 1841*25c28e83SPiotr Jasiukajtis faddd %f26,%f56,%f26 1842*25c28e83SPiotr Jasiukajtis fmuld %f22,%f62,%f24 1843*25c28e83SPiotr Jasiukajtis 1844*25c28e83SPiotr Jasiukajtis fmuld %f2,%f6,%f6 1845*25c28e83SPiotr Jasiukajtis faddd %f4,%f60,%f4 1846*25c28e83SPiotr Jasiukajtis 1847*25c28e83SPiotr Jasiukajtis fmuld %f12,%f16,%f16 1848*25c28e83SPiotr Jasiukajtis faddd %f14,%f60,%f14 1849*25c28e83SPiotr Jasiukajtis 1850*25c28e83SPiotr Jasiukajtis fmuld %f22,%f26,%f26 1851*25c28e83SPiotr Jasiukajtis faddd %f24,%f60,%f24 1852*25c28e83SPiotr Jasiukajtis 1853*25c28e83SPiotr Jasiukajtis faddd %f6,%f54,%f6 1854*25c28e83SPiotr Jasiukajtis fmuld %f2,%f4,%f4 1855*25c28e83SPiotr Jasiukajtis 1856*25c28e83SPiotr Jasiukajtis faddd %f16,%f54,%f16 1857*25c28e83SPiotr Jasiukajtis fmuld %f12,%f14,%f14 1858*25c28e83SPiotr Jasiukajtis 1859*25c28e83SPiotr Jasiukajtis faddd %f26,%f54,%f26 1860*25c28e83SPiotr Jasiukajtis fmuld %f22,%f24,%f24 1861*25c28e83SPiotr Jasiukajtis 1862*25c28e83SPiotr Jasiukajtis fmuld %f0,%f6,%f6 1863*25c28e83SPiotr Jasiukajtis ldd [%g1+%l0],%f2 1864*25c28e83SPiotr Jasiukajtis 1865*25c28e83SPiotr Jasiukajtis fmuld %f10,%f16,%f16 1866*25c28e83SPiotr Jasiukajtis ldd [%g1+%l1],%f12 1867*25c28e83SPiotr Jasiukajtis 1868*25c28e83SPiotr Jasiukajtis fmuld %f20,%f26,%f26 1869*25c28e83SPiotr Jasiukajtis ldd [%g1+%l2],%f22 1870*25c28e83SPiotr Jasiukajtis 1871*25c28e83SPiotr Jasiukajtis fmuld %f4,%f32,%f4 1872*25c28e83SPiotr Jasiukajtis ldd [%l4+%l0],%f0 1873*25c28e83SPiotr Jasiukajtis 1874*25c28e83SPiotr Jasiukajtis fmuld %f14,%f34,%f14 1875*25c28e83SPiotr Jasiukajtis ldd [%l4+%l1],%f10 1876*25c28e83SPiotr Jasiukajtis 1877*25c28e83SPiotr Jasiukajtis fmuld %f24,%f36,%f24 1878*25c28e83SPiotr Jasiukajtis ldd [%l4+%l2],%f20 1879*25c28e83SPiotr Jasiukajtis 1880*25c28e83SPiotr Jasiukajtis fmuld %f6,%f2,%f6 1881*25c28e83SPiotr Jasiukajtis 1882*25c28e83SPiotr Jasiukajtis fmuld %f16,%f12,%f16 1883*25c28e83SPiotr Jasiukajtis 1884*25c28e83SPiotr Jasiukajtis fmuld %f26,%f22,%f26 1885*25c28e83SPiotr Jasiukajtis 1886*25c28e83SPiotr Jasiukajtis faddd %f6,%f4,%f6 1887*25c28e83SPiotr Jasiukajtis 1888*25c28e83SPiotr Jasiukajtis faddd %f16,%f14,%f16 1889*25c28e83SPiotr Jasiukajtis 1890*25c28e83SPiotr Jasiukajtis faddd %f26,%f24,%f26 1891*25c28e83SPiotr Jasiukajtis 1892*25c28e83SPiotr Jasiukajtis faddd %f6,%f0,%f6 1893*25c28e83SPiotr Jasiukajtis 1894*25c28e83SPiotr Jasiukajtis faddd %f16,%f10,%f16 1895*25c28e83SPiotr Jasiukajtis 1896*25c28e83SPiotr Jasiukajtis faddd %f26,%f20,%f26 1897*25c28e83SPiotr Jasiukajtis 1898*25c28e83SPiotr Jasiukajtis faddd %f6,%f32,%f6 1899*25c28e83SPiotr Jasiukajtis 1900*25c28e83SPiotr Jasiukajtis faddd %f16,%f34,%f16 1901*25c28e83SPiotr Jasiukajtis 1902*25c28e83SPiotr Jasiukajtis faddd %f26,%f36,%f26 1903*25c28e83SPiotr Jasiukajtis 1904*25c28e83SPiotr Jasiukajtis.FIXSIGN: 1905*25c28e83SPiotr Jasiukajtis ld [%fp+n0],%o3 ; add %o3,1,%o3 1906*25c28e83SPiotr Jasiukajtis add %l5,thresh-4,%g1 1907*25c28e83SPiotr Jasiukajtis 1908*25c28e83SPiotr Jasiukajtis ld [%fp+n1],%o4 ; add %o4,1,%o4 1909*25c28e83SPiotr Jasiukajtis 1910*25c28e83SPiotr Jasiukajtis ld [%fp+n2],%o5 ; add %o5,1,%o5 1911*25c28e83SPiotr Jasiukajtis and %o3,2,%o3 1912*25c28e83SPiotr Jasiukajtis 1913*25c28e83SPiotr Jasiukajtis sll %o3,2,%o3 1914*25c28e83SPiotr Jasiukajtis and %o4,2,%o4 1915*25c28e83SPiotr Jasiukajtis lda [%i1]%asi,%l0 ! preload next argument 1916*25c28e83SPiotr Jasiukajtis 1917*25c28e83SPiotr Jasiukajtis sll %o4,2,%o4 1918*25c28e83SPiotr Jasiukajtis and %o5,2,%o5 1919*25c28e83SPiotr Jasiukajtis ld [%g1+%o3],%f8 1920*25c28e83SPiotr Jasiukajtis 1921*25c28e83SPiotr Jasiukajtis sll %o5,2,%o5 1922*25c28e83SPiotr Jasiukajtis ld [%g1+%o4],%f18 1923*25c28e83SPiotr Jasiukajtis 1924*25c28e83SPiotr Jasiukajtis ld [%g1+%o5],%f28 1925*25c28e83SPiotr Jasiukajtis fxors %f9,%f8,%f9 1926*25c28e83SPiotr Jasiukajtis 1927*25c28e83SPiotr Jasiukajtis lda [%i1]%asi,%f0 1928*25c28e83SPiotr Jasiukajtis fxors %f29,%f28,%f29 1929*25c28e83SPiotr Jasiukajtis 1930*25c28e83SPiotr Jasiukajtis lda [%i1+4]%asi,%f1 1931*25c28e83SPiotr Jasiukajtis fxors %f19,%f18,%f19 1932*25c28e83SPiotr Jasiukajtis 1933*25c28e83SPiotr Jasiukajtis fors %f6,%f9,%f6 ! tack on sign 1934*25c28e83SPiotr Jasiukajtis add %i1,%i2,%i1 ! x += stridex 1935*25c28e83SPiotr Jasiukajtis st %f6,[%o0] 1936*25c28e83SPiotr Jasiukajtis 1937*25c28e83SPiotr Jasiukajtis fors %f26,%f29,%f26 ! tack on sign 1938*25c28e83SPiotr Jasiukajtis st %f7,[%o0+4] 1939*25c28e83SPiotr Jasiukajtis 1940*25c28e83SPiotr Jasiukajtis fors %f16,%f19,%f16 ! tack on sign 1941*25c28e83SPiotr Jasiukajtis st %f26,[%o2] 1942*25c28e83SPiotr Jasiukajtis 1943*25c28e83SPiotr Jasiukajtis st %f27,[%o2+4] 1944*25c28e83SPiotr Jasiukajtis addcc %i0,-1,%i0 1945*25c28e83SPiotr Jasiukajtis 1946*25c28e83SPiotr Jasiukajtis st %f16,[%o1] 1947*25c28e83SPiotr Jasiukajtis andn %l0,%i5,%l0 ! hx &= ~0x80000000 1948*25c28e83SPiotr Jasiukajtis bg,pt %icc,.LOOP0 1949*25c28e83SPiotr Jasiukajtis 1950*25c28e83SPiotr Jasiukajtis! delay slot 1951*25c28e83SPiotr Jasiukajtis st %f17,[%o1+4] 1952*25c28e83SPiotr Jasiukajtis 1953*25c28e83SPiotr Jasiukajtis ba,pt %icc,.ENDLOOP0 1954*25c28e83SPiotr Jasiukajtis! delay slot 1955*25c28e83SPiotr Jasiukajtis nop 1956*25c28e83SPiotr Jasiukajtis 1957*25c28e83SPiotr Jasiukajtis .align 32 1958*25c28e83SPiotr Jasiukajtis.CASE1: 1959*25c28e83SPiotr Jasiukajtis fpadd32s %f10,%f31,%f18 1960*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fc3c000),%o7 1961*25c28e83SPiotr Jasiukajtis ld [%fp+x0_1],%l0 1962*25c28e83SPiotr Jasiukajtis 1963*25c28e83SPiotr Jasiukajtis fand %f8,%f44,%f4 1964*25c28e83SPiotr Jasiukajtis add %l3,8,%g1 1965*25c28e83SPiotr Jasiukajtis ld [%fp+x1_1],%l1 1966*25c28e83SPiotr Jasiukajtis 1967*25c28e83SPiotr Jasiukajtis fand %f18,%f44,%f14 1968*25c28e83SPiotr Jasiukajtis sub %l0,%o7,%l0 1969*25c28e83SPiotr Jasiukajtis 1970*25c28e83SPiotr Jasiukajtis fsubd %f0,%f4,%f0 1971*25c28e83SPiotr Jasiukajtis srl %l0,10,%l0 1972*25c28e83SPiotr Jasiukajtis sub %l1,%o7,%l1 1973*25c28e83SPiotr Jasiukajtis 1974*25c28e83SPiotr Jasiukajtis fsubd %f10,%f14,%f10 1975*25c28e83SPiotr Jasiukajtis srl %l1,10,%l1 1976*25c28e83SPiotr Jasiukajtis 1977*25c28e83SPiotr Jasiukajtis fmuld %f20,%f20,%f20 1978*25c28e83SPiotr Jasiukajtis ldd [%l5+%o5],%f36 1979*25c28e83SPiotr Jasiukajtis add %l5,%o5,%l2 1980*25c28e83SPiotr Jasiukajtis 1981*25c28e83SPiotr Jasiukajtis faddd %f0,%f2,%f0 1982*25c28e83SPiotr Jasiukajtis andn %l0,0x1f,%l0 1983*25c28e83SPiotr Jasiukajtis 1984*25c28e83SPiotr Jasiukajtis faddd %f10,%f12,%f10 1985*25c28e83SPiotr Jasiukajtis andn %l1,0x1f,%l1 1986*25c28e83SPiotr Jasiukajtis 1987*25c28e83SPiotr Jasiukajtis fmuld %f20,%f36,%f24 1988*25c28e83SPiotr Jasiukajtis ldd [%l2+0x10],%f26 1989*25c28e83SPiotr Jasiukajtis add %fp,%o5,%o5 1990*25c28e83SPiotr Jasiukajtis 1991*25c28e83SPiotr Jasiukajtis fmuld %f0,%f0,%f2 1992*25c28e83SPiotr Jasiukajtis add %l0,%o3,%l0 1993*25c28e83SPiotr Jasiukajtis 1994*25c28e83SPiotr Jasiukajtis fmuld %f10,%f10,%f12 1995*25c28e83SPiotr Jasiukajtis add %l1,%o4,%l1 1996*25c28e83SPiotr Jasiukajtis 1997*25c28e83SPiotr Jasiukajtis faddd %f24,%f26,%f24 1998*25c28e83SPiotr Jasiukajtis ldd [%l2+0x20],%f36 1999*25c28e83SPiotr Jasiukajtis 2000*25c28e83SPiotr Jasiukajtis fmuld %f2,%f58,%f6 2001*25c28e83SPiotr Jasiukajtis ldd [%l3+%l0],%f32 2002*25c28e83SPiotr Jasiukajtis 2003*25c28e83SPiotr Jasiukajtis fmuld %f12,%f58,%f16 2004*25c28e83SPiotr Jasiukajtis ldd [%l3+%l1],%f34 2005*25c28e83SPiotr Jasiukajtis 2006*25c28e83SPiotr Jasiukajtis fmuld %f20,%f24,%f24 2007*25c28e83SPiotr Jasiukajtis ldd [%l2+0x30],%f26 2008*25c28e83SPiotr Jasiukajtis 2009*25c28e83SPiotr Jasiukajtis faddd %f6,%f56,%f6 2010*25c28e83SPiotr Jasiukajtis fmuld %f2,%f62,%f4 2011*25c28e83SPiotr Jasiukajtis 2012*25c28e83SPiotr Jasiukajtis faddd %f16,%f56,%f16 2013*25c28e83SPiotr Jasiukajtis fmuld %f12,%f62,%f14 2014*25c28e83SPiotr Jasiukajtis 2015*25c28e83SPiotr Jasiukajtis faddd %f24,%f36,%f24 2016*25c28e83SPiotr Jasiukajtis ldd [%o5+x2_1],%f36 2017*25c28e83SPiotr Jasiukajtis 2018*25c28e83SPiotr Jasiukajtis fmuld %f2,%f6,%f6 2019*25c28e83SPiotr Jasiukajtis faddd %f4,%f60,%f4 2020*25c28e83SPiotr Jasiukajtis 2021*25c28e83SPiotr Jasiukajtis fmuld %f12,%f16,%f16 2022*25c28e83SPiotr Jasiukajtis faddd %f14,%f60,%f14 2023*25c28e83SPiotr Jasiukajtis 2024*25c28e83SPiotr Jasiukajtis fmuld %f20,%f24,%f24 2025*25c28e83SPiotr Jasiukajtis 2026*25c28e83SPiotr Jasiukajtis faddd %f6,%f54,%f6 2027*25c28e83SPiotr Jasiukajtis fmuld %f2,%f4,%f4 2028*25c28e83SPiotr Jasiukajtis ldd [%g1+%l0],%f2 2029*25c28e83SPiotr Jasiukajtis 2030*25c28e83SPiotr Jasiukajtis faddd %f16,%f54,%f16 2031*25c28e83SPiotr Jasiukajtis fmuld %f12,%f14,%f14 2032*25c28e83SPiotr Jasiukajtis ldd [%g1+%l1],%f12 2033*25c28e83SPiotr Jasiukajtis 2034*25c28e83SPiotr Jasiukajtis faddd %f24,%f26,%f24 2035*25c28e83SPiotr Jasiukajtis 2036*25c28e83SPiotr Jasiukajtis fmuld %f0,%f6,%f6 2037*25c28e83SPiotr Jasiukajtis ldd [%l4+%l0],%f0 2038*25c28e83SPiotr Jasiukajtis 2039*25c28e83SPiotr Jasiukajtis fmuld %f10,%f16,%f16 2040*25c28e83SPiotr Jasiukajtis ldd [%l4+%l1],%f10 2041*25c28e83SPiotr Jasiukajtis 2042*25c28e83SPiotr Jasiukajtis fmuld %f4,%f32,%f4 2043*25c28e83SPiotr Jasiukajtis std %f22,[%fp+y2_0] 2044*25c28e83SPiotr Jasiukajtis 2045*25c28e83SPiotr Jasiukajtis fmuld %f14,%f34,%f14 2046*25c28e83SPiotr Jasiukajtis 2047*25c28e83SPiotr Jasiukajtis fmuld %f6,%f2,%f6 2048*25c28e83SPiotr Jasiukajtis 2049*25c28e83SPiotr Jasiukajtis fmuld %f16,%f12,%f16 2050*25c28e83SPiotr Jasiukajtis 2051*25c28e83SPiotr Jasiukajtis fmuld %f20,%f24,%f24 2052*25c28e83SPiotr Jasiukajtis 2053*25c28e83SPiotr Jasiukajtis faddd %f6,%f4,%f6 2054*25c28e83SPiotr Jasiukajtis 2055*25c28e83SPiotr Jasiukajtis faddd %f16,%f14,%f16 2056*25c28e83SPiotr Jasiukajtis 2057*25c28e83SPiotr Jasiukajtis fmuld %f36,%f24,%f24 2058*25c28e83SPiotr Jasiukajtis ldd [%o5+y2_0],%f22 2059*25c28e83SPiotr Jasiukajtis 2060*25c28e83SPiotr Jasiukajtis faddd %f6,%f0,%f6 2061*25c28e83SPiotr Jasiukajtis 2062*25c28e83SPiotr Jasiukajtis faddd %f16,%f10,%f16 2063*25c28e83SPiotr Jasiukajtis 2064*25c28e83SPiotr Jasiukajtis faddd %f24,%f22,%f24 2065*25c28e83SPiotr Jasiukajtis 2066*25c28e83SPiotr Jasiukajtis faddd %f6,%f32,%f6 2067*25c28e83SPiotr Jasiukajtis 2068*25c28e83SPiotr Jasiukajtis faddd %f16,%f34,%f16 2069*25c28e83SPiotr Jasiukajtis ba,pt %icc,.FIXSIGN 2070*25c28e83SPiotr Jasiukajtis 2071*25c28e83SPiotr Jasiukajtis! delay slot 2072*25c28e83SPiotr Jasiukajtis faddd %f36,%f24,%f26 2073*25c28e83SPiotr Jasiukajtis 2074*25c28e83SPiotr Jasiukajtis .align 32 2075*25c28e83SPiotr Jasiukajtis.CASE2: 2076*25c28e83SPiotr Jasiukajtis fpadd32s %f0,%f31,%f8 2077*25c28e83SPiotr Jasiukajtis ld [%fp+x0_1],%l0 2078*25c28e83SPiotr Jasiukajtis andcc %l2,2,%g0 2079*25c28e83SPiotr Jasiukajtis bne,pn %icc,.CASE3 2080*25c28e83SPiotr Jasiukajtis 2081*25c28e83SPiotr Jasiukajtis! delay slot 2082*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fc3c000),%o7 2083*25c28e83SPiotr Jasiukajtis fpadd32s %f20,%f31,%f28 2084*25c28e83SPiotr Jasiukajtis ld [%fp+x2_1],%l2 2085*25c28e83SPiotr Jasiukajtis 2086*25c28e83SPiotr Jasiukajtis fand %f8,%f44,%f4 2087*25c28e83SPiotr Jasiukajtis sub %l0,%o7,%l0 2088*25c28e83SPiotr Jasiukajtis add %l3,8,%g1 2089*25c28e83SPiotr Jasiukajtis 2090*25c28e83SPiotr Jasiukajtis fand %f28,%f44,%f24 2091*25c28e83SPiotr Jasiukajtis sub %l2,%o7,%l2 2092*25c28e83SPiotr Jasiukajtis 2093*25c28e83SPiotr Jasiukajtis fsubd %f0,%f4,%f0 2094*25c28e83SPiotr Jasiukajtis srl %l0,10,%l0 2095*25c28e83SPiotr Jasiukajtis 2096*25c28e83SPiotr Jasiukajtis fsubd %f20,%f24,%f20 2097*25c28e83SPiotr Jasiukajtis srl %l2,10,%l2 2098*25c28e83SPiotr Jasiukajtis 2099*25c28e83SPiotr Jasiukajtis fmuld %f10,%f10,%f10 2100*25c28e83SPiotr Jasiukajtis ldd [%l5+%o4],%f34 2101*25c28e83SPiotr Jasiukajtis add %l5,%o4,%l1 2102*25c28e83SPiotr Jasiukajtis 2103*25c28e83SPiotr Jasiukajtis faddd %f0,%f2,%f0 2104*25c28e83SPiotr Jasiukajtis andn %l0,0x1f,%l0 2105*25c28e83SPiotr Jasiukajtis 2106*25c28e83SPiotr Jasiukajtis faddd %f20,%f22,%f20 2107*25c28e83SPiotr Jasiukajtis andn %l2,0x1f,%l2 2108*25c28e83SPiotr Jasiukajtis 2109*25c28e83SPiotr Jasiukajtis fmuld %f10,%f34,%f14 2110*25c28e83SPiotr Jasiukajtis ldd [%l1+0x10],%f16 2111*25c28e83SPiotr Jasiukajtis add %fp,%o4,%o4 2112*25c28e83SPiotr Jasiukajtis 2113*25c28e83SPiotr Jasiukajtis fmuld %f0,%f0,%f2 2114*25c28e83SPiotr Jasiukajtis add %l0,%o3,%l0 2115*25c28e83SPiotr Jasiukajtis 2116*25c28e83SPiotr Jasiukajtis fmuld %f20,%f20,%f22 2117*25c28e83SPiotr Jasiukajtis add %l2,%o5,%l2 2118*25c28e83SPiotr Jasiukajtis 2119*25c28e83SPiotr Jasiukajtis faddd %f14,%f16,%f14 2120*25c28e83SPiotr Jasiukajtis ldd [%l1+0x20],%f34 2121*25c28e83SPiotr Jasiukajtis 2122*25c28e83SPiotr Jasiukajtis fmuld %f2,%f58,%f6 2123*25c28e83SPiotr Jasiukajtis ldd [%l3+%l0],%f32 2124*25c28e83SPiotr Jasiukajtis 2125*25c28e83SPiotr Jasiukajtis fmuld %f22,%f58,%f26 2126*25c28e83SPiotr Jasiukajtis ldd [%l3+%l2],%f36 2127*25c28e83SPiotr Jasiukajtis 2128*25c28e83SPiotr Jasiukajtis fmuld %f10,%f14,%f14 2129*25c28e83SPiotr Jasiukajtis ldd [%l1+0x30],%f16 2130*25c28e83SPiotr Jasiukajtis 2131*25c28e83SPiotr Jasiukajtis faddd %f6,%f56,%f6 2132*25c28e83SPiotr Jasiukajtis fmuld %f2,%f62,%f4 2133*25c28e83SPiotr Jasiukajtis 2134*25c28e83SPiotr Jasiukajtis faddd %f26,%f56,%f26 2135*25c28e83SPiotr Jasiukajtis fmuld %f22,%f62,%f24 2136*25c28e83SPiotr Jasiukajtis 2137*25c28e83SPiotr Jasiukajtis faddd %f14,%f34,%f14 2138*25c28e83SPiotr Jasiukajtis ldd [%o4+x1_1],%f34 2139*25c28e83SPiotr Jasiukajtis 2140*25c28e83SPiotr Jasiukajtis fmuld %f2,%f6,%f6 2141*25c28e83SPiotr Jasiukajtis faddd %f4,%f60,%f4 2142*25c28e83SPiotr Jasiukajtis 2143*25c28e83SPiotr Jasiukajtis fmuld %f22,%f26,%f26 2144*25c28e83SPiotr Jasiukajtis faddd %f24,%f60,%f24 2145*25c28e83SPiotr Jasiukajtis 2146*25c28e83SPiotr Jasiukajtis fmuld %f10,%f14,%f14 2147*25c28e83SPiotr Jasiukajtis 2148*25c28e83SPiotr Jasiukajtis faddd %f6,%f54,%f6 2149*25c28e83SPiotr Jasiukajtis fmuld %f2,%f4,%f4 2150*25c28e83SPiotr Jasiukajtis ldd [%g1+%l0],%f2 2151*25c28e83SPiotr Jasiukajtis 2152*25c28e83SPiotr Jasiukajtis faddd %f26,%f54,%f26 2153*25c28e83SPiotr Jasiukajtis fmuld %f22,%f24,%f24 2154*25c28e83SPiotr Jasiukajtis ldd [%g1+%l2],%f22 2155*25c28e83SPiotr Jasiukajtis 2156*25c28e83SPiotr Jasiukajtis faddd %f14,%f16,%f14 2157*25c28e83SPiotr Jasiukajtis 2158*25c28e83SPiotr Jasiukajtis fmuld %f0,%f6,%f6 2159*25c28e83SPiotr Jasiukajtis ldd [%l4+%l0],%f0 2160*25c28e83SPiotr Jasiukajtis 2161*25c28e83SPiotr Jasiukajtis fmuld %f20,%f26,%f26 2162*25c28e83SPiotr Jasiukajtis ldd [%l4+%l2],%f20 2163*25c28e83SPiotr Jasiukajtis 2164*25c28e83SPiotr Jasiukajtis fmuld %f4,%f32,%f4 2165*25c28e83SPiotr Jasiukajtis std %f12,[%fp+y1_0] 2166*25c28e83SPiotr Jasiukajtis 2167*25c28e83SPiotr Jasiukajtis fmuld %f24,%f36,%f24 2168*25c28e83SPiotr Jasiukajtis 2169*25c28e83SPiotr Jasiukajtis fmuld %f6,%f2,%f6 2170*25c28e83SPiotr Jasiukajtis 2171*25c28e83SPiotr Jasiukajtis fmuld %f26,%f22,%f26 2172*25c28e83SPiotr Jasiukajtis 2173*25c28e83SPiotr Jasiukajtis fmuld %f10,%f14,%f14 2174*25c28e83SPiotr Jasiukajtis 2175*25c28e83SPiotr Jasiukajtis faddd %f6,%f4,%f6 2176*25c28e83SPiotr Jasiukajtis 2177*25c28e83SPiotr Jasiukajtis faddd %f26,%f24,%f26 2178*25c28e83SPiotr Jasiukajtis 2179*25c28e83SPiotr Jasiukajtis fmuld %f34,%f14,%f14 2180*25c28e83SPiotr Jasiukajtis ldd [%o4+y1_0],%f12 2181*25c28e83SPiotr Jasiukajtis 2182*25c28e83SPiotr Jasiukajtis faddd %f6,%f0,%f6 2183*25c28e83SPiotr Jasiukajtis 2184*25c28e83SPiotr Jasiukajtis faddd %f26,%f20,%f26 2185*25c28e83SPiotr Jasiukajtis 2186*25c28e83SPiotr Jasiukajtis faddd %f14,%f12,%f14 2187*25c28e83SPiotr Jasiukajtis 2188*25c28e83SPiotr Jasiukajtis faddd %f6,%f32,%f6 2189*25c28e83SPiotr Jasiukajtis 2190*25c28e83SPiotr Jasiukajtis faddd %f26,%f36,%f26 2191*25c28e83SPiotr Jasiukajtis ba,pt %icc,.FIXSIGN 2192*25c28e83SPiotr Jasiukajtis 2193*25c28e83SPiotr Jasiukajtis! delay slot 2194*25c28e83SPiotr Jasiukajtis faddd %f34,%f14,%f16 2195*25c28e83SPiotr Jasiukajtis 2196*25c28e83SPiotr Jasiukajtis .align 32 2197*25c28e83SPiotr Jasiukajtis.CASE3: 2198*25c28e83SPiotr Jasiukajtis fand %f8,%f44,%f4 2199*25c28e83SPiotr Jasiukajtis add %l3,8,%g1 2200*25c28e83SPiotr Jasiukajtis sub %l0,%o7,%l0 2201*25c28e83SPiotr Jasiukajtis 2202*25c28e83SPiotr Jasiukajtis fmuld %f10,%f10,%f10 2203*25c28e83SPiotr Jasiukajtis ldd [%l5+%o4],%f34 2204*25c28e83SPiotr Jasiukajtis add %l5,%o4,%l1 2205*25c28e83SPiotr Jasiukajtis 2206*25c28e83SPiotr Jasiukajtis fsubd %f0,%f4,%f0 2207*25c28e83SPiotr Jasiukajtis srl %l0,10,%l0 2208*25c28e83SPiotr Jasiukajtis 2209*25c28e83SPiotr Jasiukajtis fmuld %f20,%f20,%f20 2210*25c28e83SPiotr Jasiukajtis ldd [%l5+%o5],%f36 2211*25c28e83SPiotr Jasiukajtis add %l5,%o5,%l2 2212*25c28e83SPiotr Jasiukajtis 2213*25c28e83SPiotr Jasiukajtis fmuld %f10,%f34,%f14 2214*25c28e83SPiotr Jasiukajtis ldd [%l1+0x10],%f16 2215*25c28e83SPiotr Jasiukajtis add %fp,%o4,%o4 2216*25c28e83SPiotr Jasiukajtis 2217*25c28e83SPiotr Jasiukajtis faddd %f0,%f2,%f0 2218*25c28e83SPiotr Jasiukajtis andn %l0,0x1f,%l0 2219*25c28e83SPiotr Jasiukajtis 2220*25c28e83SPiotr Jasiukajtis fmuld %f20,%f36,%f24 2221*25c28e83SPiotr Jasiukajtis ldd [%l2+0x10],%f26 2222*25c28e83SPiotr Jasiukajtis add %fp,%o5,%o5 2223*25c28e83SPiotr Jasiukajtis 2224*25c28e83SPiotr Jasiukajtis faddd %f14,%f16,%f14 2225*25c28e83SPiotr Jasiukajtis ldd [%l1+0x20],%f34 2226*25c28e83SPiotr Jasiukajtis 2227*25c28e83SPiotr Jasiukajtis fmuld %f0,%f0,%f2 2228*25c28e83SPiotr Jasiukajtis add %l0,%o3,%l0 2229*25c28e83SPiotr Jasiukajtis 2230*25c28e83SPiotr Jasiukajtis faddd %f24,%f26,%f24 2231*25c28e83SPiotr Jasiukajtis ldd [%l2+0x20],%f36 2232*25c28e83SPiotr Jasiukajtis 2233*25c28e83SPiotr Jasiukajtis fmuld %f10,%f14,%f14 2234*25c28e83SPiotr Jasiukajtis ldd [%l1+0x30],%f16 2235*25c28e83SPiotr Jasiukajtis 2236*25c28e83SPiotr Jasiukajtis fmuld %f2,%f58,%f6 2237*25c28e83SPiotr Jasiukajtis ldd [%l3+%l0],%f32 2238*25c28e83SPiotr Jasiukajtis 2239*25c28e83SPiotr Jasiukajtis fmuld %f20,%f24,%f24 2240*25c28e83SPiotr Jasiukajtis ldd [%l2+0x30],%f26 2241*25c28e83SPiotr Jasiukajtis 2242*25c28e83SPiotr Jasiukajtis faddd %f14,%f34,%f14 2243*25c28e83SPiotr Jasiukajtis ldd [%o4+x1_1],%f34 2244*25c28e83SPiotr Jasiukajtis 2245*25c28e83SPiotr Jasiukajtis faddd %f6,%f56,%f6 2246*25c28e83SPiotr Jasiukajtis fmuld %f2,%f62,%f4 2247*25c28e83SPiotr Jasiukajtis 2248*25c28e83SPiotr Jasiukajtis faddd %f24,%f36,%f24 2249*25c28e83SPiotr Jasiukajtis ldd [%o5+x2_1],%f36 2250*25c28e83SPiotr Jasiukajtis 2251*25c28e83SPiotr Jasiukajtis fmuld %f10,%f14,%f14 2252*25c28e83SPiotr Jasiukajtis std %f12,[%fp+y1_0] 2253*25c28e83SPiotr Jasiukajtis 2254*25c28e83SPiotr Jasiukajtis fmuld %f2,%f6,%f6 2255*25c28e83SPiotr Jasiukajtis faddd %f4,%f60,%f4 2256*25c28e83SPiotr Jasiukajtis 2257*25c28e83SPiotr Jasiukajtis fmuld %f20,%f24,%f24 2258*25c28e83SPiotr Jasiukajtis std %f22,[%fp+y2_0] 2259*25c28e83SPiotr Jasiukajtis 2260*25c28e83SPiotr Jasiukajtis faddd %f14,%f16,%f14 2261*25c28e83SPiotr Jasiukajtis 2262*25c28e83SPiotr Jasiukajtis faddd %f6,%f54,%f6 2263*25c28e83SPiotr Jasiukajtis fmuld %f2,%f4,%f4 2264*25c28e83SPiotr Jasiukajtis ldd [%g1+%l0],%f2 2265*25c28e83SPiotr Jasiukajtis 2266*25c28e83SPiotr Jasiukajtis faddd %f24,%f26,%f24 2267*25c28e83SPiotr Jasiukajtis 2268*25c28e83SPiotr Jasiukajtis fmuld %f10,%f14,%f14 2269*25c28e83SPiotr Jasiukajtis 2270*25c28e83SPiotr Jasiukajtis fmuld %f0,%f6,%f6 2271*25c28e83SPiotr Jasiukajtis ldd [%l4+%l0],%f0 2272*25c28e83SPiotr Jasiukajtis 2273*25c28e83SPiotr Jasiukajtis fmuld %f4,%f32,%f4 2274*25c28e83SPiotr Jasiukajtis 2275*25c28e83SPiotr Jasiukajtis fmuld %f20,%f24,%f24 2276*25c28e83SPiotr Jasiukajtis 2277*25c28e83SPiotr Jasiukajtis fmuld %f6,%f2,%f6 2278*25c28e83SPiotr Jasiukajtis 2279*25c28e83SPiotr Jasiukajtis fmuld %f34,%f14,%f14 2280*25c28e83SPiotr Jasiukajtis ldd [%o4+y1_0],%f12 2281*25c28e83SPiotr Jasiukajtis 2282*25c28e83SPiotr Jasiukajtis fmuld %f36,%f24,%f24 2283*25c28e83SPiotr Jasiukajtis ldd [%o5+y2_0],%f22 2284*25c28e83SPiotr Jasiukajtis 2285*25c28e83SPiotr Jasiukajtis faddd %f6,%f4,%f6 2286*25c28e83SPiotr Jasiukajtis 2287*25c28e83SPiotr Jasiukajtis faddd %f14,%f12,%f14 2288*25c28e83SPiotr Jasiukajtis 2289*25c28e83SPiotr Jasiukajtis faddd %f24,%f22,%f24 2290*25c28e83SPiotr Jasiukajtis 2291*25c28e83SPiotr Jasiukajtis faddd %f6,%f0,%f6 2292*25c28e83SPiotr Jasiukajtis 2293*25c28e83SPiotr Jasiukajtis faddd %f34,%f14,%f16 2294*25c28e83SPiotr Jasiukajtis 2295*25c28e83SPiotr Jasiukajtis faddd %f36,%f24,%f26 2296*25c28e83SPiotr Jasiukajtis ba,pt %icc,.FIXSIGN 2297*25c28e83SPiotr Jasiukajtis 2298*25c28e83SPiotr Jasiukajtis! delay slot 2299*25c28e83SPiotr Jasiukajtis faddd %f6,%f32,%f6 2300*25c28e83SPiotr Jasiukajtis 2301*25c28e83SPiotr Jasiukajtis .align 32 2302*25c28e83SPiotr Jasiukajtis.CASE4: 2303*25c28e83SPiotr Jasiukajtis fands %f29,%f28,%f29 ! if (n & 1) clear sign bit 2304*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fc3c000),%o7 2305*25c28e83SPiotr Jasiukajtis andcc %l1,2,%g0 2306*25c28e83SPiotr Jasiukajtis bne,pn %icc,.CASE6 2307*25c28e83SPiotr Jasiukajtis 2308*25c28e83SPiotr Jasiukajtis! delay slot 2309*25c28e83SPiotr Jasiukajtis andcc %l2,2,%g0 2310*25c28e83SPiotr Jasiukajtis fpadd32s %f10,%f31,%f18 2311*25c28e83SPiotr Jasiukajtis ld [%fp+x1_1],%l1 2312*25c28e83SPiotr Jasiukajtis bne,pn %icc,.CASE5 2313*25c28e83SPiotr Jasiukajtis 2314*25c28e83SPiotr Jasiukajtis! delay slot 2315*25c28e83SPiotr Jasiukajtis add %l3,8,%g1 2316*25c28e83SPiotr Jasiukajtis ld [%fp+x2_1],%l2 2317*25c28e83SPiotr Jasiukajtis fpadd32s %f20,%f31,%f28 2318*25c28e83SPiotr Jasiukajtis 2319*25c28e83SPiotr Jasiukajtis fand %f18,%f44,%f14 2320*25c28e83SPiotr Jasiukajtis sub %l1,%o7,%l1 2321*25c28e83SPiotr Jasiukajtis 2322*25c28e83SPiotr Jasiukajtis fand %f28,%f44,%f24 2323*25c28e83SPiotr Jasiukajtis sub %l2,%o7,%l2 2324*25c28e83SPiotr Jasiukajtis 2325*25c28e83SPiotr Jasiukajtis fsubd %f10,%f14,%f10 2326*25c28e83SPiotr Jasiukajtis srl %l1,10,%l1 2327*25c28e83SPiotr Jasiukajtis 2328*25c28e83SPiotr Jasiukajtis fsubd %f20,%f24,%f20 2329*25c28e83SPiotr Jasiukajtis srl %l2,10,%l2 2330*25c28e83SPiotr Jasiukajtis 2331*25c28e83SPiotr Jasiukajtis fmuld %f0,%f0,%f0 2332*25c28e83SPiotr Jasiukajtis ldd [%l5+%o3],%f32 2333*25c28e83SPiotr Jasiukajtis add %l5,%o3,%l0 2334*25c28e83SPiotr Jasiukajtis 2335*25c28e83SPiotr Jasiukajtis faddd %f10,%f12,%f10 2336*25c28e83SPiotr Jasiukajtis andn %l1,0x1f,%l1 2337*25c28e83SPiotr Jasiukajtis 2338*25c28e83SPiotr Jasiukajtis faddd %f20,%f22,%f20 2339*25c28e83SPiotr Jasiukajtis andn %l2,0x1f,%l2 2340*25c28e83SPiotr Jasiukajtis 2341*25c28e83SPiotr Jasiukajtis fmuld %f0,%f32,%f4 2342*25c28e83SPiotr Jasiukajtis ldd [%l0+0x10],%f6 2343*25c28e83SPiotr Jasiukajtis add %fp,%o3,%o3 2344*25c28e83SPiotr Jasiukajtis 2345*25c28e83SPiotr Jasiukajtis fmuld %f10,%f10,%f12 2346*25c28e83SPiotr Jasiukajtis add %l1,%o4,%l1 2347*25c28e83SPiotr Jasiukajtis 2348*25c28e83SPiotr Jasiukajtis fmuld %f20,%f20,%f22 2349*25c28e83SPiotr Jasiukajtis add %l2,%o5,%l2 2350*25c28e83SPiotr Jasiukajtis 2351*25c28e83SPiotr Jasiukajtis faddd %f4,%f6,%f4 2352*25c28e83SPiotr Jasiukajtis ldd [%l0+0x20],%f32 2353*25c28e83SPiotr Jasiukajtis 2354*25c28e83SPiotr Jasiukajtis fmuld %f12,%f58,%f16 2355*25c28e83SPiotr Jasiukajtis ldd [%l3+%l1],%f34 2356*25c28e83SPiotr Jasiukajtis 2357*25c28e83SPiotr Jasiukajtis fmuld %f22,%f58,%f26 2358*25c28e83SPiotr Jasiukajtis ldd [%l3+%l2],%f36 2359*25c28e83SPiotr Jasiukajtis 2360*25c28e83SPiotr Jasiukajtis fmuld %f0,%f4,%f4 2361*25c28e83SPiotr Jasiukajtis ldd [%l0+0x30],%f6 2362*25c28e83SPiotr Jasiukajtis 2363*25c28e83SPiotr Jasiukajtis faddd %f16,%f56,%f16 2364*25c28e83SPiotr Jasiukajtis fmuld %f12,%f62,%f14 2365*25c28e83SPiotr Jasiukajtis 2366*25c28e83SPiotr Jasiukajtis faddd %f26,%f56,%f26 2367*25c28e83SPiotr Jasiukajtis fmuld %f22,%f62,%f24 2368*25c28e83SPiotr Jasiukajtis 2369*25c28e83SPiotr Jasiukajtis faddd %f4,%f32,%f4 2370*25c28e83SPiotr Jasiukajtis ldd [%o3+x0_1],%f32 2371*25c28e83SPiotr Jasiukajtis 2372*25c28e83SPiotr Jasiukajtis fmuld %f12,%f16,%f16 2373*25c28e83SPiotr Jasiukajtis faddd %f14,%f60,%f14 2374*25c28e83SPiotr Jasiukajtis 2375*25c28e83SPiotr Jasiukajtis fmuld %f22,%f26,%f26 2376*25c28e83SPiotr Jasiukajtis faddd %f24,%f60,%f24 2377*25c28e83SPiotr Jasiukajtis 2378*25c28e83SPiotr Jasiukajtis fmuld %f0,%f4,%f4 2379*25c28e83SPiotr Jasiukajtis 2380*25c28e83SPiotr Jasiukajtis faddd %f16,%f54,%f16 2381*25c28e83SPiotr Jasiukajtis fmuld %f12,%f14,%f14 2382*25c28e83SPiotr Jasiukajtis ldd [%g1+%l1],%f12 2383*25c28e83SPiotr Jasiukajtis 2384*25c28e83SPiotr Jasiukajtis faddd %f26,%f54,%f26 2385*25c28e83SPiotr Jasiukajtis fmuld %f22,%f24,%f24 2386*25c28e83SPiotr Jasiukajtis ldd [%g1+%l2],%f22 2387*25c28e83SPiotr Jasiukajtis 2388*25c28e83SPiotr Jasiukajtis faddd %f4,%f6,%f4 2389*25c28e83SPiotr Jasiukajtis 2390*25c28e83SPiotr Jasiukajtis fmuld %f10,%f16,%f16 2391*25c28e83SPiotr Jasiukajtis ldd [%l4+%l1],%f10 2392*25c28e83SPiotr Jasiukajtis 2393*25c28e83SPiotr Jasiukajtis fmuld %f20,%f26,%f26 2394*25c28e83SPiotr Jasiukajtis ldd [%l4+%l2],%f20 2395*25c28e83SPiotr Jasiukajtis 2396*25c28e83SPiotr Jasiukajtis fmuld %f14,%f34,%f14 2397*25c28e83SPiotr Jasiukajtis std %f2,[%fp+y0_0] 2398*25c28e83SPiotr Jasiukajtis 2399*25c28e83SPiotr Jasiukajtis fmuld %f24,%f36,%f24 2400*25c28e83SPiotr Jasiukajtis 2401*25c28e83SPiotr Jasiukajtis fmuld %f0,%f4,%f4 2402*25c28e83SPiotr Jasiukajtis 2403*25c28e83SPiotr Jasiukajtis fmuld %f16,%f12,%f16 2404*25c28e83SPiotr Jasiukajtis 2405*25c28e83SPiotr Jasiukajtis fmuld %f26,%f22,%f26 2406*25c28e83SPiotr Jasiukajtis 2407*25c28e83SPiotr Jasiukajtis fmuld %f32,%f4,%f4 2408*25c28e83SPiotr Jasiukajtis ldd [%o3+y0_0],%f2 2409*25c28e83SPiotr Jasiukajtis 2410*25c28e83SPiotr Jasiukajtis faddd %f16,%f14,%f16 2411*25c28e83SPiotr Jasiukajtis 2412*25c28e83SPiotr Jasiukajtis faddd %f26,%f24,%f26 2413*25c28e83SPiotr Jasiukajtis 2414*25c28e83SPiotr Jasiukajtis faddd %f4,%f2,%f4 2415*25c28e83SPiotr Jasiukajtis 2416*25c28e83SPiotr Jasiukajtis faddd %f16,%f10,%f16 2417*25c28e83SPiotr Jasiukajtis 2418*25c28e83SPiotr Jasiukajtis faddd %f26,%f20,%f26 2419*25c28e83SPiotr Jasiukajtis 2420*25c28e83SPiotr Jasiukajtis faddd %f32,%f4,%f6 2421*25c28e83SPiotr Jasiukajtis 2422*25c28e83SPiotr Jasiukajtis faddd %f16,%f34,%f16 2423*25c28e83SPiotr Jasiukajtis ba,pt %icc,.FIXSIGN 2424*25c28e83SPiotr Jasiukajtis 2425*25c28e83SPiotr Jasiukajtis! delay slot 2426*25c28e83SPiotr Jasiukajtis faddd %f26,%f36,%f26 2427*25c28e83SPiotr Jasiukajtis 2428*25c28e83SPiotr Jasiukajtis .align 32 2429*25c28e83SPiotr Jasiukajtis.CASE5: 2430*25c28e83SPiotr Jasiukajtis fand %f18,%f44,%f14 2431*25c28e83SPiotr Jasiukajtis sub %l1,%o7,%l1 2432*25c28e83SPiotr Jasiukajtis 2433*25c28e83SPiotr Jasiukajtis fmuld %f0,%f0,%f0 2434*25c28e83SPiotr Jasiukajtis ldd [%l5+%o3],%f32 2435*25c28e83SPiotr Jasiukajtis add %l5,%o3,%l0 2436*25c28e83SPiotr Jasiukajtis 2437*25c28e83SPiotr Jasiukajtis fsubd %f10,%f14,%f10 2438*25c28e83SPiotr Jasiukajtis srl %l1,10,%l1 2439*25c28e83SPiotr Jasiukajtis 2440*25c28e83SPiotr Jasiukajtis fmuld %f20,%f20,%f20 2441*25c28e83SPiotr Jasiukajtis ldd [%l5+%o5],%f36 2442*25c28e83SPiotr Jasiukajtis add %l5,%o5,%l2 2443*25c28e83SPiotr Jasiukajtis 2444*25c28e83SPiotr Jasiukajtis fmuld %f0,%f32,%f4 2445*25c28e83SPiotr Jasiukajtis ldd [%l0+0x10],%f6 2446*25c28e83SPiotr Jasiukajtis add %fp,%o3,%o3 2447*25c28e83SPiotr Jasiukajtis 2448*25c28e83SPiotr Jasiukajtis faddd %f10,%f12,%f10 2449*25c28e83SPiotr Jasiukajtis andn %l1,0x1f,%l1 2450*25c28e83SPiotr Jasiukajtis 2451*25c28e83SPiotr Jasiukajtis fmuld %f20,%f36,%f24 2452*25c28e83SPiotr Jasiukajtis ldd [%l2+0x10],%f26 2453*25c28e83SPiotr Jasiukajtis add %fp,%o5,%o5 2454*25c28e83SPiotr Jasiukajtis 2455*25c28e83SPiotr Jasiukajtis faddd %f4,%f6,%f4 2456*25c28e83SPiotr Jasiukajtis ldd [%l0+0x20],%f32 2457*25c28e83SPiotr Jasiukajtis 2458*25c28e83SPiotr Jasiukajtis fmuld %f10,%f10,%f12 2459*25c28e83SPiotr Jasiukajtis add %l1,%o4,%l1 2460*25c28e83SPiotr Jasiukajtis 2461*25c28e83SPiotr Jasiukajtis faddd %f24,%f26,%f24 2462*25c28e83SPiotr Jasiukajtis ldd [%l2+0x20],%f36 2463*25c28e83SPiotr Jasiukajtis 2464*25c28e83SPiotr Jasiukajtis fmuld %f0,%f4,%f4 2465*25c28e83SPiotr Jasiukajtis ldd [%l0+0x30],%f6 2466*25c28e83SPiotr Jasiukajtis 2467*25c28e83SPiotr Jasiukajtis fmuld %f12,%f58,%f16 2468*25c28e83SPiotr Jasiukajtis ldd [%l3+%l1],%f34 2469*25c28e83SPiotr Jasiukajtis 2470*25c28e83SPiotr Jasiukajtis fmuld %f20,%f24,%f24 2471*25c28e83SPiotr Jasiukajtis ldd [%l2+0x30],%f26 2472*25c28e83SPiotr Jasiukajtis 2473*25c28e83SPiotr Jasiukajtis faddd %f4,%f32,%f4 2474*25c28e83SPiotr Jasiukajtis ldd [%o3+x0_1],%f32 2475*25c28e83SPiotr Jasiukajtis 2476*25c28e83SPiotr Jasiukajtis faddd %f16,%f56,%f16 2477*25c28e83SPiotr Jasiukajtis fmuld %f12,%f62,%f14 2478*25c28e83SPiotr Jasiukajtis 2479*25c28e83SPiotr Jasiukajtis faddd %f24,%f36,%f24 2480*25c28e83SPiotr Jasiukajtis ldd [%o5+x2_1],%f36 2481*25c28e83SPiotr Jasiukajtis 2482*25c28e83SPiotr Jasiukajtis fmuld %f0,%f4,%f4 2483*25c28e83SPiotr Jasiukajtis std %f2,[%fp+y0_0] 2484*25c28e83SPiotr Jasiukajtis 2485*25c28e83SPiotr Jasiukajtis fmuld %f12,%f16,%f16 2486*25c28e83SPiotr Jasiukajtis faddd %f14,%f60,%f14 2487*25c28e83SPiotr Jasiukajtis 2488*25c28e83SPiotr Jasiukajtis fmuld %f20,%f24,%f24 2489*25c28e83SPiotr Jasiukajtis std %f22,[%fp+y2_0] 2490*25c28e83SPiotr Jasiukajtis 2491*25c28e83SPiotr Jasiukajtis faddd %f4,%f6,%f4 2492*25c28e83SPiotr Jasiukajtis 2493*25c28e83SPiotr Jasiukajtis faddd %f16,%f54,%f16 2494*25c28e83SPiotr Jasiukajtis fmuld %f12,%f14,%f14 2495*25c28e83SPiotr Jasiukajtis ldd [%g1+%l1],%f12 2496*25c28e83SPiotr Jasiukajtis 2497*25c28e83SPiotr Jasiukajtis faddd %f24,%f26,%f24 2498*25c28e83SPiotr Jasiukajtis 2499*25c28e83SPiotr Jasiukajtis fmuld %f0,%f4,%f4 2500*25c28e83SPiotr Jasiukajtis 2501*25c28e83SPiotr Jasiukajtis fmuld %f10,%f16,%f16 2502*25c28e83SPiotr Jasiukajtis ldd [%l4+%l1],%f10 2503*25c28e83SPiotr Jasiukajtis 2504*25c28e83SPiotr Jasiukajtis fmuld %f14,%f34,%f14 2505*25c28e83SPiotr Jasiukajtis 2506*25c28e83SPiotr Jasiukajtis fmuld %f20,%f24,%f24 2507*25c28e83SPiotr Jasiukajtis 2508*25c28e83SPiotr Jasiukajtis fmuld %f16,%f12,%f16 2509*25c28e83SPiotr Jasiukajtis 2510*25c28e83SPiotr Jasiukajtis fmuld %f32,%f4,%f4 2511*25c28e83SPiotr Jasiukajtis ldd [%o3+y0_0],%f2 2512*25c28e83SPiotr Jasiukajtis 2513*25c28e83SPiotr Jasiukajtis fmuld %f36,%f24,%f24 2514*25c28e83SPiotr Jasiukajtis ldd [%o5+y2_0],%f22 2515*25c28e83SPiotr Jasiukajtis 2516*25c28e83SPiotr Jasiukajtis faddd %f16,%f14,%f16 2517*25c28e83SPiotr Jasiukajtis 2518*25c28e83SPiotr Jasiukajtis faddd %f4,%f2,%f4 2519*25c28e83SPiotr Jasiukajtis 2520*25c28e83SPiotr Jasiukajtis faddd %f24,%f22,%f24 2521*25c28e83SPiotr Jasiukajtis 2522*25c28e83SPiotr Jasiukajtis faddd %f16,%f10,%f16 2523*25c28e83SPiotr Jasiukajtis 2524*25c28e83SPiotr Jasiukajtis faddd %f32,%f4,%f6 2525*25c28e83SPiotr Jasiukajtis 2526*25c28e83SPiotr Jasiukajtis faddd %f36,%f24,%f26 2527*25c28e83SPiotr Jasiukajtis ba,pt %icc,.FIXSIGN 2528*25c28e83SPiotr Jasiukajtis 2529*25c28e83SPiotr Jasiukajtis! delay slot 2530*25c28e83SPiotr Jasiukajtis faddd %f16,%f34,%f16 2531*25c28e83SPiotr Jasiukajtis 2532*25c28e83SPiotr Jasiukajtis .align 32 2533*25c28e83SPiotr Jasiukajtis.CASE6: 2534*25c28e83SPiotr Jasiukajtis ld [%fp+x2_1],%l2 2535*25c28e83SPiotr Jasiukajtis add %l3,8,%g1 2536*25c28e83SPiotr Jasiukajtis bne,pn %icc,.CASE7 2537*25c28e83SPiotr Jasiukajtis! delay slot 2538*25c28e83SPiotr Jasiukajtis fpadd32s %f20,%f31,%f28 2539*25c28e83SPiotr Jasiukajtis 2540*25c28e83SPiotr Jasiukajtis fand %f28,%f44,%f24 2541*25c28e83SPiotr Jasiukajtis ldd [%l5+%o3],%f32 2542*25c28e83SPiotr Jasiukajtis add %l5,%o3,%l0 2543*25c28e83SPiotr Jasiukajtis 2544*25c28e83SPiotr Jasiukajtis fmuld %f0,%f0,%f0 2545*25c28e83SPiotr Jasiukajtis sub %l2,%o7,%l2 2546*25c28e83SPiotr Jasiukajtis 2547*25c28e83SPiotr Jasiukajtis fsubd %f20,%f24,%f20 2548*25c28e83SPiotr Jasiukajtis srl %l2,10,%l2 2549*25c28e83SPiotr Jasiukajtis 2550*25c28e83SPiotr Jasiukajtis fmuld %f10,%f10,%f10 2551*25c28e83SPiotr Jasiukajtis ldd [%l5+%o4],%f34 2552*25c28e83SPiotr Jasiukajtis add %l5,%o4,%l1 2553*25c28e83SPiotr Jasiukajtis 2554*25c28e83SPiotr Jasiukajtis fmuld %f0,%f32,%f4 2555*25c28e83SPiotr Jasiukajtis ldd [%l0+0x10],%f6 2556*25c28e83SPiotr Jasiukajtis add %fp,%o3,%o3 2557*25c28e83SPiotr Jasiukajtis 2558*25c28e83SPiotr Jasiukajtis faddd %f20,%f22,%f20 2559*25c28e83SPiotr Jasiukajtis andn %l2,0x1f,%l2 2560*25c28e83SPiotr Jasiukajtis 2561*25c28e83SPiotr Jasiukajtis fmuld %f10,%f34,%f14 2562*25c28e83SPiotr Jasiukajtis ldd [%l1+0x10],%f16 2563*25c28e83SPiotr Jasiukajtis add %fp,%o4,%o4 2564*25c28e83SPiotr Jasiukajtis 2565*25c28e83SPiotr Jasiukajtis faddd %f4,%f6,%f4 2566*25c28e83SPiotr Jasiukajtis ldd [%l0+0x20],%f32 2567*25c28e83SPiotr Jasiukajtis 2568*25c28e83SPiotr Jasiukajtis fmuld %f20,%f20,%f22 2569*25c28e83SPiotr Jasiukajtis add %l2,%o5,%l2 2570*25c28e83SPiotr Jasiukajtis 2571*25c28e83SPiotr Jasiukajtis faddd %f14,%f16,%f14 2572*25c28e83SPiotr Jasiukajtis ldd [%l1+0x20],%f34 2573*25c28e83SPiotr Jasiukajtis 2574*25c28e83SPiotr Jasiukajtis fmuld %f0,%f4,%f4 2575*25c28e83SPiotr Jasiukajtis ldd [%l0+0x30],%f6 2576*25c28e83SPiotr Jasiukajtis 2577*25c28e83SPiotr Jasiukajtis fmuld %f22,%f58,%f26 2578*25c28e83SPiotr Jasiukajtis ldd [%l3+%l2],%f36 2579*25c28e83SPiotr Jasiukajtis 2580*25c28e83SPiotr Jasiukajtis fmuld %f10,%f14,%f14 2581*25c28e83SPiotr Jasiukajtis ldd [%l1+0x30],%f16 2582*25c28e83SPiotr Jasiukajtis 2583*25c28e83SPiotr Jasiukajtis faddd %f4,%f32,%f4 2584*25c28e83SPiotr Jasiukajtis ldd [%o3+x0_1],%f32 2585*25c28e83SPiotr Jasiukajtis 2586*25c28e83SPiotr Jasiukajtis faddd %f26,%f56,%f26 2587*25c28e83SPiotr Jasiukajtis fmuld %f22,%f62,%f24 2588*25c28e83SPiotr Jasiukajtis 2589*25c28e83SPiotr Jasiukajtis faddd %f14,%f34,%f14 2590*25c28e83SPiotr Jasiukajtis ldd [%o4+x1_1],%f34 2591*25c28e83SPiotr Jasiukajtis 2592*25c28e83SPiotr Jasiukajtis fmuld %f0,%f4,%f4 2593*25c28e83SPiotr Jasiukajtis std %f2,[%fp+y0_0] 2594*25c28e83SPiotr Jasiukajtis 2595*25c28e83SPiotr Jasiukajtis fmuld %f22,%f26,%f26 2596*25c28e83SPiotr Jasiukajtis faddd %f24,%f60,%f24 2597*25c28e83SPiotr Jasiukajtis 2598*25c28e83SPiotr Jasiukajtis fmuld %f10,%f14,%f14 2599*25c28e83SPiotr Jasiukajtis std %f12,[%fp+y1_0] 2600*25c28e83SPiotr Jasiukajtis 2601*25c28e83SPiotr Jasiukajtis faddd %f4,%f6,%f4 2602*25c28e83SPiotr Jasiukajtis 2603*25c28e83SPiotr Jasiukajtis faddd %f26,%f54,%f26 2604*25c28e83SPiotr Jasiukajtis fmuld %f22,%f24,%f24 2605*25c28e83SPiotr Jasiukajtis ldd [%g1+%l2],%f22 2606*25c28e83SPiotr Jasiukajtis 2607*25c28e83SPiotr Jasiukajtis faddd %f14,%f16,%f14 2608*25c28e83SPiotr Jasiukajtis 2609*25c28e83SPiotr Jasiukajtis fmuld %f0,%f4,%f4 2610*25c28e83SPiotr Jasiukajtis 2611*25c28e83SPiotr Jasiukajtis fmuld %f20,%f26,%f26 2612*25c28e83SPiotr Jasiukajtis ldd [%l4+%l2],%f20 2613*25c28e83SPiotr Jasiukajtis 2614*25c28e83SPiotr Jasiukajtis fmuld %f24,%f36,%f24 2615*25c28e83SPiotr Jasiukajtis 2616*25c28e83SPiotr Jasiukajtis fmuld %f10,%f14,%f14 2617*25c28e83SPiotr Jasiukajtis 2618*25c28e83SPiotr Jasiukajtis fmuld %f26,%f22,%f26 2619*25c28e83SPiotr Jasiukajtis 2620*25c28e83SPiotr Jasiukajtis fmuld %f32,%f4,%f4 2621*25c28e83SPiotr Jasiukajtis ldd [%o3+y0_0],%f2 2622*25c28e83SPiotr Jasiukajtis 2623*25c28e83SPiotr Jasiukajtis fmuld %f34,%f14,%f14 2624*25c28e83SPiotr Jasiukajtis ldd [%o4+y1_0],%f12 2625*25c28e83SPiotr Jasiukajtis 2626*25c28e83SPiotr Jasiukajtis faddd %f26,%f24,%f26 2627*25c28e83SPiotr Jasiukajtis 2628*25c28e83SPiotr Jasiukajtis faddd %f4,%f2,%f4 2629*25c28e83SPiotr Jasiukajtis 2630*25c28e83SPiotr Jasiukajtis faddd %f14,%f12,%f14 2631*25c28e83SPiotr Jasiukajtis 2632*25c28e83SPiotr Jasiukajtis faddd %f26,%f20,%f26 2633*25c28e83SPiotr Jasiukajtis 2634*25c28e83SPiotr Jasiukajtis faddd %f32,%f4,%f6 2635*25c28e83SPiotr Jasiukajtis 2636*25c28e83SPiotr Jasiukajtis faddd %f34,%f14,%f16 2637*25c28e83SPiotr Jasiukajtis ba,pt %icc,.FIXSIGN 2638*25c28e83SPiotr Jasiukajtis 2639*25c28e83SPiotr Jasiukajtis! delay slot 2640*25c28e83SPiotr Jasiukajtis faddd %f26,%f36,%f26 2641*25c28e83SPiotr Jasiukajtis 2642*25c28e83SPiotr Jasiukajtis .align 32 2643*25c28e83SPiotr Jasiukajtis.CASE7: 2644*25c28e83SPiotr Jasiukajtis fmuld %f0,%f0,%f0 2645*25c28e83SPiotr Jasiukajtis ldd [%l5+%o3],%f32 2646*25c28e83SPiotr Jasiukajtis add %l5,%o3,%l0 2647*25c28e83SPiotr Jasiukajtis 2648*25c28e83SPiotr Jasiukajtis fmuld %f10,%f10,%f10 2649*25c28e83SPiotr Jasiukajtis ldd [%l5+%o4],%f34 2650*25c28e83SPiotr Jasiukajtis add %l5,%o4,%l1 2651*25c28e83SPiotr Jasiukajtis 2652*25c28e83SPiotr Jasiukajtis fmuld %f20,%f20,%f20 2653*25c28e83SPiotr Jasiukajtis ldd [%l5+%o5],%f36 2654*25c28e83SPiotr Jasiukajtis add %l5,%o5,%l2 2655*25c28e83SPiotr Jasiukajtis 2656*25c28e83SPiotr Jasiukajtis fmuld %f0,%f32,%f4 2657*25c28e83SPiotr Jasiukajtis ldd [%l0+0x10],%f6 2658*25c28e83SPiotr Jasiukajtis add %fp,%o3,%o3 2659*25c28e83SPiotr Jasiukajtis 2660*25c28e83SPiotr Jasiukajtis fmuld %f10,%f34,%f14 2661*25c28e83SPiotr Jasiukajtis ldd [%l1+0x10],%f16 2662*25c28e83SPiotr Jasiukajtis add %fp,%o4,%o4 2663*25c28e83SPiotr Jasiukajtis 2664*25c28e83SPiotr Jasiukajtis fmuld %f20,%f36,%f24 2665*25c28e83SPiotr Jasiukajtis ldd [%l2+0x10],%f26 2666*25c28e83SPiotr Jasiukajtis add %fp,%o5,%o5 2667*25c28e83SPiotr Jasiukajtis 2668*25c28e83SPiotr Jasiukajtis faddd %f4,%f6,%f4 2669*25c28e83SPiotr Jasiukajtis ldd [%l0+0x20],%f32 2670*25c28e83SPiotr Jasiukajtis 2671*25c28e83SPiotr Jasiukajtis faddd %f14,%f16,%f14 2672*25c28e83SPiotr Jasiukajtis ldd [%l1+0x20],%f34 2673*25c28e83SPiotr Jasiukajtis 2674*25c28e83SPiotr Jasiukajtis faddd %f24,%f26,%f24 2675*25c28e83SPiotr Jasiukajtis ldd [%l2+0x20],%f36 2676*25c28e83SPiotr Jasiukajtis 2677*25c28e83SPiotr Jasiukajtis fmuld %f0,%f4,%f4 2678*25c28e83SPiotr Jasiukajtis ldd [%l0+0x30],%f6 2679*25c28e83SPiotr Jasiukajtis 2680*25c28e83SPiotr Jasiukajtis fmuld %f10,%f14,%f14 2681*25c28e83SPiotr Jasiukajtis ldd [%l1+0x30],%f16 2682*25c28e83SPiotr Jasiukajtis 2683*25c28e83SPiotr Jasiukajtis fmuld %f20,%f24,%f24 2684*25c28e83SPiotr Jasiukajtis ldd [%l2+0x30],%f26 2685*25c28e83SPiotr Jasiukajtis 2686*25c28e83SPiotr Jasiukajtis faddd %f4,%f32,%f4 2687*25c28e83SPiotr Jasiukajtis ldd [%o3+x0_1],%f32 2688*25c28e83SPiotr Jasiukajtis 2689*25c28e83SPiotr Jasiukajtis faddd %f14,%f34,%f14 2690*25c28e83SPiotr Jasiukajtis ldd [%o4+x1_1],%f34 2691*25c28e83SPiotr Jasiukajtis 2692*25c28e83SPiotr Jasiukajtis faddd %f24,%f36,%f24 2693*25c28e83SPiotr Jasiukajtis ldd [%o5+x2_1],%f36 2694*25c28e83SPiotr Jasiukajtis 2695*25c28e83SPiotr Jasiukajtis fmuld %f0,%f4,%f4 2696*25c28e83SPiotr Jasiukajtis std %f2,[%fp+y0_0] 2697*25c28e83SPiotr Jasiukajtis 2698*25c28e83SPiotr Jasiukajtis fmuld %f10,%f14,%f14 2699*25c28e83SPiotr Jasiukajtis std %f12,[%fp+y1_0] 2700*25c28e83SPiotr Jasiukajtis 2701*25c28e83SPiotr Jasiukajtis fmuld %f20,%f24,%f24 2702*25c28e83SPiotr Jasiukajtis std %f22,[%fp+y2_0] 2703*25c28e83SPiotr Jasiukajtis 2704*25c28e83SPiotr Jasiukajtis faddd %f4,%f6,%f4 2705*25c28e83SPiotr Jasiukajtis 2706*25c28e83SPiotr Jasiukajtis faddd %f14,%f16,%f14 2707*25c28e83SPiotr Jasiukajtis 2708*25c28e83SPiotr Jasiukajtis faddd %f24,%f26,%f24 2709*25c28e83SPiotr Jasiukajtis 2710*25c28e83SPiotr Jasiukajtis fmuld %f0,%f4,%f4 2711*25c28e83SPiotr Jasiukajtis 2712*25c28e83SPiotr Jasiukajtis fmuld %f10,%f14,%f14 2713*25c28e83SPiotr Jasiukajtis 2714*25c28e83SPiotr Jasiukajtis fmuld %f20,%f24,%f24 2715*25c28e83SPiotr Jasiukajtis 2716*25c28e83SPiotr Jasiukajtis fmuld %f32,%f4,%f4 2717*25c28e83SPiotr Jasiukajtis ldd [%o3+y0_0],%f2 2718*25c28e83SPiotr Jasiukajtis 2719*25c28e83SPiotr Jasiukajtis fmuld %f34,%f14,%f14 2720*25c28e83SPiotr Jasiukajtis ldd [%o4+y1_0],%f12 2721*25c28e83SPiotr Jasiukajtis 2722*25c28e83SPiotr Jasiukajtis fmuld %f36,%f24,%f24 2723*25c28e83SPiotr Jasiukajtis ldd [%o5+y2_0],%f22 2724*25c28e83SPiotr Jasiukajtis 2725*25c28e83SPiotr Jasiukajtis faddd %f4,%f2,%f4 2726*25c28e83SPiotr Jasiukajtis 2727*25c28e83SPiotr Jasiukajtis faddd %f14,%f12,%f14 2728*25c28e83SPiotr Jasiukajtis 2729*25c28e83SPiotr Jasiukajtis faddd %f24,%f22,%f24 2730*25c28e83SPiotr Jasiukajtis 2731*25c28e83SPiotr Jasiukajtis faddd %f32,%f4,%f6 2732*25c28e83SPiotr Jasiukajtis 2733*25c28e83SPiotr Jasiukajtis faddd %f34,%f14,%f16 2734*25c28e83SPiotr Jasiukajtis ba,pt %icc,.FIXSIGN 2735*25c28e83SPiotr Jasiukajtis 2736*25c28e83SPiotr Jasiukajtis! delay slot 2737*25c28e83SPiotr Jasiukajtis faddd %f36,%f24,%f26 2738*25c28e83SPiotr Jasiukajtis 2739*25c28e83SPiotr Jasiukajtis 2740*25c28e83SPiotr Jasiukajtis .align 32 2741*25c28e83SPiotr Jasiukajtis.ENDLOOP2: 2742*25c28e83SPiotr Jasiukajtis fmuld %f10,%f40,%f12 2743*25c28e83SPiotr Jasiukajtis add %l5,thresh,%g1 2744*25c28e83SPiotr Jasiukajtis faddd %f12,%f42,%f12 2745*25c28e83SPiotr Jasiukajtis st %f13,[%fp+n1] 2746*25c28e83SPiotr Jasiukajtis fsubd %f12,%f42,%f12 ! n 2747*25c28e83SPiotr Jasiukajtis fmuld %f12,%f46,%f14 2748*25c28e83SPiotr Jasiukajtis fsubd %f10,%f14,%f14 2749*25c28e83SPiotr Jasiukajtis fmuld %f12,%f48,%f16 2750*25c28e83SPiotr Jasiukajtis fsubd %f14,%f16,%f10 2751*25c28e83SPiotr Jasiukajtis ld [%fp+n1],%o4 ; add %o4,1,%o4 2752*25c28e83SPiotr Jasiukajtis fsubd %f14,%f10,%f34 2753*25c28e83SPiotr Jasiukajtis and %o4,1,%o4 2754*25c28e83SPiotr Jasiukajtis fsubd %f34,%f16,%f34 2755*25c28e83SPiotr Jasiukajtis fmuld %f12,%f50,%f18 2756*25c28e83SPiotr Jasiukajtis sll %o4,3,%o4 2757*25c28e83SPiotr Jasiukajtis fsubd %f18,%f34,%f18 2758*25c28e83SPiotr Jasiukajtis ld [%g1+%o4],%f16 2759*25c28e83SPiotr Jasiukajtis fsubd %f10,%f18,%f14 2760*25c28e83SPiotr Jasiukajtis fsubd %f10,%f14,%f34 2761*25c28e83SPiotr Jasiukajtis add %l5,thresh+4,%o7 2762*25c28e83SPiotr Jasiukajtis fsubd %f34,%f18,%f34 2763*25c28e83SPiotr Jasiukajtis fmuld %f12,%f52,%f12 2764*25c28e83SPiotr Jasiukajtis fsubd %f12,%f34,%f12 2765*25c28e83SPiotr Jasiukajtis ld [%o7+%o4],%f18 2766*25c28e83SPiotr Jasiukajtis fsubd %f14,%f12,%f10 ! x 2767*25c28e83SPiotr Jasiukajtis fsubd %f14,%f10,%f14 2768*25c28e83SPiotr Jasiukajtis fands %f10,%f30,%f19 ! save signbit 2769*25c28e83SPiotr Jasiukajtis fabsd %f10,%f10 2770*25c28e83SPiotr Jasiukajtis std %f10,[%fp+x1_1] 2771*25c28e83SPiotr Jasiukajtis fsubd %f14,%f12,%f12 ! y 2772*25c28e83SPiotr Jasiukajtis fcmpgt32 %f16,%f10,%l1 2773*25c28e83SPiotr Jasiukajtis fxors %f12,%f19,%f12 2774*25c28e83SPiotr Jasiukajtis fands %f19,%f18,%f19 ! if (n & 1) clear sign bit 2775*25c28e83SPiotr Jasiukajtis andcc %l1,2,%g0 2776*25c28e83SPiotr Jasiukajtis bne,pn %icc,1f 2777*25c28e83SPiotr Jasiukajtis! delay slot 2778*25c28e83SPiotr Jasiukajtis nop 2779*25c28e83SPiotr Jasiukajtis fpadd32s %f10,%f31,%f18 2780*25c28e83SPiotr Jasiukajtis ld [%fp+x1_1],%l1 2781*25c28e83SPiotr Jasiukajtis fand %f18,%f44,%f14 2782*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fc3c000),%o7 2783*25c28e83SPiotr Jasiukajtis add %l3,8,%g1 2784*25c28e83SPiotr Jasiukajtis fsubd %f10,%f14,%f10 2785*25c28e83SPiotr Jasiukajtis sub %l1,%o7,%l1 2786*25c28e83SPiotr Jasiukajtis srl %l1,10,%l1 2787*25c28e83SPiotr Jasiukajtis faddd %f10,%f12,%f10 2788*25c28e83SPiotr Jasiukajtis andn %l1,0x1f,%l1 2789*25c28e83SPiotr Jasiukajtis fmuld %f10,%f10,%f12 2790*25c28e83SPiotr Jasiukajtis add %l1,%o4,%l1 2791*25c28e83SPiotr Jasiukajtis fmuld %f12,%f58,%f16 2792*25c28e83SPiotr Jasiukajtis ldd [%l3+%l1],%f34 2793*25c28e83SPiotr Jasiukajtis faddd %f16,%f56,%f16 2794*25c28e83SPiotr Jasiukajtis fmuld %f12,%f62,%f14 2795*25c28e83SPiotr Jasiukajtis fmuld %f12,%f16,%f16 2796*25c28e83SPiotr Jasiukajtis faddd %f14,%f60,%f14 2797*25c28e83SPiotr Jasiukajtis faddd %f16,%f54,%f16 2798*25c28e83SPiotr Jasiukajtis fmuld %f12,%f14,%f14 2799*25c28e83SPiotr Jasiukajtis ldd [%g1+%l1],%f12 2800*25c28e83SPiotr Jasiukajtis fmuld %f10,%f16,%f16 2801*25c28e83SPiotr Jasiukajtis ldd [%l4+%l1],%f10 2802*25c28e83SPiotr Jasiukajtis fmuld %f14,%f34,%f14 2803*25c28e83SPiotr Jasiukajtis fmuld %f16,%f12,%f16 2804*25c28e83SPiotr Jasiukajtis faddd %f16,%f14,%f16 2805*25c28e83SPiotr Jasiukajtis faddd %f16,%f10,%f16 2806*25c28e83SPiotr Jasiukajtis ba,pt %icc,2f 2807*25c28e83SPiotr Jasiukajtis faddd %f16,%f34,%f16 2808*25c28e83SPiotr Jasiukajtis1: 2809*25c28e83SPiotr Jasiukajtis fmuld %f10,%f10,%f10 2810*25c28e83SPiotr Jasiukajtis ldd [%l5+%o4],%f34 2811*25c28e83SPiotr Jasiukajtis add %l5,%o4,%l1 2812*25c28e83SPiotr Jasiukajtis fmuld %f10,%f34,%f14 2813*25c28e83SPiotr Jasiukajtis ldd [%l1+0x10],%f16 2814*25c28e83SPiotr Jasiukajtis add %fp,%o4,%o4 2815*25c28e83SPiotr Jasiukajtis faddd %f14,%f16,%f14 2816*25c28e83SPiotr Jasiukajtis ldd [%l1+0x20],%f34 2817*25c28e83SPiotr Jasiukajtis fmuld %f10,%f14,%f14 2818*25c28e83SPiotr Jasiukajtis ldd [%l1+0x30],%f16 2819*25c28e83SPiotr Jasiukajtis faddd %f14,%f34,%f14 2820*25c28e83SPiotr Jasiukajtis ldd [%o4+x1_1],%f34 2821*25c28e83SPiotr Jasiukajtis fmuld %f10,%f14,%f14 2822*25c28e83SPiotr Jasiukajtis std %f12,[%fp+y1_0] 2823*25c28e83SPiotr Jasiukajtis faddd %f14,%f16,%f14 2824*25c28e83SPiotr Jasiukajtis fmuld %f10,%f14,%f14 2825*25c28e83SPiotr Jasiukajtis fmuld %f34,%f14,%f14 2826*25c28e83SPiotr Jasiukajtis ldd [%o4+y1_0],%f12 2827*25c28e83SPiotr Jasiukajtis faddd %f14,%f12,%f14 2828*25c28e83SPiotr Jasiukajtis faddd %f34,%f14,%f16 2829*25c28e83SPiotr Jasiukajtis2: 2830*25c28e83SPiotr Jasiukajtis add %l5,thresh-4,%g1 2831*25c28e83SPiotr Jasiukajtis ld [%fp+n1],%o4 ; add %o4,1,%o4 2832*25c28e83SPiotr Jasiukajtis and %o4,2,%o4 2833*25c28e83SPiotr Jasiukajtis sll %o4,2,%o4 2834*25c28e83SPiotr Jasiukajtis ld [%g1+%o4],%f18 2835*25c28e83SPiotr Jasiukajtis fxors %f19,%f18,%f19 2836*25c28e83SPiotr Jasiukajtis fors %f16,%f19,%f16 ! tack on sign 2837*25c28e83SPiotr Jasiukajtis st %f16,[%o1] 2838*25c28e83SPiotr Jasiukajtis st %f17,[%o1+4] 2839*25c28e83SPiotr Jasiukajtis 2840*25c28e83SPiotr Jasiukajtis.ENDLOOP1: 2841*25c28e83SPiotr Jasiukajtis fmuld %f0,%f40,%f2 2842*25c28e83SPiotr Jasiukajtis add %l5,thresh,%g1 2843*25c28e83SPiotr Jasiukajtis faddd %f2,%f42,%f2 2844*25c28e83SPiotr Jasiukajtis st %f3,[%fp+n0] 2845*25c28e83SPiotr Jasiukajtis fsubd %f2,%f42,%f2 ! n 2846*25c28e83SPiotr Jasiukajtis fmuld %f2,%f46,%f4 2847*25c28e83SPiotr Jasiukajtis fsubd %f0,%f4,%f4 2848*25c28e83SPiotr Jasiukajtis fmuld %f2,%f48,%f6 2849*25c28e83SPiotr Jasiukajtis fsubd %f4,%f6,%f0 2850*25c28e83SPiotr Jasiukajtis ld [%fp+n0],%o3 ; add %o3,1,%o3 2851*25c28e83SPiotr Jasiukajtis fsubd %f4,%f0,%f32 2852*25c28e83SPiotr Jasiukajtis and %o3,1,%o3 2853*25c28e83SPiotr Jasiukajtis fsubd %f32,%f6,%f32 2854*25c28e83SPiotr Jasiukajtis fmuld %f2,%f50,%f8 2855*25c28e83SPiotr Jasiukajtis sll %o3,3,%o3 2856*25c28e83SPiotr Jasiukajtis fsubd %f8,%f32,%f8 2857*25c28e83SPiotr Jasiukajtis ld [%g1+%o3],%f6 2858*25c28e83SPiotr Jasiukajtis fsubd %f0,%f8,%f4 2859*25c28e83SPiotr Jasiukajtis fsubd %f0,%f4,%f32 2860*25c28e83SPiotr Jasiukajtis add %l5,thresh+4,%o7 2861*25c28e83SPiotr Jasiukajtis fsubd %f32,%f8,%f32 2862*25c28e83SPiotr Jasiukajtis fmuld %f2,%f52,%f2 2863*25c28e83SPiotr Jasiukajtis fsubd %f2,%f32,%f2 2864*25c28e83SPiotr Jasiukajtis ld [%o7+%o3],%f8 2865*25c28e83SPiotr Jasiukajtis fsubd %f4,%f2,%f0 ! x 2866*25c28e83SPiotr Jasiukajtis fsubd %f4,%f0,%f4 2867*25c28e83SPiotr Jasiukajtis fands %f0,%f30,%f9 ! save signbit 2868*25c28e83SPiotr Jasiukajtis fabsd %f0,%f0 2869*25c28e83SPiotr Jasiukajtis std %f0,[%fp+x0_1] 2870*25c28e83SPiotr Jasiukajtis fsubd %f4,%f2,%f2 ! y 2871*25c28e83SPiotr Jasiukajtis fcmpgt32 %f6,%f0,%l0 2872*25c28e83SPiotr Jasiukajtis fxors %f2,%f9,%f2 2873*25c28e83SPiotr Jasiukajtis fands %f9,%f8,%f9 ! if (n & 1) clear sign bit 2874*25c28e83SPiotr Jasiukajtis andcc %l0,2,%g0 2875*25c28e83SPiotr Jasiukajtis bne,pn %icc,1f 2876*25c28e83SPiotr Jasiukajtis! delay slot 2877*25c28e83SPiotr Jasiukajtis nop 2878*25c28e83SPiotr Jasiukajtis fpadd32s %f0,%f31,%f8 2879*25c28e83SPiotr Jasiukajtis ld [%fp+x0_1],%l0 2880*25c28e83SPiotr Jasiukajtis fand %f8,%f44,%f4 2881*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fc3c000),%o7 2882*25c28e83SPiotr Jasiukajtis add %l3,8,%g1 2883*25c28e83SPiotr Jasiukajtis fsubd %f0,%f4,%f0 2884*25c28e83SPiotr Jasiukajtis sub %l0,%o7,%l0 2885*25c28e83SPiotr Jasiukajtis srl %l0,10,%l0 2886*25c28e83SPiotr Jasiukajtis faddd %f0,%f2,%f0 2887*25c28e83SPiotr Jasiukajtis andn %l0,0x1f,%l0 2888*25c28e83SPiotr Jasiukajtis fmuld %f0,%f0,%f2 2889*25c28e83SPiotr Jasiukajtis add %l0,%o3,%l0 2890*25c28e83SPiotr Jasiukajtis fmuld %f2,%f58,%f6 2891*25c28e83SPiotr Jasiukajtis ldd [%l3+%l0],%f32 2892*25c28e83SPiotr Jasiukajtis faddd %f6,%f56,%f6 2893*25c28e83SPiotr Jasiukajtis fmuld %f2,%f62,%f4 2894*25c28e83SPiotr Jasiukajtis fmuld %f2,%f6,%f6 2895*25c28e83SPiotr Jasiukajtis faddd %f4,%f60,%f4 2896*25c28e83SPiotr Jasiukajtis faddd %f6,%f54,%f6 2897*25c28e83SPiotr Jasiukajtis fmuld %f2,%f4,%f4 2898*25c28e83SPiotr Jasiukajtis ldd [%g1+%l0],%f2 2899*25c28e83SPiotr Jasiukajtis fmuld %f0,%f6,%f6 2900*25c28e83SPiotr Jasiukajtis ldd [%l4+%l0],%f0 2901*25c28e83SPiotr Jasiukajtis fmuld %f4,%f32,%f4 2902*25c28e83SPiotr Jasiukajtis fmuld %f6,%f2,%f6 2903*25c28e83SPiotr Jasiukajtis faddd %f6,%f4,%f6 2904*25c28e83SPiotr Jasiukajtis faddd %f6,%f0,%f6 2905*25c28e83SPiotr Jasiukajtis ba,pt %icc,2f 2906*25c28e83SPiotr Jasiukajtis faddd %f6,%f32,%f6 2907*25c28e83SPiotr Jasiukajtis1: 2908*25c28e83SPiotr Jasiukajtis fmuld %f0,%f0,%f0 2909*25c28e83SPiotr Jasiukajtis ldd [%l5+%o3],%f32 2910*25c28e83SPiotr Jasiukajtis add %l5,%o3,%l0 2911*25c28e83SPiotr Jasiukajtis fmuld %f0,%f32,%f4 2912*25c28e83SPiotr Jasiukajtis ldd [%l0+0x10],%f6 2913*25c28e83SPiotr Jasiukajtis add %fp,%o3,%o3 2914*25c28e83SPiotr Jasiukajtis faddd %f4,%f6,%f4 2915*25c28e83SPiotr Jasiukajtis ldd [%l0+0x20],%f32 2916*25c28e83SPiotr Jasiukajtis fmuld %f0,%f4,%f4 2917*25c28e83SPiotr Jasiukajtis ldd [%l0+0x30],%f6 2918*25c28e83SPiotr Jasiukajtis faddd %f4,%f32,%f4 2919*25c28e83SPiotr Jasiukajtis ldd [%o3+x0_1],%f32 2920*25c28e83SPiotr Jasiukajtis fmuld %f0,%f4,%f4 2921*25c28e83SPiotr Jasiukajtis std %f2,[%fp+y0_0] 2922*25c28e83SPiotr Jasiukajtis faddd %f4,%f6,%f4 2923*25c28e83SPiotr Jasiukajtis fmuld %f0,%f4,%f4 2924*25c28e83SPiotr Jasiukajtis fmuld %f32,%f4,%f4 2925*25c28e83SPiotr Jasiukajtis ldd [%o3+y0_0],%f2 2926*25c28e83SPiotr Jasiukajtis faddd %f4,%f2,%f4 2927*25c28e83SPiotr Jasiukajtis faddd %f32,%f4,%f6 2928*25c28e83SPiotr Jasiukajtis2: 2929*25c28e83SPiotr Jasiukajtis add %l5,thresh-4,%g1 2930*25c28e83SPiotr Jasiukajtis ld [%fp+n0],%o3 ; add %o3,1,%o3 2931*25c28e83SPiotr Jasiukajtis and %o3,2,%o3 2932*25c28e83SPiotr Jasiukajtis sll %o3,2,%o3 2933*25c28e83SPiotr Jasiukajtis ld [%g1+%o3],%f8 2934*25c28e83SPiotr Jasiukajtis fxors %f9,%f8,%f9 2935*25c28e83SPiotr Jasiukajtis fors %f6,%f9,%f6 ! tack on sign 2936*25c28e83SPiotr Jasiukajtis st %f6,[%o0] 2937*25c28e83SPiotr Jasiukajtis st %f7,[%o0+4] 2938*25c28e83SPiotr Jasiukajtis 2939*25c28e83SPiotr Jasiukajtis.ENDLOOP0: 2940*25c28e83SPiotr Jasiukajtis 2941*25c28e83SPiotr Jasiukajtis! check for huge arguments remaining 2942*25c28e83SPiotr Jasiukajtis 2943*25c28e83SPiotr Jasiukajtis tst LIM_l6 2944*25c28e83SPiotr Jasiukajtis be,pt %icc,.exit 2945*25c28e83SPiotr Jasiukajtis! delay slot 2946*25c28e83SPiotr Jasiukajtis nop 2947*25c28e83SPiotr Jasiukajtis 2948*25c28e83SPiotr Jasiukajtis! ========== huge range (use C code) ========== 2949*25c28e83SPiotr Jasiukajtis 2950*25c28e83SPiotr Jasiukajtis#ifdef __sparcv9 2951*25c28e83SPiotr Jasiukajtis ldx [%fp+xsave],%o1 2952*25c28e83SPiotr Jasiukajtis ldx [%fp+ysave],%o3 2953*25c28e83SPiotr Jasiukajtis#else 2954*25c28e83SPiotr Jasiukajtis ld [%fp+xsave],%o1 2955*25c28e83SPiotr Jasiukajtis ld [%fp+ysave],%o3 2956*25c28e83SPiotr Jasiukajtis#endif 2957*25c28e83SPiotr Jasiukajtis ld [%fp+nsave],%o0 2958*25c28e83SPiotr Jasiukajtis ld [%fp+sxsave],%o2 2959*25c28e83SPiotr Jasiukajtis ld [%fp+sysave],%o4 2960*25c28e83SPiotr Jasiukajtis sra %o2,0,%o2 ! sign-extend for V9 2961*25c28e83SPiotr Jasiukajtis sra %o4,0,%o4 2962*25c28e83SPiotr Jasiukajtis call __vlibm_vcos_big 2963*25c28e83SPiotr Jasiukajtis mov %l7,%o5 ! delay slot 2964*25c28e83SPiotr Jasiukajtis 2965*25c28e83SPiotr Jasiukajtis.exit: 2966*25c28e83SPiotr Jasiukajtis ret 2967*25c28e83SPiotr Jasiukajtis restore 2968*25c28e83SPiotr Jasiukajtis 2969*25c28e83SPiotr Jasiukajtis 2970*25c28e83SPiotr Jasiukajtis .align 32 2971*25c28e83SPiotr Jasiukajtis.SKIP0: 2972*25c28e83SPiotr Jasiukajtis addcc %i0,-1,%i0 2973*25c28e83SPiotr Jasiukajtis ble,pn %icc,.ENDLOOP0 2974*25c28e83SPiotr Jasiukajtis! delay slot, harmless if branch taken 2975*25c28e83SPiotr Jasiukajtis add %i3,%i4,%i3 ! y += stridey 2976*25c28e83SPiotr Jasiukajtis andn %l1,%i5,%l0 ! hx &= ~0x80000000 2977*25c28e83SPiotr Jasiukajtis fmovs %f10,%f0 2978*25c28e83SPiotr Jasiukajtis ld [%i1+4],%f1 2979*25c28e83SPiotr Jasiukajtis ba,pt %icc,.LOOP0 2980*25c28e83SPiotr Jasiukajtis! delay slot 2981*25c28e83SPiotr Jasiukajtis add %i1,%i2,%i1 ! x += stridex 2982*25c28e83SPiotr Jasiukajtis 2983*25c28e83SPiotr Jasiukajtis 2984*25c28e83SPiotr Jasiukajtis .align 32 2985*25c28e83SPiotr Jasiukajtis.SKIP1: 2986*25c28e83SPiotr Jasiukajtis addcc %i0,-1,%i0 2987*25c28e83SPiotr Jasiukajtis ble,pn %icc,.ENDLOOP1 2988*25c28e83SPiotr Jasiukajtis! delay slot, harmless if branch taken 2989*25c28e83SPiotr Jasiukajtis add %i3,%i4,%i3 ! y += stridey 2990*25c28e83SPiotr Jasiukajtis andn %l2,%i5,%l1 ! hx &= ~0x80000000 2991*25c28e83SPiotr Jasiukajtis fmovs %f20,%f10 2992*25c28e83SPiotr Jasiukajtis ld [%i1+4],%f11 2993*25c28e83SPiotr Jasiukajtis ba,pt %icc,.LOOP1 2994*25c28e83SPiotr Jasiukajtis! delay slot 2995*25c28e83SPiotr Jasiukajtis add %i1,%i2,%i1 ! x += stridex 2996*25c28e83SPiotr Jasiukajtis 2997*25c28e83SPiotr Jasiukajtis 2998*25c28e83SPiotr Jasiukajtis .align 32 2999*25c28e83SPiotr Jasiukajtis.SKIP2: 3000*25c28e83SPiotr Jasiukajtis addcc %i0,-1,%i0 3001*25c28e83SPiotr Jasiukajtis ble,pn %icc,.ENDLOOP2 3002*25c28e83SPiotr Jasiukajtis! delay slot, harmless if branch taken 3003*25c28e83SPiotr Jasiukajtis add %i3,%i4,%i3 ! y += stridey 3004*25c28e83SPiotr Jasiukajtis ld [%i1],%l2 3005*25c28e83SPiotr Jasiukajtis ld [%i1],%f20 3006*25c28e83SPiotr Jasiukajtis ld [%i1+4],%f21 3007*25c28e83SPiotr Jasiukajtis andn %l2,%i5,%l2 ! hx &= ~0x80000000 3008*25c28e83SPiotr Jasiukajtis ba,pt %icc,.LOOP2 3009*25c28e83SPiotr Jasiukajtis! delay slot 3010*25c28e83SPiotr Jasiukajtis add %i1,%i2,%i1 ! x += stridex 3011*25c28e83SPiotr Jasiukajtis 3012*25c28e83SPiotr Jasiukajtis 3013*25c28e83SPiotr Jasiukajtis .align 32 3014*25c28e83SPiotr Jasiukajtis.BIG0: 3015*25c28e83SPiotr Jasiukajtis sethi %hi(0x7ff00000),%o7 3016*25c28e83SPiotr Jasiukajtis cmp %l0,%o7 3017*25c28e83SPiotr Jasiukajtis bl,a,pt %icc,1f ! if hx < 0x7ff00000 3018*25c28e83SPiotr Jasiukajtis! delay slot, annulled if branch not taken 3019*25c28e83SPiotr Jasiukajtis mov %l7,LIM_l6 ! set biguns flag or 3020*25c28e83SPiotr Jasiukajtis fsubd %f0,%f0,%f0 ! y = x - x 3021*25c28e83SPiotr Jasiukajtis st %f0,[%o0] 3022*25c28e83SPiotr Jasiukajtis st %f1,[%o0+4] 3023*25c28e83SPiotr Jasiukajtis1: 3024*25c28e83SPiotr Jasiukajtis addcc %i0,-1,%i0 3025*25c28e83SPiotr Jasiukajtis ble,pn %icc,.ENDLOOP0 3026*25c28e83SPiotr Jasiukajtis! delay slot, harmless if branch taken 3027*25c28e83SPiotr Jasiukajtis andn %l1,%i5,%l0 ! hx &= ~0x80000000 3028*25c28e83SPiotr Jasiukajtis fmovd %f10,%f0 3029*25c28e83SPiotr Jasiukajtis ba,pt %icc,.LOOP0 3030*25c28e83SPiotr Jasiukajtis! delay slot 3031*25c28e83SPiotr Jasiukajtis add %i1,%i2,%i1 ! x += stridex 3032*25c28e83SPiotr Jasiukajtis 3033*25c28e83SPiotr Jasiukajtis 3034*25c28e83SPiotr Jasiukajtis .align 32 3035*25c28e83SPiotr Jasiukajtis.BIG1: 3036*25c28e83SPiotr Jasiukajtis sethi %hi(0x7ff00000),%o7 3037*25c28e83SPiotr Jasiukajtis cmp %l1,%o7 3038*25c28e83SPiotr Jasiukajtis bl,a,pt %icc,1f ! if hx < 0x7ff00000 3039*25c28e83SPiotr Jasiukajtis! delay slot, annulled if branch not taken 3040*25c28e83SPiotr Jasiukajtis mov %l7,LIM_l6 ! set biguns flag or 3041*25c28e83SPiotr Jasiukajtis fsubd %f10,%f10,%f10 ! y = x - x 3042*25c28e83SPiotr Jasiukajtis st %f10,[%o1] 3043*25c28e83SPiotr Jasiukajtis st %f11,[%o1+4] 3044*25c28e83SPiotr Jasiukajtis1: 3045*25c28e83SPiotr Jasiukajtis addcc %i0,-1,%i0 3046*25c28e83SPiotr Jasiukajtis ble,pn %icc,.ENDLOOP1 3047*25c28e83SPiotr Jasiukajtis! delay slot, harmless if branch taken 3048*25c28e83SPiotr Jasiukajtis andn %l2,%i5,%l1 ! hx &= ~0x80000000 3049*25c28e83SPiotr Jasiukajtis fmovd %f20,%f10 3050*25c28e83SPiotr Jasiukajtis ba,pt %icc,.LOOP1 3051*25c28e83SPiotr Jasiukajtis! delay slot 3052*25c28e83SPiotr Jasiukajtis add %i1,%i2,%i1 ! x += stridex 3053*25c28e83SPiotr Jasiukajtis 3054*25c28e83SPiotr Jasiukajtis 3055*25c28e83SPiotr Jasiukajtis .align 32 3056*25c28e83SPiotr Jasiukajtis.BIG2: 3057*25c28e83SPiotr Jasiukajtis sethi %hi(0x7ff00000),%o7 3058*25c28e83SPiotr Jasiukajtis cmp %l2,%o7 3059*25c28e83SPiotr Jasiukajtis bl,a,pt %icc,1f ! if hx < 0x7ff00000 3060*25c28e83SPiotr Jasiukajtis! delay slot, annulled if branch not taken 3061*25c28e83SPiotr Jasiukajtis mov %l7,LIM_l6 ! set biguns flag or 3062*25c28e83SPiotr Jasiukajtis fsubd %f20,%f20,%f20 ! y = x - x 3063*25c28e83SPiotr Jasiukajtis st %f20,[%o2] 3064*25c28e83SPiotr Jasiukajtis st %f21,[%o2+4] 3065*25c28e83SPiotr Jasiukajtis1: 3066*25c28e83SPiotr Jasiukajtis addcc %i0,-1,%i0 3067*25c28e83SPiotr Jasiukajtis ble,pn %icc,.ENDLOOP2 3068*25c28e83SPiotr Jasiukajtis! delay slot 3069*25c28e83SPiotr Jasiukajtis nop 3070*25c28e83SPiotr Jasiukajtis ld [%i1],%l2 3071*25c28e83SPiotr Jasiukajtis ld [%i1],%f20 3072*25c28e83SPiotr Jasiukajtis ld [%i1+4],%f21 3073*25c28e83SPiotr Jasiukajtis andn %l2,%i5,%l2 ! hx &= ~0x80000000 3074*25c28e83SPiotr Jasiukajtis ba,pt %icc,.LOOP2 3075*25c28e83SPiotr Jasiukajtis! delay slot 3076*25c28e83SPiotr Jasiukajtis add %i1,%i2,%i1 ! x += stridex 3077*25c28e83SPiotr Jasiukajtis 3078*25c28e83SPiotr Jasiukajtis SET_SIZE(__vcos) 3079*25c28e83SPiotr Jasiukajtis 3080