1*25c28e83SPiotr Jasiukajtis/* 2*25c28e83SPiotr Jasiukajtis * CDDL HEADER START 3*25c28e83SPiotr Jasiukajtis * 4*25c28e83SPiotr Jasiukajtis * The contents of this file are subject to the terms of the 5*25c28e83SPiotr Jasiukajtis * Common Development and Distribution License (the "License"). 6*25c28e83SPiotr Jasiukajtis * You may not use this file except in compliance with the License. 7*25c28e83SPiotr Jasiukajtis * 8*25c28e83SPiotr Jasiukajtis * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*25c28e83SPiotr Jasiukajtis * or http://www.opensolaris.org/os/licensing. 10*25c28e83SPiotr Jasiukajtis * See the License for the specific language governing permissions 11*25c28e83SPiotr Jasiukajtis * and limitations under the License. 12*25c28e83SPiotr Jasiukajtis * 13*25c28e83SPiotr Jasiukajtis * When distributing Covered Code, include this CDDL HEADER in each 14*25c28e83SPiotr Jasiukajtis * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*25c28e83SPiotr Jasiukajtis * If applicable, add the following below this CDDL HEADER, with the 16*25c28e83SPiotr Jasiukajtis * fields enclosed by brackets "[]" replaced with your own identifying 17*25c28e83SPiotr Jasiukajtis * information: Portions Copyright [yyyy] [name of copyright owner] 18*25c28e83SPiotr Jasiukajtis * 19*25c28e83SPiotr Jasiukajtis * CDDL HEADER END 20*25c28e83SPiotr Jasiukajtis */ 21*25c28e83SPiotr Jasiukajtis/* 22*25c28e83SPiotr Jasiukajtis * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 23*25c28e83SPiotr Jasiukajtis */ 24*25c28e83SPiotr Jasiukajtis/* 25*25c28e83SPiotr Jasiukajtis * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 26*25c28e83SPiotr Jasiukajtis * Use is subject to license terms. 27*25c28e83SPiotr Jasiukajtis */ 28*25c28e83SPiotr Jasiukajtis 29*25c28e83SPiotr Jasiukajtis .file "__vhypotf.S" 30*25c28e83SPiotr Jasiukajtis 31*25c28e83SPiotr Jasiukajtis#include "libm.h" 32*25c28e83SPiotr Jasiukajtis 33*25c28e83SPiotr Jasiukajtis RO_DATA 34*25c28e83SPiotr Jasiukajtis .align 64 35*25c28e83SPiotr Jasiukajtis 36*25c28e83SPiotr Jasiukajtis.CONST_TBL: 37*25c28e83SPiotr Jasiukajtis .word 0x3fe00001, 0x80007e00 ! K1 = 5.00000715259318464227e-01 38*25c28e83SPiotr Jasiukajtis .word 0xbfc00003, 0xc0017a01 ! K2 = -1.25000447037521686593e-01 39*25c28e83SPiotr Jasiukajtis .word 0x000fffff, 0xffffffff ! DC0 = 0x000fffffffffffff 40*25c28e83SPiotr Jasiukajtis .word 0x3ff00000, 0x00000000 ! DC1 = 0x3ff0000000000000 41*25c28e83SPiotr Jasiukajtis .word 0x7ffff000, 0x00000000 ! DC2 = 0x7ffff00000000000 42*25c28e83SPiotr Jasiukajtis .word 0x7fe00000, 0x00000000 ! DA0 = 0x7fe0000000000000 43*25c28e83SPiotr Jasiukajtis .word 0x47efffff, 0xe0000000 ! DFMAX = 3.402823e+38 44*25c28e83SPiotr Jasiukajtis .word 0x7f7fffff, 0x80808080 ! FMAX = 3.402823e+38 , SCALE = 0x80808080 45*25c28e83SPiotr Jasiukajtis .word 0x20000000, 0x00000000 ! DA1 = 0x2000000000000000 46*25c28e83SPiotr Jasiukajtis 47*25c28e83SPiotr Jasiukajtis#define DC0 %f12 48*25c28e83SPiotr Jasiukajtis#define DC1 %f10 49*25c28e83SPiotr Jasiukajtis#define DC2 %f42 50*25c28e83SPiotr Jasiukajtis#define DA0 %f6 51*25c28e83SPiotr Jasiukajtis#define DA1 %f4 52*25c28e83SPiotr Jasiukajtis#define K2 %f26 53*25c28e83SPiotr Jasiukajtis#define K1 %f28 54*25c28e83SPiotr Jasiukajtis#define SCALE %f3 55*25c28e83SPiotr Jasiukajtis#define FMAX %f2 56*25c28e83SPiotr Jasiukajtis#define DFMAX %f50 57*25c28e83SPiotr Jasiukajtis 58*25c28e83SPiotr Jasiukajtis#define stridex %l6 59*25c28e83SPiotr Jasiukajtis#define stridey %i4 60*25c28e83SPiotr Jasiukajtis#define stridez %l5 61*25c28e83SPiotr Jasiukajtis#define _0x7fffffff %o1 62*25c28e83SPiotr Jasiukajtis#define _0x7f3504f3 %o2 63*25c28e83SPiotr Jasiukajtis#define _0x1ff0 %l2 64*25c28e83SPiotr Jasiukajtis#define TBL %l1 65*25c28e83SPiotr Jasiukajtis 66*25c28e83SPiotr Jasiukajtis#define counter %l0 67*25c28e83SPiotr Jasiukajtis 68*25c28e83SPiotr Jasiukajtis#define tmp_px STACK_BIAS-0x30 69*25c28e83SPiotr Jasiukajtis#define tmp_py STACK_BIAS-0x28 70*25c28e83SPiotr Jasiukajtis#define tmp_counter STACK_BIAS-0x20 71*25c28e83SPiotr Jasiukajtis#define tmp0 STACK_BIAS-0x18 72*25c28e83SPiotr Jasiukajtis#define tmp1 STACK_BIAS-0x10 73*25c28e83SPiotr Jasiukajtis#define tmp2 STACK_BIAS-0x0c 74*25c28e83SPiotr Jasiukajtis#define tmp3 STACK_BIAS-0x08 75*25c28e83SPiotr Jasiukajtis#define tmp4 STACK_BIAS-0x04 76*25c28e83SPiotr Jasiukajtis 77*25c28e83SPiotr Jasiukajtis! sizeof temp storage - must be a multiple of 16 for V9 78*25c28e83SPiotr Jasiukajtis#define tmps 0x30 79*25c28e83SPiotr Jasiukajtis 80*25c28e83SPiotr Jasiukajtis!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 81*25c28e83SPiotr Jasiukajtis! !!!!! algorithm !!!!! 82*25c28e83SPiotr Jasiukajtis! hx0 = *(int*)px; 83*25c28e83SPiotr Jasiukajtis! x0 = *px; 84*25c28e83SPiotr Jasiukajtis! px += stridex; 85*25c28e83SPiotr Jasiukajtis! 86*25c28e83SPiotr Jasiukajtis! hy0 = *(int*)py; 87*25c28e83SPiotr Jasiukajtis! y0 = *py; 88*25c28e83SPiotr Jasiukajtis! py += stridey; 89*25c28e83SPiotr Jasiukajtis! 90*25c28e83SPiotr Jasiukajtis! hx0 &= 0x7fffffff; 91*25c28e83SPiotr Jasiukajtis! hy0 &= 0x7fffffff; 92*25c28e83SPiotr Jasiukajtis! 93*25c28e83SPiotr Jasiukajtis! if ( hx >= 0x7f3504f3 || hy >= 0x7f3504f3 ) 94*25c28e83SPiotr Jasiukajtis! { 95*25c28e83SPiotr Jasiukajtis! if ( hx >= 0x7f800000 || hy >= 0x7f800000 ) 96*25c28e83SPiotr Jasiukajtis! { 97*25c28e83SPiotr Jasiukajtis! if ( hx == 0x7f800000 || hy == 0x7f800000 ) 98*25c28e83SPiotr Jasiukajtis! *(int*)pz = 0x7f800000; 99*25c28e83SPiotr Jasiukajtis! else *pz = x * y; 100*25c28e83SPiotr Jasiukajtis! } 101*25c28e83SPiotr Jasiukajtis! else 102*25c28e83SPiotr Jasiukajtis! { 103*25c28e83SPiotr Jasiukajtis! hyp = sqrt(x * (double)x + y * (double)y); 104*25c28e83SPiotr Jasiukajtis! if ( hyp <= DMAX ) ftmp0 = (float)hyp; 105*25c28e83SPiotr Jasiukajtis! else ftmp0 = FMAX * FMAX; 106*25c28e83SPiotr Jasiukajtis! *pz = ftmp0; 107*25c28e83SPiotr Jasiukajtis! } 108*25c28e83SPiotr Jasiukajtis! pz += stridez; 109*25c28e83SPiotr Jasiukajtis! continue; 110*25c28e83SPiotr Jasiukajtis! } 111*25c28e83SPiotr Jasiukajtis! if ( (hx | hy) == 0 ) 112*25c28e83SPiotr Jasiukajtis! { 113*25c28e83SPiotr Jasiukajtis! *pz = 0; 114*25c28e83SPiotr Jasiukajtis! pz += stridez; 115*25c28e83SPiotr Jasiukajtis! continue; 116*25c28e83SPiotr Jasiukajtis! } 117*25c28e83SPiotr Jasiukajtis! dx0 = x0 * (double)x0; 118*25c28e83SPiotr Jasiukajtis! dy0 = y0 * (double)y0; 119*25c28e83SPiotr Jasiukajtis! db0 = dx0 + dy0; 120*25c28e83SPiotr Jasiukajtis! 121*25c28e83SPiotr Jasiukajtis! iexp0 = ((int*)&db0)[0]; 122*25c28e83SPiotr Jasiukajtis! 123*25c28e83SPiotr Jasiukajtis! h0 = vis_fand(db0,DC0); 124*25c28e83SPiotr Jasiukajtis! h0 = vis_for(h0,DC1); 125*25c28e83SPiotr Jasiukajtis! h_hi0 = vis_fand(h0,DC2); 126*25c28e83SPiotr Jasiukajtis! 127*25c28e83SPiotr Jasiukajtis! db0 = vis_fand(db0,DA0); 128*25c28e83SPiotr Jasiukajtis! db0 = vis_fmul8x16(SCALE, db0); 129*25c28e83SPiotr Jasiukajtis! db0 = vis_fpadd32(db0,DA1); 130*25c28e83SPiotr Jasiukajtis! 131*25c28e83SPiotr Jasiukajtis! iexp0 >>= 8; 132*25c28e83SPiotr Jasiukajtis! di0 = iexp0 & 0x1ff0; 133*25c28e83SPiotr Jasiukajtis! si0 = (char*)sqrt_arr + di0; 134*25c28e83SPiotr Jasiukajtis! 135*25c28e83SPiotr Jasiukajtis! dtmp0 = ((double*)((char*)div_arr + di0))[0]; 136*25c28e83SPiotr Jasiukajtis! xx0 = h0 - h_hi0; 137*25c28e83SPiotr Jasiukajtis! xx0 *= dmp0; 138*25c28e83SPiotr Jasiukajtis! 139*25c28e83SPiotr Jasiukajtis! dtmp0 = ((double*)si0)[1]; 140*25c28e83SPiotr Jasiukajtis! res0 = K2 * xx0; 141*25c28e83SPiotr Jasiukajtis! res0 += K1; 142*25c28e83SPiotr Jasiukajtis! res0 *= xx0; 143*25c28e83SPiotr Jasiukajtis! res0 += DC1; 144*25c28e83SPiotr Jasiukajtis! res0 = dtmp0 * res0; 145*25c28e83SPiotr Jasiukajtis! res0 *= db0; 146*25c28e83SPiotr Jasiukajtis! ftmp0 = (float)res0; 147*25c28e83SPiotr Jasiukajtis! *pz = ftmp0; 148*25c28e83SPiotr Jasiukajtis! pz += stridez; 149*25c28e83SPiotr Jasiukajtis!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 150*25c28e83SPiotr Jasiukajtis 151*25c28e83SPiotr Jasiukajtis ENTRY(__vhypotf) 152*25c28e83SPiotr Jasiukajtis save %sp,-SA(MINFRAME)-tmps,%sp 153*25c28e83SPiotr Jasiukajtis PIC_SETUP(l7) 154*25c28e83SPiotr Jasiukajtis PIC_SET(l7,.CONST_TBL,o3) 155*25c28e83SPiotr Jasiukajtis PIC_SET(l7,__vlibm_TBL_sqrtf,l1) 156*25c28e83SPiotr Jasiukajtis 157*25c28e83SPiotr Jasiukajtis#ifdef __sparcv9 158*25c28e83SPiotr Jasiukajtis ldx [%fp+STACK_BIAS+176],stridez 159*25c28e83SPiotr Jasiukajtis#else 160*25c28e83SPiotr Jasiukajtis ld [%fp+STACK_BIAS+92],stridez 161*25c28e83SPiotr Jasiukajtis#endif 162*25c28e83SPiotr Jasiukajtis st %i0,[%fp+tmp_counter] 163*25c28e83SPiotr Jasiukajtis 164*25c28e83SPiotr Jasiukajtis stx %i1,[%fp+tmp_px] 165*25c28e83SPiotr Jasiukajtis 166*25c28e83SPiotr Jasiukajtis stx %i3,[%fp+tmp_py] 167*25c28e83SPiotr Jasiukajtis 168*25c28e83SPiotr Jasiukajtis ldd [%o3],K1 169*25c28e83SPiotr Jasiukajtis sethi %hi(0x7ffffc00),%o1 170*25c28e83SPiotr Jasiukajtis 171*25c28e83SPiotr Jasiukajtis ldd [%o3+8],K2 172*25c28e83SPiotr Jasiukajtis sethi %hi(0x7f350400),%o2 173*25c28e83SPiotr Jasiukajtis 174*25c28e83SPiotr Jasiukajtis ldd [%o3+16],DC0 175*25c28e83SPiotr Jasiukajtis add %o1,1023,_0x7fffffff 176*25c28e83SPiotr Jasiukajtis add %o2,0xf3,_0x7f3504f3 177*25c28e83SPiotr Jasiukajtis 178*25c28e83SPiotr Jasiukajtis ldd [%o3+24],DC1 179*25c28e83SPiotr Jasiukajtis sll %i2,2,stridex 180*25c28e83SPiotr Jasiukajtis 181*25c28e83SPiotr Jasiukajtis ld [%o3+56],FMAX 182*25c28e83SPiotr Jasiukajtis 183*25c28e83SPiotr Jasiukajtis ldd [%o3+32],DC2 184*25c28e83SPiotr Jasiukajtis sll %i4,2,stridey 185*25c28e83SPiotr Jasiukajtis 186*25c28e83SPiotr Jasiukajtis ldd [%o3+40],DA0 187*25c28e83SPiotr Jasiukajtis sll stridez,2,stridez 188*25c28e83SPiotr Jasiukajtis 189*25c28e83SPiotr Jasiukajtis ldd [%o3+48],DFMAX 190*25c28e83SPiotr Jasiukajtis 191*25c28e83SPiotr Jasiukajtis ld [%o3+60],SCALE 192*25c28e83SPiotr Jasiukajtis or %g0,0xff8,%l2 193*25c28e83SPiotr Jasiukajtis 194*25c28e83SPiotr Jasiukajtis ldd [%o3+64],DA1 195*25c28e83SPiotr Jasiukajtis sll %l2,1,_0x1ff0 196*25c28e83SPiotr Jasiukajtis or %g0,%i5,%l7 197*25c28e83SPiotr Jasiukajtis 198*25c28e83SPiotr Jasiukajtis.begin: 199*25c28e83SPiotr Jasiukajtis ld [%fp+tmp_counter],counter 200*25c28e83SPiotr Jasiukajtis ldx [%fp+tmp_px],%i1 201*25c28e83SPiotr Jasiukajtis ldx [%fp+tmp_py],%i2 202*25c28e83SPiotr Jasiukajtis st %g0,[%fp+tmp_counter] 203*25c28e83SPiotr Jasiukajtis.begin1: 204*25c28e83SPiotr Jasiukajtis cmp counter,0 205*25c28e83SPiotr Jasiukajtis ble,pn %icc,.exit 206*25c28e83SPiotr Jasiukajtis lda [%i1]0x82,%l3 ! (3_0) hx0 = *(int*)px; 207*25c28e83SPiotr Jasiukajtis 208*25c28e83SPiotr Jasiukajtis lda [%i2]0x82,%l4 ! (3_0) hy0 = *(int*)py; 209*25c28e83SPiotr Jasiukajtis 210*25c28e83SPiotr Jasiukajtis lda [%i1]0x82,%f17 ! (3_0) x0 = *px; 211*25c28e83SPiotr Jasiukajtis and %l3,_0x7fffffff,%l3 ! (3_0) hx0 &= 0x7fffffff; 212*25c28e83SPiotr Jasiukajtis 213*25c28e83SPiotr Jasiukajtis cmp %l3,_0x7f3504f3 ! (3_0) hx ? 0x7f3504f3 214*25c28e83SPiotr Jasiukajtis bge,pn %icc,.spec ! (3_0) if ( hx >= 0x7f3504f3 ) 215*25c28e83SPiotr Jasiukajtis and %l4,_0x7fffffff,%l4 ! (3_0) hy0 &= 0x7fffffff; 216*25c28e83SPiotr Jasiukajtis 217*25c28e83SPiotr Jasiukajtis cmp %l4,_0x7f3504f3 ! (3_0) hy ? 0x7f3504f3 218*25c28e83SPiotr Jasiukajtis bge,pn %icc,.spec ! (3_0) if ( hy >= 0x7f3504f3 ) 219*25c28e83SPiotr Jasiukajtis or %g0,%i2,%o7 220*25c28e83SPiotr Jasiukajtis 221*25c28e83SPiotr Jasiukajtis orcc %l3,%l4,%g0 222*25c28e83SPiotr Jasiukajtis bz,pn %icc,.spec1 223*25c28e83SPiotr Jasiukajtis 224*25c28e83SPiotr Jasiukajtis add %i1,stridex,%i1 ! px += stridex 225*25c28e83SPiotr Jasiukajtis fsmuld %f17,%f17,%f44 ! (3_0) dx0 = x0 * (double)x0; 226*25c28e83SPiotr Jasiukajtis lda [%i2]0x82,%f17 ! (3_0) y0 = *py; 227*25c28e83SPiotr Jasiukajtis 228*25c28e83SPiotr Jasiukajtis lda [%i1]0x82,%l3 ! (4_0) hx0 = *(int*)px; 229*25c28e83SPiotr Jasiukajtis 230*25c28e83SPiotr Jasiukajtis lda [stridey+%o7]0x82,%l4 ! (4_0) hy0 = *(int*)py; 231*25c28e83SPiotr Jasiukajtis 232*25c28e83SPiotr Jasiukajtis and %l3,_0x7fffffff,%l3 ! (4_0) hx0 &= 0x7fffffff; 233*25c28e83SPiotr Jasiukajtis 234*25c28e83SPiotr Jasiukajtis fsmuld %f17,%f17,%f24 ! (3_0) dy0 = y0 * (double)y0; 235*25c28e83SPiotr Jasiukajtis cmp %l3,_0x7f3504f3 ! (4_0) hx ? 0x7f3504f3 236*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update0 ! (4_0) if ( hx >= 0x7f3504f3 ) 237*25c28e83SPiotr Jasiukajtis and %l4,_0x7fffffff,%l4 ! (4_0) hy0 &= 0x7fffffff; 238*25c28e83SPiotr Jasiukajtis 239*25c28e83SPiotr Jasiukajtis orcc %l3,%l4,%g0 240*25c28e83SPiotr Jasiukajtis bz,pn %icc,.update0 241*25c28e83SPiotr Jasiukajtis lda [%i1]0x82,%f17 ! (4_0) x0 = *px; 242*25c28e83SPiotr Jasiukajtis.cont0: 243*25c28e83SPiotr Jasiukajtis faddd %f44,%f24,%f24 ! (3_0) db0 = dx0 + dy0; 244*25c28e83SPiotr Jasiukajtis 245*25c28e83SPiotr Jasiukajtis fsmuld %f17,%f17,%f40 ! (4_1) dy0 = x0 * (double)x0; 246*25c28e83SPiotr Jasiukajtis cmp %l4,_0x7f3504f3 ! (4_1) hy ? 0x7f3504f3 247*25c28e83SPiotr Jasiukajtis lda [stridey+%o7]0x82,%f17 ! (4_1) hy0 = *py; 248*25c28e83SPiotr Jasiukajtis 249*25c28e83SPiotr Jasiukajtis add %o7,stridey,%i5 ! py += stridey 250*25c28e83SPiotr Jasiukajtis lda [%i1+stridex]0x82,%l3 ! (0_0) hx0 = *(int*)px; 251*25c28e83SPiotr Jasiukajtis 252*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update1 ! (4_1) if ( hy >= 0x7f3504f3 ) 253*25c28e83SPiotr Jasiukajtis st %f24,[%fp+tmp0] ! (3_1) iexp0 = ((int*)&db0)[0]; 254*25c28e83SPiotr Jasiukajtis.cont1: 255*25c28e83SPiotr Jasiukajtis and %l3,_0x7fffffff,%l3 ! (0_0) hx0 &= 0x7fffffff; 256*25c28e83SPiotr Jasiukajtis 257*25c28e83SPiotr Jasiukajtis fsmuld %f17,%f17,%f48 ! (4_1) dy0 = y0 * (double)y0; 258*25c28e83SPiotr Jasiukajtis lda [%i1+stridex]0x82,%f8 ! (0_0) x0 = *px; 259*25c28e83SPiotr Jasiukajtis 260*25c28e83SPiotr Jasiukajtis add %i1,stridex,%i1 ! px += stridex 261*25c28e83SPiotr Jasiukajtis 262*25c28e83SPiotr Jasiukajtis lda [%i5+stridey]0x82,%l4 ! (0_0) hy0 = *(int*)py; 263*25c28e83SPiotr Jasiukajtis cmp %l3,_0x7f3504f3 ! (0_0) hx ? 0x7f3504f3 264*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update2 ! (0_0) if ( hx >= 0x7f3504f3 ) 265*25c28e83SPiotr Jasiukajtis add %i5,stridey,%o4 ! py += stridey 266*25c28e83SPiotr Jasiukajtis.cont2: 267*25c28e83SPiotr Jasiukajtis faddd %f40,%f48,%f20 ! (4_1) db0 = dx0 + dy0; 268*25c28e83SPiotr Jasiukajtis 269*25c28e83SPiotr Jasiukajtis fsmuld %f8,%f8,%f40 ! (0_0) dx0 = x0 * (double)x0; 270*25c28e83SPiotr Jasiukajtis and %l4,_0x7fffffff,%l4 ! (0_0) hy0 &= 0x7fffffff; 271*25c28e83SPiotr Jasiukajtis lda [%i5+stridey]0x82,%f17 ! (0_0) hy0 = *py; 272*25c28e83SPiotr Jasiukajtis 273*25c28e83SPiotr Jasiukajtis cmp %l4,_0x7f3504f3 ! (0_0) hy ? 0x7f3504f3 274*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update3 ! (0_0) if ( hy >= 0x7f3504f3 ) 275*25c28e83SPiotr Jasiukajtis st %f20,[%fp+tmp1] ! (4_1) iexp0 = ((int*)&db0)[0]; 276*25c28e83SPiotr Jasiukajtis 277*25c28e83SPiotr Jasiukajtis orcc %l3,%l4,%g0 278*25c28e83SPiotr Jasiukajtis bz,pn %icc,.update3 279*25c28e83SPiotr Jasiukajtis.cont3: 280*25c28e83SPiotr Jasiukajtis lda [%i1+stridex]0x82,%l3 ! (1_0) hx0 = *(int*)px; 281*25c28e83SPiotr Jasiukajtis 282*25c28e83SPiotr Jasiukajtis fand %f24,DC0,%f60 ! (3_1) h0 = vis_fand(db0,DC0); 283*25c28e83SPiotr Jasiukajtis 284*25c28e83SPiotr Jasiukajtis and %l3,_0x7fffffff,%l3 ! (1_0) hx0 &= 0x7fffffff; 285*25c28e83SPiotr Jasiukajtis 286*25c28e83SPiotr Jasiukajtis fsmuld %f17,%f17,%f34 ! (0_0) dy0 = y0 * (double)y0; 287*25c28e83SPiotr Jasiukajtis cmp %l3,_0x7f3504f3 ! (1_0) hx ? 0x7f3504f3 288*25c28e83SPiotr Jasiukajtis lda [%o4+stridey]0x82,%l4 ! (1_0) hy0 = *(int*)py; 289*25c28e83SPiotr Jasiukajtis 290*25c28e83SPiotr Jasiukajtis add %i1,stridex,%i1 ! px += stridex 291*25c28e83SPiotr Jasiukajtis 292*25c28e83SPiotr Jasiukajtis lda [%i1]0x82,%f17 ! (1_0) x0 = *px; 293*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update4 ! (1_0) if ( hx >= 0x7f3504f3 ) 294*25c28e83SPiotr Jasiukajtis add %o4,stridey,%i5 ! py += stridey 295*25c28e83SPiotr Jasiukajtis.cont4: 296*25c28e83SPiotr Jasiukajtis and %l4,_0x7fffffff,%l4 ! (1_0) hy0 &= 0x7fffffff; 297*25c28e83SPiotr Jasiukajtis for %f60,DC1,%f46 ! (3_1) h0 = vis_for(h0,DC1); 298*25c28e83SPiotr Jasiukajtis 299*25c28e83SPiotr Jasiukajtis cmp %l4,_0x7f3504f3 ! (1_0) hy ? 0x7f3504f3 300*25c28e83SPiotr Jasiukajtis ld [%fp+tmp0],%o0 ! (3_1) iexp0 = ((int*)&db0)[0]; 301*25c28e83SPiotr Jasiukajtis faddd %f40,%f34,%f0 ! (0_0) db0 = dx0 + dy0; 302*25c28e83SPiotr Jasiukajtis 303*25c28e83SPiotr Jasiukajtis fsmuld %f17,%f17,%f40 ! (1_0) dx0 = x0 * (double)x0; 304*25c28e83SPiotr Jasiukajtis add %i1,stridex,%i1 ! px += stridex 305*25c28e83SPiotr Jasiukajtis lda [%o4+stridey]0x82,%f17 ! (1_0) y0 = *py; 306*25c28e83SPiotr Jasiukajtis 307*25c28e83SPiotr Jasiukajtis srax %o0,8,%o0 ! (3_1) iexp0 >>= 8; 308*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update5 ! (1_0) if ( hy >= 0x7f3504f3 ) 309*25c28e83SPiotr Jasiukajtis fand %f46,DC2,%f38 ! (3_1) h_hi0 = vis_fand(h0,DC2); 310*25c28e83SPiotr Jasiukajtis 311*25c28e83SPiotr Jasiukajtis orcc %l3,%l4,%g0 312*25c28e83SPiotr Jasiukajtis bz,pn %icc,.update5 313*25c28e83SPiotr Jasiukajtis.cont5: 314*25c28e83SPiotr Jasiukajtis lda [%i1]0x82,%l3 ! (2_0) hx0 = *(int*)px; 315*25c28e83SPiotr Jasiukajtis 316*25c28e83SPiotr Jasiukajtis and %o0,_0x1ff0,%o0 ! (3_1) di0 = iexp0 & 0x1ff0; 317*25c28e83SPiotr Jasiukajtis st %f0,[%fp+tmp2] ! (0_0) iexp0 = ((int*)&db0)[0]; 318*25c28e83SPiotr Jasiukajtis fand %f20,DC0,%f60 ! (4_1) h0 = vis_fand(db0,DC0); 319*25c28e83SPiotr Jasiukajtis 320*25c28e83SPiotr Jasiukajtis ldd [TBL+%o0],%f22 ! (3_1) dtmp0 = ((double*)((char*)div_arr + di0))[0]; 321*25c28e83SPiotr Jasiukajtis fsubd %f46,%f38,%f38 ! (3_1) xx0 = h0 - h_hi0; 322*25c28e83SPiotr Jasiukajtis 323*25c28e83SPiotr Jasiukajtis fsmuld %f17,%f17,%f32 ! (1_0) dy0 = y0 * (double)y0; 324*25c28e83SPiotr Jasiukajtis add %i5,stridey,%i2 ! py += stridey 325*25c28e83SPiotr Jasiukajtis lda [stridey+%i5]0x82,%l4 ! (2_0) hy0 = *(int*)py; 326*25c28e83SPiotr Jasiukajtis 327*25c28e83SPiotr Jasiukajtis and %l3,_0x7fffffff,%l3 ! (2_0) hx0 &= 0x7fffffff; 328*25c28e83SPiotr Jasiukajtis 329*25c28e83SPiotr Jasiukajtis lda [%i1]0x82,%f17 ! (2_0) x0 = *px; 330*25c28e83SPiotr Jasiukajtis cmp %l3,_0x7f3504f3 ! (2_0) hx ? 0x7f3504f3 331*25c28e83SPiotr Jasiukajtis 332*25c28e83SPiotr Jasiukajtis fmuld %f38,%f22,%f38 ! (3_1) xx0 *= dmp0; 333*25c28e83SPiotr Jasiukajtis and %l4,_0x7fffffff,%l4 ! (2_0) hy0 &= 0x7fffffff; 334*25c28e83SPiotr Jasiukajtis for %f60,DC1,%f46 ! (4_1) h0 = vis_for(h0,DC1); 335*25c28e83SPiotr Jasiukajtis 336*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update6 ! (2_0) if ( hx >= 0x7f3504f3 ) 337*25c28e83SPiotr Jasiukajtis ld [%fp+tmp1],%o3 ! (4_1) iexp0 = ((int*)&db0)[0]; 338*25c28e83SPiotr Jasiukajtis.cont6: 339*25c28e83SPiotr Jasiukajtis faddd %f40,%f32,%f18 ! (1_0) db0 = dx0 + dy0; 340*25c28e83SPiotr Jasiukajtis 341*25c28e83SPiotr Jasiukajtis fsmuld %f17,%f17,%f44 ! (2_0) dx0 = x0 * (double)x0; 342*25c28e83SPiotr Jasiukajtis cmp %l4,_0x7f3504f3 ! (2_0) hy ? 0x7f3504f3 343*25c28e83SPiotr Jasiukajtis lda [stridey+%i5]0x82,%f17 ! (2_0) y0 = *py; 344*25c28e83SPiotr Jasiukajtis 345*25c28e83SPiotr Jasiukajtis add %i1,stridex,%i1 ! px += stridex 346*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update7 ! (2_0) if ( hy >= 0x7f3504f3 ) 347*25c28e83SPiotr Jasiukajtis fand %f46,DC2,%f58 ! (4_1) h_hi0 = vis_fand(h0,DC2); 348*25c28e83SPiotr Jasiukajtis 349*25c28e83SPiotr Jasiukajtis orcc %l3,%l4,%g0 350*25c28e83SPiotr Jasiukajtis bz,pn %icc,.update7 351*25c28e83SPiotr Jasiukajtis nop 352*25c28e83SPiotr Jasiukajtis.cont7: 353*25c28e83SPiotr Jasiukajtis fmuld K2,%f38,%f56 ! (3_1) res0 = K2 * xx0; 354*25c28e83SPiotr Jasiukajtis srax %o3,8,%o3 ! (4_1) iexp0 >>= 8; 355*25c28e83SPiotr Jasiukajtis lda [%i1]0x82,%l3 ! (3_0) hx0 = *(int*)px; 356*25c28e83SPiotr Jasiukajtis 357*25c28e83SPiotr Jasiukajtis and %o3,_0x1ff0,%o3 ! (4_1) di0 = iexp0 & 0x1ff0; 358*25c28e83SPiotr Jasiukajtis st %f18,[%fp+tmp3] ! (1_0) iexp0 = ((int*)&db0)[0]; 359*25c28e83SPiotr Jasiukajtis fand %f0,DC0,%f60 ! (0_0) h0 = vis_fand(db0,DC0); 360*25c28e83SPiotr Jasiukajtis 361*25c28e83SPiotr Jasiukajtis ldd [TBL+%o3],%f22 ! (4_1) dtmp0 = ((double*)((char*)div_arr + di0))[0]; 362*25c28e83SPiotr Jasiukajtis add %i2,stridey,%o7 ! py += stridey 363*25c28e83SPiotr Jasiukajtis fsubd %f46,%f58,%f58 ! (4_1) xx0 = h0 - h_hi0; 364*25c28e83SPiotr Jasiukajtis 365*25c28e83SPiotr Jasiukajtis fsmuld %f17,%f17,%f30 ! (2_0) dy0 = y0 * (double)y0; 366*25c28e83SPiotr Jasiukajtis lda [stridey+%i2]0x82,%l4 ! (3_0) hy0 = *(int*)py; 367*25c28e83SPiotr Jasiukajtis and %l3,_0x7fffffff,%l3 ! (3_0) hx0 &= 0x7fffffff; 368*25c28e83SPiotr Jasiukajtis 369*25c28e83SPiotr Jasiukajtis faddd %f56,K1,%f54 ! (3_1) res0 += K1; 370*25c28e83SPiotr Jasiukajtis cmp %l3,_0x7f3504f3 ! (3_0) hx ? 0x7f3504f3 371*25c28e83SPiotr Jasiukajtis 372*25c28e83SPiotr Jasiukajtis lda [%i1]0x82,%f17 ! (3_0) x0 = *px; 373*25c28e83SPiotr Jasiukajtis add %i1,stridex,%i1 ! px += stridex 374*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update8 ! (3_0) if ( hx >= 0x7f3504f3 ) 375*25c28e83SPiotr Jasiukajtis 376*25c28e83SPiotr Jasiukajtis fmuld %f58,%f22,%f58 ! (4_1) xx0 *= dmp0; 377*25c28e83SPiotr Jasiukajtis.cont8: 378*25c28e83SPiotr Jasiukajtis and %l4,_0x7fffffff,%l4 ! (3_0) hy0 &= 0x7fffffff; 379*25c28e83SPiotr Jasiukajtis for %f60,DC1,%f46 ! (0_0) h0 = vis_for(h0,DC1); 380*25c28e83SPiotr Jasiukajtis 381*25c28e83SPiotr Jasiukajtis cmp %l4,_0x7f3504f3 ! (3_0) hy ? 0x7f3504f3 382*25c28e83SPiotr Jasiukajtis ld [%fp+tmp2],%g1 ! (0_0) iexp0 = ((int*)&db0)[0]; 383*25c28e83SPiotr Jasiukajtis faddd %f44,%f30,%f30 ! (2_0) db0 = dx0 + dy0; 384*25c28e83SPiotr Jasiukajtis 385*25c28e83SPiotr Jasiukajtis fsmuld %f17,%f17,%f44 ! (3_0) dx0 = x0 * (double)x0; 386*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update9 ! (3_0) if ( hy >= 0x7f3504f3 ) 387*25c28e83SPiotr Jasiukajtis lda [stridey+%i2]0x82,%f17 ! (3_0) y0 = *py; 388*25c28e83SPiotr Jasiukajtis 389*25c28e83SPiotr Jasiukajtis orcc %l3,%l4,%g0 390*25c28e83SPiotr Jasiukajtis bz,pn %icc,.update9 391*25c28e83SPiotr Jasiukajtis nop 392*25c28e83SPiotr Jasiukajtis.cont9: 393*25c28e83SPiotr Jasiukajtis fmuld %f54,%f38,%f40 ! (3_1) res0 *= xx0; 394*25c28e83SPiotr Jasiukajtis lda [%i1]0x82,%l3 ! (4_0) hx0 = *(int*)px; 395*25c28e83SPiotr Jasiukajtis fand %f46,DC2,%f38 ! (0_0) h_hi0 = vis_fand(h0,DC2); 396*25c28e83SPiotr Jasiukajtis 397*25c28e83SPiotr Jasiukajtis fmuld K2,%f58,%f54 ! (4_1) res0 = K2 * xx0; 398*25c28e83SPiotr Jasiukajtis srax %g1,8,%o5 ! (0_0) iexp0 >>= 8; 399*25c28e83SPiotr Jasiukajtis lda [stridey+%o7]0x82,%l4 ! (4_0) hy0 = *(int*)py; 400*25c28e83SPiotr Jasiukajtis fand %f24,DA0,%f56 ! (3_1) db0 = vis_fand(db0,DA0); 401*25c28e83SPiotr Jasiukajtis 402*25c28e83SPiotr Jasiukajtis and %o5,_0x1ff0,%o5 ! (0_0) di0 = iexp0 & 0x1ff0; 403*25c28e83SPiotr Jasiukajtis st %f30,[%fp+tmp4] ! (2_0) iexp0 = ((int*)&db0)[0]; 404*25c28e83SPiotr Jasiukajtis fand %f18,DC0,%f60 ! (1_0) h0 = vis_fand(db0,DC0); 405*25c28e83SPiotr Jasiukajtis 406*25c28e83SPiotr Jasiukajtis ldd [TBL+%o5],%f22 ! (0_0) dtmp0 = ((double*)((char*)div_arr + di0))[0]; 407*25c28e83SPiotr Jasiukajtis add %o0,TBL,%g1 ! (3_1) si0 = (char*)sqrt_arr + di0; 408*25c28e83SPiotr Jasiukajtis and %l3,_0x7fffffff,%l3 ! (4_0) hx0 &= 0x7fffffff; 409*25c28e83SPiotr Jasiukajtis fsubd %f46,%f38,%f38 ! (0_0) xx0 = h0 - h_hi0; 410*25c28e83SPiotr Jasiukajtis 411*25c28e83SPiotr Jasiukajtis fsmuld %f17,%f17,%f24 ! (3_0) dy0 = y0 * (double)y0; 412*25c28e83SPiotr Jasiukajtis cmp %l3,_0x7f3504f3 ! (4_0) hx ? 0x7f3504f3 413*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update10 ! (4_0) if ( hx >= 0x7f3504f3 ) 414*25c28e83SPiotr Jasiukajtis faddd %f40,DC1,%f40 ! (3_1) res0 += DC1; 415*25c28e83SPiotr Jasiukajtis 416*25c28e83SPiotr Jasiukajtis fmul8x16 SCALE,%f56,%f36 ! (3_1) db0 = vis_fmul8x16(SCALE, db0); 417*25c28e83SPiotr Jasiukajtis and %l4,_0x7fffffff,%l4 ! (4_0) hy0 &= 0x7fffffff; 418*25c28e83SPiotr Jasiukajtis ldd [%g1+8],%f56 ! (3_1) dtmp0 = ((double*)si0)[1]; 419*25c28e83SPiotr Jasiukajtis faddd %f54,K1,%f54 ! (4_1) res0 += K1; 420*25c28e83SPiotr Jasiukajtis 421*25c28e83SPiotr Jasiukajtis lda [%i1]0x82,%f17 ! (4_0) x0 = *px; 422*25c28e83SPiotr Jasiukajtis.cont10: 423*25c28e83SPiotr Jasiukajtis fmuld %f38,%f22,%f38 ! (0_0) xx0 *= dmp0; 424*25c28e83SPiotr Jasiukajtis cmp counter,5 425*25c28e83SPiotr Jasiukajtis for %f60,DC1,%f46 ! (1_0) h0 = vis_for(h0,DC1); 426*25c28e83SPiotr Jasiukajtis 427*25c28e83SPiotr Jasiukajtis ld [%fp+tmp3],%g1 ! (1_0) iexp0 = ((int*)&db0)[0]; 428*25c28e83SPiotr Jasiukajtis fmuld %f56,%f40,%f62 ! (3_1) res0 = dtmp0 * res0; 429*25c28e83SPiotr Jasiukajtis faddd %f44,%f24,%f24 ! (3_0) db0 = dx0 + dy0; 430*25c28e83SPiotr Jasiukajtis 431*25c28e83SPiotr Jasiukajtis bl,pn %icc,.tail 432*25c28e83SPiotr Jasiukajtis nop 433*25c28e83SPiotr Jasiukajtis 434*25c28e83SPiotr Jasiukajtis ba .main_loop 435*25c28e83SPiotr Jasiukajtis sub counter,5,counter 436*25c28e83SPiotr Jasiukajtis 437*25c28e83SPiotr Jasiukajtis .align 16 438*25c28e83SPiotr Jasiukajtis.main_loop: 439*25c28e83SPiotr Jasiukajtis fsmuld %f17,%f17,%f40 ! (4_1) dy0 = x0 * (double)x0; 440*25c28e83SPiotr Jasiukajtis cmp %l4,_0x7f3504f3 ! (4_1) hy ? 0x7f3504f3 441*25c28e83SPiotr Jasiukajtis lda [stridey+%o7]0x82,%f17 ! (4_1) hy0 = *py; 442*25c28e83SPiotr Jasiukajtis fpadd32 %f36,DA1,%f36 ! (3_2) db0 = vis_fpadd32(db0,DA1); 443*25c28e83SPiotr Jasiukajtis 444*25c28e83SPiotr Jasiukajtis fmuld %f54,%f58,%f58 ! (4_2) res0 *= xx0; 445*25c28e83SPiotr Jasiukajtis add %o7,stridey,%i5 ! py += stridey 446*25c28e83SPiotr Jasiukajtis st %f24,[%fp+tmp0] ! (3_1) iexp0 = ((int*)&db0)[0]; 447*25c28e83SPiotr Jasiukajtis fand %f46,DC2,%f44 ! (1_1) h_hi0 = vis_fand(h0,DC2); 448*25c28e83SPiotr Jasiukajtis 449*25c28e83SPiotr Jasiukajtis fmuld K2,%f38,%f56 ! (0_1) res0 = K2 * xx0; 450*25c28e83SPiotr Jasiukajtis srax %g1,8,%g5 ! (1_1) iexp0 >>= 8; 451*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update11 ! (4_1) if ( hy >= 0x7f3504f3 ) 452*25c28e83SPiotr Jasiukajtis fand %f20,DA0,%f54 ! (4_2) db0 = vis_fand(db0,DA0); 453*25c28e83SPiotr Jasiukajtis 454*25c28e83SPiotr Jasiukajtis orcc %l3,%l4,%g0 455*25c28e83SPiotr Jasiukajtis nop 456*25c28e83SPiotr Jasiukajtis bz,pn %icc,.update11 457*25c28e83SPiotr Jasiukajtis fzero %f52 458*25c28e83SPiotr Jasiukajtis.cont11: 459*25c28e83SPiotr Jasiukajtis fmuld %f62,%f36,%f62 ! (3_2) res0 *= db0; 460*25c28e83SPiotr Jasiukajtis and %g5,_0x1ff0,%g5 ! (1_1) di0 = iexp0 & 0x1ff0; 461*25c28e83SPiotr Jasiukajtis lda [%i1+stridex]0x82,%l3 ! (0_0) hx0 = *(int*)px; 462*25c28e83SPiotr Jasiukajtis fand %f30,DC0,%f60 ! (2_1) h0 = vis_fand(db0,DC0); 463*25c28e83SPiotr Jasiukajtis 464*25c28e83SPiotr Jasiukajtis ldd [%g5+TBL],%f22 ! (1_1) dtmp0 = ((double*)((char*)div_arr + di0))[0]; 465*25c28e83SPiotr Jasiukajtis add %o3,TBL,%g1 ! (4_2) si0 = (char*)sqrt_arr + di0; 466*25c28e83SPiotr Jasiukajtis add %i1,stridex,%i0 ! px += stridex 467*25c28e83SPiotr Jasiukajtis fsubd %f46,%f44,%f44 ! (1_1) xx0 = h0 - h_hi0; 468*25c28e83SPiotr Jasiukajtis 469*25c28e83SPiotr Jasiukajtis fsmuld %f17,%f17,%f48 ! (4_1) dy0 = y0 * (double)y0; 470*25c28e83SPiotr Jasiukajtis nop 471*25c28e83SPiotr Jasiukajtis lda [%i1+stridex]0x82,%f8 ! (0_0) x0 = *px; 472*25c28e83SPiotr Jasiukajtis faddd %f58,DC1,%f36 ! (4_2) res0 += DC1; 473*25c28e83SPiotr Jasiukajtis 474*25c28e83SPiotr Jasiukajtis faddd %f56,K1,%f58 ! (0_1) res0 += K1; 475*25c28e83SPiotr Jasiukajtis and %l3,_0x7fffffff,%l3 ! (0_0) hx0 &= 0x7fffffff; 476*25c28e83SPiotr Jasiukajtis ldd [%g1+8],%f56 ! (4_2) dtmp0 = ((double*)si0)[1]; 477*25c28e83SPiotr Jasiukajtis fmul8x16 SCALE,%f54,%f54 ! (4_2) db0 = vis_fmul8x16(SCALE, db0); 478*25c28e83SPiotr Jasiukajtis 479*25c28e83SPiotr Jasiukajtis lda [%i5+stridey]0x82,%l4 ! (0_0) hy0 = *(int*)py; 480*25c28e83SPiotr Jasiukajtis cmp %l3,_0x7f3504f3 ! (0_0) hx ? 0x7f3504f3 481*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update12 ! (0_0) if ( hx >= 0x7f3504f3 ) 482*25c28e83SPiotr Jasiukajtis fdtos %f62,%f14 ! (3_2) ftmp0 = (float)res0; 483*25c28e83SPiotr Jasiukajtis.cont12: 484*25c28e83SPiotr Jasiukajtis fmuld %f44,%f22,%f44 ! (1_1) xx0 *= dmp0; 485*25c28e83SPiotr Jasiukajtis add %l7,stridez,%o7 ! pz += stridez 486*25c28e83SPiotr Jasiukajtis st %f14,[%l7] ! (3_2) *pz = ftmp0; 487*25c28e83SPiotr Jasiukajtis for %f60,DC1,%f46 ! (2_1) h0 = vis_for(h0,DC1); 488*25c28e83SPiotr Jasiukajtis 489*25c28e83SPiotr Jasiukajtis fmuld %f56,%f36,%f36 ! (4_2) res0 = dtmp0 * res0; 490*25c28e83SPiotr Jasiukajtis add %i5,stridey,%o4 ! py += stridey 491*25c28e83SPiotr Jasiukajtis ld [%fp+tmp4],%g1 ! (2_1) iexp0 = ((int*)&db0)[0]; 492*25c28e83SPiotr Jasiukajtis faddd %f40,%f48,%f20 ! (4_1) db0 = dx0 + dy0; 493*25c28e83SPiotr Jasiukajtis 494*25c28e83SPiotr Jasiukajtis fsmuld %f8,%f8,%f40 ! (0_0) dx0 = x0 * (double)x0; 495*25c28e83SPiotr Jasiukajtis and %l4,_0x7fffffff,%l4 ! (0_0) hy0 &= 0x7fffffff; 496*25c28e83SPiotr Jasiukajtis lda [%i5+stridey]0x82,%f17 ! (0_0) hy0 = *py; 497*25c28e83SPiotr Jasiukajtis fpadd32 %f54,DA1,%f62 ! (4_2) db0 = vis_fpadd32(db0,DA1); 498*25c28e83SPiotr Jasiukajtis 499*25c28e83SPiotr Jasiukajtis fmuld %f58,%f38,%f38 ! (0_1) res0 *= xx0; 500*25c28e83SPiotr Jasiukajtis cmp %l4,_0x7f3504f3 ! (0_0) hy ? 0x7f3504f3 501*25c28e83SPiotr Jasiukajtis st %f20,[%fp+tmp1] ! (4_1) iexp0 = ((int*)&db0)[0]; 502*25c28e83SPiotr Jasiukajtis fand %f46,DC2,%f58 ! (2_1) h_hi0 = vis_fand(h0,DC2); 503*25c28e83SPiotr Jasiukajtis 504*25c28e83SPiotr Jasiukajtis fmuld K2,%f44,%f56 ! (1_1) res0 = K2 * xx0; 505*25c28e83SPiotr Jasiukajtis srax %g1,8,%g1 ! (2_1) iexp0 >>= 8; 506*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update13 ! (0_0) if ( hy >= 0x7f3504f3 ) 507*25c28e83SPiotr Jasiukajtis fand %f0,DA0,%f54 ! (0_1) db0 = vis_fand(db0,DA0); 508*25c28e83SPiotr Jasiukajtis 509*25c28e83SPiotr Jasiukajtis orcc %l3,%l4,%g0 510*25c28e83SPiotr Jasiukajtis nop 511*25c28e83SPiotr Jasiukajtis bz,pn %icc,.update13 512*25c28e83SPiotr Jasiukajtis fzero %f52 513*25c28e83SPiotr Jasiukajtis.cont13: 514*25c28e83SPiotr Jasiukajtis fmuld %f36,%f62,%f62 ! (4_2) res0 *= db0; 515*25c28e83SPiotr Jasiukajtis and %g1,_0x1ff0,%g1 ! (2_1) di0 = iexp0 & 0x1ff0; 516*25c28e83SPiotr Jasiukajtis lda [%i0+stridex]0x82,%l3 ! (1_0) hx0 = *(int*)px; 517*25c28e83SPiotr Jasiukajtis fand %f24,DC0,%f60 ! (3_1) h0 = vis_fand(db0,DC0); 518*25c28e83SPiotr Jasiukajtis 519*25c28e83SPiotr Jasiukajtis ldd [TBL+%g1],%f22 ! (2_1) dtmp0 = ((double*)((char*)div_arr + di0))[0]; 520*25c28e83SPiotr Jasiukajtis add %o5,TBL,%o0 ! (0_1) si0 = (char*)sqrt_arr + di0; 521*25c28e83SPiotr Jasiukajtis add %i0,stridex,%i1 ! px += stridex 522*25c28e83SPiotr Jasiukajtis fsubd %f46,%f58,%f58 ! (2_1) xx0 = h0 - h_hi0; 523*25c28e83SPiotr Jasiukajtis 524*25c28e83SPiotr Jasiukajtis fsmuld %f17,%f17,%f34 ! (0_0) dy0 = y0 * (double)y0; 525*25c28e83SPiotr Jasiukajtis add %o7,stridez,%i0 ! pz += stridez 526*25c28e83SPiotr Jasiukajtis lda [%o4+stridey]0x82,%l4 ! (1_0) hy0 = *(int*)py; 527*25c28e83SPiotr Jasiukajtis faddd %f38,DC1,%f36 ! (0_1) res0 += DC1; 528*25c28e83SPiotr Jasiukajtis 529*25c28e83SPiotr Jasiukajtis faddd %f56,K1,%f38 ! (1_1) res0 += K1; 530*25c28e83SPiotr Jasiukajtis and %l3,_0x7fffffff,%l3 ! (1_0) hx0 &= 0x7fffffff; 531*25c28e83SPiotr Jasiukajtis ldd [%o0+8],%f56 ! (0_1) dtmp0 = ((double*)si0)[1]; 532*25c28e83SPiotr Jasiukajtis fmul8x16 SCALE,%f54,%f54 ! (0_1) db0 = vis_fmul8x16(SCALE, db0); 533*25c28e83SPiotr Jasiukajtis 534*25c28e83SPiotr Jasiukajtis lda [%i1]0x82,%f17 ! (1_0) x0 = *px; 535*25c28e83SPiotr Jasiukajtis cmp %l3,_0x7f3504f3 ! (1_0) hx ? 0x7f3504f3 536*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update14 ! (1_0) if ( hx >= 0x7f3504f3 ) 537*25c28e83SPiotr Jasiukajtis fdtos %f62,%f14 ! (4_2) ftmp0 = (float)res0; 538*25c28e83SPiotr Jasiukajtis.cont14: 539*25c28e83SPiotr Jasiukajtis fmuld %f58,%f22,%f58 ! (2_1) xx0 *= dmp0; 540*25c28e83SPiotr Jasiukajtis and %l4,_0x7fffffff,%l4 ! (1_0) hy0 &= 0x7fffffff; 541*25c28e83SPiotr Jasiukajtis add %o4,stridey,%i5 ! py += stridey 542*25c28e83SPiotr Jasiukajtis for %f60,DC1,%f46 ! (3_1) h0 = vis_for(h0,DC1); 543*25c28e83SPiotr Jasiukajtis 544*25c28e83SPiotr Jasiukajtis fmuld %f56,%f36,%f36 ! (0_1) res0 = dtmp0 * res0; 545*25c28e83SPiotr Jasiukajtis cmp %l4,_0x7f3504f3 ! (1_0) hy ? 0x7f3504f3 546*25c28e83SPiotr Jasiukajtis ld [%fp+tmp0],%o0 ! (3_1) iexp0 = ((int*)&db0)[0]; 547*25c28e83SPiotr Jasiukajtis faddd %f40,%f34,%f0 ! (0_0) db0 = dx0 + dy0; 548*25c28e83SPiotr Jasiukajtis 549*25c28e83SPiotr Jasiukajtis fsmuld %f17,%f17,%f40 ! (1_0) dx0 = x0 * (double)x0; 550*25c28e83SPiotr Jasiukajtis add %i1,stridex,%i1 ! px += stridex 551*25c28e83SPiotr Jasiukajtis lda [%o4+stridey]0x82,%f17 ! (1_0) y0 = *py; 552*25c28e83SPiotr Jasiukajtis fpadd32 %f54,DA1,%f62 ! (0_1) db0 = vis_fpadd32(db0,DA1); 553*25c28e83SPiotr Jasiukajtis 554*25c28e83SPiotr Jasiukajtis fmuld %f38,%f44,%f44 ! (1_1) res0 *= xx0; 555*25c28e83SPiotr Jasiukajtis st %f14,[%o7] ! (4_2) *pz = ftmp0; 556*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update15 ! (1_0) if ( hy >= 0x7f3504f3 ) 557*25c28e83SPiotr Jasiukajtis fand %f46,DC2,%f38 ! (3_1) h_hi0 = vis_fand(h0,DC2); 558*25c28e83SPiotr Jasiukajtis 559*25c28e83SPiotr Jasiukajtis orcc %l3,%l4,%g0 560*25c28e83SPiotr Jasiukajtis bz,pn %icc,.update15 561*25c28e83SPiotr Jasiukajtis nop 562*25c28e83SPiotr Jasiukajtis.cont15: 563*25c28e83SPiotr Jasiukajtis fmuld K2,%f58,%f54 ! (2_1) res0 = K2 * xx0; 564*25c28e83SPiotr Jasiukajtis srax %o0,8,%o0 ! (3_1) iexp0 >>= 8; 565*25c28e83SPiotr Jasiukajtis st %f0,[%fp+tmp2] ! (0_0) iexp0 = ((int*)&db0)[0]; 566*25c28e83SPiotr Jasiukajtis fand %f18,DA0,%f56 ! (1_1) db0 = vis_fand(db0,DA0); 567*25c28e83SPiotr Jasiukajtis 568*25c28e83SPiotr Jasiukajtis fmuld %f36,%f62,%f62 ! (0_1) res0 *= db0; 569*25c28e83SPiotr Jasiukajtis and %o0,_0x1ff0,%o0 ! (3_1) di0 = iexp0 & 0x1ff0; 570*25c28e83SPiotr Jasiukajtis lda [%i1]0x82,%l3 ! (2_0) hx0 = *(int*)px; 571*25c28e83SPiotr Jasiukajtis fand %f20,DC0,%f60 ! (4_1) h0 = vis_fand(db0,DC0); 572*25c28e83SPiotr Jasiukajtis 573*25c28e83SPiotr Jasiukajtis ldd [TBL+%o0],%f22 ! (3_1) dtmp0 = ((double*)((char*)div_arr + di0))[0]; 574*25c28e83SPiotr Jasiukajtis add %g5,TBL,%o3 ! (1_1) si0 = (char*)sqrt_arr + di0; 575*25c28e83SPiotr Jasiukajtis add %i0,stridez,%i3 ! pz += stridez 576*25c28e83SPiotr Jasiukajtis fsubd %f46,%f38,%f38 ! (3_1) xx0 = h0 - h_hi0; 577*25c28e83SPiotr Jasiukajtis 578*25c28e83SPiotr Jasiukajtis fsmuld %f17,%f17,%f32 ! (1_0) dy0 = y0 * (double)y0; 579*25c28e83SPiotr Jasiukajtis add %i5,stridey,%i2 ! py += stridey 580*25c28e83SPiotr Jasiukajtis lda [stridey+%i5]0x82,%l4 ! (2_0) hy0 = *(int*)py; 581*25c28e83SPiotr Jasiukajtis faddd %f44,DC1,%f44 ! (1_1) res0 += DC1; 582*25c28e83SPiotr Jasiukajtis 583*25c28e83SPiotr Jasiukajtis fmul8x16 SCALE,%f56,%f36 ! (1_1) db0 = vis_fmul8x16(SCALE, db0); 584*25c28e83SPiotr Jasiukajtis and %l3,_0x7fffffff,%l3 ! (2_0) hx0 &= 0x7fffffff; 585*25c28e83SPiotr Jasiukajtis ldd [%o3+8],%f56 ! (1_1) dtmp0 = ((double*)si0)[1]; 586*25c28e83SPiotr Jasiukajtis faddd %f54,K1,%f54 ! (2_1) res0 += K1; 587*25c28e83SPiotr Jasiukajtis 588*25c28e83SPiotr Jasiukajtis lda [%i1]0x82,%f17 ! (2_0) x0 = *px; 589*25c28e83SPiotr Jasiukajtis cmp %l3,_0x7f3504f3 ! (2_0) hx ? 0x7f3504f3 590*25c28e83SPiotr Jasiukajtis add %i3,stridez,%o4 ! pz += stridez 591*25c28e83SPiotr Jasiukajtis fdtos %f62,%f14 ! (0_1) ftmp0 = (float)res0; 592*25c28e83SPiotr Jasiukajtis 593*25c28e83SPiotr Jasiukajtis fmuld %f38,%f22,%f38 ! (3_1) xx0 *= dmp0; 594*25c28e83SPiotr Jasiukajtis and %l4,_0x7fffffff,%l4 ! (2_0) hy0 &= 0x7fffffff; 595*25c28e83SPiotr Jasiukajtis st %f14,[%i0] ! (0_1) *pz = ftmp0; 596*25c28e83SPiotr Jasiukajtis for %f60,DC1,%f46 ! (4_1) h0 = vis_for(h0,DC1); 597*25c28e83SPiotr Jasiukajtis 598*25c28e83SPiotr Jasiukajtis fmuld %f56,%f44,%f62 ! (1_1) res0 = dtmp0 * res0; 599*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update16 ! (2_0) if ( hx >= 0x7f3504f3 ) 600*25c28e83SPiotr Jasiukajtis ld [%fp+tmp1],%o3 ! (4_1) iexp0 = ((int*)&db0)[0]; 601*25c28e83SPiotr Jasiukajtis faddd %f40,%f32,%f18 ! (1_0) db0 = dx0 + dy0; 602*25c28e83SPiotr Jasiukajtis.cont16: 603*25c28e83SPiotr Jasiukajtis fsmuld %f17,%f17,%f44 ! (2_0) dx0 = x0 * (double)x0; 604*25c28e83SPiotr Jasiukajtis cmp %l4,_0x7f3504f3 ! (2_0) hy ? 0x7f3504f3 605*25c28e83SPiotr Jasiukajtis lda [stridey+%i5]0x82,%f17 ! (2_0) y0 = *py; 606*25c28e83SPiotr Jasiukajtis fpadd32 %f36,DA1,%f36 ! (1_1) db0 = vis_fpadd32(db0,DA1); 607*25c28e83SPiotr Jasiukajtis 608*25c28e83SPiotr Jasiukajtis fmuld %f54,%f58,%f54 ! (2_1) res0 *= xx0; 609*25c28e83SPiotr Jasiukajtis add %i1,stridex,%l7 ! px += stridex 610*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update17 ! (2_0) if ( hy >= 0x7f3504f3 ) 611*25c28e83SPiotr Jasiukajtis fand %f46,DC2,%f58 ! (4_1) h_hi0 = vis_fand(h0,DC2); 612*25c28e83SPiotr Jasiukajtis 613*25c28e83SPiotr Jasiukajtis orcc %l3,%l4,%g0 614*25c28e83SPiotr Jasiukajtis nop 615*25c28e83SPiotr Jasiukajtis bz,pn %icc,.update17 616*25c28e83SPiotr Jasiukajtis fzero %f52 617*25c28e83SPiotr Jasiukajtis.cont17: 618*25c28e83SPiotr Jasiukajtis fmuld K2,%f38,%f56 ! (3_1) res0 = K2 * xx0; 619*25c28e83SPiotr Jasiukajtis srax %o3,8,%o3 ! (4_1) iexp0 >>= 8; 620*25c28e83SPiotr Jasiukajtis st %f18,[%fp+tmp3] ! (1_0) iexp0 = ((int*)&db0)[0]; 621*25c28e83SPiotr Jasiukajtis fand %f30,DA0,%f40 ! (2_1) db0 = vis_fand(db0,DA0); 622*25c28e83SPiotr Jasiukajtis 623*25c28e83SPiotr Jasiukajtis fmuld %f62,%f36,%f62 ! (1_1) res0 *= db0; 624*25c28e83SPiotr Jasiukajtis and %o3,_0x1ff0,%o3 ! (4_1) di0 = iexp0 & 0x1ff0; 625*25c28e83SPiotr Jasiukajtis lda [%l7]0x82,%l3 ! (3_0) hx0 = *(int*)px; 626*25c28e83SPiotr Jasiukajtis fand %f0,DC0,%f60 ! (0_0) h0 = vis_fand(db0,DC0); 627*25c28e83SPiotr Jasiukajtis 628*25c28e83SPiotr Jasiukajtis ldd [TBL+%o3],%f22 ! (4_1) dtmp0 = ((double*)((char*)div_arr + di0))[0]; 629*25c28e83SPiotr Jasiukajtis add %g1,TBL,%g1 ! (2_1) si0 = (char*)sqrt_arr + di0; 630*25c28e83SPiotr Jasiukajtis add %i2,stridey,%o7 ! py += stridey 631*25c28e83SPiotr Jasiukajtis fsubd %f46,%f58,%f58 ! (4_1) xx0 = h0 - h_hi0; 632*25c28e83SPiotr Jasiukajtis 633*25c28e83SPiotr Jasiukajtis fsmuld %f17,%f17,%f30 ! (2_0) dy0 = y0 * (double)y0; 634*25c28e83SPiotr Jasiukajtis lda [stridey+%i2]0x82,%l4 ! (3_0) hy0 = *(int*)py; 635*25c28e83SPiotr Jasiukajtis add %l7,stridex,%i1 ! px += stridex 636*25c28e83SPiotr Jasiukajtis faddd %f54,DC1,%f36 ! (2_1) res0 += DC1; 637*25c28e83SPiotr Jasiukajtis 638*25c28e83SPiotr Jasiukajtis faddd %f56,K1,%f54 ! (3_1) res0 += K1; 639*25c28e83SPiotr Jasiukajtis and %l3,_0x7fffffff,%l3 ! (3_0) hx0 &= 0x7fffffff; 640*25c28e83SPiotr Jasiukajtis ldd [%g1+8],%f56 ! (2_1) dtmp0 = ((double*)si0)[1]; 641*25c28e83SPiotr Jasiukajtis fmul8x16 SCALE,%f40,%f40 ! (2_1) db0 = vis_fmul8x16(SCALE, db0); 642*25c28e83SPiotr Jasiukajtis 643*25c28e83SPiotr Jasiukajtis lda [%l7]0x82,%f17 ! (3_0) x0 = *px; 644*25c28e83SPiotr Jasiukajtis cmp %l3,_0x7f3504f3 ! (3_0) hx ? 0x7f3504f3 645*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update18 ! (3_0) if ( hx >= 0x7f3504f3 ) 646*25c28e83SPiotr Jasiukajtis fdtos %f62,%f14 ! (1_1) ftmp0 = (float)res0; 647*25c28e83SPiotr Jasiukajtis.cont18: 648*25c28e83SPiotr Jasiukajtis fmuld %f58,%f22,%f58 ! (4_1) xx0 *= dmp0; 649*25c28e83SPiotr Jasiukajtis and %l4,_0x7fffffff,%l4 ! (3_0) hy0 &= 0x7fffffff; 650*25c28e83SPiotr Jasiukajtis st %f14,[%i3] ! (1_1) *pz = ftmp0; 651*25c28e83SPiotr Jasiukajtis for %f60,DC1,%f46 ! (0_0) h0 = vis_for(h0,DC1); 652*25c28e83SPiotr Jasiukajtis 653*25c28e83SPiotr Jasiukajtis fmuld %f56,%f36,%f36 ! (2_1) res0 = dtmp0 * res0; 654*25c28e83SPiotr Jasiukajtis cmp %l4,_0x7f3504f3 ! (3_0) hy ? 0x7f3504f3 655*25c28e83SPiotr Jasiukajtis ld [%fp+tmp2],%g1 ! (0_0) iexp0 = ((int*)&db0)[0]; 656*25c28e83SPiotr Jasiukajtis faddd %f44,%f30,%f30 ! (2_0) db0 = dx0 + dy0; 657*25c28e83SPiotr Jasiukajtis 658*25c28e83SPiotr Jasiukajtis fsmuld %f17,%f17,%f44 ! (3_0) dx0 = x0 * (double)x0; 659*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update19 ! (3_0) if ( hy >= 0x7f3504f3 ) 660*25c28e83SPiotr Jasiukajtis lda [stridey+%i2]0x82,%f17 ! (3_0) y0 = *py; 661*25c28e83SPiotr Jasiukajtis fpadd32 %f40,DA1,%f62 ! (2_1) db0 = vis_fpadd32(db0,DA1); 662*25c28e83SPiotr Jasiukajtis 663*25c28e83SPiotr Jasiukajtis.cont19: 664*25c28e83SPiotr Jasiukajtis fmuld %f54,%f38,%f40 ! (3_1) res0 *= xx0; 665*25c28e83SPiotr Jasiukajtis orcc %l3,%l4,%g0 666*25c28e83SPiotr Jasiukajtis st %f30,[%fp+tmp4] ! (2_0) iexp0 = ((int*)&db0)[0]; 667*25c28e83SPiotr Jasiukajtis fand %f46,DC2,%f38 ! (0_0) h_hi0 = vis_fand(h0,DC2); 668*25c28e83SPiotr Jasiukajtis 669*25c28e83SPiotr Jasiukajtis fmuld K2,%f58,%f54 ! (4_1) res0 = K2 * xx0; 670*25c28e83SPiotr Jasiukajtis srax %g1,8,%o5 ! (0_0) iexp0 >>= 8; 671*25c28e83SPiotr Jasiukajtis lda [%i1]0x82,%l3 ! (4_0) hx0 = *(int*)px; 672*25c28e83SPiotr Jasiukajtis fand %f24,DA0,%f56 ! (3_1) db0 = vis_fand(db0,DA0); 673*25c28e83SPiotr Jasiukajtis 674*25c28e83SPiotr Jasiukajtis fmuld %f36,%f62,%f62 ! (2_1) res0 *= db0; 675*25c28e83SPiotr Jasiukajtis and %o5,_0x1ff0,%o5 ! (0_0) di0 = iexp0 & 0x1ff0; 676*25c28e83SPiotr Jasiukajtis bz,pn %icc,.update19a 677*25c28e83SPiotr Jasiukajtis fand %f18,DC0,%f60 ! (1_0) h0 = vis_fand(db0,DC0); 678*25c28e83SPiotr Jasiukajtis.cont19a: 679*25c28e83SPiotr Jasiukajtis ldd [TBL+%o5],%f22 ! (0_0) dtmp0 = ((double*)((char*)div_arr + di0))[0]; 680*25c28e83SPiotr Jasiukajtis add %o0,TBL,%g1 ! (3_1) si0 = (char*)sqrt_arr + di0; 681*25c28e83SPiotr Jasiukajtis and %l3,_0x7fffffff,%l3 ! (4_0) hx0 &= 0x7fffffff; 682*25c28e83SPiotr Jasiukajtis fsubd %f46,%f38,%f38 ! (0_0) xx0 = h0 - h_hi0; 683*25c28e83SPiotr Jasiukajtis 684*25c28e83SPiotr Jasiukajtis fsmuld %f17,%f17,%f24 ! (3_0) dy0 = y0 * (double)y0; 685*25c28e83SPiotr Jasiukajtis cmp %l3,_0x7f3504f3 ! (4_0) hx ? 0x7f3504f3 686*25c28e83SPiotr Jasiukajtis lda [stridey+%o7]0x82,%l4 ! (4_0) hy0 = *(int*)py; 687*25c28e83SPiotr Jasiukajtis faddd %f40,DC1,%f40 ! (3_1) res0 += DC1; 688*25c28e83SPiotr Jasiukajtis 689*25c28e83SPiotr Jasiukajtis fmul8x16 SCALE,%f56,%f36 ! (3_1) db0 = vis_fmul8x16(SCALE, db0); 690*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update20 ! (4_0) if ( hx >= 0x7f3504f3 ) 691*25c28e83SPiotr Jasiukajtis ldd [%g1+8],%f56 ! (3_1) dtmp0 = ((double*)si0)[1]; 692*25c28e83SPiotr Jasiukajtis faddd %f54,K1,%f54 ! (4_1) res0 += K1; 693*25c28e83SPiotr Jasiukajtis 694*25c28e83SPiotr Jasiukajtis lda [%i1]0x82,%f17 ! (4_0) x0 = *px; 695*25c28e83SPiotr Jasiukajtis.cont20: 696*25c28e83SPiotr Jasiukajtis subcc counter,5,counter ! counter -= 5 697*25c28e83SPiotr Jasiukajtis add %o4,stridez,%l7 ! pz += stridez 698*25c28e83SPiotr Jasiukajtis fdtos %f62,%f14 ! (2_1) ftmp0 = (float)res0; 699*25c28e83SPiotr Jasiukajtis 700*25c28e83SPiotr Jasiukajtis fmuld %f38,%f22,%f38 ! (0_0) xx0 *= dmp0; 701*25c28e83SPiotr Jasiukajtis and %l4,_0x7fffffff,%l4 ! (4_0) hy0 &= 0x7fffffff; 702*25c28e83SPiotr Jasiukajtis st %f14,[%o4] ! (2_1) *pz = ftmp0; 703*25c28e83SPiotr Jasiukajtis for %f60,DC1,%f46 ! (1_0) h0 = vis_for(h0,DC1); 704*25c28e83SPiotr Jasiukajtis 705*25c28e83SPiotr Jasiukajtis ld [%fp+tmp3],%g1 ! (1_0) iexp0 = ((int*)&db0)[0]; 706*25c28e83SPiotr Jasiukajtis fmuld %f56,%f40,%f62 ! (3_1) res0 = dtmp0 * res0; 707*25c28e83SPiotr Jasiukajtis bpos,pt %icc,.main_loop 708*25c28e83SPiotr Jasiukajtis faddd %f44,%f24,%f24 ! (3_0) db0 = dx0 + dy0; 709*25c28e83SPiotr Jasiukajtis 710*25c28e83SPiotr Jasiukajtis add counter,5,counter 711*25c28e83SPiotr Jasiukajtis 712*25c28e83SPiotr Jasiukajtis.tail: 713*25c28e83SPiotr Jasiukajtis subcc counter,1,counter 714*25c28e83SPiotr Jasiukajtis bneg .begin 715*25c28e83SPiotr Jasiukajtis nop 716*25c28e83SPiotr Jasiukajtis 717*25c28e83SPiotr Jasiukajtis fpadd32 %f36,DA1,%f36 ! (3_2) db0 = vis_fpadd32(db0,DA1); 718*25c28e83SPiotr Jasiukajtis 719*25c28e83SPiotr Jasiukajtis fmuld %f54,%f58,%f58 ! (4_2) res0 *= xx0; 720*25c28e83SPiotr Jasiukajtis fand %f46,DC2,%f44 ! (1_1) h_hi0 = vis_fand(h0,DC2); 721*25c28e83SPiotr Jasiukajtis 722*25c28e83SPiotr Jasiukajtis fmuld K2,%f38,%f56 ! (0_1) res0 = K2 * xx0; 723*25c28e83SPiotr Jasiukajtis srax %g1,8,%g5 ! (1_1) iexp0 >>= 8; 724*25c28e83SPiotr Jasiukajtis fand %f20,DA0,%f54 ! (4_2) db0 = vis_fand(db0,DA0); 725*25c28e83SPiotr Jasiukajtis 726*25c28e83SPiotr Jasiukajtis fmuld %f62,%f36,%f62 ! (3_2) res0 *= db0; 727*25c28e83SPiotr Jasiukajtis and %g5,_0x1ff0,%g5 ! (1_1) di0 = iexp0 & 0x1ff0; 728*25c28e83SPiotr Jasiukajtis 729*25c28e83SPiotr Jasiukajtis ldd [%g5+TBL],%f22 ! (1_1) dtmp0 = ((double*)((char*)div_arr + di0))[0]; 730*25c28e83SPiotr Jasiukajtis add %o3,TBL,%g1 ! (4_2) si0 = (char*)sqrt_arr + di0; 731*25c28e83SPiotr Jasiukajtis fsubd %f46,%f44,%f44 ! (1_1) xx0 = h0 - h_hi0; 732*25c28e83SPiotr Jasiukajtis 733*25c28e83SPiotr Jasiukajtis faddd %f58,DC1,%f36 ! (4_2) res0 += DC1; 734*25c28e83SPiotr Jasiukajtis 735*25c28e83SPiotr Jasiukajtis faddd %f56,K1,%f58 ! (0_1) res0 += K1; 736*25c28e83SPiotr Jasiukajtis ldd [%g1+8],%f56 ! (4_2) dtmp0 = ((double*)si0)[1]; 737*25c28e83SPiotr Jasiukajtis fmul8x16 SCALE,%f54,%f54 ! (4_2) db0 = vis_fmul8x16(SCALE, db0); 738*25c28e83SPiotr Jasiukajtis 739*25c28e83SPiotr Jasiukajtis fdtos %f62,%f14 ! (3_2) ftmp0 = (float)res0; 740*25c28e83SPiotr Jasiukajtis 741*25c28e83SPiotr Jasiukajtis fmuld %f44,%f22,%f44 ! (1_1) xx0 *= dmp0; 742*25c28e83SPiotr Jasiukajtis add %l7,stridez,%o7 ! pz += stridez 743*25c28e83SPiotr Jasiukajtis st %f14,[%l7] ! (3_2) *pz = ftmp0; 744*25c28e83SPiotr Jasiukajtis 745*25c28e83SPiotr Jasiukajtis subcc counter,1,counter 746*25c28e83SPiotr Jasiukajtis bneg .begin 747*25c28e83SPiotr Jasiukajtis or %g0,%o7,%l7 748*25c28e83SPiotr Jasiukajtis 749*25c28e83SPiotr Jasiukajtis fmuld %f56,%f36,%f36 ! (4_2) res0 = dtmp0 * res0; 750*25c28e83SPiotr Jasiukajtis 751*25c28e83SPiotr Jasiukajtis fpadd32 %f54,DA1,%f62 ! (4_2) db0 = vis_fpadd32(db0,DA1); 752*25c28e83SPiotr Jasiukajtis 753*25c28e83SPiotr Jasiukajtis fmuld %f58,%f38,%f38 ! (0_1) res0 *= xx0; 754*25c28e83SPiotr Jasiukajtis 755*25c28e83SPiotr Jasiukajtis fmuld K2,%f44,%f56 ! (1_1) res0 = K2 * xx0; 756*25c28e83SPiotr Jasiukajtis fand %f0,DA0,%f54 ! (0_1) db0 = vis_fand(db0,DA0); 757*25c28e83SPiotr Jasiukajtis 758*25c28e83SPiotr Jasiukajtis fmuld %f36,%f62,%f62 ! (4_2) res0 *= db0; 759*25c28e83SPiotr Jasiukajtis 760*25c28e83SPiotr Jasiukajtis add %o5,TBL,%o0 ! (0_1) si0 = (char*)sqrt_arr + di0; 761*25c28e83SPiotr Jasiukajtis 762*25c28e83SPiotr Jasiukajtis faddd %f38,DC1,%f36 ! (0_1) res0 += DC1; 763*25c28e83SPiotr Jasiukajtis 764*25c28e83SPiotr Jasiukajtis faddd %f56,K1,%f38 ! (1_1) res0 += K1; 765*25c28e83SPiotr Jasiukajtis ldd [%o0+8],%f56 ! (0_1) dtmp0 = ((double*)si0)[1]; 766*25c28e83SPiotr Jasiukajtis fmul8x16 SCALE,%f54,%f54 ! (0_1) db0 = vis_fmul8x16(SCALE, db0); 767*25c28e83SPiotr Jasiukajtis 768*25c28e83SPiotr Jasiukajtis add %o7,stridez,%i0 ! pz += stridez 769*25c28e83SPiotr Jasiukajtis fdtos %f62,%f14 ! (4_2) ftmp0 = (float)res0; 770*25c28e83SPiotr Jasiukajtis 771*25c28e83SPiotr Jasiukajtis fmuld %f56,%f36,%f36 ! (0_1) res0 = dtmp0 * res0; 772*25c28e83SPiotr Jasiukajtis 773*25c28e83SPiotr Jasiukajtis fpadd32 %f54,DA1,%f62 ! (0_1) db0 = vis_fpadd32(db0,DA1); 774*25c28e83SPiotr Jasiukajtis 775*25c28e83SPiotr Jasiukajtis fmuld %f38,%f44,%f44 ! (1_1) res0 *= xx0; 776*25c28e83SPiotr Jasiukajtis add %i0,stridez,%i3 ! pz += stridez 777*25c28e83SPiotr Jasiukajtis st %f14,[%o7] ! (4_2) *pz = ftmp0; 778*25c28e83SPiotr Jasiukajtis 779*25c28e83SPiotr Jasiukajtis subcc counter,1,counter 780*25c28e83SPiotr Jasiukajtis bneg .begin 781*25c28e83SPiotr Jasiukajtis or %g0,%i0,%l7 782*25c28e83SPiotr Jasiukajtis 783*25c28e83SPiotr Jasiukajtis fand %f18,DA0,%f56 ! (1_1) db0 = vis_fand(db0,DA0); 784*25c28e83SPiotr Jasiukajtis 785*25c28e83SPiotr Jasiukajtis fmuld %f36,%f62,%f62 ! (0_1) res0 *= db0; 786*25c28e83SPiotr Jasiukajtis 787*25c28e83SPiotr Jasiukajtis add %g5,TBL,%o3 ! (1_1) si0 = (char*)sqrt_arr + di0; 788*25c28e83SPiotr Jasiukajtis 789*25c28e83SPiotr Jasiukajtis faddd %f44,DC1,%f44 ! (1_1) res0 += DC1; 790*25c28e83SPiotr Jasiukajtis 791*25c28e83SPiotr Jasiukajtis fmul8x16 SCALE,%f56,%f36 ! (1_1) db0 = vis_fmul8x16(SCALE, db0); 792*25c28e83SPiotr Jasiukajtis ldd [%o3+8],%f56 ! (1_1) dtmp0 = ((double*)si0)[1]; 793*25c28e83SPiotr Jasiukajtis 794*25c28e83SPiotr Jasiukajtis add %i3,stridez,%o4 ! pz += stridez 795*25c28e83SPiotr Jasiukajtis fdtos %f62,%f14 ! (0_1) ftmp0 = (float)res0; 796*25c28e83SPiotr Jasiukajtis 797*25c28e83SPiotr Jasiukajtis st %f14,[%i0] ! (0_1) *pz = ftmp0; 798*25c28e83SPiotr Jasiukajtis 799*25c28e83SPiotr Jasiukajtis subcc counter,1,counter 800*25c28e83SPiotr Jasiukajtis bneg .begin 801*25c28e83SPiotr Jasiukajtis or %g0,%i3,%l7 802*25c28e83SPiotr Jasiukajtis 803*25c28e83SPiotr Jasiukajtis fmuld %f56,%f44,%f62 ! (1_1) res0 = dtmp0 * res0; 804*25c28e83SPiotr Jasiukajtis 805*25c28e83SPiotr Jasiukajtis fpadd32 %f36,DA1,%f36 ! (1_1) db0 = vis_fpadd32(db0,DA1); 806*25c28e83SPiotr Jasiukajtis 807*25c28e83SPiotr Jasiukajtis fmuld %f62,%f36,%f62 ! (1_1) res0 *= db0; 808*25c28e83SPiotr Jasiukajtis 809*25c28e83SPiotr Jasiukajtis fdtos %f62,%f14 ! (1_1) ftmp0 = (float)res0; 810*25c28e83SPiotr Jasiukajtis 811*25c28e83SPiotr Jasiukajtis st %f14,[%i3] ! (1_1) *pz = ftmp0; 812*25c28e83SPiotr Jasiukajtis 813*25c28e83SPiotr Jasiukajtis ba .begin 814*25c28e83SPiotr Jasiukajtis or %g0,%o4,%l7 815*25c28e83SPiotr Jasiukajtis 816*25c28e83SPiotr Jasiukajtis .align 16 817*25c28e83SPiotr Jasiukajtis.spec1: 818*25c28e83SPiotr Jasiukajtis st %g0,[%l7] ! *pz = 0; 819*25c28e83SPiotr Jasiukajtis add %l7,stridez,%l7 ! pz += stridez 820*25c28e83SPiotr Jasiukajtis 821*25c28e83SPiotr Jasiukajtis add %i2,stridey,%i2 ! py += stridey 822*25c28e83SPiotr Jasiukajtis ba .begin1 823*25c28e83SPiotr Jasiukajtis sub counter,1,counter ! counter-- 824*25c28e83SPiotr Jasiukajtis 825*25c28e83SPiotr Jasiukajtis .align 16 826*25c28e83SPiotr Jasiukajtis.spec: 827*25c28e83SPiotr Jasiukajtis sethi %hi(0x7f800000),%i0 828*25c28e83SPiotr Jasiukajtis cmp %l3,%i0 ! hx ? 0x7f800000 829*25c28e83SPiotr Jasiukajtis bge,pt %icc,2f ! if ( hx >= 0x7f800000 ) 830*25c28e83SPiotr Jasiukajtis ld [%i2],%f8 831*25c28e83SPiotr Jasiukajtis 832*25c28e83SPiotr Jasiukajtis cmp %l4,%i0 ! hy ? 0x7f800000 833*25c28e83SPiotr Jasiukajtis bge,pt %icc,2f ! if ( hy >= 0x7f800000 ) 834*25c28e83SPiotr Jasiukajtis nop 835*25c28e83SPiotr Jasiukajtis 836*25c28e83SPiotr Jasiukajtis fsmuld %f17,%f17,%f44 ! x * (double)x 837*25c28e83SPiotr Jasiukajtis fsmuld %f8,%f8,%f24 ! y * (double)y 838*25c28e83SPiotr Jasiukajtis faddd %f44,%f24,%f24 ! x * (double)x + y * (double)y 839*25c28e83SPiotr Jasiukajtis fsqrtd %f24,%f24 ! hyp = sqrt(x * (double)x + y * (double)y); 840*25c28e83SPiotr Jasiukajtis fcmped %f24,DFMAX ! hyp ? DMAX 841*25c28e83SPiotr Jasiukajtis fbug,a 1f ! if ( hyp > DMAX ) 842*25c28e83SPiotr Jasiukajtis fmuls FMAX,FMAX,%f20 ! ftmp0 = FMAX * FMAX; 843*25c28e83SPiotr Jasiukajtis 844*25c28e83SPiotr Jasiukajtis fdtos %f24,%f20 ! ftmp0 = (float)hyp; 845*25c28e83SPiotr Jasiukajtis1: 846*25c28e83SPiotr Jasiukajtis st %f20,[%l7] ! *pz = ftmp0; 847*25c28e83SPiotr Jasiukajtis add %l7,stridez,%l7 ! pz += stridez 848*25c28e83SPiotr Jasiukajtis add %i1,stridex,%i1 ! px += stridex 849*25c28e83SPiotr Jasiukajtis 850*25c28e83SPiotr Jasiukajtis add %i2,stridey,%i2 ! py += stridey 851*25c28e83SPiotr Jasiukajtis ba .begin1 852*25c28e83SPiotr Jasiukajtis sub counter,1,counter ! counter-- 853*25c28e83SPiotr Jasiukajtis2: 854*25c28e83SPiotr Jasiukajtis fcmps %f17,%f8 ! exceptions 855*25c28e83SPiotr Jasiukajtis cmp %l3,%i0 ! hx ? 0x7f800000 856*25c28e83SPiotr Jasiukajtis be,a %icc,1f ! if ( hx == 0x7f800000 ) 857*25c28e83SPiotr Jasiukajtis st %i0,[%l7] ! *(int*)pz = 0x7f800000; 858*25c28e83SPiotr Jasiukajtis 859*25c28e83SPiotr Jasiukajtis cmp %l4,%i0 ! hy ? 0x7f800000 860*25c28e83SPiotr Jasiukajtis be,a %icc,1f ! if ( hy == 0x7f800000 861*25c28e83SPiotr Jasiukajtis st %i0,[%l7] ! *(int*)pz = 0x7f800000; 862*25c28e83SPiotr Jasiukajtis 863*25c28e83SPiotr Jasiukajtis fmuls %f17,%f8,%f8 ! x * y 864*25c28e83SPiotr Jasiukajtis st %f8,[%l7] ! *pz = x * y; 865*25c28e83SPiotr Jasiukajtis 866*25c28e83SPiotr Jasiukajtis1: 867*25c28e83SPiotr Jasiukajtis add %l7,stridez,%l7 ! pz += stridez 868*25c28e83SPiotr Jasiukajtis add %i1,stridex,%i1 ! px += stridex 869*25c28e83SPiotr Jasiukajtis 870*25c28e83SPiotr Jasiukajtis add %i2,stridey,%i2 ! py += stridey 871*25c28e83SPiotr Jasiukajtis ba .begin1 872*25c28e83SPiotr Jasiukajtis sub counter,1,counter ! counter-- 873*25c28e83SPiotr Jasiukajtis 874*25c28e83SPiotr Jasiukajtis .align 16 875*25c28e83SPiotr Jasiukajtis.update0: 876*25c28e83SPiotr Jasiukajtis cmp counter,1 877*25c28e83SPiotr Jasiukajtis ble .cont0 878*25c28e83SPiotr Jasiukajtis fzeros %f17 879*25c28e83SPiotr Jasiukajtis 880*25c28e83SPiotr Jasiukajtis stx %i1,[%fp+tmp_px] 881*25c28e83SPiotr Jasiukajtis 882*25c28e83SPiotr Jasiukajtis add %o7,stridey,%i5 883*25c28e83SPiotr Jasiukajtis stx %i5,[%fp+tmp_py] 884*25c28e83SPiotr Jasiukajtis 885*25c28e83SPiotr Jasiukajtis sub counter,1,counter 886*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 887*25c28e83SPiotr Jasiukajtis 888*25c28e83SPiotr Jasiukajtis ba .cont0 889*25c28e83SPiotr Jasiukajtis or %g0,1,counter 890*25c28e83SPiotr Jasiukajtis 891*25c28e83SPiotr Jasiukajtis .align 16 892*25c28e83SPiotr Jasiukajtis.update1: 893*25c28e83SPiotr Jasiukajtis cmp counter,1 894*25c28e83SPiotr Jasiukajtis ble .cont1 895*25c28e83SPiotr Jasiukajtis fzeros %f17 896*25c28e83SPiotr Jasiukajtis 897*25c28e83SPiotr Jasiukajtis stx %i1,[%fp+tmp_px] 898*25c28e83SPiotr Jasiukajtis stx %i5,[%fp+tmp_py] 899*25c28e83SPiotr Jasiukajtis 900*25c28e83SPiotr Jasiukajtis sub counter,1,counter 901*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 902*25c28e83SPiotr Jasiukajtis 903*25c28e83SPiotr Jasiukajtis ba .cont1 904*25c28e83SPiotr Jasiukajtis or %g0,1,counter 905*25c28e83SPiotr Jasiukajtis 906*25c28e83SPiotr Jasiukajtis .align 16 907*25c28e83SPiotr Jasiukajtis.update2: 908*25c28e83SPiotr Jasiukajtis cmp counter,2 909*25c28e83SPiotr Jasiukajtis ble .cont2 910*25c28e83SPiotr Jasiukajtis fzeros %f8 911*25c28e83SPiotr Jasiukajtis 912*25c28e83SPiotr Jasiukajtis stx %i1,[%fp+tmp_px] 913*25c28e83SPiotr Jasiukajtis stx %o4,[%fp+tmp_py] 914*25c28e83SPiotr Jasiukajtis 915*25c28e83SPiotr Jasiukajtis sub counter,2,counter 916*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 917*25c28e83SPiotr Jasiukajtis 918*25c28e83SPiotr Jasiukajtis ba .cont2 919*25c28e83SPiotr Jasiukajtis or %g0,2,counter 920*25c28e83SPiotr Jasiukajtis 921*25c28e83SPiotr Jasiukajtis .align 16 922*25c28e83SPiotr Jasiukajtis.update3: 923*25c28e83SPiotr Jasiukajtis cmp counter,2 924*25c28e83SPiotr Jasiukajtis ble .cont3 925*25c28e83SPiotr Jasiukajtis fzeros %f17 926*25c28e83SPiotr Jasiukajtis 927*25c28e83SPiotr Jasiukajtis stx %i1,[%fp+tmp_px] 928*25c28e83SPiotr Jasiukajtis stx %o4,[%fp+tmp_py] 929*25c28e83SPiotr Jasiukajtis 930*25c28e83SPiotr Jasiukajtis sub counter,2,counter 931*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 932*25c28e83SPiotr Jasiukajtis 933*25c28e83SPiotr Jasiukajtis ba .cont3 934*25c28e83SPiotr Jasiukajtis or %g0,2,counter 935*25c28e83SPiotr Jasiukajtis 936*25c28e83SPiotr Jasiukajtis .align 16 937*25c28e83SPiotr Jasiukajtis.update4: 938*25c28e83SPiotr Jasiukajtis cmp counter,3 939*25c28e83SPiotr Jasiukajtis ble .cont4 940*25c28e83SPiotr Jasiukajtis fzeros %f17 941*25c28e83SPiotr Jasiukajtis 942*25c28e83SPiotr Jasiukajtis stx %i1,[%fp+tmp_px] 943*25c28e83SPiotr Jasiukajtis stx %i5,[%fp+tmp_py] 944*25c28e83SPiotr Jasiukajtis 945*25c28e83SPiotr Jasiukajtis sub counter,3,counter 946*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 947*25c28e83SPiotr Jasiukajtis 948*25c28e83SPiotr Jasiukajtis ba .cont4 949*25c28e83SPiotr Jasiukajtis or %g0,3,counter 950*25c28e83SPiotr Jasiukajtis 951*25c28e83SPiotr Jasiukajtis .align 16 952*25c28e83SPiotr Jasiukajtis.update5: 953*25c28e83SPiotr Jasiukajtis cmp counter,3 954*25c28e83SPiotr Jasiukajtis ble .cont5 955*25c28e83SPiotr Jasiukajtis fzeros %f17 956*25c28e83SPiotr Jasiukajtis 957*25c28e83SPiotr Jasiukajtis sub %i1,stridex,%i2 958*25c28e83SPiotr Jasiukajtis stx %i2,[%fp+tmp_px] 959*25c28e83SPiotr Jasiukajtis stx %i5,[%fp+tmp_py] 960*25c28e83SPiotr Jasiukajtis 961*25c28e83SPiotr Jasiukajtis sub counter,3,counter 962*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 963*25c28e83SPiotr Jasiukajtis 964*25c28e83SPiotr Jasiukajtis ba .cont5 965*25c28e83SPiotr Jasiukajtis or %g0,3,counter 966*25c28e83SPiotr Jasiukajtis 967*25c28e83SPiotr Jasiukajtis .align 16 968*25c28e83SPiotr Jasiukajtis.update6: 969*25c28e83SPiotr Jasiukajtis cmp counter,4 970*25c28e83SPiotr Jasiukajtis ble .cont6 971*25c28e83SPiotr Jasiukajtis fzeros %f17 972*25c28e83SPiotr Jasiukajtis 973*25c28e83SPiotr Jasiukajtis stx %i1,[%fp+tmp_px] 974*25c28e83SPiotr Jasiukajtis stx %i2,[%fp+tmp_py] 975*25c28e83SPiotr Jasiukajtis 976*25c28e83SPiotr Jasiukajtis sub counter,4,counter 977*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 978*25c28e83SPiotr Jasiukajtis 979*25c28e83SPiotr Jasiukajtis ba .cont6 980*25c28e83SPiotr Jasiukajtis or %g0,4,counter 981*25c28e83SPiotr Jasiukajtis 982*25c28e83SPiotr Jasiukajtis .align 16 983*25c28e83SPiotr Jasiukajtis.update7: 984*25c28e83SPiotr Jasiukajtis cmp counter,4 985*25c28e83SPiotr Jasiukajtis ble .cont7 986*25c28e83SPiotr Jasiukajtis fzeros %f17 987*25c28e83SPiotr Jasiukajtis 988*25c28e83SPiotr Jasiukajtis sub %i1,stridex,%o7 989*25c28e83SPiotr Jasiukajtis stx %o7,[%fp+tmp_px] 990*25c28e83SPiotr Jasiukajtis stx %i2,[%fp+tmp_py] 991*25c28e83SPiotr Jasiukajtis 992*25c28e83SPiotr Jasiukajtis sub counter,4,counter 993*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 994*25c28e83SPiotr Jasiukajtis 995*25c28e83SPiotr Jasiukajtis ba .cont7 996*25c28e83SPiotr Jasiukajtis or %g0,4,counter 997*25c28e83SPiotr Jasiukajtis 998*25c28e83SPiotr Jasiukajtis .align 16 999*25c28e83SPiotr Jasiukajtis.update8: 1000*25c28e83SPiotr Jasiukajtis cmp counter,5 1001*25c28e83SPiotr Jasiukajtis ble .cont8 1002*25c28e83SPiotr Jasiukajtis fzeros %f17 1003*25c28e83SPiotr Jasiukajtis 1004*25c28e83SPiotr Jasiukajtis sub %i1,stridex,%o5 1005*25c28e83SPiotr Jasiukajtis stx %o5,[%fp+tmp_px] 1006*25c28e83SPiotr Jasiukajtis stx %o7,[%fp+tmp_py] 1007*25c28e83SPiotr Jasiukajtis 1008*25c28e83SPiotr Jasiukajtis sub counter,5,counter 1009*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1010*25c28e83SPiotr Jasiukajtis 1011*25c28e83SPiotr Jasiukajtis ba .cont8 1012*25c28e83SPiotr Jasiukajtis or %g0,5,counter 1013*25c28e83SPiotr Jasiukajtis 1014*25c28e83SPiotr Jasiukajtis .align 16 1015*25c28e83SPiotr Jasiukajtis.update9: 1016*25c28e83SPiotr Jasiukajtis cmp counter,5 1017*25c28e83SPiotr Jasiukajtis ble .cont9 1018*25c28e83SPiotr Jasiukajtis fzeros %f17 1019*25c28e83SPiotr Jasiukajtis 1020*25c28e83SPiotr Jasiukajtis sub %i1,stridex,%o5 1021*25c28e83SPiotr Jasiukajtis stx %o5,[%fp+tmp_px] 1022*25c28e83SPiotr Jasiukajtis stx %o7,[%fp+tmp_py] 1023*25c28e83SPiotr Jasiukajtis 1024*25c28e83SPiotr Jasiukajtis sub counter,5,counter 1025*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1026*25c28e83SPiotr Jasiukajtis 1027*25c28e83SPiotr Jasiukajtis ba .cont9 1028*25c28e83SPiotr Jasiukajtis or %g0,5,counter 1029*25c28e83SPiotr Jasiukajtis 1030*25c28e83SPiotr Jasiukajtis .align 16 1031*25c28e83SPiotr Jasiukajtis.update10: 1032*25c28e83SPiotr Jasiukajtis fmul8x16 SCALE,%f56,%f36 ! (3_1) db0 = vis_fmul8x16(SCALE, db0); 1033*25c28e83SPiotr Jasiukajtis and %l4,_0x7fffffff,%l4 ! (4_0) hy0 &= 0x7fffffff; 1034*25c28e83SPiotr Jasiukajtis ldd [%g1+8],%f56 ! (3_1) dtmp0 = ((double*)si0)[1]; 1035*25c28e83SPiotr Jasiukajtis faddd %f54,K1,%f54 ! (4_1) res0 += K1; 1036*25c28e83SPiotr Jasiukajtis 1037*25c28e83SPiotr Jasiukajtis cmp counter,6 1038*25c28e83SPiotr Jasiukajtis ble .cont10 1039*25c28e83SPiotr Jasiukajtis fzeros %f17 1040*25c28e83SPiotr Jasiukajtis 1041*25c28e83SPiotr Jasiukajtis stx %i1,[%fp+tmp_px] 1042*25c28e83SPiotr Jasiukajtis add %o7,stridey,%i5 1043*25c28e83SPiotr Jasiukajtis stx %i5,[%fp+tmp_py] 1044*25c28e83SPiotr Jasiukajtis 1045*25c28e83SPiotr Jasiukajtis sub counter,6,counter 1046*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1047*25c28e83SPiotr Jasiukajtis 1048*25c28e83SPiotr Jasiukajtis ba .cont10 1049*25c28e83SPiotr Jasiukajtis or %g0,6,counter 1050*25c28e83SPiotr Jasiukajtis 1051*25c28e83SPiotr Jasiukajtis .align 16 1052*25c28e83SPiotr Jasiukajtis.update11: 1053*25c28e83SPiotr Jasiukajtis cmp counter,1 1054*25c28e83SPiotr Jasiukajtis ble .cont11 1055*25c28e83SPiotr Jasiukajtis fzeros %f17 1056*25c28e83SPiotr Jasiukajtis 1057*25c28e83SPiotr Jasiukajtis stx %i1,[%fp+tmp_px] 1058*25c28e83SPiotr Jasiukajtis stx %i5,[%fp+tmp_py] 1059*25c28e83SPiotr Jasiukajtis 1060*25c28e83SPiotr Jasiukajtis sub counter,1,counter 1061*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1062*25c28e83SPiotr Jasiukajtis 1063*25c28e83SPiotr Jasiukajtis ba .cont11 1064*25c28e83SPiotr Jasiukajtis or %g0,1,counter 1065*25c28e83SPiotr Jasiukajtis 1066*25c28e83SPiotr Jasiukajtis .align 16 1067*25c28e83SPiotr Jasiukajtis.update12: 1068*25c28e83SPiotr Jasiukajtis cmp counter,2 1069*25c28e83SPiotr Jasiukajtis ble .cont12 1070*25c28e83SPiotr Jasiukajtis fzeros %f8 1071*25c28e83SPiotr Jasiukajtis 1072*25c28e83SPiotr Jasiukajtis stx %i0,[%fp+tmp_px] 1073*25c28e83SPiotr Jasiukajtis add %i5,stridey,%o4 1074*25c28e83SPiotr Jasiukajtis stx %o4,[%fp+tmp_py] 1075*25c28e83SPiotr Jasiukajtis 1076*25c28e83SPiotr Jasiukajtis sub counter,2,counter 1077*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1078*25c28e83SPiotr Jasiukajtis 1079*25c28e83SPiotr Jasiukajtis ba .cont12 1080*25c28e83SPiotr Jasiukajtis or %g0,2,counter 1081*25c28e83SPiotr Jasiukajtis 1082*25c28e83SPiotr Jasiukajtis .align 16 1083*25c28e83SPiotr Jasiukajtis.update13: 1084*25c28e83SPiotr Jasiukajtis cmp counter,2 1085*25c28e83SPiotr Jasiukajtis ble .cont13 1086*25c28e83SPiotr Jasiukajtis fzeros %f17 1087*25c28e83SPiotr Jasiukajtis 1088*25c28e83SPiotr Jasiukajtis stx %i0,[%fp+tmp_px] 1089*25c28e83SPiotr Jasiukajtis stx %o4,[%fp+tmp_py] 1090*25c28e83SPiotr Jasiukajtis 1091*25c28e83SPiotr Jasiukajtis sub counter,2,counter 1092*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1093*25c28e83SPiotr Jasiukajtis 1094*25c28e83SPiotr Jasiukajtis ba .cont13 1095*25c28e83SPiotr Jasiukajtis or %g0,2,counter 1096*25c28e83SPiotr Jasiukajtis 1097*25c28e83SPiotr Jasiukajtis .align 16 1098*25c28e83SPiotr Jasiukajtis.update14: 1099*25c28e83SPiotr Jasiukajtis cmp counter,3 1100*25c28e83SPiotr Jasiukajtis ble .cont14 1101*25c28e83SPiotr Jasiukajtis fzeros %f17 1102*25c28e83SPiotr Jasiukajtis 1103*25c28e83SPiotr Jasiukajtis stx %i1,[%fp+tmp_px] 1104*25c28e83SPiotr Jasiukajtis add %o4,stridey,%i5 1105*25c28e83SPiotr Jasiukajtis stx %i5,[%fp+tmp_py] 1106*25c28e83SPiotr Jasiukajtis 1107*25c28e83SPiotr Jasiukajtis sub counter,3,counter 1108*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1109*25c28e83SPiotr Jasiukajtis 1110*25c28e83SPiotr Jasiukajtis ba .cont14 1111*25c28e83SPiotr Jasiukajtis or %g0,3,counter 1112*25c28e83SPiotr Jasiukajtis 1113*25c28e83SPiotr Jasiukajtis .align 16 1114*25c28e83SPiotr Jasiukajtis.update15: 1115*25c28e83SPiotr Jasiukajtis cmp counter,3 1116*25c28e83SPiotr Jasiukajtis ble .cont15 1117*25c28e83SPiotr Jasiukajtis fzeros %f17 1118*25c28e83SPiotr Jasiukajtis 1119*25c28e83SPiotr Jasiukajtis sub %i1,stridex,%i2 1120*25c28e83SPiotr Jasiukajtis stx %i2,[%fp+tmp_px] 1121*25c28e83SPiotr Jasiukajtis stx %i5,[%fp+tmp_py] 1122*25c28e83SPiotr Jasiukajtis 1123*25c28e83SPiotr Jasiukajtis sub counter,3,counter 1124*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1125*25c28e83SPiotr Jasiukajtis 1126*25c28e83SPiotr Jasiukajtis ba .cont15 1127*25c28e83SPiotr Jasiukajtis or %g0,3,counter 1128*25c28e83SPiotr Jasiukajtis 1129*25c28e83SPiotr Jasiukajtis .align 16 1130*25c28e83SPiotr Jasiukajtis.update16: 1131*25c28e83SPiotr Jasiukajtis faddd %f40,%f32,%f18 ! (1_0) db0 = dx0 + dy0; 1132*25c28e83SPiotr Jasiukajtis cmp counter,4 1133*25c28e83SPiotr Jasiukajtis ble .cont16 1134*25c28e83SPiotr Jasiukajtis fzeros %f17 1135*25c28e83SPiotr Jasiukajtis 1136*25c28e83SPiotr Jasiukajtis stx %i1,[%fp+tmp_px] 1137*25c28e83SPiotr Jasiukajtis stx %i2,[%fp+tmp_py] 1138*25c28e83SPiotr Jasiukajtis 1139*25c28e83SPiotr Jasiukajtis sub counter,4,counter 1140*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1141*25c28e83SPiotr Jasiukajtis 1142*25c28e83SPiotr Jasiukajtis ba .cont16 1143*25c28e83SPiotr Jasiukajtis or %g0,4,counter 1144*25c28e83SPiotr Jasiukajtis 1145*25c28e83SPiotr Jasiukajtis .align 16 1146*25c28e83SPiotr Jasiukajtis.update17: 1147*25c28e83SPiotr Jasiukajtis cmp counter,4 1148*25c28e83SPiotr Jasiukajtis ble .cont17 1149*25c28e83SPiotr Jasiukajtis fzeros %f17 1150*25c28e83SPiotr Jasiukajtis 1151*25c28e83SPiotr Jasiukajtis stx %i1,[%fp+tmp_px] 1152*25c28e83SPiotr Jasiukajtis stx %i2,[%fp+tmp_py] 1153*25c28e83SPiotr Jasiukajtis 1154*25c28e83SPiotr Jasiukajtis sub counter,4,counter 1155*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1156*25c28e83SPiotr Jasiukajtis 1157*25c28e83SPiotr Jasiukajtis ba .cont17 1158*25c28e83SPiotr Jasiukajtis or %g0,4,counter 1159*25c28e83SPiotr Jasiukajtis 1160*25c28e83SPiotr Jasiukajtis .align 16 1161*25c28e83SPiotr Jasiukajtis.update18: 1162*25c28e83SPiotr Jasiukajtis cmp counter,5 1163*25c28e83SPiotr Jasiukajtis ble .cont18 1164*25c28e83SPiotr Jasiukajtis fzeros %f17 1165*25c28e83SPiotr Jasiukajtis 1166*25c28e83SPiotr Jasiukajtis stx %l7,[%fp+tmp_px] 1167*25c28e83SPiotr Jasiukajtis stx %o7,[%fp+tmp_py] 1168*25c28e83SPiotr Jasiukajtis 1169*25c28e83SPiotr Jasiukajtis sub counter,5,counter 1170*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1171*25c28e83SPiotr Jasiukajtis 1172*25c28e83SPiotr Jasiukajtis ba .cont18 1173*25c28e83SPiotr Jasiukajtis or %g0,5,counter 1174*25c28e83SPiotr Jasiukajtis 1175*25c28e83SPiotr Jasiukajtis .align 16 1176*25c28e83SPiotr Jasiukajtis.update19: 1177*25c28e83SPiotr Jasiukajtis fpadd32 %f40,DA1,%f62 ! (2_1) db0 = vis_fpadd32(db0,DA1); 1178*25c28e83SPiotr Jasiukajtis cmp counter,5 1179*25c28e83SPiotr Jasiukajtis ble .cont19 1180*25c28e83SPiotr Jasiukajtis fzeros %f17 1181*25c28e83SPiotr Jasiukajtis 1182*25c28e83SPiotr Jasiukajtis stx %l7,[%fp+tmp_px] 1183*25c28e83SPiotr Jasiukajtis stx %o7,[%fp+tmp_py] 1184*25c28e83SPiotr Jasiukajtis 1185*25c28e83SPiotr Jasiukajtis sub counter,5,counter 1186*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1187*25c28e83SPiotr Jasiukajtis 1188*25c28e83SPiotr Jasiukajtis ba .cont19 1189*25c28e83SPiotr Jasiukajtis or %g0,5,counter 1190*25c28e83SPiotr Jasiukajtis 1191*25c28e83SPiotr Jasiukajtis .align 16 1192*25c28e83SPiotr Jasiukajtis.update19a: 1193*25c28e83SPiotr Jasiukajtis cmp counter,5 1194*25c28e83SPiotr Jasiukajtis ble .cont19a 1195*25c28e83SPiotr Jasiukajtis fzeros %f17 1196*25c28e83SPiotr Jasiukajtis 1197*25c28e83SPiotr Jasiukajtis stx %l7,[%fp+tmp_px] 1198*25c28e83SPiotr Jasiukajtis stx %o7,[%fp+tmp_py] 1199*25c28e83SPiotr Jasiukajtis 1200*25c28e83SPiotr Jasiukajtis sub counter,5,counter 1201*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1202*25c28e83SPiotr Jasiukajtis 1203*25c28e83SPiotr Jasiukajtis ba .cont19a 1204*25c28e83SPiotr Jasiukajtis or %g0,5,counter 1205*25c28e83SPiotr Jasiukajtis 1206*25c28e83SPiotr Jasiukajtis .align 16 1207*25c28e83SPiotr Jasiukajtis.update20: 1208*25c28e83SPiotr Jasiukajtis faddd %f54,K1,%f54 ! (4_1) res0 += K1; 1209*25c28e83SPiotr Jasiukajtis cmp counter,6 1210*25c28e83SPiotr Jasiukajtis ble .cont20 1211*25c28e83SPiotr Jasiukajtis fzeros %f17 1212*25c28e83SPiotr Jasiukajtis 1213*25c28e83SPiotr Jasiukajtis stx %i1,[%fp+tmp_px] 1214*25c28e83SPiotr Jasiukajtis add %o7,stridey,%g1 1215*25c28e83SPiotr Jasiukajtis stx %g1,[%fp+tmp_py] 1216*25c28e83SPiotr Jasiukajtis 1217*25c28e83SPiotr Jasiukajtis sub counter,6,counter 1218*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1219*25c28e83SPiotr Jasiukajtis 1220*25c28e83SPiotr Jasiukajtis ba .cont20 1221*25c28e83SPiotr Jasiukajtis or %g0,6,counter 1222*25c28e83SPiotr Jasiukajtis 1223*25c28e83SPiotr Jasiukajtis.exit: 1224*25c28e83SPiotr Jasiukajtis ret 1225*25c28e83SPiotr Jasiukajtis restore 1226*25c28e83SPiotr Jasiukajtis SET_SIZE(__vhypotf) 1227*25c28e83SPiotr Jasiukajtis 1228