1*25c28e83SPiotr Jasiukajtis/* 2*25c28e83SPiotr Jasiukajtis * CDDL HEADER START 3*25c28e83SPiotr Jasiukajtis * 4*25c28e83SPiotr Jasiukajtis * The contents of this file are subject to the terms of the 5*25c28e83SPiotr Jasiukajtis * Common Development and Distribution License (the "License"). 6*25c28e83SPiotr Jasiukajtis * You may not use this file except in compliance with the License. 7*25c28e83SPiotr Jasiukajtis * 8*25c28e83SPiotr Jasiukajtis * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*25c28e83SPiotr Jasiukajtis * or http://www.opensolaris.org/os/licensing. 10*25c28e83SPiotr Jasiukajtis * See the License for the specific language governing permissions 11*25c28e83SPiotr Jasiukajtis * and limitations under the License. 12*25c28e83SPiotr Jasiukajtis * 13*25c28e83SPiotr Jasiukajtis * When distributing Covered Code, include this CDDL HEADER in each 14*25c28e83SPiotr Jasiukajtis * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*25c28e83SPiotr Jasiukajtis * If applicable, add the following below this CDDL HEADER, with the 16*25c28e83SPiotr Jasiukajtis * fields enclosed by brackets "[]" replaced with your own identifying 17*25c28e83SPiotr Jasiukajtis * information: Portions Copyright [yyyy] [name of copyright owner] 18*25c28e83SPiotr Jasiukajtis * 19*25c28e83SPiotr Jasiukajtis * CDDL HEADER END 20*25c28e83SPiotr Jasiukajtis */ 21*25c28e83SPiotr Jasiukajtis/* 22*25c28e83SPiotr Jasiukajtis * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 23*25c28e83SPiotr Jasiukajtis */ 24*25c28e83SPiotr Jasiukajtis/* 25*25c28e83SPiotr Jasiukajtis * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 26*25c28e83SPiotr Jasiukajtis * Use is subject to license terms. 27*25c28e83SPiotr Jasiukajtis */ 28*25c28e83SPiotr Jasiukajtis 29*25c28e83SPiotr Jasiukajtis .file "__vrsqrt.S" 30*25c28e83SPiotr Jasiukajtis 31*25c28e83SPiotr Jasiukajtis#include "libm.h" 32*25c28e83SPiotr Jasiukajtis 33*25c28e83SPiotr Jasiukajtis RO_DATA 34*25c28e83SPiotr Jasiukajtis .align 64 35*25c28e83SPiotr Jasiukajtis 36*25c28e83SPiotr Jasiukajtis.CONST_TBL: 37*25c28e83SPiotr Jasiukajtis .word 0xbfe00000, 0x0000002f ! K1 =-5.00000000000005209867e-01; 38*25c28e83SPiotr Jasiukajtis .word 0x3fd80000, 0x00000058 ! K2 = 3.75000000000004884257e-01; 39*25c28e83SPiotr Jasiukajtis .word 0xbfd3ffff, 0xff444bc8 ! K3 =-3.12499999317136886551e-01; 40*25c28e83SPiotr Jasiukajtis .word 0x3fd17fff, 0xff5006fe ! K4 = 2.73437499359815081532e-01; 41*25c28e83SPiotr Jasiukajtis .word 0xbfcf80bb, 0xb33ef574 ! K5 =-2.46116125605037803130e-01; 42*25c28e83SPiotr Jasiukajtis .word 0x3fcce0af, 0xf8156949 ! K6 = 2.25606914648617522896e-01; 43*25c28e83SPiotr Jasiukajtis 44*25c28e83SPiotr Jasiukajtis .word 0x001fffff, 0xffffffff ! DC0 45*25c28e83SPiotr Jasiukajtis .word 0x3fe00000, 0x00000000 ! DC1 46*25c28e83SPiotr Jasiukajtis .word 0x00002000, 0x00000000 ! DC2 47*25c28e83SPiotr Jasiukajtis .word 0x7fffc000, 0x00000000 ! DC3 48*25c28e83SPiotr Jasiukajtis .word 0x0007ffff, 0xffffffff ! DC4 49*25c28e83SPiotr Jasiukajtis 50*25c28e83SPiotr Jasiukajtis .word 0x43200000, 0x00000000 ! D2ON51 = pow(2,51) 51*25c28e83SPiotr Jasiukajtis .word 0x3ff00000, 0x00000000 ! DONE = 1.0 52*25c28e83SPiotr Jasiukajtis 53*25c28e83SPiotr Jasiukajtis#define stridex %l5 54*25c28e83SPiotr Jasiukajtis#define stridey %l7 55*25c28e83SPiotr Jasiukajtis#define counter %l0 56*25c28e83SPiotr Jasiukajtis#define TBL %l3 57*25c28e83SPiotr Jasiukajtis#define _0x7ff00000 %o0 58*25c28e83SPiotr Jasiukajtis#define _0x00100000 %o1 59*25c28e83SPiotr Jasiukajtis 60*25c28e83SPiotr Jasiukajtis#define DC0 %f56 61*25c28e83SPiotr Jasiukajtis#define DC1 %f54 62*25c28e83SPiotr Jasiukajtis#define DC2 %f48 63*25c28e83SPiotr Jasiukajtis#define DC3 %f46 64*25c28e83SPiotr Jasiukajtis#define K6 %f42 65*25c28e83SPiotr Jasiukajtis#define K5 %f20 66*25c28e83SPiotr Jasiukajtis#define K4 %f52 67*25c28e83SPiotr Jasiukajtis#define K3 %f50 68*25c28e83SPiotr Jasiukajtis#define K2 %f14 69*25c28e83SPiotr Jasiukajtis#define K1 %f12 70*25c28e83SPiotr Jasiukajtis#define DONE %f4 71*25c28e83SPiotr Jasiukajtis 72*25c28e83SPiotr Jasiukajtis#define tmp_counter %g5 73*25c28e83SPiotr Jasiukajtis#define tmp_px %o5 74*25c28e83SPiotr Jasiukajtis 75*25c28e83SPiotr Jasiukajtis#define tmp0 STACK_BIAS-0x40 76*25c28e83SPiotr Jasiukajtis#define tmp1 STACK_BIAS-0x38 77*25c28e83SPiotr Jasiukajtis#define tmp2 STACK_BIAS-0x30 78*25c28e83SPiotr Jasiukajtis#define tmp3 STACK_BIAS-0x28 79*25c28e83SPiotr Jasiukajtis#define tmp4 STACK_BIAS-0x20 80*25c28e83SPiotr Jasiukajtis#define tmp5 STACK_BIAS-0x18 81*25c28e83SPiotr Jasiukajtis#define tmp6 STACK_BIAS-0x10 82*25c28e83SPiotr Jasiukajtis#define tmp7 STACK_BIAS-0x08 83*25c28e83SPiotr Jasiukajtis 84*25c28e83SPiotr Jasiukajtis! sizeof temp storage - must be a multiple of 16 for V9 85*25c28e83SPiotr Jasiukajtis#define tmps 0x40 86*25c28e83SPiotr Jasiukajtis 87*25c28e83SPiotr Jasiukajtis!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 88*25c28e83SPiotr Jasiukajtis! !!!!! algorithm !!!!! 89*25c28e83SPiotr Jasiukajtis! ((float*)&res)[0] = ((float*)px)[0]; 90*25c28e83SPiotr Jasiukajtis! ((float*)&res)[1] = ((float*)px)[1]; 91*25c28e83SPiotr Jasiukajtis! hx = *(int*)px; 92*25c28e83SPiotr Jasiukajtis! if ( hx >= 0x7ff00000 ) 93*25c28e83SPiotr Jasiukajtis! { 94*25c28e83SPiotr Jasiukajtis! res = DONE / res; 95*25c28e83SPiotr Jasiukajtis! ((float*)py)[0] = ((float*)&res)[0]; 96*25c28e83SPiotr Jasiukajtis! ((float*)py)[1] = ((float*)&res)[1]; 97*25c28e83SPiotr Jasiukajtis! px += stridex; 98*25c28e83SPiotr Jasiukajtis! py += stridey; 99*25c28e83SPiotr Jasiukajtis! continue; 100*25c28e83SPiotr Jasiukajtis! } 101*25c28e83SPiotr Jasiukajtis! if ( hx < 0x00100000 ) 102*25c28e83SPiotr Jasiukajtis! { 103*25c28e83SPiotr Jasiukajtis! ax = hx & 0x7fffffff; 104*25c28e83SPiotr Jasiukajtis! lx = ((int*)px)[1]; 105*25c28e83SPiotr Jasiukajtis! 106*25c28e83SPiotr Jasiukajtis! if ( (ax | lx) == 0 ) 107*25c28e83SPiotr Jasiukajtis! { 108*25c28e83SPiotr Jasiukajtis! res = DONE / res; 109*25c28e83SPiotr Jasiukajtis! ((float*)py)[0] = ((float*)&res)[0]; 110*25c28e83SPiotr Jasiukajtis! ((float*)py)[1] = ((float*)&res)[1]; 111*25c28e83SPiotr Jasiukajtis! px += stridex; 112*25c28e83SPiotr Jasiukajtis! py += stridey; 113*25c28e83SPiotr Jasiukajtis! continue; 114*25c28e83SPiotr Jasiukajtis! } 115*25c28e83SPiotr Jasiukajtis! else if ( hx >= 0 ) 116*25c28e83SPiotr Jasiukajtis! { 117*25c28e83SPiotr Jasiukajtis! if ( hx < 0x00080000 ) 118*25c28e83SPiotr Jasiukajtis! { 119*25c28e83SPiotr Jasiukajtis! res = *(long long*)&res; 120*25c28e83SPiotr Jasiukajtis! hx = *(int*)&res - (537 << 21); 121*25c28e83SPiotr Jasiukajtis! } 122*25c28e83SPiotr Jasiukajtis! else 123*25c28e83SPiotr Jasiukajtis! { 124*25c28e83SPiotr Jasiukajtis! res = vis_fand(res,DC4); 125*25c28e83SPiotr Jasiukajtis! res = *(long long*)&res; 126*25c28e83SPiotr Jasiukajtis! res += D2ON51; 127*25c28e83SPiotr Jasiukajtis! hx = *(int*)&res - (537 << 21); 128*25c28e83SPiotr Jasiukajtis! } 129*25c28e83SPiotr Jasiukajtis! } 130*25c28e83SPiotr Jasiukajtis! else 131*25c28e83SPiotr Jasiukajtis! { 132*25c28e83SPiotr Jasiukajtis! res = sqrt(res); 133*25c28e83SPiotr Jasiukajtis! ((float*)py)[0] = ((float*)&res)[0]; 134*25c28e83SPiotr Jasiukajtis! ((float*)py)[1] = ((float*)&res)[1]; 135*25c28e83SPiotr Jasiukajtis! px += stridex; 136*25c28e83SPiotr Jasiukajtis! py += stridey; 137*25c28e83SPiotr Jasiukajtis! continue; 138*25c28e83SPiotr Jasiukajtis! } 139*25c28e83SPiotr Jasiukajtis! } 140*25c28e83SPiotr Jasiukajtis! 141*25c28e83SPiotr Jasiukajtis! iexp = hx >> 21; 142*25c28e83SPiotr Jasiukajtis! iexp = -iexp; 143*25c28e83SPiotr Jasiukajtis! iexp += 0x5fe; 144*25c28e83SPiotr Jasiukajtis! lexp = iexp << 52; 145*25c28e83SPiotr Jasiukajtis! dlexp = *(double*)&lexp; 146*25c28e83SPiotr Jasiukajtis! hx >>= 10; 147*25c28e83SPiotr Jasiukajtis! hx &= 0x7f8; 148*25c28e83SPiotr Jasiukajtis! hx += 8; 149*25c28e83SPiotr Jasiukajtis! hx &= -16; 150*25c28e83SPiotr Jasiukajtis! 151*25c28e83SPiotr Jasiukajtis! res = vis_fand(res,DC0); 152*25c28e83SPiotr Jasiukajtis! res = vis_for(res,DC1); 153*25c28e83SPiotr Jasiukajtis! res_c = vis_fpadd32(res,DC2); 154*25c28e83SPiotr Jasiukajtis! res_c = vis_fand(res_c,DC3); 155*25c28e83SPiotr Jasiukajtis! 156*25c28e83SPiotr Jasiukajtis! addr = (char*)arr + hx; 157*25c28e83SPiotr Jasiukajtis! dexp_hi = ((double*)addr)[0]; 158*25c28e83SPiotr Jasiukajtis! dexp_lo = ((double*)addr)[1]; 159*25c28e83SPiotr Jasiukajtis! dtmp0 = dexp_hi * dexp_hi; 160*25c28e83SPiotr Jasiukajtis! xx = res - res_c; 161*25c28e83SPiotr Jasiukajtis! xx *= dtmp0; 162*25c28e83SPiotr Jasiukajtis! res = K6 * xx; 163*25c28e83SPiotr Jasiukajtis! res += K5; 164*25c28e83SPiotr Jasiukajtis! res *= xx; 165*25c28e83SPiotr Jasiukajtis! res += K4; 166*25c28e83SPiotr Jasiukajtis! res *= xx; 167*25c28e83SPiotr Jasiukajtis! res += K3; 168*25c28e83SPiotr Jasiukajtis! res *= xx; 169*25c28e83SPiotr Jasiukajtis! res += K2; 170*25c28e83SPiotr Jasiukajtis! res *= xx; 171*25c28e83SPiotr Jasiukajtis! res += K1; 172*25c28e83SPiotr Jasiukajtis! res *= xx; 173*25c28e83SPiotr Jasiukajtis! res = dexp_hi * res; 174*25c28e83SPiotr Jasiukajtis! res += dexp_lo; 175*25c28e83SPiotr Jasiukajtis! res += dexp_hi; 176*25c28e83SPiotr Jasiukajtis! 177*25c28e83SPiotr Jasiukajtis! res *= dlexp; 178*25c28e83SPiotr Jasiukajtis! 179*25c28e83SPiotr Jasiukajtis! ((float*)py)[0] = ((float*)&res)[0]; 180*25c28e83SPiotr Jasiukajtis! ((float*)py)[1] = ((float*)&res)[1]; 181*25c28e83SPiotr Jasiukajtis! 182*25c28e83SPiotr Jasiukajtis!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 183*25c28e83SPiotr Jasiukajtis 184*25c28e83SPiotr Jasiukajtis ENTRY(__vrsqrt) 185*25c28e83SPiotr Jasiukajtis save %sp,-SA(MINFRAME)-tmps,%sp 186*25c28e83SPiotr Jasiukajtis PIC_SETUP(l7) 187*25c28e83SPiotr Jasiukajtis PIC_SET(l7,.CONST_TBL,o3) 188*25c28e83SPiotr Jasiukajtis PIC_SET(l7,__vlibm_TBL_rsqrt,l3) 189*25c28e83SPiotr Jasiukajtis wr %g0,0x82,%asi 190*25c28e83SPiotr Jasiukajtis 191*25c28e83SPiotr Jasiukajtis ldd [%o3],K1 192*25c28e83SPiotr Jasiukajtis sethi %hi(0x7ff00000),%o0 193*25c28e83SPiotr Jasiukajtis mov %i3,%o4 194*25c28e83SPiotr Jasiukajtis 195*25c28e83SPiotr Jasiukajtis ldd [%o3+0x08],K2 196*25c28e83SPiotr Jasiukajtis sethi %hi(0x00100000),%o1 197*25c28e83SPiotr Jasiukajtis mov %i1,tmp_px 198*25c28e83SPiotr Jasiukajtis 199*25c28e83SPiotr Jasiukajtis ldd [%o3+0x10],K3 200*25c28e83SPiotr Jasiukajtis sll %i2,3,stridex 201*25c28e83SPiotr Jasiukajtis mov %i0,tmp_counter 202*25c28e83SPiotr Jasiukajtis 203*25c28e83SPiotr Jasiukajtis ldd [%o3+0x18],K4 204*25c28e83SPiotr Jasiukajtis sll %i4,3,stridey 205*25c28e83SPiotr Jasiukajtis 206*25c28e83SPiotr Jasiukajtis ldd [%o3+0x20],K5 207*25c28e83SPiotr Jasiukajtis ldd [%o3+0x28],K6 208*25c28e83SPiotr Jasiukajtis ldd [%o3+0x30],DC0 209*25c28e83SPiotr Jasiukajtis ldd [%o3+0x38],DC1 210*25c28e83SPiotr Jasiukajtis ldd [%o3+0x40],DC2 211*25c28e83SPiotr Jasiukajtis ldd [%o3+0x48],DC3 212*25c28e83SPiotr Jasiukajtis 213*25c28e83SPiotr Jasiukajtis.begin: 214*25c28e83SPiotr Jasiukajtis mov tmp_counter,counter 215*25c28e83SPiotr Jasiukajtis mov tmp_px,%i1 216*25c28e83SPiotr Jasiukajtis clr tmp_counter 217*25c28e83SPiotr Jasiukajtis.begin1: 218*25c28e83SPiotr Jasiukajtis cmp counter,0 219*25c28e83SPiotr Jasiukajtis ble,pn %icc,.exit 220*25c28e83SPiotr Jasiukajtis ldd [%o3+0x60],DONE 221*25c28e83SPiotr Jasiukajtis 222*25c28e83SPiotr Jasiukajtis lda [%i1]%asi,%f0 ! (6_0) ((float*)res)[0] = ((float*)px)[0]; 223*25c28e83SPiotr Jasiukajtis sethi %hi(0x7ffffc00),%i0 224*25c28e83SPiotr Jasiukajtis 225*25c28e83SPiotr Jasiukajtis lda [%i1+4]%asi,%f1 ! (6_0) ((float*)res)[1] = ((float*)px)[1]; 226*25c28e83SPiotr Jasiukajtis add %i0,1023,%i0 227*25c28e83SPiotr Jasiukajtis 228*25c28e83SPiotr Jasiukajtis fand %f0,DC0,%f16 ! (6_0) res = vis_fand(res,DC0); 229*25c28e83SPiotr Jasiukajtis 230*25c28e83SPiotr Jasiukajtis lda [%i1]%asi,%g1 ! (6_1) hx = *(int*)px; 231*25c28e83SPiotr Jasiukajtis sethi %hi(0x00080000),%i4 232*25c28e83SPiotr Jasiukajtis 233*25c28e83SPiotr Jasiukajtis lda [%i1+4]%asi,%l4 234*25c28e83SPiotr Jasiukajtis add %i1,stridex,%l6 ! px += stridex 235*25c28e83SPiotr Jasiukajtis 236*25c28e83SPiotr Jasiukajtis sra %g1,21,%o7 ! (6_1) iexp = hx >> 21; 237*25c28e83SPiotr Jasiukajtis lda [%l6]%asi,%f8 ! (0_0) ((float*)res)[0] = ((float*)px)[0]; 238*25c28e83SPiotr Jasiukajtis for %f16,DC1,%f44 ! (6_1) res = vis_for(res,DC1); 239*25c28e83SPiotr Jasiukajtis 240*25c28e83SPiotr Jasiukajtis lda [%l6+4]%asi,%f9 ! (0_0) ((float*)res)[1] = ((float*)px)[1]; 241*25c28e83SPiotr Jasiukajtis sra %g1,10,%o2 ! (6_1) hx >>= 10; 242*25c28e83SPiotr Jasiukajtis and %g1,%i0,%i2 243*25c28e83SPiotr Jasiukajtis 244*25c28e83SPiotr Jasiukajtis cmp %g1,_0x7ff00000 ! (6_1) hx ? 0x7ff00000 245*25c28e83SPiotr Jasiukajtis bge,pn %icc,.spec0 ! (6_1) if ( hx >= 0x7ff00000 ) 246*25c28e83SPiotr Jasiukajtis and %o2,2040,%o2 ! (6_1) hx &= 0x7f8; 247*25c28e83SPiotr Jasiukajtis 248*25c28e83SPiotr Jasiukajtis cmp %g1,_0x00100000 ! (6_1) hx ? 0x00100000 249*25c28e83SPiotr Jasiukajtis bl,pn %icc,.spec1 ! (6_1) if ( hx < 0x00100000 ) 250*25c28e83SPiotr Jasiukajtis sub %g0,%o7,%o7 ! (6_1) iexp = -iexp; 251*25c28e83SPiotr Jasiukajtis.cont_spec: 252*25c28e83SPiotr Jasiukajtis fand %f8,DC0,%f16 ! (0_0) res = vis_fand(res,DC0); 253*25c28e83SPiotr Jasiukajtis 254*25c28e83SPiotr Jasiukajtis fpadd32 %f44,DC2,%f18 ! (6_1) res_c = vis_fpadd32(res,DC2); 255*25c28e83SPiotr Jasiukajtis 256*25c28e83SPiotr Jasiukajtis add %o2,8,%l4 ! (6_1) hx += 8; 257*25c28e83SPiotr Jasiukajtis 258*25c28e83SPiotr Jasiukajtis add %o7,1534,%o7 ! (6_1) iexp += 0x5fe; 259*25c28e83SPiotr Jasiukajtis 260*25c28e83SPiotr Jasiukajtis lda [%l6]%asi,%g1 ! (0_0) hx = *(int*)px; 261*25c28e83SPiotr Jasiukajtis sllx %o7,52,%o7 ! (6_1) iexp << 52; 262*25c28e83SPiotr Jasiukajtis and %l4,-16,%l4 ! (6_1) hx = -16; 263*25c28e83SPiotr Jasiukajtis 264*25c28e83SPiotr Jasiukajtis add %l4,TBL,%l4 ! (6_1) addr = (char*)arr + hx; 265*25c28e83SPiotr Jasiukajtis stx %o7,[%fp+tmp1] ! (6_1) dlexp = *(double*)lexp; 266*25c28e83SPiotr Jasiukajtis 267*25c28e83SPiotr Jasiukajtis add %l6,stridex,%l6 ! px += stridex 268*25c28e83SPiotr Jasiukajtis ldd [%l4],%f30 ! (6_1) dtmp0 = ((double*)addr)[0]; 269*25c28e83SPiotr Jasiukajtis 270*25c28e83SPiotr Jasiukajtis sra %g1,21,%o7 ! (0_0) iexp = hx >> 21; 271*25c28e83SPiotr Jasiukajtis lda [%l6]%asi,%f0 ! (1_0) ((float*)res)[0] = ((float*)px)[0]; 272*25c28e83SPiotr Jasiukajtis for %f16,DC1,%f28 ! (0_0) res = vis_for(res,DC1); 273*25c28e83SPiotr Jasiukajtis 274*25c28e83SPiotr Jasiukajtis sra %g1,10,%o2 ! (0_0) hx >>= 10; 275*25c28e83SPiotr Jasiukajtis sub %g0,%o7,%o7 ! (0_0) iexp = -iexp; 276*25c28e83SPiotr Jasiukajtis lda [%l6+4]%asi,%f1 ! (1_0) ((float*)res)[1] = ((float*)px)[1]; 277*25c28e83SPiotr Jasiukajtis 278*25c28e83SPiotr Jasiukajtis cmp %g1,_0x7ff00000 ! (0_0) hx ? 0x7ff00000 279*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update0 ! (0_0) if ( hx >= 0x7ff00000 ) 280*25c28e83SPiotr Jasiukajtis fand %f18,DC3,%f6 ! (6_1) res_c = vis_fand(res_c,DC3); 281*25c28e83SPiotr Jasiukajtis.cont0: 282*25c28e83SPiotr Jasiukajtis and %o2,2040,%o2 ! (0_0) hx &= 0x7f8; 283*25c28e83SPiotr Jasiukajtis fmuld %f30,%f30,%f10 ! (6_1) dtmp0 = dexp_hi * dexp_hi; 284*25c28e83SPiotr Jasiukajtis 285*25c28e83SPiotr Jasiukajtis cmp %g1,_0x00100000 ! (0_0) hx ? 0x00100000 286*25c28e83SPiotr Jasiukajtis bl,pn %icc,.update1 ! (0_0) if ( hx < 0x00100000 ) 287*25c28e83SPiotr Jasiukajtis add %o7,1534,%o7 ! (0_0) iexp += 0x5fe; 288*25c28e83SPiotr Jasiukajtis.cont1: 289*25c28e83SPiotr Jasiukajtis fand %f0,DC0,%f16 ! (1_0) res = vis_fand(res,DC0); 290*25c28e83SPiotr Jasiukajtis 291*25c28e83SPiotr Jasiukajtis fpadd32 %f28,DC2,%f18 ! (0_0) res_c = vis_fpadd32(res,DC2); 292*25c28e83SPiotr Jasiukajtis 293*25c28e83SPiotr Jasiukajtis add %o2,8,%l2 ! (0_0) hx += 8; 294*25c28e83SPiotr Jasiukajtis fsubd %f44,%f6,%f6 ! (6_1) xx = res - res_c; 295*25c28e83SPiotr Jasiukajtis 296*25c28e83SPiotr Jasiukajtis lda [%l6]%asi,%g1 ! (1_0) hx = *(int*)px; 297*25c28e83SPiotr Jasiukajtis sllx %o7,52,%o7 ! (0_0) iexp << 52; 298*25c28e83SPiotr Jasiukajtis and %l2,-16,%l2 ! (0_0) hx = -16; 299*25c28e83SPiotr Jasiukajtis 300*25c28e83SPiotr Jasiukajtis add %l2,TBL,%l2 ! (0_0) addr = (char*)arr + hx; 301*25c28e83SPiotr Jasiukajtis add %l6,stridex,%l6 ! px += stridex 302*25c28e83SPiotr Jasiukajtis stx %o7,[%fp+tmp2] ! (0_0) dlexp = *(double*)lexp; 303*25c28e83SPiotr Jasiukajtis 304*25c28e83SPiotr Jasiukajtis fmuld %f6,%f10,%f26 ! (6_1) xx *= dtmp0; 305*25c28e83SPiotr Jasiukajtis ldd [%l2],%f10 ! (0_0) dtmp0 = ((double*)addr)[0]; 306*25c28e83SPiotr Jasiukajtis 307*25c28e83SPiotr Jasiukajtis sra %g1,21,%o7 ! (1_0) iexp = hx >> 21; 308*25c28e83SPiotr Jasiukajtis lda [%l6]%asi,%f6 ! (2_0) ((float*)res)[0] = ((float*)px)[0]; 309*25c28e83SPiotr Jasiukajtis for %f16,DC1,%f44 ! (1_0) res = vis_for(res,DC1); 310*25c28e83SPiotr Jasiukajtis 311*25c28e83SPiotr Jasiukajtis sra %g1,10,%o2 ! (1_0) hx >>= 10; 312*25c28e83SPiotr Jasiukajtis cmp %g1,_0x7ff00000 ! (1_0) hx ? 0x7ff00000 313*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update2 ! (1_0) if ( hx >= 0x7ff00000 ) 314*25c28e83SPiotr Jasiukajtis lda [%l6+4]%asi,%f7 ! (2_0) ((float*)res)[1] = ((float*)px)[1]; 315*25c28e83SPiotr Jasiukajtis.cont2: 316*25c28e83SPiotr Jasiukajtis fand %f18,DC3,%f8 ! (0_0) res_c = vis_fand(res_c,DC3); 317*25c28e83SPiotr Jasiukajtis 318*25c28e83SPiotr Jasiukajtis fmuld %f10,%f10,%f10 ! (0_0) dtmp0 = dexp_hi * dexp_hi; 319*25c28e83SPiotr Jasiukajtis cmp %g1,_0x00100000 ! (1_0) hx ? 0x00100000 320*25c28e83SPiotr Jasiukajtis bl,pn %icc,.update3 ! (1_0) if ( hx < 0x00100000 ) 321*25c28e83SPiotr Jasiukajtis and %o2,2040,%o2 ! (1_0) hx &= 0x7f8; 322*25c28e83SPiotr Jasiukajtis.cont3: 323*25c28e83SPiotr Jasiukajtis sub %g0,%o7,%o7 ! (1_0) iexp = -iexp; 324*25c28e83SPiotr Jasiukajtis fand %f6,DC0,%f16 ! (2_0) res = vis_fand(res,DC0); 325*25c28e83SPiotr Jasiukajtis 326*25c28e83SPiotr Jasiukajtis add %o7,1534,%o7 ! (1_0) iexp += 0x5fe; 327*25c28e83SPiotr Jasiukajtis fpadd32 %f44,DC2,%f18 ! (1_0) res_c = vis_fpadd32(res,DC2); 328*25c28e83SPiotr Jasiukajtis 329*25c28e83SPiotr Jasiukajtis fmuld K6,%f26,%f62 ! (6_1) res = K6 * xx; 330*25c28e83SPiotr Jasiukajtis add %o2,8,%i2 ! (1_0) hx += 8; 331*25c28e83SPiotr Jasiukajtis fsubd %f28,%f8,%f32 ! (0_0) xx = res - res_c; 332*25c28e83SPiotr Jasiukajtis 333*25c28e83SPiotr Jasiukajtis lda [%l6]%asi,%g1 ! (2_0) hx = *(int*)px; 334*25c28e83SPiotr Jasiukajtis sllx %o7,52,%o7 ! (1_0) iexp << 52; 335*25c28e83SPiotr Jasiukajtis and %i2,-16,%i2 ! (1_0) hx = -16; 336*25c28e83SPiotr Jasiukajtis 337*25c28e83SPiotr Jasiukajtis add %i2,TBL,%i2 ! (1_0) addr = (char*)arr + hx; 338*25c28e83SPiotr Jasiukajtis stx %o7,[%fp+tmp3] ! (1_0) dlexp = *(double*)lexp; 339*25c28e83SPiotr Jasiukajtis 340*25c28e83SPiotr Jasiukajtis fmuld %f32,%f10,%f32 ! (0_0) xx *= dtmp0; 341*25c28e83SPiotr Jasiukajtis add %l6,stridex,%l6 ! px += stridex 342*25c28e83SPiotr Jasiukajtis ldd [%i2],%f10 ! (1_0) dtmp0 = ((double*)addr)[0]; 343*25c28e83SPiotr Jasiukajtis faddd %f62,K5,%f62 ! (6_1) res += K5; 344*25c28e83SPiotr Jasiukajtis 345*25c28e83SPiotr Jasiukajtis sra %g1,21,%o7 ! (2_0) iexp = hx >> 21; 346*25c28e83SPiotr Jasiukajtis lda [%l6]%asi,%f0 ! (3_0) ((float*)res)[0] = ((float*)px)[0]; 347*25c28e83SPiotr Jasiukajtis for %f16,DC1,%f28 ! (2_0) res = vis_for(res,DC1); 348*25c28e83SPiotr Jasiukajtis 349*25c28e83SPiotr Jasiukajtis sra %g1,10,%o2 ! (2_0) hx >>= 10; 350*25c28e83SPiotr Jasiukajtis cmp %g1,_0x7ff00000 ! (2_0) hx ? 0x7ff00000 351*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update4 ! (2_0) if ( hx >= 0x7ff00000 ) 352*25c28e83SPiotr Jasiukajtis lda [%l6+4]%asi,%f1 ! (3_0) ((float*)res)[1] = ((float*)px)[1]; 353*25c28e83SPiotr Jasiukajtis.cont4: 354*25c28e83SPiotr Jasiukajtis fmuld %f62,%f26,%f40 ! (6_1) res *= xx; 355*25c28e83SPiotr Jasiukajtis fand %f18,DC3,%f8 ! (1_0) res_c = vis_fand(res_c,DC3); 356*25c28e83SPiotr Jasiukajtis 357*25c28e83SPiotr Jasiukajtis fmuld %f10,%f10,%f10 ! (1_0) dtmp0 = dexp_hi * dexp_hi; 358*25c28e83SPiotr Jasiukajtis cmp %g1,_0x00100000 ! (2_0) hx ? 0x00100000 359*25c28e83SPiotr Jasiukajtis bl,pn %icc,.update5 ! (2_0) if ( hx < 0x00100000 ) 360*25c28e83SPiotr Jasiukajtis and %o2,2040,%o2 ! (2_0) hx &= 0x7f8; 361*25c28e83SPiotr Jasiukajtis.cont5: 362*25c28e83SPiotr Jasiukajtis sub %g0,%o7,%o7 ! (2_0) iexp = -iexp; 363*25c28e83SPiotr Jasiukajtis fand %f0,DC0,%f16 ! (3_0) res = vis_fand(res,DC0); 364*25c28e83SPiotr Jasiukajtis 365*25c28e83SPiotr Jasiukajtis add %o7,1534,%o7 ! (2_0) iexp += 0x5fe; 366*25c28e83SPiotr Jasiukajtis fpadd32 %f28,DC2,%f18 ! (2_0) res_c = vis_fpadd32(res,DC2); 367*25c28e83SPiotr Jasiukajtis 368*25c28e83SPiotr Jasiukajtis fmuld K6,%f32,%f62 ! (0_0) res = K6 * xx; 369*25c28e83SPiotr Jasiukajtis add %o2,8,%i4 ! (2_0) hx += 8; 370*25c28e83SPiotr Jasiukajtis fsubd %f44,%f8,%f6 ! (1_0) xx = res - res_c; 371*25c28e83SPiotr Jasiukajtis 372*25c28e83SPiotr Jasiukajtis faddd %f40,K4,%f40 ! (6_1) res += K4; 373*25c28e83SPiotr Jasiukajtis 374*25c28e83SPiotr Jasiukajtis lda [%l6]%asi,%g1 ! (3_0) hx = *(int*)px; 375*25c28e83SPiotr Jasiukajtis sllx %o7,52,%o7 ! (2_0) iexp << 52; 376*25c28e83SPiotr Jasiukajtis and %i4,-16,%i4 ! (2_0) hx = -16; 377*25c28e83SPiotr Jasiukajtis 378*25c28e83SPiotr Jasiukajtis add %i4,TBL,%i4 ! (2_0) addr = (char*)arr + hx; 379*25c28e83SPiotr Jasiukajtis stx %o7,[%fp+tmp4] ! (2_0) dlexp = *(double*)lexp; 380*25c28e83SPiotr Jasiukajtis 381*25c28e83SPiotr Jasiukajtis fmuld %f6,%f10,%f38 ! (1_0) xx *= dtmp0; 382*25c28e83SPiotr Jasiukajtis ldd [%i4],%f24 ! (2_0) dtmp0 = ((double*)addr)[0]; 383*25c28e83SPiotr Jasiukajtis faddd %f62,K5,%f62 ! (0_0) res += K5; 384*25c28e83SPiotr Jasiukajtis 385*25c28e83SPiotr Jasiukajtis fmuld %f40,%f26,%f34 ! (6_1) res *= xx; 386*25c28e83SPiotr Jasiukajtis add %l6,stridex,%l6 ! px += stridex 387*25c28e83SPiotr Jasiukajtis 388*25c28e83SPiotr Jasiukajtis sra %g1,21,%o7 ! (3_0) iexp = hx >> 21; 389*25c28e83SPiotr Jasiukajtis lda [%l6]%asi,%f8 ! (4_0) ((float*)res)[0] = ((float*)px)[0]; 390*25c28e83SPiotr Jasiukajtis for %f16,DC1,%f44 ! (3_0) res = vis_for(res,DC1); 391*25c28e83SPiotr Jasiukajtis 392*25c28e83SPiotr Jasiukajtis sra %g1,10,%o2 ! (3_0) hx >>= 10; 393*25c28e83SPiotr Jasiukajtis cmp %g1,_0x7ff00000 ! (3_0) hx ? 0x7ff00000 394*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update6 ! (3_0) if ( hx >= 0x7ff00000 ) 395*25c28e83SPiotr Jasiukajtis lda [%l6+4]%asi,%f9 ! (4_0) ((float*)res)[1] = ((float*)px)[1]; 396*25c28e83SPiotr Jasiukajtis.cont6: 397*25c28e83SPiotr Jasiukajtis fmuld %f62,%f32,%f60 ! (0_0) res *= xx; 398*25c28e83SPiotr Jasiukajtis cmp %g1,_0x00100000 ! (3_0) hx ? 0x00100000 399*25c28e83SPiotr Jasiukajtis fand %f18,DC3,%f22 ! (2_0) res_c = vis_fand(res_c,DC3); 400*25c28e83SPiotr Jasiukajtis 401*25c28e83SPiotr Jasiukajtis fmuld %f24,%f24,%f24 ! (2_0) dtmp0 = dexp_hi * dexp_hi; 402*25c28e83SPiotr Jasiukajtis bl,pn %icc,.update7 ! (3_0) if ( hx < 0x00100000 ) 403*25c28e83SPiotr Jasiukajtis and %o2,2040,%o2 ! (3_0) hx &= 0x7f8; 404*25c28e83SPiotr Jasiukajtis faddd %f34,K3,%f6 ! (6_1) res += K3; 405*25c28e83SPiotr Jasiukajtis.cont7: 406*25c28e83SPiotr Jasiukajtis sub %g0,%o7,%o7 ! (3_0) iexp = -iexp; 407*25c28e83SPiotr Jasiukajtis fand %f8,DC0,%f16 ! (4_0) res = vis_fand(res,DC0); 408*25c28e83SPiotr Jasiukajtis 409*25c28e83SPiotr Jasiukajtis add %o7,1534,%o7 ! (3_0) iexp += 0x5fe; 410*25c28e83SPiotr Jasiukajtis fpadd32 %f44,DC2,%f18 ! (3_0) res_c = vis_fpadd32(res,DC2); 411*25c28e83SPiotr Jasiukajtis 412*25c28e83SPiotr Jasiukajtis fmuld K6,%f38,%f62 ! (1_0) res = K6 * xx; 413*25c28e83SPiotr Jasiukajtis add %o2,8,%i5 ! (3_0) hx += 8; 414*25c28e83SPiotr Jasiukajtis fsubd %f28,%f22,%f28 ! (2_0) xx = res - res_c; 415*25c28e83SPiotr Jasiukajtis 416*25c28e83SPiotr Jasiukajtis fmuld %f6,%f26,%f22 ! (6_1) res *= xx; 417*25c28e83SPiotr Jasiukajtis faddd %f60,K4,%f60 ! (0_0) res += K4; 418*25c28e83SPiotr Jasiukajtis 419*25c28e83SPiotr Jasiukajtis lda [%l6]%asi,%g1 ! (4_0) hx = *(int*)px; 420*25c28e83SPiotr Jasiukajtis sllx %o7,52,%o7 ! (3_0) iexp << 52; 421*25c28e83SPiotr Jasiukajtis and %i5,-16,%i5 ! (3_0) hx = -16; 422*25c28e83SPiotr Jasiukajtis 423*25c28e83SPiotr Jasiukajtis add %i5,TBL,%i5 ! (3_0) addr = (char*)arr + hx; 424*25c28e83SPiotr Jasiukajtis stx %o7,[%fp+tmp5] ! (3_0) dlexp = *(double*)lexp; 425*25c28e83SPiotr Jasiukajtis 426*25c28e83SPiotr Jasiukajtis fmuld %f28,%f24,%f36 ! (2_0) xx *= dtmp0; 427*25c28e83SPiotr Jasiukajtis add %l6,stridex,%i0 ! px += stridex 428*25c28e83SPiotr Jasiukajtis ldd [%i5],%f28 ! (3_0) dtmp0 = ((double*)addr)[0]; 429*25c28e83SPiotr Jasiukajtis faddd %f62,K5,%f62 ! (1_0) res += K5; 430*25c28e83SPiotr Jasiukajtis 431*25c28e83SPiotr Jasiukajtis faddd %f22,K2,%f10 ! (6_1) res += K2; 432*25c28e83SPiotr Jasiukajtis fmuld %f60,%f32,%f34 ! (0_0) res *= xx; 433*25c28e83SPiotr Jasiukajtis 434*25c28e83SPiotr Jasiukajtis sra %g1,21,%o7 ! (4_0) iexp = hx >> 21; 435*25c28e83SPiotr Jasiukajtis lda [%i0]%asi,%f0 ! (5_0) ((float*)res)[0] = ((float*)px)[0]; 436*25c28e83SPiotr Jasiukajtis for %f16,DC1,%f24 ! (4_0) res = vis_for(res,DC1); 437*25c28e83SPiotr Jasiukajtis 438*25c28e83SPiotr Jasiukajtis sra %g1,10,%o2 ! (4_0) hx >>= 10; 439*25c28e83SPiotr Jasiukajtis cmp %g1,_0x7ff00000 ! (4_0) hx ? 0x7ff00000 440*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update8 ! (4_0) if ( hx >= 0x7ff00000 ) 441*25c28e83SPiotr Jasiukajtis lda [%i0+4]%asi,%f1 ! (5_0) ((float*)res)[1] = ((float*)px)[1]; 442*25c28e83SPiotr Jasiukajtis.cont8: 443*25c28e83SPiotr Jasiukajtis fand %f18,DC3,%f40 ! (3_0) res_c = vis_fand(res_c,DC3); 444*25c28e83SPiotr Jasiukajtis fmuld %f62,%f38,%f62 ! (1_0) res *= xx; 445*25c28e83SPiotr Jasiukajtis 446*25c28e83SPiotr Jasiukajtis fmuld %f10,%f26,%f58 ! (6_1) res *= xx; 447*25c28e83SPiotr Jasiukajtis cmp %g1,_0x00100000 ! (4_0) hx ? 0x00100000 448*25c28e83SPiotr Jasiukajtis and %o2,2040,%o2 ! (4_0) hx &= 0x7f8; 449*25c28e83SPiotr Jasiukajtis faddd %f34,K3,%f60 ! (0_0) res += K3; 450*25c28e83SPiotr Jasiukajtis 451*25c28e83SPiotr Jasiukajtis fmuld %f28,%f28,%f28 ! (3_0) dtmp0 = dexp_hi * dexp_hi; 452*25c28e83SPiotr Jasiukajtis bl,pn %icc,.update9 ! (4_0) if ( hx < 0x00100000 ) 453*25c28e83SPiotr Jasiukajtis sub %g0,%o7,%o7 ! (4_0) iexp = -iexp; 454*25c28e83SPiotr Jasiukajtis fand %f0,DC0,%f16 ! (5_0) res = vis_fand(res,DC0); 455*25c28e83SPiotr Jasiukajtis.cont9: 456*25c28e83SPiotr Jasiukajtis add %o7,1534,%o7 ! (4_0) iexp += 0x5fe; 457*25c28e83SPiotr Jasiukajtis fpadd32 %f24,DC2,%f18 ! (4_0) res_c = vis_fpadd32(res,DC2); 458*25c28e83SPiotr Jasiukajtis 459*25c28e83SPiotr Jasiukajtis fmuld K6,%f36,%f10 ! (2_0) res = K6 * xx; 460*25c28e83SPiotr Jasiukajtis add %o2,8,%l1 ! (4_0) hx += 8; 461*25c28e83SPiotr Jasiukajtis fsubd %f44,%f40,%f44 ! (3_0) xx = res - res_c; 462*25c28e83SPiotr Jasiukajtis 463*25c28e83SPiotr Jasiukajtis fmuld %f60,%f32,%f60 ! (0_0) res *= xx; 464*25c28e83SPiotr Jasiukajtis faddd %f62,K4,%f6 ! (1_0) res += K4; 465*25c28e83SPiotr Jasiukajtis 466*25c28e83SPiotr Jasiukajtis lda [%i0]%asi,%g1 ! (5_0) hx = *(int*)px; 467*25c28e83SPiotr Jasiukajtis sllx %o7,52,%o7 ! (4_0) iexp << 52; 468*25c28e83SPiotr Jasiukajtis and %l1,-16,%l1 ! (4_0) hx = -16; 469*25c28e83SPiotr Jasiukajtis faddd %f58,K1,%f58 ! (6_1) res += K1; 470*25c28e83SPiotr Jasiukajtis 471*25c28e83SPiotr Jasiukajtis add %i0,stridex,%i1 ! px += stridex 472*25c28e83SPiotr Jasiukajtis add %l1,TBL,%l1 ! (4_0) addr = (char*)arr + hx; 473*25c28e83SPiotr Jasiukajtis stx %o7,[%fp+tmp6] ! (4_0) dlexp = *(double*)lexp; 474*25c28e83SPiotr Jasiukajtis 475*25c28e83SPiotr Jasiukajtis fmuld %f44,%f28,%f40 ! (3_0) xx *= dtmp0; 476*25c28e83SPiotr Jasiukajtis ldd [%l1],%f44 ! (4_0) dtmp0 = ((double*)addr)[0]; 477*25c28e83SPiotr Jasiukajtis faddd %f10,K5,%f62 ! (2_0) res += K5; 478*25c28e83SPiotr Jasiukajtis 479*25c28e83SPiotr Jasiukajtis fmuld %f6,%f38,%f34 ! (1_0) res *= xx; 480*25c28e83SPiotr Jasiukajtis sra %g1,21,%o7 ! (5_0) iexp = hx >> 21; 481*25c28e83SPiotr Jasiukajtis nop 482*25c28e83SPiotr Jasiukajtis faddd %f60,K2,%f60 ! (0_0) res += K2; 483*25c28e83SPiotr Jasiukajtis 484*25c28e83SPiotr Jasiukajtis for %f16,DC1,%f28 ! (5_0) res = vis_for(res,DC1); 485*25c28e83SPiotr Jasiukajtis sub %g0,%o7,%o7 ! (5_0) iexp = -iexp; 486*25c28e83SPiotr Jasiukajtis lda [%i1]%asi,%f6 ! (6_0) ((float*)res)[0] = ((float*)px)[0]; 487*25c28e83SPiotr Jasiukajtis fmuld %f58,%f26,%f26 ! (6_1) res *= xx; 488*25c28e83SPiotr Jasiukajtis 489*25c28e83SPiotr Jasiukajtis sra %g1,10,%o2 ! (5_0) hx >>= 10; 490*25c28e83SPiotr Jasiukajtis cmp %g1,_0x7ff00000 ! (5_0) hx ? 0x7ff00000 491*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update10 ! (5_0) if ( hx >= 0x7ff00000 ) 492*25c28e83SPiotr Jasiukajtis lda [%i1+4]%asi,%f7 ! (6_0) ((float*)res)[1] = ((float*)px)[1]; 493*25c28e83SPiotr Jasiukajtis.cont10: 494*25c28e83SPiotr Jasiukajtis fand %f18,DC3,%f8 ! (4_0) res_c = vis_fand(res_c,DC3); 495*25c28e83SPiotr Jasiukajtis fmuld %f62,%f36,%f62 ! (2_0) res *= xx; 496*25c28e83SPiotr Jasiukajtis 497*25c28e83SPiotr Jasiukajtis fmuld %f60,%f32,%f58 ! (0_0) res *= xx; 498*25c28e83SPiotr Jasiukajtis cmp %g1,_0x00100000 ! (5_0) hx ? 0x00100000 499*25c28e83SPiotr Jasiukajtis and %o2,2040,%o2 ! (5_0) hx &= 0x7f8; 500*25c28e83SPiotr Jasiukajtis faddd %f34,K3,%f34 ! (1_0) res += K3; 501*25c28e83SPiotr Jasiukajtis 502*25c28e83SPiotr Jasiukajtis fmuld %f30,%f26,%f26 ! (6_1) res = dexp_hi * res; 503*25c28e83SPiotr Jasiukajtis bl,pn %icc,.update11 ! (5_0) if ( hx < 0x00100000 ) 504*25c28e83SPiotr Jasiukajtis nop 505*25c28e83SPiotr Jasiukajtis fand %f6,DC0,%f16 ! (6_0) res = vis_fand(res,DC0); 506*25c28e83SPiotr Jasiukajtis.cont11: 507*25c28e83SPiotr Jasiukajtis ldd [%l4+8],%f60 ! (6_1) dexp_lo = ((double*)addr)[1]; 508*25c28e83SPiotr Jasiukajtis fmuld %f44,%f44,%f44 ! (4_0) dtmp0 = dexp_hi * dexp_hi; 509*25c28e83SPiotr Jasiukajtis fpadd32 %f28,DC2,%f18 ! (5_0) res_c = vis_fpadd32(res,DC2); 510*25c28e83SPiotr Jasiukajtis 511*25c28e83SPiotr Jasiukajtis fmuld K6,%f40,%f22 ! (3_0) res = K6 * xx; 512*25c28e83SPiotr Jasiukajtis add %o2,8,%i3 ! (5_0) hx += 8; 513*25c28e83SPiotr Jasiukajtis fsubd %f24,%f8,%f10 ! (4_0) xx = res - res_c; 514*25c28e83SPiotr Jasiukajtis 515*25c28e83SPiotr Jasiukajtis fmuld %f34,%f38,%f24 ! (1_0) res *= xx; 516*25c28e83SPiotr Jasiukajtis or %g0,%o4,%i0 517*25c28e83SPiotr Jasiukajtis 518*25c28e83SPiotr Jasiukajtis cmp counter,7 519*25c28e83SPiotr Jasiukajtis bl,pn %icc,.tail 520*25c28e83SPiotr Jasiukajtis faddd %f62,K4,%f34 ! (2_0) res += K4; 521*25c28e83SPiotr Jasiukajtis 522*25c28e83SPiotr Jasiukajtis ba .main_loop 523*25c28e83SPiotr Jasiukajtis sub counter,7,counter ! counter 524*25c28e83SPiotr Jasiukajtis 525*25c28e83SPiotr Jasiukajtis .align 16 526*25c28e83SPiotr Jasiukajtis.main_loop: 527*25c28e83SPiotr Jasiukajtis add %o7,1534,%o7 ! (5_0) iexp += 0x5fe; 528*25c28e83SPiotr Jasiukajtis and %i3,-16,%i3 ! (5_1) hx = -16; 529*25c28e83SPiotr Jasiukajtis lda [%i1]%asi,%g1 ! (6_1) hx = *(int*)px; 530*25c28e83SPiotr Jasiukajtis faddd %f58,K1,%f58 ! (0_1) res += K1; 531*25c28e83SPiotr Jasiukajtis 532*25c28e83SPiotr Jasiukajtis add %i3,TBL,%i3 ! (5_1) addr = (char*)arr + hx; 533*25c28e83SPiotr Jasiukajtis sllx %o7,52,%o7 ! (5_1) iexp << 52; 534*25c28e83SPiotr Jasiukajtis stx %o7,[%fp+tmp0] ! (5_1) dlexp = *(double*)lexp; 535*25c28e83SPiotr Jasiukajtis faddd %f26,%f60,%f8 ! (6_2) res += dexp_lo; 536*25c28e83SPiotr Jasiukajtis 537*25c28e83SPiotr Jasiukajtis faddd %f22,K5,%f62 ! (3_1) res += K5; 538*25c28e83SPiotr Jasiukajtis add %i1,stridex,%l6 ! px += stridex 539*25c28e83SPiotr Jasiukajtis ldd [%i3],%f22 ! (5_1) dtmp0 = ((double*)addr)[0]; 540*25c28e83SPiotr Jasiukajtis fmuld %f10,%f44,%f60 ! (4_1) xx *= dtmp0; 541*25c28e83SPiotr Jasiukajtis 542*25c28e83SPiotr Jasiukajtis faddd %f24,K2,%f26 ! (1_1) res += K2; 543*25c28e83SPiotr Jasiukajtis add %i0,stridey,%i1 ! px += stridey 544*25c28e83SPiotr Jasiukajtis ldd [%l2],%f24 ! (0_1) dexp_hi = ((double*)addr)[0]; 545*25c28e83SPiotr Jasiukajtis fmuld %f34,%f36,%f34 ! (2_1) res *= xx; 546*25c28e83SPiotr Jasiukajtis 547*25c28e83SPiotr Jasiukajtis fmuld %f58,%f32,%f58 ! (0_1) res *= xx; 548*25c28e83SPiotr Jasiukajtis sra %g1,21,%o7 ! (6_1) iexp = hx >> 21; 549*25c28e83SPiotr Jasiukajtis lda [%l6]%asi,%f0 ! (0_0) ((float*)res)[0] = ((float*)px)[0]; 550*25c28e83SPiotr Jasiukajtis for %f16,DC1,%f44 ! (6_1) res = vis_for(res,DC1); 551*25c28e83SPiotr Jasiukajtis 552*25c28e83SPiotr Jasiukajtis lda [%l6+4]%asi,%f1 ! (0_0) ((float*)res)[1] = ((float*)px)[1]; 553*25c28e83SPiotr Jasiukajtis sra %g1,10,%o2 ! (6_1) hx >>= 10; 554*25c28e83SPiotr Jasiukajtis fmuld %f22,%f22,%f10 ! (5_1) dtmp0 = dexp_hi * dexp_hi; 555*25c28e83SPiotr Jasiukajtis faddd %f8,%f30,%f30 ! (6_2) res += dexp_hi; 556*25c28e83SPiotr Jasiukajtis 557*25c28e83SPiotr Jasiukajtis fmuld %f62,%f40,%f32 ! (3_1) res *= xx; 558*25c28e83SPiotr Jasiukajtis cmp %g1,_0x7ff00000 ! (6_1) hx ? 0x7ff00000 559*25c28e83SPiotr Jasiukajtis ldd [%fp+tmp1],%f62 ! (6_2) dlexp = *(double*)lexp; 560*25c28e83SPiotr Jasiukajtis fand %f18,DC3,%f8 ! (5_1) res_c = vis_fand(res_c,DC3); 561*25c28e83SPiotr Jasiukajtis 562*25c28e83SPiotr Jasiukajtis fmuld %f26,%f38,%f26 ! (1_1) res *= xx; 563*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update12 ! (6_1) if ( hx >= 0x7ff00000 ) 564*25c28e83SPiotr Jasiukajtis and %o2,2040,%o2 ! (6_1) hx &= 0x7f8; 565*25c28e83SPiotr Jasiukajtis faddd %f34,K3,%f34 ! (2_1) res += K3; 566*25c28e83SPiotr Jasiukajtis.cont12: 567*25c28e83SPiotr Jasiukajtis fmuld %f24,%f58,%f58 ! (0_1) res = dexp_hi * res; 568*25c28e83SPiotr Jasiukajtis cmp %g1,_0x00100000 ! (6_1) hx ? 0x00100000 569*25c28e83SPiotr Jasiukajtis sub %g0,%o7,%o7 ! (6_1) iexp = -iexp; 570*25c28e83SPiotr Jasiukajtis fand %f0,DC0,%f16 ! (0_0) res = vis_fand(res,DC0); 571*25c28e83SPiotr Jasiukajtis 572*25c28e83SPiotr Jasiukajtis fmuld %f30,%f62,%f2 ! (6_2) res *= dlexp; 573*25c28e83SPiotr Jasiukajtis bl,pn %icc,.update13 ! (6_1) if ( hx < 0x00100000 ) 574*25c28e83SPiotr Jasiukajtis ldd [%l2+8],%f30 ! (0_1) dexp_lo = ((double*)addr)[1]; 575*25c28e83SPiotr Jasiukajtis fpadd32 %f44,DC2,%f18 ! (6_1) res_c = vis_fpadd32(res,DC2); 576*25c28e83SPiotr Jasiukajtis.cont13: 577*25c28e83SPiotr Jasiukajtis fmuld K6,%f60,%f62 ! (4_1) res = K6 * xx; 578*25c28e83SPiotr Jasiukajtis add %o2,8,%l4 ! (6_1) hx += 8; 579*25c28e83SPiotr Jasiukajtis st %f2,[%i0] ! (6_2) ((float*)py)[0] = ((float*)res)[0]; 580*25c28e83SPiotr Jasiukajtis fsubd %f28,%f8,%f6 ! (5_1) xx = res - res_c; 581*25c28e83SPiotr Jasiukajtis 582*25c28e83SPiotr Jasiukajtis fmuld %f34,%f36,%f28 ! (2_1) res *= xx; 583*25c28e83SPiotr Jasiukajtis add %o7,1534,%o7 ! (6_1) iexp += 0x5fe; 584*25c28e83SPiotr Jasiukajtis st %f3,[%i0+4] ! (6_2) ((float*)py)[1] = ((float*)res)[1]; 585*25c28e83SPiotr Jasiukajtis faddd %f32,K4,%f32 ! (3_1) res += K4; 586*25c28e83SPiotr Jasiukajtis 587*25c28e83SPiotr Jasiukajtis lda [%l6]%asi,%g1 ! (0_0) hx = *(int*)px; 588*25c28e83SPiotr Jasiukajtis sllx %o7,52,%o7 ! (6_1) iexp << 52; 589*25c28e83SPiotr Jasiukajtis and %l4,-16,%l4 ! (6_1) hx = -16; 590*25c28e83SPiotr Jasiukajtis faddd %f26,K1,%f26 ! (1_1) res += K1; 591*25c28e83SPiotr Jasiukajtis 592*25c28e83SPiotr Jasiukajtis add %i1,stridey,%i0 ! px += stridey 593*25c28e83SPiotr Jasiukajtis add %l4,TBL,%l4 ! (6_1) addr = (char*)arr + hx; 594*25c28e83SPiotr Jasiukajtis stx %o7,[%fp+tmp1] ! (6_1) dlexp = *(double*)lexp; 595*25c28e83SPiotr Jasiukajtis faddd %f58,%f30,%f8 ! (0_1) res += dexp_lo; 596*25c28e83SPiotr Jasiukajtis 597*25c28e83SPiotr Jasiukajtis fmuld %f6,%f10,%f58 ! (5_1) xx *= dtmp0; 598*25c28e83SPiotr Jasiukajtis add %l6,stridex,%l6 ! px += stridex 599*25c28e83SPiotr Jasiukajtis ldd [%l4],%f30 ! (6_1) dtmp0 = ((double*)addr)[0]; 600*25c28e83SPiotr Jasiukajtis faddd %f62,K5,%f62 ! (4_1) res += K5; 601*25c28e83SPiotr Jasiukajtis 602*25c28e83SPiotr Jasiukajtis fmuld %f32,%f40,%f34 ! (3_1) res *= xx; 603*25c28e83SPiotr Jasiukajtis sra %g1,10,%o2 ! (0_0) hx >>= 10; 604*25c28e83SPiotr Jasiukajtis ldd [%i2],%f4 ! (1_1) dexp_hi = ((double*)addr)[0]; 605*25c28e83SPiotr Jasiukajtis faddd %f28,K2,%f32 ! (2_1) res += K2; 606*25c28e83SPiotr Jasiukajtis 607*25c28e83SPiotr Jasiukajtis fmuld %f26,%f38,%f26 ! (1_1) res *= xx; 608*25c28e83SPiotr Jasiukajtis sra %g1,21,%o7 ! (0_0) iexp = hx >> 21; 609*25c28e83SPiotr Jasiukajtis lda [%l6]%asi,%f6 ! (1_0) ((float*)res)[0] = ((float*)px)[0]; 610*25c28e83SPiotr Jasiukajtis for %f16,DC1,%f28 ! (0_0) res = vis_for(res,DC1); 611*25c28e83SPiotr Jasiukajtis 612*25c28e83SPiotr Jasiukajtis fmuld %f30,%f30,%f30 ! (6_1) dtmp0 = dexp_hi * dexp_hi; 613*25c28e83SPiotr Jasiukajtis sub %g0,%o7,%o7 ! (0_0) iexp = -iexp; 614*25c28e83SPiotr Jasiukajtis lda [%l6+4]%asi,%f7 ! (1_0) ((float*)res)[1] = ((float*)px)[1]; 615*25c28e83SPiotr Jasiukajtis faddd %f8,%f24,%f24 ! (0_1) res += dexp_hi; 616*25c28e83SPiotr Jasiukajtis 617*25c28e83SPiotr Jasiukajtis fmuld %f62,%f60,%f38 ! (4_1) res *= xx; 618*25c28e83SPiotr Jasiukajtis cmp %g1,_0x7ff00000 ! (0_0) hx ? 0x7ff00000 619*25c28e83SPiotr Jasiukajtis ldd [%fp+tmp2],%f62 ! (0_1) dlexp = *(double*)lexp; 620*25c28e83SPiotr Jasiukajtis fand %f18,DC3,%f8 ! (6_1) res_c = vis_fand(res_c,DC3); 621*25c28e83SPiotr Jasiukajtis 622*25c28e83SPiotr Jasiukajtis fmuld %f32,%f36,%f32 ! (2_1) res *= xx; 623*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update14 ! (0_0) if ( hx >= 0x7ff00000 ) 624*25c28e83SPiotr Jasiukajtis and %o2,2040,%o2 ! (0_0) hx &= 0x7f8; 625*25c28e83SPiotr Jasiukajtis faddd %f34,K3,%f34 ! (3_1) res += K3; 626*25c28e83SPiotr Jasiukajtis.cont14: 627*25c28e83SPiotr Jasiukajtis fmuld %f4,%f26,%f26 ! (1_1) res = dexp_hi * res; 628*25c28e83SPiotr Jasiukajtis cmp %g1,_0x00100000 ! (0_0) hx ? 0x00100000 629*25c28e83SPiotr Jasiukajtis add %o7,1534,%o7 ! (0_0) iexp += 0x5fe; 630*25c28e83SPiotr Jasiukajtis fand %f6,DC0,%f16 ! (1_0) res = vis_fand(res,DC0); 631*25c28e83SPiotr Jasiukajtis 632*25c28e83SPiotr Jasiukajtis fmuld %f24,%f62,%f2 ! (0_1) res *= dlexp; 633*25c28e83SPiotr Jasiukajtis bl,pn %icc,.update15 ! (0_0) if ( hx < 0x00100000 ) 634*25c28e83SPiotr Jasiukajtis ldd [%i2+8],%f24 ! (1_1) dexp_lo = ((double*)addr)[1]; 635*25c28e83SPiotr Jasiukajtis fpadd32 %f28,DC2,%f18 ! (0_0) res_c = vis_fpadd32(res,DC2); 636*25c28e83SPiotr Jasiukajtis.cont15: 637*25c28e83SPiotr Jasiukajtis fmuld K6,%f58,%f62 ! (5_1) res = K6 * xx; 638*25c28e83SPiotr Jasiukajtis add %o2,8,%l2 ! (0_0) hx += 8; 639*25c28e83SPiotr Jasiukajtis st %f2,[%i1] ! (0_1) ((float*)py)[0] = ((float*)res)[0]; 640*25c28e83SPiotr Jasiukajtis fsubd %f44,%f8,%f10 ! (6_1) xx = res - res_c; 641*25c28e83SPiotr Jasiukajtis 642*25c28e83SPiotr Jasiukajtis fmuld %f34,%f40,%f44 ! (3_1) res *= xx; 643*25c28e83SPiotr Jasiukajtis nop 644*25c28e83SPiotr Jasiukajtis st %f3,[%i1+4] ! (0_1) ((float*)py)[1] = ((float*)res)[1]; 645*25c28e83SPiotr Jasiukajtis faddd %f38,K4,%f38 ! (4_1) res += K4; 646*25c28e83SPiotr Jasiukajtis 647*25c28e83SPiotr Jasiukajtis lda [%l6]%asi,%g1 ! (1_0) hx = *(int*)px; 648*25c28e83SPiotr Jasiukajtis sllx %o7,52,%o7 ! (0_0) iexp << 52; 649*25c28e83SPiotr Jasiukajtis and %l2,-16,%l2 ! (0_0) hx = -16; 650*25c28e83SPiotr Jasiukajtis faddd %f32,K1,%f32 ! (2_1) res += K1; 651*25c28e83SPiotr Jasiukajtis 652*25c28e83SPiotr Jasiukajtis add %l2,TBL,%l2 ! (0_0) addr = (char*)arr + hx; 653*25c28e83SPiotr Jasiukajtis add %l6,stridex,%l6 ! px += stridex 654*25c28e83SPiotr Jasiukajtis stx %o7,[%fp+tmp2] ! (0_0) dlexp = *(double*)lexp; 655*25c28e83SPiotr Jasiukajtis faddd %f26,%f24,%f8 ! (1_1) res += dexp_lo; 656*25c28e83SPiotr Jasiukajtis 657*25c28e83SPiotr Jasiukajtis fmuld %f10,%f30,%f26 ! (6_1) xx *= dtmp0; 658*25c28e83SPiotr Jasiukajtis add %i0,stridey,%i1 ! px += stridey 659*25c28e83SPiotr Jasiukajtis ldd [%l2],%f30 ! (0_0) dtmp0 = ((double*)addr)[0]; 660*25c28e83SPiotr Jasiukajtis faddd %f62,K5,%f62 ! (5_1) res += K5; 661*25c28e83SPiotr Jasiukajtis 662*25c28e83SPiotr Jasiukajtis fmuld %f38,%f60,%f34 ! (4_1) res *= xx; 663*25c28e83SPiotr Jasiukajtis sra %g1,10,%o2 ! (1_0) hx >>= 10; 664*25c28e83SPiotr Jasiukajtis ldd [%i4],%f24 ! (2_1) dexp_hi = ((double*)addr)[0]; 665*25c28e83SPiotr Jasiukajtis faddd %f44,K2,%f38 ! (3_1) res += K2; 666*25c28e83SPiotr Jasiukajtis 667*25c28e83SPiotr Jasiukajtis fmuld %f32,%f36,%f32 ! (2_1) res *= xx; 668*25c28e83SPiotr Jasiukajtis sra %g1,21,%o7 ! (1_0) iexp = hx >> 21; 669*25c28e83SPiotr Jasiukajtis lda [%l6]%asi,%f0 ! (2_0) ((float*)res)[0] = ((float*)px)[0]; 670*25c28e83SPiotr Jasiukajtis for %f16,DC1,%f44 ! (1_0) res = vis_for(res,DC1); 671*25c28e83SPiotr Jasiukajtis 672*25c28e83SPiotr Jasiukajtis fmuld %f30,%f30,%f30 ! (0_0) dtmp0 = dexp_hi * dexp_hi; 673*25c28e83SPiotr Jasiukajtis cmp %g1,_0x7ff00000 ! (1_0) hx ? 0x7ff00000 674*25c28e83SPiotr Jasiukajtis lda [%l6+4]%asi,%f1 ! (2_0) ((float*)res)[1] = ((float*)px)[1]; 675*25c28e83SPiotr Jasiukajtis faddd %f8,%f4,%f4 ! (1_1) res += dexp_hi; 676*25c28e83SPiotr Jasiukajtis 677*25c28e83SPiotr Jasiukajtis fmuld %f62,%f58,%f36 ! (5_1) res *= xx; 678*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update16 ! (1_0) if ( hx >= 0x7ff00000 ) 679*25c28e83SPiotr Jasiukajtis ldd [%fp+tmp3],%f62 ! (1_1) dlexp = *(double*)lexp; 680*25c28e83SPiotr Jasiukajtis fand %f18,DC3,%f8 ! (0_0) res_c = vis_fand(res_c,DC3); 681*25c28e83SPiotr Jasiukajtis.cont16: 682*25c28e83SPiotr Jasiukajtis fmuld %f38,%f40,%f38 ! (3_1) res *= xx; 683*25c28e83SPiotr Jasiukajtis cmp %g1,_0x00100000 ! (1_0) hx ? 0x00100000 684*25c28e83SPiotr Jasiukajtis and %o2,2040,%o2 ! (1_0) hx &= 0x7f8; 685*25c28e83SPiotr Jasiukajtis faddd %f34,K3,%f34 ! (4_1) res += K3; 686*25c28e83SPiotr Jasiukajtis 687*25c28e83SPiotr Jasiukajtis fmuld %f24,%f32,%f32 ! (2_1) res = dexp_hi * res; 688*25c28e83SPiotr Jasiukajtis bl,pn %icc,.update17 ! (1_0) if ( hx < 0x00100000 ) 689*25c28e83SPiotr Jasiukajtis sub %g0,%o7,%o7 ! (1_0) iexp = -iexp; 690*25c28e83SPiotr Jasiukajtis fand %f0,DC0,%f16 ! (2_0) res = vis_fand(res,DC0); 691*25c28e83SPiotr Jasiukajtis.cont17: 692*25c28e83SPiotr Jasiukajtis fmuld %f4,%f62,%f2 ! (1_1) res *= dlexp; 693*25c28e83SPiotr Jasiukajtis add %o7,1534,%o7 ! (1_0) iexp += 0x5fe; 694*25c28e83SPiotr Jasiukajtis ldd [%i4+8],%f4 ! (2_1) dexp_lo = ((double*)addr)[1]; 695*25c28e83SPiotr Jasiukajtis fpadd32 %f44,DC2,%f18 ! (1_0) res_c = vis_fpadd32(res,DC2); 696*25c28e83SPiotr Jasiukajtis 697*25c28e83SPiotr Jasiukajtis fmuld K6,%f26,%f62 ! (6_1) res = K6 * xx; 698*25c28e83SPiotr Jasiukajtis add %o2,8,%i2 ! (1_0) hx += 8; 699*25c28e83SPiotr Jasiukajtis st %f2,[%i0] ! (1_1) ((float*)py)[0] = ((float*)res)[0]; 700*25c28e83SPiotr Jasiukajtis fsubd %f28,%f8,%f6 ! (0_0) xx = res - res_c; 701*25c28e83SPiotr Jasiukajtis 702*25c28e83SPiotr Jasiukajtis fmuld %f34,%f60,%f28 ! (4_1) res *= xx; 703*25c28e83SPiotr Jasiukajtis nop 704*25c28e83SPiotr Jasiukajtis st %f3,[%i0+4] ! (1_1) ((float*)py)[1] = ((float*)res)[1]; 705*25c28e83SPiotr Jasiukajtis faddd %f36,K4,%f36 ! (5_1) res += K4; 706*25c28e83SPiotr Jasiukajtis 707*25c28e83SPiotr Jasiukajtis lda [%l6]%asi,%g1 ! (2_0) hx = *(int*)px; 708*25c28e83SPiotr Jasiukajtis sllx %o7,52,%o7 ! (1_0) iexp << 52; 709*25c28e83SPiotr Jasiukajtis and %i2,-16,%i2 ! (1_0) hx = -16; 710*25c28e83SPiotr Jasiukajtis faddd %f38,K1,%f38 ! (3_1) res += K1; 711*25c28e83SPiotr Jasiukajtis 712*25c28e83SPiotr Jasiukajtis add %i1,stridey,%i0 ! px += stridey 713*25c28e83SPiotr Jasiukajtis add %i2,TBL,%i2 ! (1_0) addr = (char*)arr + hx; 714*25c28e83SPiotr Jasiukajtis stx %o7,[%fp+tmp3] ! (1_0) dlexp = *(double*)lexp; 715*25c28e83SPiotr Jasiukajtis faddd %f32,%f4,%f8 ! (2_1) res += dexp_lo; 716*25c28e83SPiotr Jasiukajtis 717*25c28e83SPiotr Jasiukajtis fmuld %f6,%f30,%f32 ! (0_0) xx *= dtmp0; 718*25c28e83SPiotr Jasiukajtis add %l6,stridex,%l6 ! px += stridex 719*25c28e83SPiotr Jasiukajtis ldd [%i2],%f30 ! (1_0) dtmp0 = ((double*)addr)[0]; 720*25c28e83SPiotr Jasiukajtis faddd %f62,K5,%f62 ! (6_1) res += K5; 721*25c28e83SPiotr Jasiukajtis 722*25c28e83SPiotr Jasiukajtis fmuld %f36,%f58,%f34 ! (5_1) res *= xx; 723*25c28e83SPiotr Jasiukajtis sra %g1,10,%o2 ! (2_0) hx >>= 10; 724*25c28e83SPiotr Jasiukajtis ldd [%i5],%f4 ! (3_1) dexp_hi = ((double*)addr)[0]; 725*25c28e83SPiotr Jasiukajtis faddd %f28,K2,%f36 ! (4_1) res += K2; 726*25c28e83SPiotr Jasiukajtis 727*25c28e83SPiotr Jasiukajtis fmuld %f38,%f40,%f38 ! (3_1) res *= xx; 728*25c28e83SPiotr Jasiukajtis sra %g1,21,%o7 ! (2_0) iexp = hx >> 21; 729*25c28e83SPiotr Jasiukajtis lda [%l6]%asi,%f6 ! (3_0) ((float*)res)[0] = ((float*)px)[0]; 730*25c28e83SPiotr Jasiukajtis for %f16,DC1,%f28 ! (2_0) res = vis_for(res,DC1); 731*25c28e83SPiotr Jasiukajtis 732*25c28e83SPiotr Jasiukajtis fmuld %f30,%f30,%f30 ! (1_0) dtmp0 = dexp_hi * dexp_hi; 733*25c28e83SPiotr Jasiukajtis cmp %g1,_0x7ff00000 ! (2_0) hx ? 0x7ff00000 734*25c28e83SPiotr Jasiukajtis lda [%l6+4]%asi,%f7 ! (3_0) ((float*)res)[1] = ((float*)px)[1]; 735*25c28e83SPiotr Jasiukajtis faddd %f8,%f24,%f24 ! (2_1) res += dexp_hi; 736*25c28e83SPiotr Jasiukajtis 737*25c28e83SPiotr Jasiukajtis fmuld %f62,%f26,%f40 ! (6_1) res *= xx; 738*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update18 ! (2_0) if ( hx >= 0x7ff00000 ) 739*25c28e83SPiotr Jasiukajtis ldd [%fp+tmp4],%f62 ! (2_1) dlexp = *(double*)lexp; 740*25c28e83SPiotr Jasiukajtis fand %f18,DC3,%f8 ! (1_0) res_c = vis_fand(res_c,DC3); 741*25c28e83SPiotr Jasiukajtis.cont18: 742*25c28e83SPiotr Jasiukajtis fmuld %f36,%f60,%f36 ! (4_1) res *= xx; 743*25c28e83SPiotr Jasiukajtis cmp %g1,_0x00100000 ! (2_0) hx ? 0x00100000 744*25c28e83SPiotr Jasiukajtis and %o2,2040,%o2 ! (2_0) hx &= 0x7f8; 745*25c28e83SPiotr Jasiukajtis faddd %f34,K3,%f34 ! (5_1) res += K3; 746*25c28e83SPiotr Jasiukajtis 747*25c28e83SPiotr Jasiukajtis fmuld %f4,%f38,%f38 ! (3_1) res = dexp_hi * res; 748*25c28e83SPiotr Jasiukajtis bl,pn %icc,.update19 ! (2_0) if ( hx < 0x00100000 ) 749*25c28e83SPiotr Jasiukajtis sub %g0,%o7,%o7 ! (2_0) iexp = -iexp; 750*25c28e83SPiotr Jasiukajtis fand %f6,DC0,%f16 ! (3_0) res = vis_fand(res,DC0); 751*25c28e83SPiotr Jasiukajtis.cont19: 752*25c28e83SPiotr Jasiukajtis fmuld %f24,%f62,%f2 ! (2_1) res *= dlexp; 753*25c28e83SPiotr Jasiukajtis add %o7,1534,%o7 ! (2_0) iexp += 0x5fe; 754*25c28e83SPiotr Jasiukajtis ldd [%i5+8],%f24 ! (3_1) dexp_lo = ((double*)addr)[1]; 755*25c28e83SPiotr Jasiukajtis fpadd32 %f28,DC2,%f18 ! (2_0) res_c = vis_fpadd32(res,DC2); 756*25c28e83SPiotr Jasiukajtis 757*25c28e83SPiotr Jasiukajtis fmuld K6,%f32,%f62 ! (0_0) res = K6 * xx; 758*25c28e83SPiotr Jasiukajtis add %o2,8,%i4 ! (2_0) hx += 8; 759*25c28e83SPiotr Jasiukajtis st %f2,[%i1] ! (2_1) ((float*)py)[0] = ((float*)res)[0]; 760*25c28e83SPiotr Jasiukajtis fsubd %f44,%f8,%f10 ! (1_0) xx = res - res_c; 761*25c28e83SPiotr Jasiukajtis 762*25c28e83SPiotr Jasiukajtis fmuld %f34,%f58,%f44 ! (5_1) res *= xx; 763*25c28e83SPiotr Jasiukajtis nop 764*25c28e83SPiotr Jasiukajtis st %f3,[%i1+4] ! (2_1) ((float*)py)[1] = ((float*)res)[1]; 765*25c28e83SPiotr Jasiukajtis faddd %f40,K4,%f40 ! (6_1) res += K4; 766*25c28e83SPiotr Jasiukajtis 767*25c28e83SPiotr Jasiukajtis lda [%l6]%asi,%g1 ! (3_0) hx = *(int*)px; 768*25c28e83SPiotr Jasiukajtis sllx %o7,52,%o7 ! (2_0) iexp << 52; 769*25c28e83SPiotr Jasiukajtis and %i4,-16,%i4 ! (2_0) hx = -16; 770*25c28e83SPiotr Jasiukajtis faddd %f36,K1,%f36 ! (4_1) res += K1; 771*25c28e83SPiotr Jasiukajtis 772*25c28e83SPiotr Jasiukajtis add %l6,stridex,%l6 ! px += stridex 773*25c28e83SPiotr Jasiukajtis add %i4,TBL,%i4 ! (2_0) addr = (char*)arr + hx; 774*25c28e83SPiotr Jasiukajtis stx %o7,[%fp+tmp4] ! (2_0) dlexp = *(double*)lexp; 775*25c28e83SPiotr Jasiukajtis faddd %f38,%f24,%f8 ! (3_1) res += dexp_lo; 776*25c28e83SPiotr Jasiukajtis 777*25c28e83SPiotr Jasiukajtis fmuld %f10,%f30,%f38 ! (1_0) xx *= dtmp0; 778*25c28e83SPiotr Jasiukajtis add %i0,stridey,%i1 ! px += stridey 779*25c28e83SPiotr Jasiukajtis ldd [%i4],%f24 ! (2_0) dtmp0 = ((double*)addr)[0]; 780*25c28e83SPiotr Jasiukajtis faddd %f62,K5,%f62 ! (0_0) res += K5; 781*25c28e83SPiotr Jasiukajtis 782*25c28e83SPiotr Jasiukajtis fmuld %f40,%f26,%f34 ! (6_1) res *= xx; 783*25c28e83SPiotr Jasiukajtis sra %g1,10,%o2 ! (3_0) hx >>= 10; 784*25c28e83SPiotr Jasiukajtis ldd [%l1],%f30 ! (4_1) dexp_hi = ((double*)addr)[0]; 785*25c28e83SPiotr Jasiukajtis faddd %f44,K2,%f40 ! (5_1) res += K2; 786*25c28e83SPiotr Jasiukajtis 787*25c28e83SPiotr Jasiukajtis fmuld %f36,%f60,%f36 ! (4_1) res *= xx; 788*25c28e83SPiotr Jasiukajtis sra %g1,21,%o7 ! (3_0) iexp = hx >> 21; 789*25c28e83SPiotr Jasiukajtis lda [%l6]%asi,%f0 ! (4_0) ((float*)res)[0] = ((float*)px)[0]; 790*25c28e83SPiotr Jasiukajtis for %f16,DC1,%f44 ! (3_0) res = vis_for(res,DC1); 791*25c28e83SPiotr Jasiukajtis 792*25c28e83SPiotr Jasiukajtis fmuld %f24,%f24,%f24 ! (2_0) dtmp0 = dexp_hi * dexp_hi; 793*25c28e83SPiotr Jasiukajtis cmp %g1,_0x7ff00000 ! (3_0) hx ? 0x7ff00000 794*25c28e83SPiotr Jasiukajtis lda [%l6+4]%asi,%f1 ! (4_0) ((float*)res)[1] = ((float*)px)[1]; 795*25c28e83SPiotr Jasiukajtis faddd %f8,%f4,%f8 ! (3_1) res += dexp_hi; 796*25c28e83SPiotr Jasiukajtis 797*25c28e83SPiotr Jasiukajtis fmuld %f62,%f32,%f60 ! (0_0) res *= xx; 798*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update20 ! (3_0) if ( hx >= 0x7ff00000 ) 799*25c28e83SPiotr Jasiukajtis ldd [%fp+tmp5],%f62 ! (3_1) dlexp = *(double*)lexp; 800*25c28e83SPiotr Jasiukajtis fand %f18,DC3,%f4 ! (2_0) res_c = vis_fand(res_c,DC3); 801*25c28e83SPiotr Jasiukajtis.cont20: 802*25c28e83SPiotr Jasiukajtis fmuld %f40,%f58,%f40 ! (5_1) res *= xx; 803*25c28e83SPiotr Jasiukajtis cmp %g1,_0x00100000 ! (3_0) hx ? 0x00100000 804*25c28e83SPiotr Jasiukajtis and %o2,2040,%o2 ! (3_0) hx &= 0x7f8; 805*25c28e83SPiotr Jasiukajtis faddd %f34,K3,%f10 ! (6_1) res += K3; 806*25c28e83SPiotr Jasiukajtis 807*25c28e83SPiotr Jasiukajtis fmuld %f30,%f36,%f36 ! (4_1) res = dexp_hi * res; 808*25c28e83SPiotr Jasiukajtis bl,pn %icc,.update21 ! (3_0) if ( hx < 0x00100000 ) 809*25c28e83SPiotr Jasiukajtis sub %g0,%o7,%o7 ! (3_0) iexp = -iexp; 810*25c28e83SPiotr Jasiukajtis fand %f0,DC0,%f16 ! (4_0) res = vis_fand(res,DC0); 811*25c28e83SPiotr Jasiukajtis.cont21: 812*25c28e83SPiotr Jasiukajtis fmuld %f8,%f62,%f8 ! (3_1) res *= dlexp; 813*25c28e83SPiotr Jasiukajtis add %o7,1534,%o7 ! (3_0) iexp += 0x5fe; 814*25c28e83SPiotr Jasiukajtis ldd [%l1+8],%f34 ! (4_1) dexp_lo = ((double*)addr)[1]; 815*25c28e83SPiotr Jasiukajtis fpadd32 %f44,DC2,%f18 ! (3_0) res_c = vis_fpadd32(res,DC2); 816*25c28e83SPiotr Jasiukajtis 817*25c28e83SPiotr Jasiukajtis fmuld K6,%f38,%f62 ! (1_0) res = K6 * xx; 818*25c28e83SPiotr Jasiukajtis add %o2,8,%i5 ! (3_0) hx += 8; 819*25c28e83SPiotr Jasiukajtis st %f8,[%i0] ! (3_1) ((float*)py)[0] = ((float*)res)[0]; 820*25c28e83SPiotr Jasiukajtis fsubd %f28,%f4,%f28 ! (2_0) xx = res - res_c; 821*25c28e83SPiotr Jasiukajtis 822*25c28e83SPiotr Jasiukajtis fmuld %f10,%f26,%f4 ! (6_1) res *= xx; 823*25c28e83SPiotr Jasiukajtis nop 824*25c28e83SPiotr Jasiukajtis st %f9,[%i0+4] ! (3_1) ((float*)py)[1] = ((float*)res)[1]; 825*25c28e83SPiotr Jasiukajtis faddd %f60,K4,%f60 ! (0_0) res += K4; 826*25c28e83SPiotr Jasiukajtis 827*25c28e83SPiotr Jasiukajtis lda [%l6]%asi,%g1 ! (4_0) hx = *(int*)px; 828*25c28e83SPiotr Jasiukajtis sllx %o7,52,%o7 ! (3_0) iexp << 52; 829*25c28e83SPiotr Jasiukajtis and %i5,-16,%i5 ! (3_0) hx = -16; 830*25c28e83SPiotr Jasiukajtis faddd %f40,K1,%f40 ! (5_1) res += K1; 831*25c28e83SPiotr Jasiukajtis 832*25c28e83SPiotr Jasiukajtis add %l6,stridex,%i0 ! px += stridex 833*25c28e83SPiotr Jasiukajtis add %i5,TBL,%i5 ! (3_0) addr = (char*)arr + hx; 834*25c28e83SPiotr Jasiukajtis stx %o7,[%fp+tmp5] ! (3_0) dlexp = *(double*)lexp; 835*25c28e83SPiotr Jasiukajtis faddd %f36,%f34,%f8 ! (4_1) res += dexp_lo; 836*25c28e83SPiotr Jasiukajtis 837*25c28e83SPiotr Jasiukajtis fmuld %f28,%f24,%f36 ! (2_0) xx *= dtmp0; 838*25c28e83SPiotr Jasiukajtis add %i1,stridey,%l6 ! px += stridey 839*25c28e83SPiotr Jasiukajtis ldd [%i5],%f28 ! (3_0) dtmp0 = ((double*)addr)[0]; 840*25c28e83SPiotr Jasiukajtis faddd %f62,K5,%f62 ! (1_0) res += K5; 841*25c28e83SPiotr Jasiukajtis 842*25c28e83SPiotr Jasiukajtis faddd %f4,K2,%f10 ! (6_1) res += K2; 843*25c28e83SPiotr Jasiukajtis sra %g1,10,%o2 ! (4_0) hx >>= 10; 844*25c28e83SPiotr Jasiukajtis nop 845*25c28e83SPiotr Jasiukajtis fmuld %f60,%f32,%f34 ! (0_0) res *= xx; 846*25c28e83SPiotr Jasiukajtis 847*25c28e83SPiotr Jasiukajtis fmuld %f40,%f58,%f40 ! (5_1) res *= xx; 848*25c28e83SPiotr Jasiukajtis sra %g1,21,%o7 ! (4_0) iexp = hx >> 21; 849*25c28e83SPiotr Jasiukajtis lda [%i0]%asi,%f6 ! (5_0) ((float*)res)[0] = ((float*)px)[0]; 850*25c28e83SPiotr Jasiukajtis for %f16,DC1,%f24 ! (4_0) res = vis_for(res,DC1); 851*25c28e83SPiotr Jasiukajtis 852*25c28e83SPiotr Jasiukajtis fmuld %f28,%f28,%f28 ! (3_0) dtmp0 = dexp_hi * dexp_hi; 853*25c28e83SPiotr Jasiukajtis cmp %g1,_0x7ff00000 ! (4_0) hx ? 0x7ff00000 854*25c28e83SPiotr Jasiukajtis lda [%i0+4]%asi,%f7 ! (5_0) ((float*)res)[1] = ((float*)px)[1]; 855*25c28e83SPiotr Jasiukajtis faddd %f8,%f30,%f30 ! (4_1) res += dexp_hi; 856*25c28e83SPiotr Jasiukajtis 857*25c28e83SPiotr Jasiukajtis fand %f18,DC3,%f8 ! (3_0) res_c = vis_fand(res_c,DC3); 858*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update22 ! (4_0) if ( hx >= 0x7ff00000 ) 859*25c28e83SPiotr Jasiukajtis ldd [%fp+tmp6],%f18 ! (4_1) dlexp = *(double*)lexp; 860*25c28e83SPiotr Jasiukajtis fmuld %f62,%f38,%f62 ! (1_0) res *= xx; 861*25c28e83SPiotr Jasiukajtis.cont22: 862*25c28e83SPiotr Jasiukajtis fmuld %f10,%f26,%f58 ! (6_1) res *= xx; 863*25c28e83SPiotr Jasiukajtis cmp %g1,_0x00100000 ! (4_0) hx ? 0x00100000 864*25c28e83SPiotr Jasiukajtis and %o2,2040,%o2 ! (4_0) hx &= 0x7f8; 865*25c28e83SPiotr Jasiukajtis faddd %f34,K3,%f60 ! (0_0) res += K3; 866*25c28e83SPiotr Jasiukajtis 867*25c28e83SPiotr Jasiukajtis fmuld %f22,%f40,%f40 ! (5_1) res = dexp_hi * res; 868*25c28e83SPiotr Jasiukajtis bl,pn %icc,.update23 ! (4_0) if ( hx < 0x00100000 ) 869*25c28e83SPiotr Jasiukajtis sub %g0,%o7,%o7 ! (4_0) iexp = -iexp; 870*25c28e83SPiotr Jasiukajtis fand %f6,DC0,%f16 ! (5_0) res = vis_fand(res,DC0); 871*25c28e83SPiotr Jasiukajtis.cont23: 872*25c28e83SPiotr Jasiukajtis fmuld %f30,%f18,%f6 ! (4_1) res *= dlexp; 873*25c28e83SPiotr Jasiukajtis add %o7,1534,%o7 ! (4_0) iexp += 0x5fe; 874*25c28e83SPiotr Jasiukajtis ldd [%i3+8],%f34 ! (5_1) dexp_lo = ((double*)addr)[1]; 875*25c28e83SPiotr Jasiukajtis fpadd32 %f24,DC2,%f18 ! (4_0) res_c = vis_fpadd32(res,DC2); 876*25c28e83SPiotr Jasiukajtis 877*25c28e83SPiotr Jasiukajtis fmuld K6,%f36,%f30 ! (2_0) res = K6 * xx; 878*25c28e83SPiotr Jasiukajtis add %o2,8,%l1 ! (4_0) hx += 8; 879*25c28e83SPiotr Jasiukajtis st %f6,[%i1] ! (4_1) ((float*)py)[0] = ((float*)res)[0]; 880*25c28e83SPiotr Jasiukajtis fsubd %f44,%f8,%f44 ! (3_0) xx = res - res_c; 881*25c28e83SPiotr Jasiukajtis 882*25c28e83SPiotr Jasiukajtis fmuld %f60,%f32,%f60 ! (0_0) res *= xx; 883*25c28e83SPiotr Jasiukajtis sllx %o7,52,%o7 ! (4_0) iexp << 52; 884*25c28e83SPiotr Jasiukajtis st %f7,[%i1+4] ! (4_1) ((float*)py)[1] = ((float*)res)[1]; 885*25c28e83SPiotr Jasiukajtis faddd %f62,K4,%f6 ! (1_0) res += K4; 886*25c28e83SPiotr Jasiukajtis 887*25c28e83SPiotr Jasiukajtis lda [%i0]%asi,%g1 ! (5_0) hx = *(int*)px; 888*25c28e83SPiotr Jasiukajtis add %i0,stridex,%i1 ! px += stridex 889*25c28e83SPiotr Jasiukajtis and %l1,-16,%l1 ! (4_0) hx = -16; 890*25c28e83SPiotr Jasiukajtis faddd %f58,K1,%f58 ! (6_1) res += K1; 891*25c28e83SPiotr Jasiukajtis 892*25c28e83SPiotr Jasiukajtis add %l1,TBL,%l1 ! (4_0) addr = (char*)arr + hx; 893*25c28e83SPiotr Jasiukajtis add %l6,stridey,%i0 ! px += stridey 894*25c28e83SPiotr Jasiukajtis stx %o7,[%fp+tmp6] ! (4_0) dlexp = *(double*)lexp; 895*25c28e83SPiotr Jasiukajtis faddd %f40,%f34,%f8 ! (5_1) res += dexp_lo; 896*25c28e83SPiotr Jasiukajtis 897*25c28e83SPiotr Jasiukajtis fmuld %f44,%f28,%f40 ! (3_0) xx *= dtmp0; 898*25c28e83SPiotr Jasiukajtis nop 899*25c28e83SPiotr Jasiukajtis ldd [%l1],%f44 ! (4_0) dtmp0 = ((double*)addr)[0]; 900*25c28e83SPiotr Jasiukajtis faddd %f30,K5,%f62 ! (2_0) res += K5; 901*25c28e83SPiotr Jasiukajtis 902*25c28e83SPiotr Jasiukajtis fmuld %f6,%f38,%f34 ! (1_0) res *= xx; 903*25c28e83SPiotr Jasiukajtis sra %g1,21,%o7 ! (5_0) iexp = hx >> 21; 904*25c28e83SPiotr Jasiukajtis ldd [%l4],%f30 ! (6_1) dexp_hi = ((double*)addr)[0]; 905*25c28e83SPiotr Jasiukajtis faddd %f60,K2,%f60 ! (0_0) res += K2; 906*25c28e83SPiotr Jasiukajtis 907*25c28e83SPiotr Jasiukajtis for %f16,DC1,%f28 ! (5_0) res = vis_for(res,DC1); 908*25c28e83SPiotr Jasiukajtis sub %g0,%o7,%o7 ! (5_0) iexp = -iexp; 909*25c28e83SPiotr Jasiukajtis lda [%i1]%asi,%f6 ! (6_0) ((float*)res)[0] = ((float*)px)[0]; 910*25c28e83SPiotr Jasiukajtis fmuld %f58,%f26,%f26 ! (6_1) res *= xx; 911*25c28e83SPiotr Jasiukajtis 912*25c28e83SPiotr Jasiukajtis fmuld %f44,%f44,%f44 ! (4_0) dtmp0 = dexp_hi * dexp_hi; 913*25c28e83SPiotr Jasiukajtis cmp %g1,_0x7ff00000 ! (5_0) hx ? 0x7ff00000 914*25c28e83SPiotr Jasiukajtis lda [%i1+4]%asi,%f7 ! (6_0) ((float*)res)[1] = ((float*)px)[1]; 915*25c28e83SPiotr Jasiukajtis faddd %f8,%f22,%f22 ! (5_1) res += dexp_hi; 916*25c28e83SPiotr Jasiukajtis 917*25c28e83SPiotr Jasiukajtis fand %f18,DC3,%f8 ! (4_0) res_c = vis_fand(res_c,DC3); 918*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update24 ! (5_0) if ( hx >= 0x7ff00000 ) 919*25c28e83SPiotr Jasiukajtis ldd [%fp+tmp0],%f18 ! (5_1) dlexp = *(double*)lexp; 920*25c28e83SPiotr Jasiukajtis fmuld %f62,%f36,%f62 ! (2_0) res *= xx; 921*25c28e83SPiotr Jasiukajtis.cont24: 922*25c28e83SPiotr Jasiukajtis fmuld %f60,%f32,%f58 ! (0_0) res *= xx; 923*25c28e83SPiotr Jasiukajtis sra %g1,10,%o2 ! (5_0) hx >>= 10; 924*25c28e83SPiotr Jasiukajtis cmp %g1,_0x00100000 ! (5_0) hx ? 0x00100000 925*25c28e83SPiotr Jasiukajtis faddd %f34,K3,%f34 ! (1_0) res += K3; 926*25c28e83SPiotr Jasiukajtis 927*25c28e83SPiotr Jasiukajtis fmuld %f30,%f26,%f26 ! (6_1) res = dexp_hi * res; 928*25c28e83SPiotr Jasiukajtis bl,pn %icc,.update25 ! (5_0) if ( hx < 0x00100000 ) 929*25c28e83SPiotr Jasiukajtis and %o2,2040,%o2 ! (5_0) hx &= 0x7f8; 930*25c28e83SPiotr Jasiukajtis fand %f6,DC0,%f16 ! (6_0) res = vis_fand(res,DC0); 931*25c28e83SPiotr Jasiukajtis.cont25: 932*25c28e83SPiotr Jasiukajtis fmuld %f22,%f18,%f2 ! (5_1) res *= dlexp; 933*25c28e83SPiotr Jasiukajtis subcc counter,7,counter ! counter -= 7; 934*25c28e83SPiotr Jasiukajtis ldd [%l4+8],%f60 ! (6_1) dexp_lo = ((double*)addr)[1]; 935*25c28e83SPiotr Jasiukajtis fpadd32 %f28,DC2,%f18 ! (5_0) res_c = vis_fpadd32(res,DC2); 936*25c28e83SPiotr Jasiukajtis 937*25c28e83SPiotr Jasiukajtis fmuld K6,%f40,%f22 ! (3_0) res = K6 * xx; 938*25c28e83SPiotr Jasiukajtis add %o2,8,%i3 ! (5_0) hx += 8; 939*25c28e83SPiotr Jasiukajtis st %f2,[%l6] ! (5_1) ((float*)py)[0] = ((float*)res)[0]; 940*25c28e83SPiotr Jasiukajtis fsubd %f24,%f8,%f10 ! (4_0) xx = res - res_c; 941*25c28e83SPiotr Jasiukajtis 942*25c28e83SPiotr Jasiukajtis fmuld %f34,%f38,%f24 ! (1_0) res *= xx; 943*25c28e83SPiotr Jasiukajtis st %f3,[%l6+4] ! (5_1) ((float*)py)[1] = ((float*)res)[1]; 944*25c28e83SPiotr Jasiukajtis bpos,pt %icc,.main_loop 945*25c28e83SPiotr Jasiukajtis faddd %f62,K4,%f34 ! (2_0) res += K4; 946*25c28e83SPiotr Jasiukajtis 947*25c28e83SPiotr Jasiukajtis add counter,7,counter 948*25c28e83SPiotr Jasiukajtis.tail: 949*25c28e83SPiotr Jasiukajtis add %o7,1534,%o7 ! (5_0) iexp += 0x5fe; 950*25c28e83SPiotr Jasiukajtis subcc counter,1,counter 951*25c28e83SPiotr Jasiukajtis bneg,a .begin 952*25c28e83SPiotr Jasiukajtis mov %i0,%o4 953*25c28e83SPiotr Jasiukajtis 954*25c28e83SPiotr Jasiukajtis faddd %f58,K1,%f58 ! (0_1) res += K1; 955*25c28e83SPiotr Jasiukajtis 956*25c28e83SPiotr Jasiukajtis faddd %f26,%f60,%f8 ! (6_2) res += dexp_lo; 957*25c28e83SPiotr Jasiukajtis 958*25c28e83SPiotr Jasiukajtis faddd %f22,K5,%f62 ! (3_1) res += K5; 959*25c28e83SPiotr Jasiukajtis fmuld %f10,%f44,%f60 ! (4_1) xx *= dtmp0; 960*25c28e83SPiotr Jasiukajtis 961*25c28e83SPiotr Jasiukajtis faddd %f24,K2,%f26 ! (1_1) res += K2; 962*25c28e83SPiotr Jasiukajtis add %i1,stridex,%l6 ! px += stridex 963*25c28e83SPiotr Jasiukajtis ldd [%l2],%f24 ! (0_1) dexp_hi = ((double*)addr)[0]; 964*25c28e83SPiotr Jasiukajtis fmuld %f34,%f36,%f34 ! (2_1) res *= xx; 965*25c28e83SPiotr Jasiukajtis 966*25c28e83SPiotr Jasiukajtis fmuld %f58,%f32,%f58 ! (0_1) res *= xx; 967*25c28e83SPiotr Jasiukajtis 968*25c28e83SPiotr Jasiukajtis add %i0,stridey,%i1 ! px += stridey 969*25c28e83SPiotr Jasiukajtis faddd %f8,%f30,%f30 ! (6_2) res += dexp_hi; 970*25c28e83SPiotr Jasiukajtis 971*25c28e83SPiotr Jasiukajtis fmuld %f62,%f40,%f32 ! (3_1) res *= xx; 972*25c28e83SPiotr Jasiukajtis ldd [%fp+tmp1],%f62 ! (6_2) dlexp = *(double*)lexp; 973*25c28e83SPiotr Jasiukajtis 974*25c28e83SPiotr Jasiukajtis fmuld %f26,%f38,%f26 ! (1_1) res *= xx; 975*25c28e83SPiotr Jasiukajtis faddd %f34,K3,%f34 ! (2_1) res += K3; 976*25c28e83SPiotr Jasiukajtis 977*25c28e83SPiotr Jasiukajtis fmuld %f24,%f58,%f58 ! (0_1) res = dexp_hi * res; 978*25c28e83SPiotr Jasiukajtis 979*25c28e83SPiotr Jasiukajtis fmuld %f30,%f62,%f2 ! (6_2) res *= dlexp; 980*25c28e83SPiotr Jasiukajtis ldd [%l2+8],%f30 ! (0_1) dexp_lo = ((double*)addr)[1]; 981*25c28e83SPiotr Jasiukajtis 982*25c28e83SPiotr Jasiukajtis fmuld K6,%f60,%f62 ! (4_1) res = K6 * xx; 983*25c28e83SPiotr Jasiukajtis st %f2,[%i0] ! (6_2) ((float*)py)[0] = ((float*)res)[0]; 984*25c28e83SPiotr Jasiukajtis 985*25c28e83SPiotr Jasiukajtis fmuld %f34,%f36,%f28 ! (2_1) res *= xx; 986*25c28e83SPiotr Jasiukajtis st %f3,[%i0+4] ! (6_2) ((float*)py)[1] = ((float*)res)[1]; 987*25c28e83SPiotr Jasiukajtis faddd %f32,K4,%f32 ! (3_1) res += K4; 988*25c28e83SPiotr Jasiukajtis 989*25c28e83SPiotr Jasiukajtis subcc counter,1,counter 990*25c28e83SPiotr Jasiukajtis bneg,a .begin 991*25c28e83SPiotr Jasiukajtis mov %i1,%o4 992*25c28e83SPiotr Jasiukajtis 993*25c28e83SPiotr Jasiukajtis faddd %f26,K1,%f26 ! (1_1) res += K1; 994*25c28e83SPiotr Jasiukajtis 995*25c28e83SPiotr Jasiukajtis faddd %f58,%f30,%f8 ! (0_1) res += dexp_lo; 996*25c28e83SPiotr Jasiukajtis 997*25c28e83SPiotr Jasiukajtis add %l6,stridex,%l6 ! px += stridex 998*25c28e83SPiotr Jasiukajtis faddd %f62,K5,%f62 ! (4_1) res += K5; 999*25c28e83SPiotr Jasiukajtis 1000*25c28e83SPiotr Jasiukajtis fmuld %f32,%f40,%f34 ! (3_1) res *= xx; 1001*25c28e83SPiotr Jasiukajtis add %i1,stridey,%i0 ! px += stridey 1002*25c28e83SPiotr Jasiukajtis ldd [%i2],%f22 ! (1_1) dexp_hi = ((double*)addr)[0]; 1003*25c28e83SPiotr Jasiukajtis faddd %f28,K2,%f32 ! (2_1) res += K2; 1004*25c28e83SPiotr Jasiukajtis 1005*25c28e83SPiotr Jasiukajtis fmuld %f26,%f38,%f26 ! (1_1) res *= xx; 1006*25c28e83SPiotr Jasiukajtis 1007*25c28e83SPiotr Jasiukajtis faddd %f8,%f24,%f24 ! (0_1) res += dexp_hi; 1008*25c28e83SPiotr Jasiukajtis 1009*25c28e83SPiotr Jasiukajtis fmuld %f62,%f60,%f38 ! (4_1) res *= xx; 1010*25c28e83SPiotr Jasiukajtis ldd [%fp+tmp2],%f62 ! (0_1) dlexp = *(double*)lexp; 1011*25c28e83SPiotr Jasiukajtis 1012*25c28e83SPiotr Jasiukajtis fmuld %f32,%f36,%f32 ! (2_1) res *= xx; 1013*25c28e83SPiotr Jasiukajtis faddd %f34,K3,%f34 ! (3_1) res += K3; 1014*25c28e83SPiotr Jasiukajtis 1015*25c28e83SPiotr Jasiukajtis fmuld %f22,%f26,%f26 ! (1_1) res = dexp_hi * res; 1016*25c28e83SPiotr Jasiukajtis 1017*25c28e83SPiotr Jasiukajtis fmuld %f24,%f62,%f2 ! (0_1) res *= dlexp; 1018*25c28e83SPiotr Jasiukajtis ldd [%i2+8],%f24 ! (1_1) dexp_lo = ((double*)addr)[1]; 1019*25c28e83SPiotr Jasiukajtis 1020*25c28e83SPiotr Jasiukajtis st %f2,[%i1] ! (0_1) ((float*)py)[0] = ((float*)res)[0]; 1021*25c28e83SPiotr Jasiukajtis 1022*25c28e83SPiotr Jasiukajtis fmuld %f34,%f40,%f44 ! (3_1) res *= xx; 1023*25c28e83SPiotr Jasiukajtis st %f3,[%i1+4] ! (0_1) ((float*)py)[1] = ((float*)res)[1]; 1024*25c28e83SPiotr Jasiukajtis faddd %f38,K4,%f38 ! (4_1) res += K4; 1025*25c28e83SPiotr Jasiukajtis 1026*25c28e83SPiotr Jasiukajtis subcc counter,1,counter 1027*25c28e83SPiotr Jasiukajtis bneg,a .begin 1028*25c28e83SPiotr Jasiukajtis mov %i0,%o4 1029*25c28e83SPiotr Jasiukajtis 1030*25c28e83SPiotr Jasiukajtis faddd %f32,K1,%f32 ! (2_1) res += K1; 1031*25c28e83SPiotr Jasiukajtis 1032*25c28e83SPiotr Jasiukajtis add %l6,stridex,%l6 ! px += stridex 1033*25c28e83SPiotr Jasiukajtis faddd %f26,%f24,%f8 ! (1_1) res += dexp_lo; 1034*25c28e83SPiotr Jasiukajtis 1035*25c28e83SPiotr Jasiukajtis add %i0,stridey,%i1 ! px += stridey 1036*25c28e83SPiotr Jasiukajtis 1037*25c28e83SPiotr Jasiukajtis fmuld %f38,%f60,%f34 ! (4_1) res *= xx; 1038*25c28e83SPiotr Jasiukajtis ldd [%i4],%f24 ! (2_1) dexp_hi = ((double*)addr)[0]; 1039*25c28e83SPiotr Jasiukajtis faddd %f44,K2,%f38 ! (3_1) res += K2; 1040*25c28e83SPiotr Jasiukajtis 1041*25c28e83SPiotr Jasiukajtis fmuld %f32,%f36,%f32 ! (2_1) res *= xx; 1042*25c28e83SPiotr Jasiukajtis 1043*25c28e83SPiotr Jasiukajtis faddd %f8,%f22,%f22 ! (1_1) res += dexp_hi; 1044*25c28e83SPiotr Jasiukajtis 1045*25c28e83SPiotr Jasiukajtis ldd [%fp+tmp3],%f62 ! (1_1) dlexp = *(double*)lexp; 1046*25c28e83SPiotr Jasiukajtis 1047*25c28e83SPiotr Jasiukajtis fmuld %f38,%f40,%f38 ! (3_1) res *= xx; 1048*25c28e83SPiotr Jasiukajtis faddd %f34,K3,%f34 ! (4_1) res += K3; 1049*25c28e83SPiotr Jasiukajtis 1050*25c28e83SPiotr Jasiukajtis fmuld %f24,%f32,%f32 ! (2_1) res = dexp_hi * res; 1051*25c28e83SPiotr Jasiukajtis 1052*25c28e83SPiotr Jasiukajtis fmuld %f22,%f62,%f2 ! (1_1) res *= dlexp; 1053*25c28e83SPiotr Jasiukajtis ldd [%i4+8],%f22 ! (2_1) dexp_lo = ((double*)addr)[1]; 1054*25c28e83SPiotr Jasiukajtis 1055*25c28e83SPiotr Jasiukajtis st %f2,[%i0] ! (1_1) ((float*)py)[0] = ((float*)res)[0]; 1056*25c28e83SPiotr Jasiukajtis 1057*25c28e83SPiotr Jasiukajtis fmuld %f34,%f60,%f28 ! (4_1) res *= xx; 1058*25c28e83SPiotr Jasiukajtis st %f3,[%i0+4] ! (1_1) ((float*)py)[1] = ((float*)res)[1]; 1059*25c28e83SPiotr Jasiukajtis 1060*25c28e83SPiotr Jasiukajtis subcc counter,1,counter 1061*25c28e83SPiotr Jasiukajtis bneg,a .begin 1062*25c28e83SPiotr Jasiukajtis mov %i1,%o4 1063*25c28e83SPiotr Jasiukajtis 1064*25c28e83SPiotr Jasiukajtis faddd %f38,K1,%f38 ! (3_1) res += K1; 1065*25c28e83SPiotr Jasiukajtis 1066*25c28e83SPiotr Jasiukajtis faddd %f32,%f22,%f8 ! (2_1) res += dexp_lo; 1067*25c28e83SPiotr Jasiukajtis 1068*25c28e83SPiotr Jasiukajtis add %l6,stridex,%l6 ! px += stridex 1069*25c28e83SPiotr Jasiukajtis 1070*25c28e83SPiotr Jasiukajtis add %i1,stridey,%i0 ! px += stridey 1071*25c28e83SPiotr Jasiukajtis ldd [%i5],%f22 ! (3_1) dexp_hi = ((double*)addr)[0]; 1072*25c28e83SPiotr Jasiukajtis faddd %f28,K2,%f36 ! (4_1) res += K2; 1073*25c28e83SPiotr Jasiukajtis 1074*25c28e83SPiotr Jasiukajtis fmuld %f38,%f40,%f38 ! (3_1) res *= xx; 1075*25c28e83SPiotr Jasiukajtis 1076*25c28e83SPiotr Jasiukajtis faddd %f8,%f24,%f24 ! (2_1) res += dexp_hi; 1077*25c28e83SPiotr Jasiukajtis 1078*25c28e83SPiotr Jasiukajtis ldd [%fp+tmp4],%f62 ! (2_1) dlexp = *(double*)lexp; 1079*25c28e83SPiotr Jasiukajtis 1080*25c28e83SPiotr Jasiukajtis fmuld %f36,%f60,%f36 ! (4_1) res *= xx; 1081*25c28e83SPiotr Jasiukajtis 1082*25c28e83SPiotr Jasiukajtis fmuld %f22,%f38,%f38 ! (3_1) res = dexp_hi * res; 1083*25c28e83SPiotr Jasiukajtis 1084*25c28e83SPiotr Jasiukajtis fmuld %f24,%f62,%f2 ! (2_1) res *= dlexp; 1085*25c28e83SPiotr Jasiukajtis ldd [%i5+8],%f24 ! (3_1) dexp_lo = ((double*)addr)[1]; 1086*25c28e83SPiotr Jasiukajtis 1087*25c28e83SPiotr Jasiukajtis st %f2,[%i1] ! (2_1) ((float*)py)[0] = ((float*)res)[0]; 1088*25c28e83SPiotr Jasiukajtis 1089*25c28e83SPiotr Jasiukajtis st %f3,[%i1+4] ! (2_1) ((float*)py)[1] = ((float*)res)[1]; 1090*25c28e83SPiotr Jasiukajtis 1091*25c28e83SPiotr Jasiukajtis subcc counter,1,counter 1092*25c28e83SPiotr Jasiukajtis bneg,a .begin 1093*25c28e83SPiotr Jasiukajtis mov %i0,%o4 1094*25c28e83SPiotr Jasiukajtis 1095*25c28e83SPiotr Jasiukajtis faddd %f36,K1,%f36 ! (4_1) res += K1; 1096*25c28e83SPiotr Jasiukajtis 1097*25c28e83SPiotr Jasiukajtis faddd %f38,%f24,%f8 ! (3_1) res += dexp_lo; 1098*25c28e83SPiotr Jasiukajtis 1099*25c28e83SPiotr Jasiukajtis add %i0,stridey,%i1 ! px += stridey 1100*25c28e83SPiotr Jasiukajtis 1101*25c28e83SPiotr Jasiukajtis add %l6,stridex,%l6 ! px += stridex 1102*25c28e83SPiotr Jasiukajtis ldd [%l1],%f30 ! (4_1) dexp_hi = ((double*)addr)[0]; 1103*25c28e83SPiotr Jasiukajtis 1104*25c28e83SPiotr Jasiukajtis fmuld %f36,%f60,%f36 ! (4_1) res *= xx; 1105*25c28e83SPiotr Jasiukajtis 1106*25c28e83SPiotr Jasiukajtis faddd %f8,%f22,%f8 ! (3_1) res += dexp_hi; 1107*25c28e83SPiotr Jasiukajtis 1108*25c28e83SPiotr Jasiukajtis ldd [%fp+tmp5],%f62 ! (3_1) dlexp = *(double*)lexp; 1109*25c28e83SPiotr Jasiukajtis 1110*25c28e83SPiotr Jasiukajtis fmuld %f30,%f36,%f36 ! (4_1) res = dexp_hi * res; 1111*25c28e83SPiotr Jasiukajtis 1112*25c28e83SPiotr Jasiukajtis fmuld %f8,%f62,%f8 ! (3_1) res *= dlexp; 1113*25c28e83SPiotr Jasiukajtis ldd [%l1+8],%f34 ! (4_1) dexp_lo = ((double*)addr)[1]; 1114*25c28e83SPiotr Jasiukajtis 1115*25c28e83SPiotr Jasiukajtis st %f8,[%i0] ! (3_1) ((float*)py)[0] = ((float*)res)[0]; 1116*25c28e83SPiotr Jasiukajtis 1117*25c28e83SPiotr Jasiukajtis st %f9,[%i0+4] ! (3_1) ((float*)py)[1] = ((float*)res)[1]; 1118*25c28e83SPiotr Jasiukajtis 1119*25c28e83SPiotr Jasiukajtis subcc counter,1,counter 1120*25c28e83SPiotr Jasiukajtis bneg,a .begin 1121*25c28e83SPiotr Jasiukajtis mov %i1,%o4 1122*25c28e83SPiotr Jasiukajtis 1123*25c28e83SPiotr Jasiukajtis faddd %f36,%f34,%f8 ! (4_1) res += dexp_lo; 1124*25c28e83SPiotr Jasiukajtis 1125*25c28e83SPiotr Jasiukajtis add %l6,stridex,%i0 ! px += stridex 1126*25c28e83SPiotr Jasiukajtis 1127*25c28e83SPiotr Jasiukajtis add %i1,stridey,%l6 ! px += stridey 1128*25c28e83SPiotr Jasiukajtis 1129*25c28e83SPiotr Jasiukajtis faddd %f8,%f30,%f30 ! (4_1) res += dexp_hi; 1130*25c28e83SPiotr Jasiukajtis 1131*25c28e83SPiotr Jasiukajtis ldd [%fp+tmp6],%f18 ! (4_1) dlexp = *(double*)lexp; 1132*25c28e83SPiotr Jasiukajtis 1133*25c28e83SPiotr Jasiukajtis fmuld %f30,%f18,%f6 ! (4_1) res *= dlexp; 1134*25c28e83SPiotr Jasiukajtis 1135*25c28e83SPiotr Jasiukajtis st %f6,[%i1] ! (4_1) ((float*)py)[0] = ((float*)res)[0]; 1136*25c28e83SPiotr Jasiukajtis 1137*25c28e83SPiotr Jasiukajtis st %f7,[%i1+4] ! (4_1) ((float*)py)[1] = ((float*)res)[1]; 1138*25c28e83SPiotr Jasiukajtis 1139*25c28e83SPiotr Jasiukajtis ba .begin 1140*25c28e83SPiotr Jasiukajtis add %i1,stridey,%o4 1141*25c28e83SPiotr Jasiukajtis 1142*25c28e83SPiotr Jasiukajtis .align 16 1143*25c28e83SPiotr Jasiukajtis.spec0: 1144*25c28e83SPiotr Jasiukajtis fdivd DONE,%f0,%f0 ! res = DONE / res; 1145*25c28e83SPiotr Jasiukajtis add %i1,stridex,%i1 ! px += stridex 1146*25c28e83SPiotr Jasiukajtis st %f0,[%o4] ! ((float*)py)[0] = ((float*)&res)[0]; 1147*25c28e83SPiotr Jasiukajtis st %f1,[%o4+4] ! ((float*)py)[1] = ((float*)&res)[1]; 1148*25c28e83SPiotr Jasiukajtis add %o4,stridey,%o4 ! py += stridey 1149*25c28e83SPiotr Jasiukajtis ba .begin1 1150*25c28e83SPiotr Jasiukajtis sub counter,1,counter 1151*25c28e83SPiotr Jasiukajtis 1152*25c28e83SPiotr Jasiukajtis .align 16 1153*25c28e83SPiotr Jasiukajtis.spec1: 1154*25c28e83SPiotr Jasiukajtis orcc %i2,%l4,%g0 1155*25c28e83SPiotr Jasiukajtis bz,a 2f 1156*25c28e83SPiotr Jasiukajtis fdivd DONE,%f0,%f0 ! res = DONE / res; 1157*25c28e83SPiotr Jasiukajtis 1158*25c28e83SPiotr Jasiukajtis cmp %g1,0 1159*25c28e83SPiotr Jasiukajtis bl,a 2f 1160*25c28e83SPiotr Jasiukajtis fsqrtd %f0,%f0 ! res = sqrt(res); 1161*25c28e83SPiotr Jasiukajtis 1162*25c28e83SPiotr Jasiukajtis cmp %g1,%i4 1163*25c28e83SPiotr Jasiukajtis bge,a 1f 1164*25c28e83SPiotr Jasiukajtis ldd [%o3+0x50],%f18 1165*25c28e83SPiotr Jasiukajtis 1166*25c28e83SPiotr Jasiukajtis fxtod %f0,%f0 ! res = *(long long*)&res; 1167*25c28e83SPiotr Jasiukajtis st %f0,[%fp+tmp0] 1168*25c28e83SPiotr Jasiukajtis 1169*25c28e83SPiotr Jasiukajtis fand %f0,DC0,%f16 ! (6_0) res = vis_fand(res,DC0); 1170*25c28e83SPiotr Jasiukajtis ld [%fp+tmp0],%g1 1171*25c28e83SPiotr Jasiukajtis 1172*25c28e83SPiotr Jasiukajtis sra %g1,21,%o7 ! (6_1) iexp = hx >> 21; 1173*25c28e83SPiotr Jasiukajtis for %f16,DC1,%f44 ! (6_1) res = vis_for(res,DC1); 1174*25c28e83SPiotr Jasiukajtis 1175*25c28e83SPiotr Jasiukajtis sra %g1,10,%o2 ! (6_1) hx >>= 10; 1176*25c28e83SPiotr Jasiukajtis sub %o7,537,%o7 1177*25c28e83SPiotr Jasiukajtis 1178*25c28e83SPiotr Jasiukajtis and %o2,2040,%o2 ! (6_1) hx &= 0x7f8; 1179*25c28e83SPiotr Jasiukajtis ba .cont_spec 1180*25c28e83SPiotr Jasiukajtis sub %g0,%o7,%o7 ! (6_1) iexp = -iexp; 1181*25c28e83SPiotr Jasiukajtis 1182*25c28e83SPiotr Jasiukajtis1: 1183*25c28e83SPiotr Jasiukajtis fand %f0,%f18,%f0 ! res = vis_fand(res,DC4); 1184*25c28e83SPiotr Jasiukajtis 1185*25c28e83SPiotr Jasiukajtis ldd [%o3+0x58],%f28 1186*25c28e83SPiotr Jasiukajtis fxtod %f0,%f0 ! res = *(long long*)&res; 1187*25c28e83SPiotr Jasiukajtis 1188*25c28e83SPiotr Jasiukajtis faddd %f0,%f28,%f0 ! res += D2ON51; 1189*25c28e83SPiotr Jasiukajtis st %f0,[%fp+tmp0] 1190*25c28e83SPiotr Jasiukajtis 1191*25c28e83SPiotr Jasiukajtis fand %f0,DC0,%f16 ! (6_0) res = vis_fand(res,DC0); 1192*25c28e83SPiotr Jasiukajtis ld [%fp+tmp0],%g1 1193*25c28e83SPiotr Jasiukajtis 1194*25c28e83SPiotr Jasiukajtis sra %g1,21,%o7 ! (6_1) iexp = hx >> 21; 1195*25c28e83SPiotr Jasiukajtis for %f16,DC1,%f44 ! (6_1) res = vis_for(res,DC1); 1196*25c28e83SPiotr Jasiukajtis 1197*25c28e83SPiotr Jasiukajtis sra %g1,10,%o2 ! (6_1) hx >>= 10; 1198*25c28e83SPiotr Jasiukajtis sub %o7,537,%o7 1199*25c28e83SPiotr Jasiukajtis 1200*25c28e83SPiotr Jasiukajtis and %o2,2040,%o2 ! (6_1) hx &= 0x7f8; 1201*25c28e83SPiotr Jasiukajtis ba .cont_spec 1202*25c28e83SPiotr Jasiukajtis sub %g0,%o7,%o7 ! (6_1) iexp = -iexp; 1203*25c28e83SPiotr Jasiukajtis 1204*25c28e83SPiotr Jasiukajtis2: 1205*25c28e83SPiotr Jasiukajtis add %i1,stridex,%i1 ! px += stridex 1206*25c28e83SPiotr Jasiukajtis st %f0,[%o4] ! ((float*)py)[0] = ((float*)&res)[0]; 1207*25c28e83SPiotr Jasiukajtis st %f1,[%o4+4] ! ((float*)py)[1] = ((float*)&res)[1]; 1208*25c28e83SPiotr Jasiukajtis add %o4,stridey,%o4 ! py += stridey 1209*25c28e83SPiotr Jasiukajtis ba .begin1 1210*25c28e83SPiotr Jasiukajtis sub counter,1,counter 1211*25c28e83SPiotr Jasiukajtis 1212*25c28e83SPiotr Jasiukajtis .align 16 1213*25c28e83SPiotr Jasiukajtis.update0: 1214*25c28e83SPiotr Jasiukajtis cmp counter,1 1215*25c28e83SPiotr Jasiukajtis ble .cont0 1216*25c28e83SPiotr Jasiukajtis nop 1217*25c28e83SPiotr Jasiukajtis 1218*25c28e83SPiotr Jasiukajtis sub %l6,stridex,tmp_px 1219*25c28e83SPiotr Jasiukajtis sub counter,1,tmp_counter 1220*25c28e83SPiotr Jasiukajtis 1221*25c28e83SPiotr Jasiukajtis ba .cont0 1222*25c28e83SPiotr Jasiukajtis mov 1,counter 1223*25c28e83SPiotr Jasiukajtis 1224*25c28e83SPiotr Jasiukajtis .align 16 1225*25c28e83SPiotr Jasiukajtis.update1: 1226*25c28e83SPiotr Jasiukajtis cmp counter,1 1227*25c28e83SPiotr Jasiukajtis ble .cont1 1228*25c28e83SPiotr Jasiukajtis sub %l6,stridex,%i1 1229*25c28e83SPiotr Jasiukajtis 1230*25c28e83SPiotr Jasiukajtis ld [%i1+4],%i2 1231*25c28e83SPiotr Jasiukajtis cmp %g1,0 1232*25c28e83SPiotr Jasiukajtis bl 1f 1233*25c28e83SPiotr Jasiukajtis 1234*25c28e83SPiotr Jasiukajtis orcc %g1,%i2,%g0 1235*25c28e83SPiotr Jasiukajtis bz 1f 1236*25c28e83SPiotr Jasiukajtis sethi %hi(0x00080000),%i3 1237*25c28e83SPiotr Jasiukajtis 1238*25c28e83SPiotr Jasiukajtis cmp %g1,%i3 1239*25c28e83SPiotr Jasiukajtis bge,a 2f 1240*25c28e83SPiotr Jasiukajtis ldd [%o3+0x50],%f18 1241*25c28e83SPiotr Jasiukajtis 1242*25c28e83SPiotr Jasiukajtis fxtod %f8,%f8 ! res = *(long long*)&res; 1243*25c28e83SPiotr Jasiukajtis st %f8,[%fp+tmp7] 1244*25c28e83SPiotr Jasiukajtis 1245*25c28e83SPiotr Jasiukajtis fand %f8,DC0,%f16 ! (0_0) res = vis_fand(res,DC0); 1246*25c28e83SPiotr Jasiukajtis ld [%fp+tmp7],%g1 1247*25c28e83SPiotr Jasiukajtis 1248*25c28e83SPiotr Jasiukajtis sra %g1,21,%o7 ! (0_0) iexp = hx >> 21; 1249*25c28e83SPiotr Jasiukajtis sra %g1,10,%o2 ! (0_0) hx >>= 10; 1250*25c28e83SPiotr Jasiukajtis for %f16,DC1,%f28 ! (0_0) res = vis_for(res,DC1); 1251*25c28e83SPiotr Jasiukajtis 1252*25c28e83SPiotr Jasiukajtis sub %o7,537,%o7 1253*25c28e83SPiotr Jasiukajtis 1254*25c28e83SPiotr Jasiukajtis sub %g0,%o7,%o7 ! (0_0) iexp = -iexp; 1255*25c28e83SPiotr Jasiukajtis 1256*25c28e83SPiotr Jasiukajtis and %o2,2040,%o2 ! (0_0) hx &= 0x7f8; 1257*25c28e83SPiotr Jasiukajtis ba .cont1 1258*25c28e83SPiotr Jasiukajtis add %o7,1534,%o7 ! (0_0) iexp += 0x5fe; 1259*25c28e83SPiotr Jasiukajtis2: 1260*25c28e83SPiotr Jasiukajtis fand %f8,%f18,%f8 1261*25c28e83SPiotr Jasiukajtis fxtod %f8,%f8 ! res = *(long long*)&res; 1262*25c28e83SPiotr Jasiukajtis ldd [%o3+0x58],%f18 1263*25c28e83SPiotr Jasiukajtis faddd %f8,%f18,%f8 1264*25c28e83SPiotr Jasiukajtis st %f8,[%fp+tmp7] 1265*25c28e83SPiotr Jasiukajtis 1266*25c28e83SPiotr Jasiukajtis fand %f8,DC0,%f16 ! (0_0) res = vis_fand(res,DC0); 1267*25c28e83SPiotr Jasiukajtis ld [%fp+tmp7],%g1 1268*25c28e83SPiotr Jasiukajtis 1269*25c28e83SPiotr Jasiukajtis sra %g1,21,%o7 ! (0_0) iexp = hx >> 21; 1270*25c28e83SPiotr Jasiukajtis sra %g1,10,%o2 ! (0_0) hx >>= 10; 1271*25c28e83SPiotr Jasiukajtis for %f16,DC1,%f28 ! (0_0) res = vis_for(res,DC1); 1272*25c28e83SPiotr Jasiukajtis 1273*25c28e83SPiotr Jasiukajtis sub %o7,537,%o7 1274*25c28e83SPiotr Jasiukajtis 1275*25c28e83SPiotr Jasiukajtis sub %g0,%o7,%o7 ! (0_0) iexp = -iexp; 1276*25c28e83SPiotr Jasiukajtis 1277*25c28e83SPiotr Jasiukajtis and %o2,2040,%o2 ! (0_0) hx &= 0x7f8; 1278*25c28e83SPiotr Jasiukajtis ba .cont1 1279*25c28e83SPiotr Jasiukajtis add %o7,1534,%o7 ! (0_0) iexp += 0x5fe; 1280*25c28e83SPiotr Jasiukajtis1: 1281*25c28e83SPiotr Jasiukajtis sub %l6,stridex,tmp_px 1282*25c28e83SPiotr Jasiukajtis sub counter,1,tmp_counter 1283*25c28e83SPiotr Jasiukajtis 1284*25c28e83SPiotr Jasiukajtis ba .cont1 1285*25c28e83SPiotr Jasiukajtis mov 1,counter 1286*25c28e83SPiotr Jasiukajtis 1287*25c28e83SPiotr Jasiukajtis .align 16 1288*25c28e83SPiotr Jasiukajtis.update2: 1289*25c28e83SPiotr Jasiukajtis cmp counter,2 1290*25c28e83SPiotr Jasiukajtis ble .cont2 1291*25c28e83SPiotr Jasiukajtis nop 1292*25c28e83SPiotr Jasiukajtis 1293*25c28e83SPiotr Jasiukajtis sub %l6,stridex,tmp_px 1294*25c28e83SPiotr Jasiukajtis sub counter,2,tmp_counter 1295*25c28e83SPiotr Jasiukajtis 1296*25c28e83SPiotr Jasiukajtis ba .cont2 1297*25c28e83SPiotr Jasiukajtis mov 2,counter 1298*25c28e83SPiotr Jasiukajtis 1299*25c28e83SPiotr Jasiukajtis .align 16 1300*25c28e83SPiotr Jasiukajtis.update3: 1301*25c28e83SPiotr Jasiukajtis cmp counter,2 1302*25c28e83SPiotr Jasiukajtis ble .cont3 1303*25c28e83SPiotr Jasiukajtis sub %l6,stridex,%i1 1304*25c28e83SPiotr Jasiukajtis 1305*25c28e83SPiotr Jasiukajtis ld [%i1+4],%i2 1306*25c28e83SPiotr Jasiukajtis cmp %g1,0 1307*25c28e83SPiotr Jasiukajtis bl 1f 1308*25c28e83SPiotr Jasiukajtis 1309*25c28e83SPiotr Jasiukajtis orcc %g1,%i2,%g0 1310*25c28e83SPiotr Jasiukajtis bz 1f 1311*25c28e83SPiotr Jasiukajtis sethi %hi(0x00080000),%i3 1312*25c28e83SPiotr Jasiukajtis 1313*25c28e83SPiotr Jasiukajtis cmp %g1,%i3 1314*25c28e83SPiotr Jasiukajtis bge,a 2f 1315*25c28e83SPiotr Jasiukajtis ldd [%o3+0x50],%f18 1316*25c28e83SPiotr Jasiukajtis 1317*25c28e83SPiotr Jasiukajtis fxtod %f0,%f0 ! res = *(long long*)&res; 1318*25c28e83SPiotr Jasiukajtis st %f0,[%fp+tmp7] 1319*25c28e83SPiotr Jasiukajtis 1320*25c28e83SPiotr Jasiukajtis fand %f0,DC0,%f16 ! (1_0) res = vis_fand(res,DC0); 1321*25c28e83SPiotr Jasiukajtis ld [%fp+tmp7],%g1 1322*25c28e83SPiotr Jasiukajtis 1323*25c28e83SPiotr Jasiukajtis sra %g1,21,%o7 ! (1_0) iexp = hx >> 21; 1324*25c28e83SPiotr Jasiukajtis for %f16,DC1,%f44 ! (1_0) res = vis_for(res,DC1); 1325*25c28e83SPiotr Jasiukajtis 1326*25c28e83SPiotr Jasiukajtis sra %g1,10,%o2 ! (1_0) hx >>= 10; 1327*25c28e83SPiotr Jasiukajtis sub %o7,537,%o7 1328*25c28e83SPiotr Jasiukajtis ba .cont3 1329*25c28e83SPiotr Jasiukajtis and %o2,2040,%o2 ! (1_0) hx &= 0x7f8; 1330*25c28e83SPiotr Jasiukajtis2: 1331*25c28e83SPiotr Jasiukajtis fand %f0,%f18,%f0 1332*25c28e83SPiotr Jasiukajtis fxtod %f0,%f0 ! res = *(long long*)&res; 1333*25c28e83SPiotr Jasiukajtis ldd [%o3+0x58],%f18 1334*25c28e83SPiotr Jasiukajtis faddd %f0,%f18,%f0 1335*25c28e83SPiotr Jasiukajtis st %f0,[%fp+tmp7] 1336*25c28e83SPiotr Jasiukajtis 1337*25c28e83SPiotr Jasiukajtis fand %f0,DC0,%f16 ! (1_0) res = vis_fand(res,DC0); 1338*25c28e83SPiotr Jasiukajtis ld [%fp+tmp7],%g1 1339*25c28e83SPiotr Jasiukajtis 1340*25c28e83SPiotr Jasiukajtis sra %g1,21,%o7 ! (1_0) iexp = hx >> 21; 1341*25c28e83SPiotr Jasiukajtis for %f16,DC1,%f44 ! (1_0) res = vis_for(res,DC1); 1342*25c28e83SPiotr Jasiukajtis 1343*25c28e83SPiotr Jasiukajtis sra %g1,10,%o2 ! (1_0) hx >>= 10; 1344*25c28e83SPiotr Jasiukajtis sub %o7,537,%o7 1345*25c28e83SPiotr Jasiukajtis ba .cont3 1346*25c28e83SPiotr Jasiukajtis and %o2,2040,%o2 ! (1_0) hx &= 0x7f8; 1347*25c28e83SPiotr Jasiukajtis1: 1348*25c28e83SPiotr Jasiukajtis sub %l6,stridex,tmp_px 1349*25c28e83SPiotr Jasiukajtis sub counter,2,tmp_counter 1350*25c28e83SPiotr Jasiukajtis 1351*25c28e83SPiotr Jasiukajtis ba .cont3 1352*25c28e83SPiotr Jasiukajtis mov 2,counter 1353*25c28e83SPiotr Jasiukajtis 1354*25c28e83SPiotr Jasiukajtis .align 16 1355*25c28e83SPiotr Jasiukajtis.update4: 1356*25c28e83SPiotr Jasiukajtis cmp counter,3 1357*25c28e83SPiotr Jasiukajtis ble .cont4 1358*25c28e83SPiotr Jasiukajtis nop 1359*25c28e83SPiotr Jasiukajtis 1360*25c28e83SPiotr Jasiukajtis sub %l6,stridex,tmp_px 1361*25c28e83SPiotr Jasiukajtis sub counter,3,tmp_counter 1362*25c28e83SPiotr Jasiukajtis 1363*25c28e83SPiotr Jasiukajtis ba .cont4 1364*25c28e83SPiotr Jasiukajtis mov 3,counter 1365*25c28e83SPiotr Jasiukajtis 1366*25c28e83SPiotr Jasiukajtis .align 16 1367*25c28e83SPiotr Jasiukajtis.update5: 1368*25c28e83SPiotr Jasiukajtis cmp counter,3 1369*25c28e83SPiotr Jasiukajtis ble .cont5 1370*25c28e83SPiotr Jasiukajtis sub %l6,stridex,%i1 1371*25c28e83SPiotr Jasiukajtis 1372*25c28e83SPiotr Jasiukajtis ld [%i1+4],%i3 1373*25c28e83SPiotr Jasiukajtis cmp %g1,0 1374*25c28e83SPiotr Jasiukajtis bl 1f 1375*25c28e83SPiotr Jasiukajtis 1376*25c28e83SPiotr Jasiukajtis orcc %g1,%i3,%g0 1377*25c28e83SPiotr Jasiukajtis bz 1f 1378*25c28e83SPiotr Jasiukajtis sethi %hi(0x00080000),%i4 1379*25c28e83SPiotr Jasiukajtis 1380*25c28e83SPiotr Jasiukajtis cmp %g1,%i4 1381*25c28e83SPiotr Jasiukajtis bge,a 2f 1382*25c28e83SPiotr Jasiukajtis ldd [%o3+0x50],%f18 1383*25c28e83SPiotr Jasiukajtis 1384*25c28e83SPiotr Jasiukajtis fxtod %f6,%f6 ! res = *(long long*)&res; 1385*25c28e83SPiotr Jasiukajtis st %f6,[%fp+tmp7] 1386*25c28e83SPiotr Jasiukajtis 1387*25c28e83SPiotr Jasiukajtis fand %f6,DC0,%f16 ! (2_0) res = vis_fand(res,DC0); 1388*25c28e83SPiotr Jasiukajtis ld [%fp+tmp7],%g1 1389*25c28e83SPiotr Jasiukajtis 1390*25c28e83SPiotr Jasiukajtis sra %g1,21,%o7 ! (2_0) iexp = hx >> 21; 1391*25c28e83SPiotr Jasiukajtis sra %g1,10,%o2 ! (2_0) hx >>= 10; 1392*25c28e83SPiotr Jasiukajtis 1393*25c28e83SPiotr Jasiukajtis sub %o7,537,%o7 1394*25c28e83SPiotr Jasiukajtis and %o2,2040,%o2 ! (2_0) hx &= 0x7f8; 1395*25c28e83SPiotr Jasiukajtis ba .cont5 1396*25c28e83SPiotr Jasiukajtis for %f16,DC1,%f28 ! (2_0) res = vis_for(res,DC1); 1397*25c28e83SPiotr Jasiukajtis2: 1398*25c28e83SPiotr Jasiukajtis fand %f6,%f18,%f6 1399*25c28e83SPiotr Jasiukajtis fxtod %f6,%f6 ! res = *(long long*)&res; 1400*25c28e83SPiotr Jasiukajtis ldd [%o3+0x58],%f18 1401*25c28e83SPiotr Jasiukajtis faddd %f6,%f18,%f6 1402*25c28e83SPiotr Jasiukajtis st %f6,[%fp+tmp7] 1403*25c28e83SPiotr Jasiukajtis 1404*25c28e83SPiotr Jasiukajtis fand %f6,DC0,%f16 ! (2_0) res = vis_fand(res,DC0); 1405*25c28e83SPiotr Jasiukajtis ld [%fp+tmp7],%g1 1406*25c28e83SPiotr Jasiukajtis 1407*25c28e83SPiotr Jasiukajtis sra %g1,21,%o7 ! (2_0) iexp = hx >> 21; 1408*25c28e83SPiotr Jasiukajtis sra %g1,10,%o2 ! (2_0) hx >>= 10; 1409*25c28e83SPiotr Jasiukajtis 1410*25c28e83SPiotr Jasiukajtis sub %o7,537,%o7 1411*25c28e83SPiotr Jasiukajtis and %o2,2040,%o2 ! (2_0) hx &= 0x7f8; 1412*25c28e83SPiotr Jasiukajtis ba .cont5 1413*25c28e83SPiotr Jasiukajtis for %f16,DC1,%f28 ! (2_0) res = vis_for(res,DC1); 1414*25c28e83SPiotr Jasiukajtis1: 1415*25c28e83SPiotr Jasiukajtis sub %l6,stridex,tmp_px 1416*25c28e83SPiotr Jasiukajtis sub counter,3,tmp_counter 1417*25c28e83SPiotr Jasiukajtis 1418*25c28e83SPiotr Jasiukajtis ba .cont5 1419*25c28e83SPiotr Jasiukajtis mov 3,counter 1420*25c28e83SPiotr Jasiukajtis 1421*25c28e83SPiotr Jasiukajtis .align 16 1422*25c28e83SPiotr Jasiukajtis.update6: 1423*25c28e83SPiotr Jasiukajtis cmp counter,4 1424*25c28e83SPiotr Jasiukajtis ble .cont6 1425*25c28e83SPiotr Jasiukajtis nop 1426*25c28e83SPiotr Jasiukajtis 1427*25c28e83SPiotr Jasiukajtis sub %l6,stridex,tmp_px 1428*25c28e83SPiotr Jasiukajtis sub counter,4,tmp_counter 1429*25c28e83SPiotr Jasiukajtis 1430*25c28e83SPiotr Jasiukajtis ba .cont6 1431*25c28e83SPiotr Jasiukajtis mov 4,counter 1432*25c28e83SPiotr Jasiukajtis 1433*25c28e83SPiotr Jasiukajtis .align 16 1434*25c28e83SPiotr Jasiukajtis.update7: 1435*25c28e83SPiotr Jasiukajtis sub %l6,stridex,%i1 1436*25c28e83SPiotr Jasiukajtis cmp counter,4 1437*25c28e83SPiotr Jasiukajtis ble .cont7 1438*25c28e83SPiotr Jasiukajtis faddd %f34,K3,%f6 ! (6_1) res += K3; 1439*25c28e83SPiotr Jasiukajtis 1440*25c28e83SPiotr Jasiukajtis ld [%i1+4],%i3 1441*25c28e83SPiotr Jasiukajtis cmp %g1,0 1442*25c28e83SPiotr Jasiukajtis bl 1f 1443*25c28e83SPiotr Jasiukajtis 1444*25c28e83SPiotr Jasiukajtis orcc %g1,%i3,%g0 1445*25c28e83SPiotr Jasiukajtis bz 1f 1446*25c28e83SPiotr Jasiukajtis sethi %hi(0x00080000),%i5 1447*25c28e83SPiotr Jasiukajtis 1448*25c28e83SPiotr Jasiukajtis cmp %g1,%i5 1449*25c28e83SPiotr Jasiukajtis bge,a 2f 1450*25c28e83SPiotr Jasiukajtis ldd [%o3+0x50],%f18 1451*25c28e83SPiotr Jasiukajtis 1452*25c28e83SPiotr Jasiukajtis fxtod %f0,%f0 ! res = *(long long*)&res; 1453*25c28e83SPiotr Jasiukajtis st %f0,[%fp+tmp7] 1454*25c28e83SPiotr Jasiukajtis 1455*25c28e83SPiotr Jasiukajtis fand %f0,DC0,%f16 ! (3_0) res = vis_fand(res,DC0); 1456*25c28e83SPiotr Jasiukajtis ld [%fp+tmp7],%g1 1457*25c28e83SPiotr Jasiukajtis 1458*25c28e83SPiotr Jasiukajtis sra %g1,21,%o7 ! (3_0) iexp = hx >> 21; 1459*25c28e83SPiotr Jasiukajtis sra %g1,10,%o2 ! (3_0) hx >>= 10; 1460*25c28e83SPiotr Jasiukajtis 1461*25c28e83SPiotr Jasiukajtis sub %o7,537,%o7 1462*25c28e83SPiotr Jasiukajtis and %o2,2040,%o2 ! (3_0) hx &= 0x7f8; 1463*25c28e83SPiotr Jasiukajtis ba .cont7 1464*25c28e83SPiotr Jasiukajtis for %f16,DC1,%f44 ! (3_0) res = vis_for(res,DC1); 1465*25c28e83SPiotr Jasiukajtis2: 1466*25c28e83SPiotr Jasiukajtis fand %f0,%f18,%f0 1467*25c28e83SPiotr Jasiukajtis fxtod %f0,%f0 ! res = *(long long*)&res; 1468*25c28e83SPiotr Jasiukajtis ldd [%o3+0x58],%f18 1469*25c28e83SPiotr Jasiukajtis faddd %f0,%f18,%f0 1470*25c28e83SPiotr Jasiukajtis st %f0,[%fp+tmp7] 1471*25c28e83SPiotr Jasiukajtis 1472*25c28e83SPiotr Jasiukajtis fand %f0,DC0,%f16 ! (3_0) res = vis_fand(res,DC0); 1473*25c28e83SPiotr Jasiukajtis ld [%fp+tmp7],%g1 1474*25c28e83SPiotr Jasiukajtis 1475*25c28e83SPiotr Jasiukajtis sra %g1,21,%o7 ! (3_0) iexp = hx >> 21; 1476*25c28e83SPiotr Jasiukajtis sra %g1,10,%o2 ! (3_0) hx >>= 10; 1477*25c28e83SPiotr Jasiukajtis 1478*25c28e83SPiotr Jasiukajtis sub %o7,537,%o7 1479*25c28e83SPiotr Jasiukajtis and %o2,2040,%o2 ! (3_0) hx &= 0x7f8; 1480*25c28e83SPiotr Jasiukajtis ba .cont7 1481*25c28e83SPiotr Jasiukajtis for %f16,DC1,%f44 ! (3_0) res = vis_for(res,DC1); 1482*25c28e83SPiotr Jasiukajtis1: 1483*25c28e83SPiotr Jasiukajtis sub %l6,stridex,tmp_px 1484*25c28e83SPiotr Jasiukajtis sub counter,4,tmp_counter 1485*25c28e83SPiotr Jasiukajtis 1486*25c28e83SPiotr Jasiukajtis ba .cont7 1487*25c28e83SPiotr Jasiukajtis mov 4,counter 1488*25c28e83SPiotr Jasiukajtis 1489*25c28e83SPiotr Jasiukajtis .align 16 1490*25c28e83SPiotr Jasiukajtis.update8: 1491*25c28e83SPiotr Jasiukajtis cmp counter,5 1492*25c28e83SPiotr Jasiukajtis ble .cont8 1493*25c28e83SPiotr Jasiukajtis nop 1494*25c28e83SPiotr Jasiukajtis 1495*25c28e83SPiotr Jasiukajtis mov %l6,tmp_px 1496*25c28e83SPiotr Jasiukajtis sub counter,5,tmp_counter 1497*25c28e83SPiotr Jasiukajtis 1498*25c28e83SPiotr Jasiukajtis ba .cont8 1499*25c28e83SPiotr Jasiukajtis mov 5,counter 1500*25c28e83SPiotr Jasiukajtis 1501*25c28e83SPiotr Jasiukajtis .align 16 1502*25c28e83SPiotr Jasiukajtis.update9: 1503*25c28e83SPiotr Jasiukajtis ld [%l6+4],%i3 1504*25c28e83SPiotr Jasiukajtis cmp counter,5 1505*25c28e83SPiotr Jasiukajtis ble .cont9 1506*25c28e83SPiotr Jasiukajtis fand %f0,DC0,%f16 ! (5_0) res = vis_fand(res,DC0); 1507*25c28e83SPiotr Jasiukajtis 1508*25c28e83SPiotr Jasiukajtis cmp %g1,0 1509*25c28e83SPiotr Jasiukajtis bl 1f 1510*25c28e83SPiotr Jasiukajtis 1511*25c28e83SPiotr Jasiukajtis orcc %g1,%i3,%g0 1512*25c28e83SPiotr Jasiukajtis bz 1f 1513*25c28e83SPiotr Jasiukajtis sethi %hi(0x00080000),%i1 1514*25c28e83SPiotr Jasiukajtis 1515*25c28e83SPiotr Jasiukajtis cmp %g1,%i1 1516*25c28e83SPiotr Jasiukajtis bge,a 2f 1517*25c28e83SPiotr Jasiukajtis ldd [%o3+0x50],%f18 1518*25c28e83SPiotr Jasiukajtis 1519*25c28e83SPiotr Jasiukajtis fxtod %f8,%f8 ! res = *(long long*)&res; 1520*25c28e83SPiotr Jasiukajtis st %f8,[%fp+tmp7] 1521*25c28e83SPiotr Jasiukajtis 1522*25c28e83SPiotr Jasiukajtis fand %f8,DC0,%f24 ! (4_0) res = vis_fand(res,DC0); 1523*25c28e83SPiotr Jasiukajtis ld [%fp+tmp7],%g1 1524*25c28e83SPiotr Jasiukajtis 1525*25c28e83SPiotr Jasiukajtis sra %g1,21,%o7 ! (4_0) iexp = hx >> 21; 1526*25c28e83SPiotr Jasiukajtis sra %g1,10,%o2 ! (4_0) hx >>= 10; 1527*25c28e83SPiotr Jasiukajtis 1528*25c28e83SPiotr Jasiukajtis sub %o7,537,%o7 1529*25c28e83SPiotr Jasiukajtis 1530*25c28e83SPiotr Jasiukajtis and %o2,2040,%o2 ! (4_0) hx &= 0x7f8; 1531*25c28e83SPiotr Jasiukajtis sub %g0,%o7,%o7 ! (4_0) iexp = -iexp; 1532*25c28e83SPiotr Jasiukajtis ba .cont9 1533*25c28e83SPiotr Jasiukajtis for %f24,DC1,%f24 ! (4_0) res = vis_for(res,DC1); 1534*25c28e83SPiotr Jasiukajtis2: 1535*25c28e83SPiotr Jasiukajtis fand %f8,%f18,%f8 1536*25c28e83SPiotr Jasiukajtis fxtod %f8,%f8 ! res = *(long long*)&res; 1537*25c28e83SPiotr Jasiukajtis ldd [%o3+0x58],%f18 1538*25c28e83SPiotr Jasiukajtis faddd %f8,%f18,%f8 1539*25c28e83SPiotr Jasiukajtis st %f8,[%fp+tmp7] 1540*25c28e83SPiotr Jasiukajtis 1541*25c28e83SPiotr Jasiukajtis fand %f8,DC0,%f24 ! (4_0) res = vis_fand(res,DC0); 1542*25c28e83SPiotr Jasiukajtis ld [%fp+tmp7],%g1 1543*25c28e83SPiotr Jasiukajtis 1544*25c28e83SPiotr Jasiukajtis sra %g1,21,%o7 ! (4_0) iexp = hx >> 21; 1545*25c28e83SPiotr Jasiukajtis sra %g1,10,%o2 ! (4_0) hx >>= 10; 1546*25c28e83SPiotr Jasiukajtis 1547*25c28e83SPiotr Jasiukajtis sub %o7,537,%o7 1548*25c28e83SPiotr Jasiukajtis 1549*25c28e83SPiotr Jasiukajtis and %o2,2040,%o2 ! (4_0) hx &= 0x7f8; 1550*25c28e83SPiotr Jasiukajtis sub %g0,%o7,%o7 ! (4_0) iexp = -iexp; 1551*25c28e83SPiotr Jasiukajtis ba .cont9 1552*25c28e83SPiotr Jasiukajtis for %f24,DC1,%f24 ! (4_0) res = vis_for(res,DC1); 1553*25c28e83SPiotr Jasiukajtis1: 1554*25c28e83SPiotr Jasiukajtis mov %l6,tmp_px 1555*25c28e83SPiotr Jasiukajtis sub counter,5,tmp_counter 1556*25c28e83SPiotr Jasiukajtis 1557*25c28e83SPiotr Jasiukajtis ba .cont9 1558*25c28e83SPiotr Jasiukajtis mov 5,counter 1559*25c28e83SPiotr Jasiukajtis 1560*25c28e83SPiotr Jasiukajtis .align 16 1561*25c28e83SPiotr Jasiukajtis.update10: 1562*25c28e83SPiotr Jasiukajtis cmp counter,6 1563*25c28e83SPiotr Jasiukajtis ble .cont10 1564*25c28e83SPiotr Jasiukajtis nop 1565*25c28e83SPiotr Jasiukajtis 1566*25c28e83SPiotr Jasiukajtis mov %i0,tmp_px 1567*25c28e83SPiotr Jasiukajtis sub counter,6,tmp_counter 1568*25c28e83SPiotr Jasiukajtis 1569*25c28e83SPiotr Jasiukajtis ba .cont10 1570*25c28e83SPiotr Jasiukajtis mov 6,counter 1571*25c28e83SPiotr Jasiukajtis 1572*25c28e83SPiotr Jasiukajtis .align 16 1573*25c28e83SPiotr Jasiukajtis.update11: 1574*25c28e83SPiotr Jasiukajtis ld [%i0+4],%i3 1575*25c28e83SPiotr Jasiukajtis cmp counter,6 1576*25c28e83SPiotr Jasiukajtis ble .cont11 1577*25c28e83SPiotr Jasiukajtis fand %f6,DC0,%f16 ! (6_0) res = vis_fand(res,DC0); 1578*25c28e83SPiotr Jasiukajtis 1579*25c28e83SPiotr Jasiukajtis cmp %g1,0 1580*25c28e83SPiotr Jasiukajtis bl 1f 1581*25c28e83SPiotr Jasiukajtis 1582*25c28e83SPiotr Jasiukajtis orcc %g1,%i3,%g0 1583*25c28e83SPiotr Jasiukajtis bz 1f 1584*25c28e83SPiotr Jasiukajtis sethi %hi(0x00080000),%i3 1585*25c28e83SPiotr Jasiukajtis 1586*25c28e83SPiotr Jasiukajtis cmp %g1,%i3 1587*25c28e83SPiotr Jasiukajtis bge,a 2f 1588*25c28e83SPiotr Jasiukajtis ldd [%o3+0x50],%f18 1589*25c28e83SPiotr Jasiukajtis 1590*25c28e83SPiotr Jasiukajtis fxtod %f0,%f0 ! res = *(long long*)&res; 1591*25c28e83SPiotr Jasiukajtis st %f0,[%fp+tmp7] 1592*25c28e83SPiotr Jasiukajtis 1593*25c28e83SPiotr Jasiukajtis fand %f0,DC0,%f28 ! (5_0) res = vis_fand(res,DC0); 1594*25c28e83SPiotr Jasiukajtis ld [%fp+tmp7],%g1 1595*25c28e83SPiotr Jasiukajtis 1596*25c28e83SPiotr Jasiukajtis sra %g1,21,%o7 ! (5_0) iexp = hx >> 21; 1597*25c28e83SPiotr Jasiukajtis sra %g1,10,%o2 ! (5_0) hx >>= 10; 1598*25c28e83SPiotr Jasiukajtis 1599*25c28e83SPiotr Jasiukajtis sub %o7,537,%o7 1600*25c28e83SPiotr Jasiukajtis 1601*25c28e83SPiotr Jasiukajtis sub %g0,%o7,%o7 ! (5_0) iexp = -iexp; 1602*25c28e83SPiotr Jasiukajtis 1603*25c28e83SPiotr Jasiukajtis and %o2,2040,%o2 ! (5_0) hx &= 0x7f8; 1604*25c28e83SPiotr Jasiukajtis ba .cont11 1605*25c28e83SPiotr Jasiukajtis for %f28,DC1,%f28 ! (5_0) res = vis_for(res,DC1); 1606*25c28e83SPiotr Jasiukajtis2: 1607*25c28e83SPiotr Jasiukajtis fand %f0,%f18,%f0 1608*25c28e83SPiotr Jasiukajtis fxtod %f0,%f0 ! res = *(long long*)&res; 1609*25c28e83SPiotr Jasiukajtis ldd [%o3+0x58],%f18 1610*25c28e83SPiotr Jasiukajtis faddd %f0,%f18,%f0 1611*25c28e83SPiotr Jasiukajtis st %f0,[%fp+tmp7] 1612*25c28e83SPiotr Jasiukajtis 1613*25c28e83SPiotr Jasiukajtis fand %f0,DC0,%f28 ! (5_0) res = vis_fand(res,DC0); 1614*25c28e83SPiotr Jasiukajtis ld [%fp+tmp7],%g1 1615*25c28e83SPiotr Jasiukajtis 1616*25c28e83SPiotr Jasiukajtis sra %g1,21,%o7 ! (5_0) iexp = hx >> 21; 1617*25c28e83SPiotr Jasiukajtis sra %g1,10,%o2 ! (5_0) hx >>= 10; 1618*25c28e83SPiotr Jasiukajtis 1619*25c28e83SPiotr Jasiukajtis sub %o7,537,%o7 1620*25c28e83SPiotr Jasiukajtis 1621*25c28e83SPiotr Jasiukajtis sub %g0,%o7,%o7 ! (5_0) iexp = -iexp; 1622*25c28e83SPiotr Jasiukajtis 1623*25c28e83SPiotr Jasiukajtis and %o2,2040,%o2 ! (5_0) hx &= 0x7f8; 1624*25c28e83SPiotr Jasiukajtis ba .cont11 1625*25c28e83SPiotr Jasiukajtis for %f28,DC1,%f28 ! (5_0) res = vis_for(res,DC1); 1626*25c28e83SPiotr Jasiukajtis1: 1627*25c28e83SPiotr Jasiukajtis mov %i0,tmp_px 1628*25c28e83SPiotr Jasiukajtis sub counter,6,tmp_counter 1629*25c28e83SPiotr Jasiukajtis 1630*25c28e83SPiotr Jasiukajtis ba .cont11 1631*25c28e83SPiotr Jasiukajtis mov 6,counter 1632*25c28e83SPiotr Jasiukajtis 1633*25c28e83SPiotr Jasiukajtis .align 16 1634*25c28e83SPiotr Jasiukajtis.update12: 1635*25c28e83SPiotr Jasiukajtis cmp counter,0 1636*25c28e83SPiotr Jasiukajtis ble .cont12 1637*25c28e83SPiotr Jasiukajtis faddd %f34,K3,%f34 ! (2_1) res += K3; 1638*25c28e83SPiotr Jasiukajtis 1639*25c28e83SPiotr Jasiukajtis sub %l6,stridex,tmp_px 1640*25c28e83SPiotr Jasiukajtis sub counter,0,tmp_counter 1641*25c28e83SPiotr Jasiukajtis 1642*25c28e83SPiotr Jasiukajtis ba .cont12 1643*25c28e83SPiotr Jasiukajtis mov 0,counter 1644*25c28e83SPiotr Jasiukajtis 1645*25c28e83SPiotr Jasiukajtis .align 16 1646*25c28e83SPiotr Jasiukajtis.update13: 1647*25c28e83SPiotr Jasiukajtis sub %l6,stridex,%l4 1648*25c28e83SPiotr Jasiukajtis cmp counter,0 1649*25c28e83SPiotr Jasiukajtis ble .cont13 1650*25c28e83SPiotr Jasiukajtis fpadd32 %f44,DC2,%f18 ! (6_1) res_c = vis_fpadd32(res,DC2); 1651*25c28e83SPiotr Jasiukajtis 1652*25c28e83SPiotr Jasiukajtis ld [%l4+4],%l4 1653*25c28e83SPiotr Jasiukajtis cmp %g1,0 1654*25c28e83SPiotr Jasiukajtis bl 1f 1655*25c28e83SPiotr Jasiukajtis 1656*25c28e83SPiotr Jasiukajtis orcc %g1,%l4,%g0 1657*25c28e83SPiotr Jasiukajtis bz 1f 1658*25c28e83SPiotr Jasiukajtis sethi %hi(0x00080000),%l4 1659*25c28e83SPiotr Jasiukajtis 1660*25c28e83SPiotr Jasiukajtis cmp %g1,%l4 1661*25c28e83SPiotr Jasiukajtis bge,a 2f 1662*25c28e83SPiotr Jasiukajtis ldd [%o3+0x50],%f62 1663*25c28e83SPiotr Jasiukajtis 1664*25c28e83SPiotr Jasiukajtis fxtod %f6,%f6 ! res = *(long long*)&res; 1665*25c28e83SPiotr Jasiukajtis st %f6,[%fp+tmp7] 1666*25c28e83SPiotr Jasiukajtis 1667*25c28e83SPiotr Jasiukajtis fand %f6,DC0,%f44 ! (6_0) res = vis_fand(res,DC0); 1668*25c28e83SPiotr Jasiukajtis ld [%fp+tmp7],%g1 1669*25c28e83SPiotr Jasiukajtis 1670*25c28e83SPiotr Jasiukajtis sra %g1,21,%o7 ! (6_1) iexp = hx >> 21; 1671*25c28e83SPiotr Jasiukajtis sra %g1,10,%o2 ! (6_1) hx >>= 10; 1672*25c28e83SPiotr Jasiukajtis 1673*25c28e83SPiotr Jasiukajtis sub %o7,537,%o7 1674*25c28e83SPiotr Jasiukajtis and %o2,2040,%o2 ! (6_1) hx &= 0x7f8; 1675*25c28e83SPiotr Jasiukajtis for %f44,DC1,%f44 ! (6_1) res = vis_for(res,DC1); 1676*25c28e83SPiotr Jasiukajtis 1677*25c28e83SPiotr Jasiukajtis sub %g0,%o7,%o7 ! (6_1) iexp = -iexp; 1678*25c28e83SPiotr Jasiukajtis ba .cont13 1679*25c28e83SPiotr Jasiukajtis fpadd32 %f44,DC2,%f18 ! (6_1) res_c = vis_fpadd32(res,DC2); 1680*25c28e83SPiotr Jasiukajtis2: 1681*25c28e83SPiotr Jasiukajtis fand %f6,%f62,%f6 1682*25c28e83SPiotr Jasiukajtis fxtod %f6,%f6 ! res = *(long long*)&res; 1683*25c28e83SPiotr Jasiukajtis ldd [%o3+0x58],%f62 1684*25c28e83SPiotr Jasiukajtis faddd %f6,%f62,%f6 1685*25c28e83SPiotr Jasiukajtis st %f6,[%fp+tmp7] 1686*25c28e83SPiotr Jasiukajtis 1687*25c28e83SPiotr Jasiukajtis fand %f6,DC0,%f44 ! (6_0) res = vis_fand(res,DC0); 1688*25c28e83SPiotr Jasiukajtis ld [%fp+tmp7],%g1 1689*25c28e83SPiotr Jasiukajtis 1690*25c28e83SPiotr Jasiukajtis sra %g1,21,%o7 ! (6_1) iexp = hx >> 21; 1691*25c28e83SPiotr Jasiukajtis sra %g1,10,%o2 ! (6_1) hx >>= 10; 1692*25c28e83SPiotr Jasiukajtis for %f44,DC1,%f44 ! (6_1) res = vis_for(res,DC1); 1693*25c28e83SPiotr Jasiukajtis 1694*25c28e83SPiotr Jasiukajtis sub %o7,537,%o7 1695*25c28e83SPiotr Jasiukajtis 1696*25c28e83SPiotr Jasiukajtis and %o2,2040,%o2 ! (6_1) hx &= 0x7f8; 1697*25c28e83SPiotr Jasiukajtis sub %g0,%o7,%o7 ! (6_1) iexp = -iexp; 1698*25c28e83SPiotr Jasiukajtis ba .cont13 1699*25c28e83SPiotr Jasiukajtis fpadd32 %f44,DC2,%f18 ! (6_1) res_c = vis_fpadd32(res,DC2); 1700*25c28e83SPiotr Jasiukajtis1: 1701*25c28e83SPiotr Jasiukajtis sub %l6,stridex,tmp_px 1702*25c28e83SPiotr Jasiukajtis sub counter,0,tmp_counter 1703*25c28e83SPiotr Jasiukajtis 1704*25c28e83SPiotr Jasiukajtis ba .cont13 1705*25c28e83SPiotr Jasiukajtis mov 0,counter 1706*25c28e83SPiotr Jasiukajtis 1707*25c28e83SPiotr Jasiukajtis .align 16 1708*25c28e83SPiotr Jasiukajtis.update14: 1709*25c28e83SPiotr Jasiukajtis cmp counter,1 1710*25c28e83SPiotr Jasiukajtis ble .cont14 1711*25c28e83SPiotr Jasiukajtis faddd %f34,K3,%f34 ! (3_1) res += K3; 1712*25c28e83SPiotr Jasiukajtis 1713*25c28e83SPiotr Jasiukajtis sub %l6,stridex,tmp_px 1714*25c28e83SPiotr Jasiukajtis sub counter,1,tmp_counter 1715*25c28e83SPiotr Jasiukajtis 1716*25c28e83SPiotr Jasiukajtis ba .cont14 1717*25c28e83SPiotr Jasiukajtis mov 1,counter 1718*25c28e83SPiotr Jasiukajtis 1719*25c28e83SPiotr Jasiukajtis .align 16 1720*25c28e83SPiotr Jasiukajtis.update15: 1721*25c28e83SPiotr Jasiukajtis sub %l6,stridex,%l2 1722*25c28e83SPiotr Jasiukajtis cmp counter,1 1723*25c28e83SPiotr Jasiukajtis ble .cont15 1724*25c28e83SPiotr Jasiukajtis fpadd32 %f28,DC2,%f18 ! (0_0) res_c = vis_fpadd32(res,DC2); 1725*25c28e83SPiotr Jasiukajtis 1726*25c28e83SPiotr Jasiukajtis ld [%l2+4],%l2 1727*25c28e83SPiotr Jasiukajtis cmp %g1,0 1728*25c28e83SPiotr Jasiukajtis bl 1f 1729*25c28e83SPiotr Jasiukajtis 1730*25c28e83SPiotr Jasiukajtis orcc %g1,%l2,%g0 1731*25c28e83SPiotr Jasiukajtis bz 1f 1732*25c28e83SPiotr Jasiukajtis sethi %hi(0x00080000),%l2 1733*25c28e83SPiotr Jasiukajtis 1734*25c28e83SPiotr Jasiukajtis cmp %g1,%l2 1735*25c28e83SPiotr Jasiukajtis bge,a 2f 1736*25c28e83SPiotr Jasiukajtis ldd [%o3+0x50],%f62 1737*25c28e83SPiotr Jasiukajtis 1738*25c28e83SPiotr Jasiukajtis fxtod %f0,%f0 ! res = *(long long*)&res; 1739*25c28e83SPiotr Jasiukajtis st %f0,[%fp+tmp7] 1740*25c28e83SPiotr Jasiukajtis 1741*25c28e83SPiotr Jasiukajtis fand %f0,DC0,%f18 ! (0_0) res = vis_fand(res,DC0); 1742*25c28e83SPiotr Jasiukajtis ld [%fp+tmp7],%g1 1743*25c28e83SPiotr Jasiukajtis 1744*25c28e83SPiotr Jasiukajtis sra %g1,21,%o7 ! (0_0) iexp = hx >> 21; 1745*25c28e83SPiotr Jasiukajtis sra %g1,10,%o2 ! (0_0) hx >>= 10; 1746*25c28e83SPiotr Jasiukajtis 1747*25c28e83SPiotr Jasiukajtis sub %o7,537,%o7 1748*25c28e83SPiotr Jasiukajtis for %f18,DC1,%f28 ! (0_0) res = vis_for(res,DC1); 1749*25c28e83SPiotr Jasiukajtis 1750*25c28e83SPiotr Jasiukajtis sub %g0,%o7,%o7 ! (0_0) iexp = -iexp; 1751*25c28e83SPiotr Jasiukajtis 1752*25c28e83SPiotr Jasiukajtis and %o2,2040,%o2 ! (0_0) hx &= 0x7f8; 1753*25c28e83SPiotr Jasiukajtis add %o7,1534,%o7 ! (0_0) iexp += 0x5fe; 1754*25c28e83SPiotr Jasiukajtis ba .cont15 1755*25c28e83SPiotr Jasiukajtis fpadd32 %f28,DC2,%f18 ! (0_0) res_c = vis_fpadd32(res,DC2); 1756*25c28e83SPiotr Jasiukajtis2: 1757*25c28e83SPiotr Jasiukajtis fand %f0,%f62,%f0 1758*25c28e83SPiotr Jasiukajtis fxtod %f0,%f0 ! res = *(long long*)&res; 1759*25c28e83SPiotr Jasiukajtis ldd [%o3+0x58],%f62 1760*25c28e83SPiotr Jasiukajtis faddd %f0,%f62,%f0 1761*25c28e83SPiotr Jasiukajtis st %f0,[%fp+tmp7] 1762*25c28e83SPiotr Jasiukajtis 1763*25c28e83SPiotr Jasiukajtis fand %f0,DC0,%f18 ! (0_0) res = vis_fand(res,DC0); 1764*25c28e83SPiotr Jasiukajtis ld [%fp+tmp7],%g1 1765*25c28e83SPiotr Jasiukajtis 1766*25c28e83SPiotr Jasiukajtis sra %g1,21,%o7 ! (0_0) iexp = hx >> 21; 1767*25c28e83SPiotr Jasiukajtis sra %g1,10,%o2 ! (0_0) hx >>= 10; 1768*25c28e83SPiotr Jasiukajtis for %f18,DC1,%f28 ! (0_0) res = vis_for(res,DC1); 1769*25c28e83SPiotr Jasiukajtis 1770*25c28e83SPiotr Jasiukajtis sub %o7,537,%o7 1771*25c28e83SPiotr Jasiukajtis 1772*25c28e83SPiotr Jasiukajtis sub %g0,%o7,%o7 ! (0_0) iexp = -iexp; 1773*25c28e83SPiotr Jasiukajtis 1774*25c28e83SPiotr Jasiukajtis and %o2,2040,%o2 ! (0_0) hx &= 0x7f8; 1775*25c28e83SPiotr Jasiukajtis add %o7,1534,%o7 ! (0_0) iexp += 0x5fe; 1776*25c28e83SPiotr Jasiukajtis ba .cont15 1777*25c28e83SPiotr Jasiukajtis fpadd32 %f28,DC2,%f18 ! (0_0) res_c = vis_fpadd32(res,DC2); 1778*25c28e83SPiotr Jasiukajtis1: 1779*25c28e83SPiotr Jasiukajtis sub %l6,stridex,tmp_px 1780*25c28e83SPiotr Jasiukajtis sub counter,1,tmp_counter 1781*25c28e83SPiotr Jasiukajtis 1782*25c28e83SPiotr Jasiukajtis ba .cont15 1783*25c28e83SPiotr Jasiukajtis mov 1,counter 1784*25c28e83SPiotr Jasiukajtis 1785*25c28e83SPiotr Jasiukajtis .align 16 1786*25c28e83SPiotr Jasiukajtis.update16: 1787*25c28e83SPiotr Jasiukajtis cmp counter,2 1788*25c28e83SPiotr Jasiukajtis ble .cont16 1789*25c28e83SPiotr Jasiukajtis fand %f18,DC3,%f8 ! (0_0) res_c = vis_fand(res_c,DC3); 1790*25c28e83SPiotr Jasiukajtis 1791*25c28e83SPiotr Jasiukajtis sub %l6,stridex,tmp_px 1792*25c28e83SPiotr Jasiukajtis sub counter,2,tmp_counter 1793*25c28e83SPiotr Jasiukajtis 1794*25c28e83SPiotr Jasiukajtis ba .cont16 1795*25c28e83SPiotr Jasiukajtis mov 2,counter 1796*25c28e83SPiotr Jasiukajtis 1797*25c28e83SPiotr Jasiukajtis .align 16 1798*25c28e83SPiotr Jasiukajtis.update17: 1799*25c28e83SPiotr Jasiukajtis sub %l6,stridex,%i2 1800*25c28e83SPiotr Jasiukajtis cmp counter,2 1801*25c28e83SPiotr Jasiukajtis ble .cont17 1802*25c28e83SPiotr Jasiukajtis fand %f0,DC0,%f16 ! (2_0) res = vis_fand(res,DC0); 1803*25c28e83SPiotr Jasiukajtis 1804*25c28e83SPiotr Jasiukajtis ld [%i2+4],%i2 1805*25c28e83SPiotr Jasiukajtis cmp %g1,0 1806*25c28e83SPiotr Jasiukajtis bl 1f 1807*25c28e83SPiotr Jasiukajtis 1808*25c28e83SPiotr Jasiukajtis orcc %g1,%i2,%g0 1809*25c28e83SPiotr Jasiukajtis bz 1f 1810*25c28e83SPiotr Jasiukajtis sethi %hi(0x00080000),%i2 1811*25c28e83SPiotr Jasiukajtis 1812*25c28e83SPiotr Jasiukajtis cmp %g1,%i2 1813*25c28e83SPiotr Jasiukajtis bge,a 2f 1814*25c28e83SPiotr Jasiukajtis ldd [%o3+0x50],%f2 1815*25c28e83SPiotr Jasiukajtis 1816*25c28e83SPiotr Jasiukajtis fxtod %f6,%f6 ! res = *(long long*)&res; 1817*25c28e83SPiotr Jasiukajtis st %f6,[%fp+tmp7] 1818*25c28e83SPiotr Jasiukajtis 1819*25c28e83SPiotr Jasiukajtis fand %f6,DC0,%f44 ! (1_0) res = vis_fand(res,DC0); 1820*25c28e83SPiotr Jasiukajtis ld [%fp+tmp7],%g1 1821*25c28e83SPiotr Jasiukajtis 1822*25c28e83SPiotr Jasiukajtis sra %g1,21,%o7 ! (1_0) iexp = hx >> 21; 1823*25c28e83SPiotr Jasiukajtis sra %g1,10,%o2 ! (1_0) hx >>= 10; 1824*25c28e83SPiotr Jasiukajtis 1825*25c28e83SPiotr Jasiukajtis sub %o7,537,%o7 1826*25c28e83SPiotr Jasiukajtis 1827*25c28e83SPiotr Jasiukajtis and %o2,2040,%o2 ! (1_0) hx &= 0x7f8; 1828*25c28e83SPiotr Jasiukajtis sub %g0,%o7,%o7 ! (1_0) iexp = -iexp; 1829*25c28e83SPiotr Jasiukajtis ba .cont17 1830*25c28e83SPiotr Jasiukajtis for %f44,DC1,%f44 ! (1_0) res = vis_for(res,DC1); 1831*25c28e83SPiotr Jasiukajtis2: 1832*25c28e83SPiotr Jasiukajtis fand %f6,%f2,%f6 1833*25c28e83SPiotr Jasiukajtis fxtod %f6,%f6 ! res = *(long long*)&res; 1834*25c28e83SPiotr Jasiukajtis ldd [%o3+0x58],%f2 1835*25c28e83SPiotr Jasiukajtis faddd %f6,%f2,%f6 1836*25c28e83SPiotr Jasiukajtis st %f6,[%fp+tmp7] 1837*25c28e83SPiotr Jasiukajtis 1838*25c28e83SPiotr Jasiukajtis fand %f6,DC0,%f44 ! (1_0) res = vis_fand(res,DC0); 1839*25c28e83SPiotr Jasiukajtis ld [%fp+tmp7],%g1 1840*25c28e83SPiotr Jasiukajtis 1841*25c28e83SPiotr Jasiukajtis sra %g1,21,%o7 ! (1_0) iexp = hx >> 21; 1842*25c28e83SPiotr Jasiukajtis sra %g1,10,%o2 ! (1_0) hx >>= 10; 1843*25c28e83SPiotr Jasiukajtis 1844*25c28e83SPiotr Jasiukajtis sub %o7,537,%o7 1845*25c28e83SPiotr Jasiukajtis 1846*25c28e83SPiotr Jasiukajtis and %o2,2040,%o2 ! (1_0) hx &= 0x7f8; 1847*25c28e83SPiotr Jasiukajtis sub %g0,%o7,%o7 ! (1_0) iexp = -iexp; 1848*25c28e83SPiotr Jasiukajtis ba .cont17 1849*25c28e83SPiotr Jasiukajtis for %f44,DC1,%f44 ! (1_0) res = vis_for(res,DC1); 1850*25c28e83SPiotr Jasiukajtis1: 1851*25c28e83SPiotr Jasiukajtis sub %l6,stridex,tmp_px 1852*25c28e83SPiotr Jasiukajtis sub counter,2,tmp_counter 1853*25c28e83SPiotr Jasiukajtis 1854*25c28e83SPiotr Jasiukajtis ba .cont17 1855*25c28e83SPiotr Jasiukajtis mov 2,counter 1856*25c28e83SPiotr Jasiukajtis 1857*25c28e83SPiotr Jasiukajtis .align 16 1858*25c28e83SPiotr Jasiukajtis.update18: 1859*25c28e83SPiotr Jasiukajtis cmp counter,3 1860*25c28e83SPiotr Jasiukajtis ble .cont18 1861*25c28e83SPiotr Jasiukajtis fand %f18,DC3,%f8 ! (1_0) res_c = vis_fand(res_c,DC3); 1862*25c28e83SPiotr Jasiukajtis 1863*25c28e83SPiotr Jasiukajtis sub %l6,stridex,tmp_px 1864*25c28e83SPiotr Jasiukajtis sub counter,3,tmp_counter 1865*25c28e83SPiotr Jasiukajtis 1866*25c28e83SPiotr Jasiukajtis ba .cont18 1867*25c28e83SPiotr Jasiukajtis mov 3,counter 1868*25c28e83SPiotr Jasiukajtis 1869*25c28e83SPiotr Jasiukajtis .align 16 1870*25c28e83SPiotr Jasiukajtis.update19: 1871*25c28e83SPiotr Jasiukajtis sub %l6,stridex,%i4 1872*25c28e83SPiotr Jasiukajtis cmp counter,3 1873*25c28e83SPiotr Jasiukajtis ble .cont19 1874*25c28e83SPiotr Jasiukajtis fand %f6,DC0,%f16 ! (3_0) res = vis_fand(res,DC0); 1875*25c28e83SPiotr Jasiukajtis 1876*25c28e83SPiotr Jasiukajtis ld [%i4+4],%i4 1877*25c28e83SPiotr Jasiukajtis cmp %g1,0 1878*25c28e83SPiotr Jasiukajtis bl 1f 1879*25c28e83SPiotr Jasiukajtis 1880*25c28e83SPiotr Jasiukajtis orcc %g1,%i4,%g0 1881*25c28e83SPiotr Jasiukajtis bz 1f 1882*25c28e83SPiotr Jasiukajtis sethi %hi(0x00080000),%i4 1883*25c28e83SPiotr Jasiukajtis 1884*25c28e83SPiotr Jasiukajtis cmp %g1,%i4 1885*25c28e83SPiotr Jasiukajtis bge,a 2f 1886*25c28e83SPiotr Jasiukajtis ldd [%o3+0x50],%f2 1887*25c28e83SPiotr Jasiukajtis 1888*25c28e83SPiotr Jasiukajtis fxtod %f0,%f0 ! res = *(long long*)&res; 1889*25c28e83SPiotr Jasiukajtis st %f0,[%fp+tmp7] 1890*25c28e83SPiotr Jasiukajtis 1891*25c28e83SPiotr Jasiukajtis fand %f0,DC0,%f28 ! (2_0) res = vis_fand(res,DC0); 1892*25c28e83SPiotr Jasiukajtis ld [%fp+tmp7],%g1 1893*25c28e83SPiotr Jasiukajtis 1894*25c28e83SPiotr Jasiukajtis sra %g1,21,%o7 ! (2_0) iexp = hx >> 21; 1895*25c28e83SPiotr Jasiukajtis 1896*25c28e83SPiotr Jasiukajtis sra %g1,10,%o2 ! (2_0) hx >>= 10; 1897*25c28e83SPiotr Jasiukajtis sub %o7,537,%o7 1898*25c28e83SPiotr Jasiukajtis 1899*25c28e83SPiotr Jasiukajtis and %o2,2040,%o2 ! (2_0) hx &= 0x7f8; 1900*25c28e83SPiotr Jasiukajtis sub %g0,%o7,%o7 ! (2_0) iexp = -iexp; 1901*25c28e83SPiotr Jasiukajtis ba .cont19 1902*25c28e83SPiotr Jasiukajtis for %f28,DC1,%f28 ! (2_0) res = vis_for(res,DC1); 1903*25c28e83SPiotr Jasiukajtis2: 1904*25c28e83SPiotr Jasiukajtis fand %f0,%f2,%f0 1905*25c28e83SPiotr Jasiukajtis fxtod %f0,%f0 ! res = *(long long*)&res; 1906*25c28e83SPiotr Jasiukajtis ldd [%o3+0x58],%f2 1907*25c28e83SPiotr Jasiukajtis faddd %f0,%f2,%f0 1908*25c28e83SPiotr Jasiukajtis st %f0,[%fp+tmp7] 1909*25c28e83SPiotr Jasiukajtis 1910*25c28e83SPiotr Jasiukajtis fand %f0,DC0,%f28 ! (2_0) res = vis_fand(res,DC0); 1911*25c28e83SPiotr Jasiukajtis ld [%fp+tmp7],%g1 1912*25c28e83SPiotr Jasiukajtis 1913*25c28e83SPiotr Jasiukajtis sra %g1,21,%o7 ! (2_0) iexp = hx >> 21; 1914*25c28e83SPiotr Jasiukajtis 1915*25c28e83SPiotr Jasiukajtis sra %g1,10,%o2 ! (2_0) hx >>= 10; 1916*25c28e83SPiotr Jasiukajtis sub %o7,537,%o7 1917*25c28e83SPiotr Jasiukajtis 1918*25c28e83SPiotr Jasiukajtis and %o2,2040,%o2 ! (2_0) hx &= 0x7f8; 1919*25c28e83SPiotr Jasiukajtis sub %g0,%o7,%o7 ! (2_0) iexp = -iexp; 1920*25c28e83SPiotr Jasiukajtis ba .cont19 1921*25c28e83SPiotr Jasiukajtis for %f28,DC1,%f28 ! (2_0) res = vis_for(res,DC1); 1922*25c28e83SPiotr Jasiukajtis1: 1923*25c28e83SPiotr Jasiukajtis sub %l6,stridex,tmp_px 1924*25c28e83SPiotr Jasiukajtis sub counter,3,tmp_counter 1925*25c28e83SPiotr Jasiukajtis 1926*25c28e83SPiotr Jasiukajtis ba .cont19 1927*25c28e83SPiotr Jasiukajtis mov 3,counter 1928*25c28e83SPiotr Jasiukajtis 1929*25c28e83SPiotr Jasiukajtis .align 16 1930*25c28e83SPiotr Jasiukajtis.update20: 1931*25c28e83SPiotr Jasiukajtis cmp counter,4 1932*25c28e83SPiotr Jasiukajtis ble .cont20 1933*25c28e83SPiotr Jasiukajtis fand %f18,DC3,%f4 ! (2_0) res_c = vis_fand(res_c,DC3); 1934*25c28e83SPiotr Jasiukajtis 1935*25c28e83SPiotr Jasiukajtis sub %l6,stridex,tmp_px 1936*25c28e83SPiotr Jasiukajtis sub counter,4,tmp_counter 1937*25c28e83SPiotr Jasiukajtis 1938*25c28e83SPiotr Jasiukajtis ba .cont20 1939*25c28e83SPiotr Jasiukajtis mov 4,counter 1940*25c28e83SPiotr Jasiukajtis 1941*25c28e83SPiotr Jasiukajtis .align 16 1942*25c28e83SPiotr Jasiukajtis.update21: 1943*25c28e83SPiotr Jasiukajtis sub %l6,stridex,%i5 1944*25c28e83SPiotr Jasiukajtis cmp counter,4 1945*25c28e83SPiotr Jasiukajtis ble .cont21 1946*25c28e83SPiotr Jasiukajtis fand %f0,DC0,%f16 ! (4_0) res = vis_fand(res,DC0); 1947*25c28e83SPiotr Jasiukajtis 1948*25c28e83SPiotr Jasiukajtis ld [%i5+4],%i5 1949*25c28e83SPiotr Jasiukajtis cmp %g1,0 1950*25c28e83SPiotr Jasiukajtis bl 1f 1951*25c28e83SPiotr Jasiukajtis 1952*25c28e83SPiotr Jasiukajtis orcc %g1,%i5,%g0 1953*25c28e83SPiotr Jasiukajtis bz 1f 1954*25c28e83SPiotr Jasiukajtis sethi %hi(0x00080000),%i5 1955*25c28e83SPiotr Jasiukajtis 1956*25c28e83SPiotr Jasiukajtis cmp %g1,%i5 1957*25c28e83SPiotr Jasiukajtis bge,a 2f 1958*25c28e83SPiotr Jasiukajtis ldd [%o3+0x50],%f34 1959*25c28e83SPiotr Jasiukajtis 1960*25c28e83SPiotr Jasiukajtis fxtod %f6,%f6 ! res = *(long long*)&res; 1961*25c28e83SPiotr Jasiukajtis st %f6,[%fp+tmp7] 1962*25c28e83SPiotr Jasiukajtis 1963*25c28e83SPiotr Jasiukajtis fand %f6,DC0,%f44 ! (3_0) res = vis_fand(res,DC0); 1964*25c28e83SPiotr Jasiukajtis ld [%fp+tmp7],%g1 1965*25c28e83SPiotr Jasiukajtis 1966*25c28e83SPiotr Jasiukajtis sra %g1,21,%o7 ! (3_0) iexp = hx >> 21; 1967*25c28e83SPiotr Jasiukajtis sra %g1,10,%o2 ! (3_0) hx >>= 10; 1968*25c28e83SPiotr Jasiukajtis 1969*25c28e83SPiotr Jasiukajtis sub %o7,537,%o7 1970*25c28e83SPiotr Jasiukajtis and %o2,2040,%o2 ! (3_0) hx &= 0x7f8; 1971*25c28e83SPiotr Jasiukajtis 1972*25c28e83SPiotr Jasiukajtis sub %g0,%o7,%o7 ! (3_0) iexp = -iexp; 1973*25c28e83SPiotr Jasiukajtis ba .cont21 1974*25c28e83SPiotr Jasiukajtis for %f44,DC1,%f44 ! (3_0) res = vis_for(res,DC1); 1975*25c28e83SPiotr Jasiukajtis2: 1976*25c28e83SPiotr Jasiukajtis fand %f6,%f34,%f6 1977*25c28e83SPiotr Jasiukajtis fxtod %f6,%f6 ! res = *(long long*)&res; 1978*25c28e83SPiotr Jasiukajtis ldd [%o3+0x58],%f34 1979*25c28e83SPiotr Jasiukajtis faddd %f6,%f34,%f6 1980*25c28e83SPiotr Jasiukajtis st %f6,[%fp+tmp7] 1981*25c28e83SPiotr Jasiukajtis 1982*25c28e83SPiotr Jasiukajtis fand %f6,DC0,%f44 ! (3_0) res = vis_fand(res,DC0); 1983*25c28e83SPiotr Jasiukajtis ld [%fp+tmp7],%g1 1984*25c28e83SPiotr Jasiukajtis 1985*25c28e83SPiotr Jasiukajtis sra %g1,21,%o7 ! (3_0) iexp = hx >> 21; 1986*25c28e83SPiotr Jasiukajtis sra %g1,10,%o2 ! (3_0) hx >>= 10; 1987*25c28e83SPiotr Jasiukajtis 1988*25c28e83SPiotr Jasiukajtis sub %o7,537,%o7 1989*25c28e83SPiotr Jasiukajtis and %o2,2040,%o2 ! (3_0) hx &= 0x7f8; 1990*25c28e83SPiotr Jasiukajtis 1991*25c28e83SPiotr Jasiukajtis sub %g0,%o7,%o7 ! (3_0) iexp = -iexp; 1992*25c28e83SPiotr Jasiukajtis ba .cont21 1993*25c28e83SPiotr Jasiukajtis for %f44,DC1,%f44 ! (3_0) res = vis_for(res,DC1); 1994*25c28e83SPiotr Jasiukajtis1: 1995*25c28e83SPiotr Jasiukajtis sub %l6,stridex,tmp_px 1996*25c28e83SPiotr Jasiukajtis sub counter,4,tmp_counter 1997*25c28e83SPiotr Jasiukajtis 1998*25c28e83SPiotr Jasiukajtis ba .cont21 1999*25c28e83SPiotr Jasiukajtis mov 4,counter 2000*25c28e83SPiotr Jasiukajtis 2001*25c28e83SPiotr Jasiukajtis .align 16 2002*25c28e83SPiotr Jasiukajtis.update22: 2003*25c28e83SPiotr Jasiukajtis cmp counter,5 2004*25c28e83SPiotr Jasiukajtis ble .cont22 2005*25c28e83SPiotr Jasiukajtis fmuld %f62,%f38,%f62 ! (1_0) res *= xx; 2006*25c28e83SPiotr Jasiukajtis 2007*25c28e83SPiotr Jasiukajtis sub %i0,stridex,tmp_px 2008*25c28e83SPiotr Jasiukajtis sub counter,5,tmp_counter 2009*25c28e83SPiotr Jasiukajtis 2010*25c28e83SPiotr Jasiukajtis ba .cont22 2011*25c28e83SPiotr Jasiukajtis mov 5,counter 2012*25c28e83SPiotr Jasiukajtis 2013*25c28e83SPiotr Jasiukajtis .align 16 2014*25c28e83SPiotr Jasiukajtis.update23: 2015*25c28e83SPiotr Jasiukajtis sub %i0,stridex,%l1 2016*25c28e83SPiotr Jasiukajtis cmp counter,5 2017*25c28e83SPiotr Jasiukajtis ble .cont23 2018*25c28e83SPiotr Jasiukajtis fand %f6,DC0,%f16 ! (5_0) res = vis_fand(res,DC0); 2019*25c28e83SPiotr Jasiukajtis 2020*25c28e83SPiotr Jasiukajtis ld [%l1+4],%l1 2021*25c28e83SPiotr Jasiukajtis cmp %g1,0 2022*25c28e83SPiotr Jasiukajtis bl 1f 2023*25c28e83SPiotr Jasiukajtis 2024*25c28e83SPiotr Jasiukajtis orcc %g1,%l1,%g0 2025*25c28e83SPiotr Jasiukajtis bz 1f 2026*25c28e83SPiotr Jasiukajtis sethi %hi(0x00080000),%l1 2027*25c28e83SPiotr Jasiukajtis 2028*25c28e83SPiotr Jasiukajtis cmp %g1,%l1 2029*25c28e83SPiotr Jasiukajtis bge,a 2f 2030*25c28e83SPiotr Jasiukajtis ldd [%o3+0x50],%f34 2031*25c28e83SPiotr Jasiukajtis 2032*25c28e83SPiotr Jasiukajtis fxtod %f0,%f0 ! res = *(long long*)&res; 2033*25c28e83SPiotr Jasiukajtis st %f0,[%fp+tmp7] 2034*25c28e83SPiotr Jasiukajtis 2035*25c28e83SPiotr Jasiukajtis fand %f0,DC0,%f24 ! (4_0) res = vis_fand(res,DC0); 2036*25c28e83SPiotr Jasiukajtis ld [%fp+tmp7],%g1 2037*25c28e83SPiotr Jasiukajtis 2038*25c28e83SPiotr Jasiukajtis sra %g1,21,%o7 ! (4_0) iexp = hx >> 21; 2039*25c28e83SPiotr Jasiukajtis 2040*25c28e83SPiotr Jasiukajtis sra %g1,10,%o2 ! (4_0) hx >>= 10; 2041*25c28e83SPiotr Jasiukajtis sub %o7,537,%o7 2042*25c28e83SPiotr Jasiukajtis 2043*25c28e83SPiotr Jasiukajtis and %o2,2040,%o2 ! (4_0) hx &= 0x7f8; 2044*25c28e83SPiotr Jasiukajtis sub %g0,%o7,%o7 ! (4_0) iexp = -iexp; 2045*25c28e83SPiotr Jasiukajtis ba .cont23 2046*25c28e83SPiotr Jasiukajtis for %f24,DC1,%f24 ! (4_0) res = vis_for(res,DC1); 2047*25c28e83SPiotr Jasiukajtis2: 2048*25c28e83SPiotr Jasiukajtis fand %f0,%f34,%f0 2049*25c28e83SPiotr Jasiukajtis fxtod %f0,%f0 ! res = *(long long*)&res; 2050*25c28e83SPiotr Jasiukajtis ldd [%o3+0x58],%f34 2051*25c28e83SPiotr Jasiukajtis faddd %f0,%f34,%f0 2052*25c28e83SPiotr Jasiukajtis st %f0,[%fp+tmp7] 2053*25c28e83SPiotr Jasiukajtis 2054*25c28e83SPiotr Jasiukajtis fand %f0,DC0,%f24 ! (4_0) res = vis_fand(res,DC0); 2055*25c28e83SPiotr Jasiukajtis ld [%fp+tmp7],%g1 2056*25c28e83SPiotr Jasiukajtis 2057*25c28e83SPiotr Jasiukajtis sra %g1,21,%o7 ! (4_0) iexp = hx >> 21; 2058*25c28e83SPiotr Jasiukajtis 2059*25c28e83SPiotr Jasiukajtis sra %g1,10,%o2 ! (4_0) hx >>= 10; 2060*25c28e83SPiotr Jasiukajtis sub %o7,537,%o7 2061*25c28e83SPiotr Jasiukajtis 2062*25c28e83SPiotr Jasiukajtis and %o2,2040,%o2 ! (4_0) hx &= 0x7f8; 2063*25c28e83SPiotr Jasiukajtis sub %g0,%o7,%o7 ! (4_0) iexp = -iexp; 2064*25c28e83SPiotr Jasiukajtis ba .cont23 2065*25c28e83SPiotr Jasiukajtis for %f24,DC1,%f24 ! (4_0) res = vis_for(res,DC1); 2066*25c28e83SPiotr Jasiukajtis1: 2067*25c28e83SPiotr Jasiukajtis sub %i0,stridex,tmp_px 2068*25c28e83SPiotr Jasiukajtis sub counter,5,tmp_counter 2069*25c28e83SPiotr Jasiukajtis 2070*25c28e83SPiotr Jasiukajtis ba .cont23 2071*25c28e83SPiotr Jasiukajtis mov 5,counter 2072*25c28e83SPiotr Jasiukajtis 2073*25c28e83SPiotr Jasiukajtis .align 16 2074*25c28e83SPiotr Jasiukajtis.update24: 2075*25c28e83SPiotr Jasiukajtis cmp counter,6 2076*25c28e83SPiotr Jasiukajtis ble .cont24 2077*25c28e83SPiotr Jasiukajtis fmuld %f62,%f36,%f62 ! (2_0) res *= xx; 2078*25c28e83SPiotr Jasiukajtis 2079*25c28e83SPiotr Jasiukajtis sub %i1,stridex,tmp_px 2080*25c28e83SPiotr Jasiukajtis sub counter,6,tmp_counter 2081*25c28e83SPiotr Jasiukajtis 2082*25c28e83SPiotr Jasiukajtis ba .cont24 2083*25c28e83SPiotr Jasiukajtis mov 6,counter 2084*25c28e83SPiotr Jasiukajtis 2085*25c28e83SPiotr Jasiukajtis .align 16 2086*25c28e83SPiotr Jasiukajtis.update25: 2087*25c28e83SPiotr Jasiukajtis sub %i1,stridex,%i3 2088*25c28e83SPiotr Jasiukajtis cmp counter,6 2089*25c28e83SPiotr Jasiukajtis ble .cont25 2090*25c28e83SPiotr Jasiukajtis fand %f6,DC0,%f16 ! (6_0) res = vis_fand(res,DC0); 2091*25c28e83SPiotr Jasiukajtis 2092*25c28e83SPiotr Jasiukajtis ld [%i3+4],%i3 2093*25c28e83SPiotr Jasiukajtis cmp %g1,0 2094*25c28e83SPiotr Jasiukajtis bl 1f 2095*25c28e83SPiotr Jasiukajtis 2096*25c28e83SPiotr Jasiukajtis orcc %g1,%i3,%g0 2097*25c28e83SPiotr Jasiukajtis bz 1f 2098*25c28e83SPiotr Jasiukajtis nop 2099*25c28e83SPiotr Jasiukajtis 2100*25c28e83SPiotr Jasiukajtis sub %i1,stridex,%i3 2101*25c28e83SPiotr Jasiukajtis ld [%i3],%f10 2102*25c28e83SPiotr Jasiukajtis ld [%i3+4],%f11 2103*25c28e83SPiotr Jasiukajtis 2104*25c28e83SPiotr Jasiukajtis sethi %hi(0x00080000),%i3 2105*25c28e83SPiotr Jasiukajtis 2106*25c28e83SPiotr Jasiukajtis cmp %g1,%i3 2107*25c28e83SPiotr Jasiukajtis bge,a 2f 2108*25c28e83SPiotr Jasiukajtis ldd [%o3+0x50],%f60 2109*25c28e83SPiotr Jasiukajtis 2110*25c28e83SPiotr Jasiukajtis fxtod %f10,%f10 ! res = *(long long*)&res; 2111*25c28e83SPiotr Jasiukajtis st %f10,[%fp+tmp7] 2112*25c28e83SPiotr Jasiukajtis 2113*25c28e83SPiotr Jasiukajtis fand %f10,DC0,%f28 ! (5_0) res = vis_fand(res,DC0); 2114*25c28e83SPiotr Jasiukajtis ld [%fp+tmp7],%g1 2115*25c28e83SPiotr Jasiukajtis 2116*25c28e83SPiotr Jasiukajtis sra %g1,21,%o7 ! (5_0) iexp = hx >> 21; 2117*25c28e83SPiotr Jasiukajtis 2118*25c28e83SPiotr Jasiukajtis sra %g1,10,%o2 ! (5_0) hx >>= 10; 2119*25c28e83SPiotr Jasiukajtis sub %o7,537,%o7 2120*25c28e83SPiotr Jasiukajtis 2121*25c28e83SPiotr Jasiukajtis and %o2,2040,%o2 ! (5_0) hx &= 0x7f8; 2122*25c28e83SPiotr Jasiukajtis sub %g0,%o7,%o7 ! (5_0) iexp = -iexp; 2123*25c28e83SPiotr Jasiukajtis 2124*25c28e83SPiotr Jasiukajtis ba .cont25 2125*25c28e83SPiotr Jasiukajtis for %f28,DC1,%f28 ! (5_0) res = vis_for(res,DC1); 2126*25c28e83SPiotr Jasiukajtis2: 2127*25c28e83SPiotr Jasiukajtis fand %f10,%f60,%f10 2128*25c28e83SPiotr Jasiukajtis fxtod %f10,%f10 ! res = *(long long*)&res; 2129*25c28e83SPiotr Jasiukajtis ldd [%o3+0x58],%f60 2130*25c28e83SPiotr Jasiukajtis faddd %f10,%f60,%f10 2131*25c28e83SPiotr Jasiukajtis st %f10,[%fp+tmp7] 2132*25c28e83SPiotr Jasiukajtis 2133*25c28e83SPiotr Jasiukajtis fand %f10,DC0,%f28 ! (5_0) res = vis_fand(res,DC0); 2134*25c28e83SPiotr Jasiukajtis ld [%fp+tmp7],%g1 2135*25c28e83SPiotr Jasiukajtis 2136*25c28e83SPiotr Jasiukajtis sra %g1,21,%o7 ! (5_0) iexp = hx >> 21; 2137*25c28e83SPiotr Jasiukajtis 2138*25c28e83SPiotr Jasiukajtis sra %g1,10,%o2 ! (5_0) hx >>= 10; 2139*25c28e83SPiotr Jasiukajtis sub %o7,537,%o7 2140*25c28e83SPiotr Jasiukajtis 2141*25c28e83SPiotr Jasiukajtis and %o2,2040,%o2 ! (5_0) hx &= 0x7f8; 2142*25c28e83SPiotr Jasiukajtis sub %g0,%o7,%o7 ! (5_0) iexp = -iexp; 2143*25c28e83SPiotr Jasiukajtis 2144*25c28e83SPiotr Jasiukajtis ba .cont25 2145*25c28e83SPiotr Jasiukajtis for %f28,DC1,%f28 ! (5_0) res = vis_for(res,DC1); 2146*25c28e83SPiotr Jasiukajtis1: 2147*25c28e83SPiotr Jasiukajtis sub %i1,stridex,tmp_px 2148*25c28e83SPiotr Jasiukajtis sub counter,6,tmp_counter 2149*25c28e83SPiotr Jasiukajtis 2150*25c28e83SPiotr Jasiukajtis ba .cont25 2151*25c28e83SPiotr Jasiukajtis mov 6,counter 2152*25c28e83SPiotr Jasiukajtis 2153*25c28e83SPiotr Jasiukajtis.exit: 2154*25c28e83SPiotr Jasiukajtis ret 2155*25c28e83SPiotr Jasiukajtis restore 2156*25c28e83SPiotr Jasiukajtis SET_SIZE(__vrsqrt) 2157*25c28e83SPiotr Jasiukajtis 2158