1*25c28e83SPiotr Jasiukajtis/* 2*25c28e83SPiotr Jasiukajtis * CDDL HEADER START 3*25c28e83SPiotr Jasiukajtis * 4*25c28e83SPiotr Jasiukajtis * The contents of this file are subject to the terms of the 5*25c28e83SPiotr Jasiukajtis * Common Development and Distribution License (the "License"). 6*25c28e83SPiotr Jasiukajtis * You may not use this file except in compliance with the License. 7*25c28e83SPiotr Jasiukajtis * 8*25c28e83SPiotr Jasiukajtis * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*25c28e83SPiotr Jasiukajtis * or http://www.opensolaris.org/os/licensing. 10*25c28e83SPiotr Jasiukajtis * See the License for the specific language governing permissions 11*25c28e83SPiotr Jasiukajtis * and limitations under the License. 12*25c28e83SPiotr Jasiukajtis * 13*25c28e83SPiotr Jasiukajtis * When distributing Covered Code, include this CDDL HEADER in each 14*25c28e83SPiotr Jasiukajtis * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*25c28e83SPiotr Jasiukajtis * If applicable, add the following below this CDDL HEADER, with the 16*25c28e83SPiotr Jasiukajtis * fields enclosed by brackets "[]" replaced with your own identifying 17*25c28e83SPiotr Jasiukajtis * information: Portions Copyright [yyyy] [name of copyright owner] 18*25c28e83SPiotr Jasiukajtis * 19*25c28e83SPiotr Jasiukajtis * CDDL HEADER END 20*25c28e83SPiotr Jasiukajtis */ 21*25c28e83SPiotr Jasiukajtis/* 22*25c28e83SPiotr Jasiukajtis * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 23*25c28e83SPiotr Jasiukajtis */ 24*25c28e83SPiotr Jasiukajtis/* 25*25c28e83SPiotr Jasiukajtis * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 26*25c28e83SPiotr Jasiukajtis * Use is subject to license terms. 27*25c28e83SPiotr Jasiukajtis */ 28*25c28e83SPiotr Jasiukajtis 29*25c28e83SPiotr Jasiukajtis .file "__vhypot.S" 30*25c28e83SPiotr Jasiukajtis 31*25c28e83SPiotr Jasiukajtis#include "libm.h" 32*25c28e83SPiotr Jasiukajtis 33*25c28e83SPiotr Jasiukajtis RO_DATA 34*25c28e83SPiotr Jasiukajtis .align 64 35*25c28e83SPiotr Jasiukajtis 36*25c28e83SPiotr Jasiukajtis.CONST_TBL: 37*25c28e83SPiotr Jasiukajtis .word 0x7ff00000, 0 ! DC0 38*25c28e83SPiotr Jasiukajtis .word 0x7fe00000, 0 ! DC1 39*25c28e83SPiotr Jasiukajtis .word 0x00100000, 0 ! DC2 40*25c28e83SPiotr Jasiukajtis .word 0x41b00000, 0 ! D2ON28 = 268435456.0 41*25c28e83SPiotr Jasiukajtis .word 0x7fd00000, 0 ! DC3 42*25c28e83SPiotr Jasiukajtis 43*25c28e83SPiotr Jasiukajtis#define counter %i0 44*25c28e83SPiotr Jasiukajtis#define tmp_counter %l3 45*25c28e83SPiotr Jasiukajtis#define tmp_px %l5 46*25c28e83SPiotr Jasiukajtis#define tmp_py %o7 47*25c28e83SPiotr Jasiukajtis#define stridex %i2 48*25c28e83SPiotr Jasiukajtis#define stridey %i4 49*25c28e83SPiotr Jasiukajtis#define stridez %l0 50*25c28e83SPiotr Jasiukajtis 51*25c28e83SPiotr Jasiukajtis#define DC0 %f8 52*25c28e83SPiotr Jasiukajtis#define DC0_HI %f8 53*25c28e83SPiotr Jasiukajtis#define DC0_LO %f9 54*25c28e83SPiotr Jasiukajtis#define DC1 %f46 55*25c28e83SPiotr Jasiukajtis#define DC2 %f48 56*25c28e83SPiotr Jasiukajtis#define DC3 %f0 57*25c28e83SPiotr Jasiukajtis#define D2ON28 %f62 58*25c28e83SPiotr Jasiukajtis 59*25c28e83SPiotr Jasiukajtis!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 60*25c28e83SPiotr Jasiukajtis! !!!!! algorithm !!!!! 61*25c28e83SPiotr Jasiukajtis! ((float*)&x)[0] = ((float*)px)[0]; 62*25c28e83SPiotr Jasiukajtis! ((float*)&x)[1] = ((float*)px)[1]; 63*25c28e83SPiotr Jasiukajtis! 64*25c28e83SPiotr Jasiukajtis! ((float*)&y)[0] = ((float*)py)[0]; 65*25c28e83SPiotr Jasiukajtis! ((float*)&y)[1] = ((float*)py)[1]; 66*25c28e83SPiotr Jasiukajtis! 67*25c28e83SPiotr Jasiukajtis! x = fabs(x); 68*25c28e83SPiotr Jasiukajtis! y = fabs(y); 69*25c28e83SPiotr Jasiukajtis! 70*25c28e83SPiotr Jasiukajtis! c0 = vis_fcmple32(DC1,x); 71*25c28e83SPiotr Jasiukajtis! c2 = vis_fcmple32(DC1,y); 72*25c28e83SPiotr Jasiukajtis! c1 = vis_fcmpgt32(DC2,x); 73*25c28e83SPiotr Jasiukajtis! c3 = vis_fcmpgt32(DC2,y); 74*25c28e83SPiotr Jasiukajtis! 75*25c28e83SPiotr Jasiukajtis! c0 |= c2; 76*25c28e83SPiotr Jasiukajtis! c1 &= c3; 77*25c28e83SPiotr Jasiukajtis! if ( (c0 & 2) != 0 ) 78*25c28e83SPiotr Jasiukajtis! { 79*25c28e83SPiotr Jasiukajtis! lx = ((int*)px)[1]; 80*25c28e83SPiotr Jasiukajtis! ly = ((int*)py)[1]; 81*25c28e83SPiotr Jasiukajtis! hx = *(int*)px; 82*25c28e83SPiotr Jasiukajtis! hy = *(int*)py; 83*25c28e83SPiotr Jasiukajtis! 84*25c28e83SPiotr Jasiukajtis! hx &= 0x7fffffff; 85*25c28e83SPiotr Jasiukajtis! hy &= 0x7fffffff; 86*25c28e83SPiotr Jasiukajtis! 87*25c28e83SPiotr Jasiukajtis! j0 = hx; 88*25c28e83SPiotr Jasiukajtis! if ( j0 < hy ) j0 = hy; 89*25c28e83SPiotr Jasiukajtis! j0 &= 0x7ff00000; 90*25c28e83SPiotr Jasiukajtis! if ( j0 >= 0x7ff00000 ) 91*25c28e83SPiotr Jasiukajtis! { 92*25c28e83SPiotr Jasiukajtis! if ( hx == 0x7ff00000 && lx == 0 ) res = x == y ? y : x; 93*25c28e83SPiotr Jasiukajtis! else if ( hy == 0x7ff00000 && ly == 0 ) res = x == y ? x : y; 94*25c28e83SPiotr Jasiukajtis! else res = x * y; 95*25c28e83SPiotr Jasiukajtis! 96*25c28e83SPiotr Jasiukajtis! ((float*)pz)[0] = ((float*)&res)[0]; 97*25c28e83SPiotr Jasiukajtis! ((float*)pz)[1] = ((float*)&res)[1]; 98*25c28e83SPiotr Jasiukajtis! } 99*25c28e83SPiotr Jasiukajtis! else 100*25c28e83SPiotr Jasiukajtis! { 101*25c28e83SPiotr Jasiukajtis! diff = hy - hx; 102*25c28e83SPiotr Jasiukajtis! j0 = diff >> 31; 103*25c28e83SPiotr Jasiukajtis! if ( ((diff ^ j0) - j0) < 0x03600000 ) 104*25c28e83SPiotr Jasiukajtis! {! 105*25c28e83SPiotr Jasiukajtis! x *= D2ONM1022; 106*25c28e83SPiotr Jasiukajtis! y *= D2ONM1022; 107*25c28e83SPiotr Jasiukajtis! 108*25c28e83SPiotr Jasiukajtis! x_hi = ( x + two28 ) - two28; 109*25c28e83SPiotr Jasiukajtis! x_lo = x - x_hi; 110*25c28e83SPiotr Jasiukajtis! y_hi = ( y + two28 ) - two28; 111*25c28e83SPiotr Jasiukajtis! y_lo = y - y_hi; 112*25c28e83SPiotr Jasiukajtis! res = (x_hi * x_hi + y_hi * y_hi); 113*25c28e83SPiotr Jasiukajtis! res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo); 114*25c28e83SPiotr Jasiukajtis! 115*25c28e83SPiotr Jasiukajtis! res = sqrt(res); 116*25c28e83SPiotr Jasiukajtis! 117*25c28e83SPiotr Jasiukajtis! res = D2ONP1022 * res; 118*25c28e83SPiotr Jasiukajtis! ((float*)pz)[0] = ((float*)&res)[0]; 119*25c28e83SPiotr Jasiukajtis! ((float*)pz)[1] = ((float*)&res)[1]; 120*25c28e83SPiotr Jasiukajtis! } 121*25c28e83SPiotr Jasiukajtis! else 122*25c28e83SPiotr Jasiukajtis! { 123*25c28e83SPiotr Jasiukajtis! res = x + y; 124*25c28e83SPiotr Jasiukajtis! ((float*)pz)[0] = ((float*)&res)[0]; 125*25c28e83SPiotr Jasiukajtis! ((float*)pz)[1] = ((float*)&res)[1]; 126*25c28e83SPiotr Jasiukajtis! } 127*25c28e83SPiotr Jasiukajtis! } 128*25c28e83SPiotr Jasiukajtis! px += stridex; 129*25c28e83SPiotr Jasiukajtis! py += stridey; 130*25c28e83SPiotr Jasiukajtis! pz += stridez; 131*25c28e83SPiotr Jasiukajtis! continue; 132*25c28e83SPiotr Jasiukajtis! } 133*25c28e83SPiotr Jasiukajtis! if ( (c1 & 2) != 0 ) 134*25c28e83SPiotr Jasiukajtis! { 135*25c28e83SPiotr Jasiukajtis! x *= D2ONP1022; 136*25c28e83SPiotr Jasiukajtis! y *= D2ONP1022; 137*25c28e83SPiotr Jasiukajtis! 138*25c28e83SPiotr Jasiukajtis! x_hi = ( x + two28 ) - two28; 139*25c28e83SPiotr Jasiukajtis! x_lo = x - x_hi; 140*25c28e83SPiotr Jasiukajtis! y_hi = ( y + two28 ) - two28; 141*25c28e83SPiotr Jasiukajtis! y_lo = y - y_hi; 142*25c28e83SPiotr Jasiukajtis! res = (x_hi * x_hi + y_hi * y_hi); 143*25c28e83SPiotr Jasiukajtis! res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo); 144*25c28e83SPiotr Jasiukajtis! 145*25c28e83SPiotr Jasiukajtis! res = sqrt(res); 146*25c28e83SPiotr Jasiukajtis! 147*25c28e83SPiotr Jasiukajtis! res = D2ONM1022 * res; 148*25c28e83SPiotr Jasiukajtis! ((float*)pz)[0] = ((float*)&res)[0]; 149*25c28e83SPiotr Jasiukajtis! ((float*)pz)[1] = ((float*)&res)[1]; 150*25c28e83SPiotr Jasiukajtis! px += stridex; 151*25c28e83SPiotr Jasiukajtis! py += stridey; 152*25c28e83SPiotr Jasiukajtis! pz += stridez; 153*25c28e83SPiotr Jasiukajtis! continue; 154*25c28e83SPiotr Jasiukajtis! } 155*25c28e83SPiotr Jasiukajtis! 156*25c28e83SPiotr Jasiukajtis! dmax = x; 157*25c28e83SPiotr Jasiukajtis! if ( dmax < y ) dmax = y; 158*25c28e83SPiotr Jasiukajtis! 159*25c28e83SPiotr Jasiukajtis! dmax = vis_fand(dmax,DC0); 160*25c28e83SPiotr Jasiukajtis! dnorm = vis_fpsub32(DC1,dmax); 161*25c28e83SPiotr Jasiukajtis! 162*25c28e83SPiotr Jasiukajtis! x *= dnorm; 163*25c28e83SPiotr Jasiukajtis! y *= dnorm; 164*25c28e83SPiotr Jasiukajtis! 165*25c28e83SPiotr Jasiukajtis! x_hi = x + D2ON28; 166*25c28e83SPiotr Jasiukajtis! x_hi -= D2ON28; 167*25c28e83SPiotr Jasiukajtis! x_lo = x - x_hi; 168*25c28e83SPiotr Jasiukajtis! 169*25c28e83SPiotr Jasiukajtis! y_hi = y + D2ON28; 170*25c28e83SPiotr Jasiukajtis! y_hi -= D2ON28; 171*25c28e83SPiotr Jasiukajtis! y_lo = y - y_hi; 172*25c28e83SPiotr Jasiukajtis! 173*25c28e83SPiotr Jasiukajtis! res = x_hi * x_hi; 174*25c28e83SPiotr Jasiukajtis! dtmp1 = x + x_hi; 175*25c28e83SPiotr Jasiukajtis! dtmp0 = y_hi * y_hi; 176*25c28e83SPiotr Jasiukajtis! dtmp2 = y + y_hi; 177*25c28e83SPiotr Jasiukajtis! 178*25c28e83SPiotr Jasiukajtis! res += dtmp0; 179*25c28e83SPiotr Jasiukajtis! dtmp1 *= x_lo; 180*25c28e83SPiotr Jasiukajtis! dtmp2 *= y_lo; 181*25c28e83SPiotr Jasiukajtis! dtmp1 += dtmp2; 182*25c28e83SPiotr Jasiukajtis! res += dtmp1; 183*25c28e83SPiotr Jasiukajtis! 184*25c28e83SPiotr Jasiukajtis! res = sqrt(res); 185*25c28e83SPiotr Jasiukajtis! 186*25c28e83SPiotr Jasiukajtis! res = dmax * res; 187*25c28e83SPiotr Jasiukajtis! ((float*)pz)[0] = ((float*)&res)[0]; 188*25c28e83SPiotr Jasiukajtis! ((float*)pz)[1] = ((float*)&res)[1]; 189*25c28e83SPiotr Jasiukajtis! 190*25c28e83SPiotr Jasiukajtis! px += stridex; 191*25c28e83SPiotr Jasiukajtis! py += stridey; 192*25c28e83SPiotr Jasiukajtis! pz += stridez; 193*25c28e83SPiotr Jasiukajtis!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 194*25c28e83SPiotr Jasiukajtis 195*25c28e83SPiotr Jasiukajtis ENTRY(__vhypot) 196*25c28e83SPiotr Jasiukajtis save %sp,-SA(MINFRAME),%sp 197*25c28e83SPiotr Jasiukajtis PIC_SETUP(l7) 198*25c28e83SPiotr Jasiukajtis PIC_SET(l7,.CONST_TBL,o3) 199*25c28e83SPiotr Jasiukajtis wr %g0,0x82,%asi 200*25c28e83SPiotr Jasiukajtis 201*25c28e83SPiotr Jasiukajtis#ifdef __sparcv9 202*25c28e83SPiotr Jasiukajtis ldx [%fp+STACK_BIAS+176],%l0 203*25c28e83SPiotr Jasiukajtis#else 204*25c28e83SPiotr Jasiukajtis ld [%fp+STACK_BIAS+92],%l0 205*25c28e83SPiotr Jasiukajtis#endif 206*25c28e83SPiotr Jasiukajtis ldd [%o3],DC0 207*25c28e83SPiotr Jasiukajtis sll %i2,3,stridex 208*25c28e83SPiotr Jasiukajtis mov %i0,tmp_counter 209*25c28e83SPiotr Jasiukajtis 210*25c28e83SPiotr Jasiukajtis ldd [%o3+8],DC1 211*25c28e83SPiotr Jasiukajtis sll %i4,3,stridey 212*25c28e83SPiotr Jasiukajtis mov %i1,tmp_px 213*25c28e83SPiotr Jasiukajtis 214*25c28e83SPiotr Jasiukajtis ldd [%o3+16],DC2 215*25c28e83SPiotr Jasiukajtis sll %l0,3,stridez 216*25c28e83SPiotr Jasiukajtis mov %i3,tmp_py 217*25c28e83SPiotr Jasiukajtis 218*25c28e83SPiotr Jasiukajtis ldd [%o3+24],D2ON28 219*25c28e83SPiotr Jasiukajtis 220*25c28e83SPiotr Jasiukajtis ldd [%o3+32],DC3 221*25c28e83SPiotr Jasiukajtis 222*25c28e83SPiotr Jasiukajtis.begin: 223*25c28e83SPiotr Jasiukajtis mov tmp_counter,counter 224*25c28e83SPiotr Jasiukajtis mov tmp_px,%i1 225*25c28e83SPiotr Jasiukajtis mov tmp_py,%i3 226*25c28e83SPiotr Jasiukajtis clr tmp_counter 227*25c28e83SPiotr Jasiukajtis.begin1: 228*25c28e83SPiotr Jasiukajtis cmp counter,0 229*25c28e83SPiotr Jasiukajtis ble,pn %icc,.exit 230*25c28e83SPiotr Jasiukajtis nop 231*25c28e83SPiotr Jasiukajtis 232*25c28e83SPiotr Jasiukajtis lda [%i1]%asi,%o0 233*25c28e83SPiotr Jasiukajtis sethi %hi(0x7ffffc00),%o5 234*25c28e83SPiotr Jasiukajtis 235*25c28e83SPiotr Jasiukajtis lda [%i3]%asi,%o2 236*25c28e83SPiotr Jasiukajtis add %o5,1023,%o5 237*25c28e83SPiotr Jasiukajtis 238*25c28e83SPiotr Jasiukajtis lda [%i1]%asi,%f26 ! (1_0) ((float*)&x)[0] = ((float*)px)[0]; 239*25c28e83SPiotr Jasiukajtis 240*25c28e83SPiotr Jasiukajtis lda [%i1+4]%asi,%f27 ! (1_0) ((float*)&x)[1] = ((float*)px)[1]; 241*25c28e83SPiotr Jasiukajtis add %i1,stridex,%o1 ! px += stridex 242*25c28e83SPiotr Jasiukajtis 243*25c28e83SPiotr Jasiukajtis lda [%i3]%asi,%f24 ! (1_0) ((float*)&y)[0] = ((float*)py)[0]; 244*25c28e83SPiotr Jasiukajtis sethi %hi(0x00100000),%l7 245*25c28e83SPiotr Jasiukajtis and %o0,%o5,%o0 246*25c28e83SPiotr Jasiukajtis 247*25c28e83SPiotr Jasiukajtis lda [%i3+4]%asi,%f25 ! (1_0) ((float*)&y)[1] = ((float*)py)[1]; 248*25c28e83SPiotr Jasiukajtis and %o2,%o5,%o2 249*25c28e83SPiotr Jasiukajtis sethi %hi(0x7fe00000),%l6 250*25c28e83SPiotr Jasiukajtis 251*25c28e83SPiotr Jasiukajtis fabsd %f26,%f36 ! (1_0) x = fabs(x); 252*25c28e83SPiotr Jasiukajtis cmp %o0,%o2 253*25c28e83SPiotr Jasiukajtis mov %o2,%l4 254*25c28e83SPiotr Jasiukajtis 255*25c28e83SPiotr Jasiukajtis fabsd %f24,%f54 ! (1_0) y = fabs(y); 256*25c28e83SPiotr Jasiukajtis add %i3,stridey,%o5 ! py += stridey 257*25c28e83SPiotr Jasiukajtis movg %icc,%o0,%o2 258*25c28e83SPiotr Jasiukajtis lda [%o5]%asi,%f28 ! (2_0) ((float*)&y)[0] = ((float*)py)[0]; 259*25c28e83SPiotr Jasiukajtis 260*25c28e83SPiotr Jasiukajtis cmp %o2,%l6 261*25c28e83SPiotr Jasiukajtis sethi %hi(0x7ff00000),%o4 262*25c28e83SPiotr Jasiukajtis bge,pn %icc,.spec0 263*25c28e83SPiotr Jasiukajtis lda [%o5+4]%asi,%f29 ! (2_0) ((float*)&y)[1] = ((float*)py)[1]; 264*25c28e83SPiotr Jasiukajtis 265*25c28e83SPiotr Jasiukajtis cmp %o2,%l7 266*25c28e83SPiotr Jasiukajtis bl,pn %icc,.spec1 267*25c28e83SPiotr Jasiukajtis nop 268*25c28e83SPiotr Jasiukajtis lda [%o1]%asi,%f26 ! (2_0) ((float*)&x)[0] = ((float*)px)[0]; 269*25c28e83SPiotr Jasiukajtis 270*25c28e83SPiotr Jasiukajtis lda [%o1+4]%asi,%f27 ! (2_0) ((float*)&x)[1] = ((float*)px)[1]; 271*25c28e83SPiotr Jasiukajtis add %i3,stridey,%i3 ! py += stridey 272*25c28e83SPiotr Jasiukajtis 273*25c28e83SPiotr Jasiukajtis fabsd %f28,%f34 ! (2_0) y = fabs(y); 274*25c28e83SPiotr Jasiukajtis 275*25c28e83SPiotr Jasiukajtis fabsd %f26,%f50 ! (2_0) x = fabs(x); 276*25c28e83SPiotr Jasiukajtis 277*25c28e83SPiotr Jasiukajtis fcmple32 DC1,%f50,%o3 ! (2_0) c0 = vis_fcmple32(DC1,x); 278*25c28e83SPiotr Jasiukajtis 279*25c28e83SPiotr Jasiukajtis fcmple32 DC1,%f34,%o0 ! (2_0) c2 = vis_fcmple32(DC1,y); 280*25c28e83SPiotr Jasiukajtis 281*25c28e83SPiotr Jasiukajtis fcmpgt32 DC2,%f50,%o4 ! (2_0) c1 = vis_fcmpgt32(DC2,x); 282*25c28e83SPiotr Jasiukajtis 283*25c28e83SPiotr Jasiukajtis fcmpgt32 DC2,%f34,%o5 ! (2_0) c3 = vis_fcmpgt32(DC2,y); 284*25c28e83SPiotr Jasiukajtis 285*25c28e83SPiotr Jasiukajtis or %o3,%o0,%o3 ! (2_0) c0 |= c2; 286*25c28e83SPiotr Jasiukajtis 287*25c28e83SPiotr Jasiukajtis andcc %o3,2,%g0 ! (2_0) c0 & 2 288*25c28e83SPiotr Jasiukajtis bnz,pn %icc,.update0 ! (2_0) if ( (c0 & 2) != 0 ) 289*25c28e83SPiotr Jasiukajtis and %o4,%o5,%o4 ! (2_0) c1 &= c3; 290*25c28e83SPiotr Jasiukajtis.cont0: 291*25c28e83SPiotr Jasiukajtis add %i3,stridey,%l4 ! py += stridey 292*25c28e83SPiotr Jasiukajtis andcc %o4,2,%g0 ! (2_0) c1 & 2 293*25c28e83SPiotr Jasiukajtis bnz,pn %icc,.update1 ! (2_0) if ( (c1 & 2) != 0 ) 294*25c28e83SPiotr Jasiukajtis fmovd %f36,%f56 ! (1_0) dmax = x; 295*25c28e83SPiotr Jasiukajtis.cont1: 296*25c28e83SPiotr Jasiukajtis lda [%l4]%asi,%f30 ! (3_0) ((float*)&y)[0] = ((float*)py)[0]; 297*25c28e83SPiotr Jasiukajtis add %o1,stridex,%l2 ! px += stridex 298*25c28e83SPiotr Jasiukajtis 299*25c28e83SPiotr Jasiukajtis lda [%l4+4]%asi,%f31 ! (3_0) ((float*)&y)[1] = ((float*)py)[1]; 300*25c28e83SPiotr Jasiukajtis 301*25c28e83SPiotr Jasiukajtis lda [%l2]%asi,%f18 ! (3_1) ((float*)&x)[0] = ((float*)px)[0]; 302*25c28e83SPiotr Jasiukajtis 303*25c28e83SPiotr Jasiukajtis lda [%l2+4]%asi,%f19 ! (3_1) ((float*)&x)[1] = ((float*)px)[1]; 304*25c28e83SPiotr Jasiukajtis 305*25c28e83SPiotr Jasiukajtis fabsd %f30,%f30 ! (3_1) y = fabs(y); 306*25c28e83SPiotr Jasiukajtis 307*25c28e83SPiotr Jasiukajtis fabsd %f18,%f18 ! (3_1) x = fabs(x); 308*25c28e83SPiotr Jasiukajtis 309*25c28e83SPiotr Jasiukajtis fcmped %fcc2,%f54,%f56 ! (1_1) dmax ? y 310*25c28e83SPiotr Jasiukajtis 311*25c28e83SPiotr Jasiukajtis fmovdg %fcc2,%f54,%f56 ! (1_1) if ( dmax < y ) dmax = y; 312*25c28e83SPiotr Jasiukajtis 313*25c28e83SPiotr Jasiukajtis fcmple32 DC1,%f18,%o3 ! (3_1) c0 = vis_fcmple32(DC1,x); 314*25c28e83SPiotr Jasiukajtis 315*25c28e83SPiotr Jasiukajtis fcmple32 DC1,%f30,%o0 ! (3_1) c2 = vis_fcmple32(DC1,y); 316*25c28e83SPiotr Jasiukajtis 317*25c28e83SPiotr Jasiukajtis fcmpgt32 DC2,%f18,%o4 ! (3_1) c1 = vis_fcmpgt32(DC2,x); 318*25c28e83SPiotr Jasiukajtis 319*25c28e83SPiotr Jasiukajtis fcmpgt32 DC2,%f30,%o1 ! (3_1) c3 = vis_fcmpgt32(DC2,y); 320*25c28e83SPiotr Jasiukajtis 321*25c28e83SPiotr Jasiukajtis fand %f56,DC0,%f38 ! (1_1) dmax = vis_fand(dmax,DC0); 322*25c28e83SPiotr Jasiukajtis 323*25c28e83SPiotr Jasiukajtis or %o3,%o0,%o3 ! (3_1) c0 |= c2; 324*25c28e83SPiotr Jasiukajtis 325*25c28e83SPiotr Jasiukajtis andcc %o3,2,%g0 ! (3_1) c0 & 2 326*25c28e83SPiotr Jasiukajtis bnz,pn %icc,.update2 ! (3_1) if ( (c0 & 2) != 0 ) 327*25c28e83SPiotr Jasiukajtis and %o4,%o1,%o4 ! (3_1) c1 &= c3; 328*25c28e83SPiotr Jasiukajtis.cont2: 329*25c28e83SPiotr Jasiukajtis add %l4,stridey,%i3 ! py += stridey 330*25c28e83SPiotr Jasiukajtis andcc %o4,2,%g0 ! (3_1) c1 & 2 331*25c28e83SPiotr Jasiukajtis bnz,pn %icc,.update3 ! (3_1) if ( (c1 & 2) != 0 ) 332*25c28e83SPiotr Jasiukajtis fmovd %f50,%f32 ! (2_1) dmax = x; 333*25c28e83SPiotr Jasiukajtis.cont3: 334*25c28e83SPiotr Jasiukajtis fpsub32 DC1,%f38,%f10 ! (1_1) dnorm = vis_fpsub32(DC1,dmax); 335*25c28e83SPiotr Jasiukajtis lda [%i3]%asi,%f20 ! (0_0) ((float*)&y)[0] = ((float*)py)[0]; 336*25c28e83SPiotr Jasiukajtis 337*25c28e83SPiotr Jasiukajtis lda [%i3+4]%asi,%f21 ! (0_0) ((float*)&y)[1] = ((float*)py)[1]; 338*25c28e83SPiotr Jasiukajtis 339*25c28e83SPiotr Jasiukajtis add %l2,stridex,%l1 ! px += stridex 340*25c28e83SPiotr Jasiukajtis 341*25c28e83SPiotr Jasiukajtis fmuld %f36,%f10,%f36 ! (1_1) x *= dnorm; 342*25c28e83SPiotr Jasiukajtis lda [%l1]%asi,%f22 ! (0_0) ((float*)&x)[0] = ((float*)px)[0] 343*25c28e83SPiotr Jasiukajtis 344*25c28e83SPiotr Jasiukajtis lda [%l1+4]%asi,%f23 ! (0_0) ((float*)&x)[1] = ((float*)px)[1]; 345*25c28e83SPiotr Jasiukajtis 346*25c28e83SPiotr Jasiukajtis fmuld %f54,%f10,%f56 ! (1_1) y *= dnorm; 347*25c28e83SPiotr Jasiukajtis fabsd %f20,%f40 ! (0_0) y = fabs(y); 348*25c28e83SPiotr Jasiukajtis 349*25c28e83SPiotr Jasiukajtis fabsd %f22,%f20 ! (0_0) x = fabs(x); 350*25c28e83SPiotr Jasiukajtis 351*25c28e83SPiotr Jasiukajtis fcmped %fcc3,%f34,%f32 ! (2_1) dmax ? y 352*25c28e83SPiotr Jasiukajtis 353*25c28e83SPiotr Jasiukajtis 354*25c28e83SPiotr Jasiukajtis fmovdg %fcc3,%f34,%f32 ! (2_1) if ( dmax < y ) dmax = y; 355*25c28e83SPiotr Jasiukajtis 356*25c28e83SPiotr Jasiukajtis faddd %f36,D2ON28,%f58 ! (1_1) x_hi = x + D2ON28; 357*25c28e83SPiotr Jasiukajtis fcmple32 DC1,%f20,%g5 ! (0_0) c0 = vis_fcmple32(DC1,x); 358*25c28e83SPiotr Jasiukajtis 359*25c28e83SPiotr Jasiukajtis faddd %f56,D2ON28,%f22 ! (1_1) y_hi = y + D2ON28; 360*25c28e83SPiotr Jasiukajtis fcmple32 DC1,%f40,%o2 ! (0_0) c2 = vis_fcmple32(DC1,y); 361*25c28e83SPiotr Jasiukajtis 362*25c28e83SPiotr Jasiukajtis fcmpgt32 DC2,%f20,%g1 ! (0_0) c1 = vis_fcmpgt32(DC2,x); 363*25c28e83SPiotr Jasiukajtis 364*25c28e83SPiotr Jasiukajtis fcmpgt32 DC2,%f40,%o4 ! (0_0) c3 = vis_fcmpgt32(DC2,y); 365*25c28e83SPiotr Jasiukajtis 366*25c28e83SPiotr Jasiukajtis fand %f32,DC0,%f52 ! (2_1) dmax = vis_fand(dmax,DC0); 367*25c28e83SPiotr Jasiukajtis 368*25c28e83SPiotr Jasiukajtis or %g5,%o2,%g5 ! (0_0) c0 |= c2; 369*25c28e83SPiotr Jasiukajtis fsubd %f58,D2ON28,%f58 ! (1_1) x_hi -= D2ON28; 370*25c28e83SPiotr Jasiukajtis 371*25c28e83SPiotr Jasiukajtis andcc %g5,2,%g0 ! (0_0) c0 & 2 372*25c28e83SPiotr Jasiukajtis bnz,pn %icc,.update4 ! (0_0) if ( (c0 & 2) != 0 ) 373*25c28e83SPiotr Jasiukajtis fsubd %f22,D2ON28,%f22 ! (1_1) y_hi -= D2ON28; 374*25c28e83SPiotr Jasiukajtis.cont4: 375*25c28e83SPiotr Jasiukajtis and %g1,%o4,%g1 ! (0_0) c1 &= c3; 376*25c28e83SPiotr Jasiukajtis 377*25c28e83SPiotr Jasiukajtis add %i3,stridey,%l2 ! py += stridey 378*25c28e83SPiotr Jasiukajtis andcc %g1,2,%g0 ! (0_0) c1 & 2 379*25c28e83SPiotr Jasiukajtis bnz,pn %icc,.update5 ! (0_0) if ( (c1 & 2) != 0 ) 380*25c28e83SPiotr Jasiukajtis fmovd %f18,%f44 ! (3_1) dmax = x; 381*25c28e83SPiotr Jasiukajtis.cont5: 382*25c28e83SPiotr Jasiukajtis fpsub32 DC1,%f52,%f10 ! (2_1) dnorm = vis_fpsub32(DC1,dmax); 383*25c28e83SPiotr Jasiukajtis lda [%l2]%asi,%f24 ! (1_0) ((float*)&y)[0] = ((float*)py)[0]; 384*25c28e83SPiotr Jasiukajtis 385*25c28e83SPiotr Jasiukajtis fmuld %f58,%f58,%f60 ! (1_1) res = x_hi * x_hi; 386*25c28e83SPiotr Jasiukajtis lda [%l2+4]%asi,%f25 ! (1_0) ((float*)&y)[1] = ((float*)py)[1]; 387*25c28e83SPiotr Jasiukajtis add %l1,stridex,%l7 ! px += stridex 388*25c28e83SPiotr Jasiukajtis faddd %f56,%f22,%f28 ! (1_1) dtmp2 = y + y_hi; 389*25c28e83SPiotr Jasiukajtis 390*25c28e83SPiotr Jasiukajtis faddd %f36,%f58,%f6 ! (1_1) dtmp1 = x + x_hi; 391*25c28e83SPiotr Jasiukajtis lda [%l7]%asi,%f26 ! (1_0) ((float*)&x)[0] = ((float*)px)[0]; 392*25c28e83SPiotr Jasiukajtis 393*25c28e83SPiotr Jasiukajtis fmuld %f50,%f10,%f50 ! (2_1) x *= dnorm; 394*25c28e83SPiotr Jasiukajtis fsubd %f36,%f58,%f58 ! (1_1) x_lo = x - x_hi; 395*25c28e83SPiotr Jasiukajtis lda [%l7+4]%asi,%f27 ! (1_0) ((float*)&x)[1] = ((float*)px)[1]; 396*25c28e83SPiotr Jasiukajtis 397*25c28e83SPiotr Jasiukajtis fmuld %f22,%f22,%f2 ! (1_1) dtmp0 = y_hi * y_hi; 398*25c28e83SPiotr Jasiukajtis fsubd %f56,%f22,%f56 ! (1_1) y_lo = y - y_hi; 399*25c28e83SPiotr Jasiukajtis 400*25c28e83SPiotr Jasiukajtis fmuld %f34,%f10,%f34 ! (2_1) y *= dnorm; 401*25c28e83SPiotr Jasiukajtis fabsd %f24,%f54 ! (1_0) y = fabs(y); 402*25c28e83SPiotr Jasiukajtis 403*25c28e83SPiotr Jasiukajtis fabsd %f26,%f36 ! (1_0) x = fabs(x); 404*25c28e83SPiotr Jasiukajtis 405*25c28e83SPiotr Jasiukajtis fmuld %f6,%f58,%f10 ! (1_1) dtmp1 *= x_lo; 406*25c28e83SPiotr Jasiukajtis fcmped %fcc0,%f30,%f44 ! (3_1) dmax ? y 407*25c28e83SPiotr Jasiukajtis 408*25c28e83SPiotr Jasiukajtis fmuld %f28,%f56,%f26 ! (1_1) dtmp2 *= y_lo; 409*25c28e83SPiotr Jasiukajtis 410*25c28e83SPiotr Jasiukajtis fmovdg %fcc0,%f30,%f44 ! (3_1) if ( dmax < y ) dmax = y; 411*25c28e83SPiotr Jasiukajtis 412*25c28e83SPiotr Jasiukajtis faddd %f50,D2ON28,%f58 ! (2_1) x_hi = x + D2ON28; 413*25c28e83SPiotr Jasiukajtis fcmple32 DC1,%f36,%g1 ! (1_0) c0 = vis_fcmple32(DC1,x); 414*25c28e83SPiotr Jasiukajtis 415*25c28e83SPiotr Jasiukajtis faddd %f34,D2ON28,%f22 ! (2_1) y_hi = y + D2ON28; 416*25c28e83SPiotr Jasiukajtis fcmple32 DC1,%f54,%g5 ! (1_0) c2 = vis_fcmple32(DC1,y); 417*25c28e83SPiotr Jasiukajtis 418*25c28e83SPiotr Jasiukajtis faddd %f60,%f2,%f24 ! (1_1) res += dtmp0; 419*25c28e83SPiotr Jasiukajtis fcmpgt32 DC2,%f36,%o5 ! (1_0) c1 = vis_fcmpgt32(DC2,x); 420*25c28e83SPiotr Jasiukajtis 421*25c28e83SPiotr Jasiukajtis faddd %f10,%f26,%f28 ! (1_1) dtmp1 += dtmp2; 422*25c28e83SPiotr Jasiukajtis fcmpgt32 DC2,%f54,%o1 ! (1_0) c3 = vis_fcmpgt32(DC2,y); 423*25c28e83SPiotr Jasiukajtis 424*25c28e83SPiotr Jasiukajtis fand %f44,DC0,%f14 ! (3_1) dmax = vis_fand(dmax,DC0); 425*25c28e83SPiotr Jasiukajtis 426*25c28e83SPiotr Jasiukajtis or %g1,%g5,%g1 ! (1_0) c0 |= c2; 427*25c28e83SPiotr Jasiukajtis fsubd %f58,D2ON28,%f44 ! (2_1) x_hi -= D2ON28; 428*25c28e83SPiotr Jasiukajtis 429*25c28e83SPiotr Jasiukajtis andcc %g1,2,%g0 ! (1_0) c0 & 2 430*25c28e83SPiotr Jasiukajtis bnz,pn %icc,.update6 ! (1_0) if ( (c0 & 2) != 0 ) 431*25c28e83SPiotr Jasiukajtis fsubd %f22,D2ON28,%f58 ! (2_1) y_hi -= D2ON28; 432*25c28e83SPiotr Jasiukajtis.cont6: 433*25c28e83SPiotr Jasiukajtis and %o5,%o1,%o5 ! (1_0) c1 &= c3; 434*25c28e83SPiotr Jasiukajtis faddd %f24,%f28,%f26 ! (1_1) res += dtmp1; 435*25c28e83SPiotr Jasiukajtis 436*25c28e83SPiotr Jasiukajtis add %l2,stridey,%i3 ! py += stridey 437*25c28e83SPiotr Jasiukajtis andcc %o5,2,%g0 ! (1_0) c1 & 2 438*25c28e83SPiotr Jasiukajtis bnz,pn %icc,.update7 ! (1_0) if ( (c1 & 2) != 0 ) 439*25c28e83SPiotr Jasiukajtis fmovd %f20,%f4 ! (0_0) dmax = x; 440*25c28e83SPiotr Jasiukajtis.cont7: 441*25c28e83SPiotr Jasiukajtis fpsub32 DC1,%f14,%f10 ! (3_1) dnorm = vis_fpsub32(DC1,dmax); 442*25c28e83SPiotr Jasiukajtis lda [%i3]%asi,%f28 ! (2_0) ((float*)&y)[0] = ((float*)py)[0]; 443*25c28e83SPiotr Jasiukajtis 444*25c28e83SPiotr Jasiukajtis fmuld %f44,%f44,%f2 ! (2_1) res = x_hi * x_hi; 445*25c28e83SPiotr Jasiukajtis lda [%i3+4]%asi,%f29 ! (2_0) ((float*)&y)[1] = ((float*)py)[1]; 446*25c28e83SPiotr Jasiukajtis add %l7,stridex,%o1 ! px += stridex 447*25c28e83SPiotr Jasiukajtis faddd %f34,%f58,%f60 ! (2_1) dtmp2 = y + y_hi; 448*25c28e83SPiotr Jasiukajtis 449*25c28e83SPiotr Jasiukajtis fsqrtd %f26,%f24 ! (1_1) res = sqrt(res); 450*25c28e83SPiotr Jasiukajtis lda [%o1]%asi,%f26 ! (2_0) ((float*)&x)[0] = ((float*)px)[0]; 451*25c28e83SPiotr Jasiukajtis faddd %f50,%f44,%f56 ! (2_1) dtmp1 = x + x_hi; 452*25c28e83SPiotr Jasiukajtis 453*25c28e83SPiotr Jasiukajtis fmuld %f18,%f10,%f6 ! (3_1) x *= dnorm; 454*25c28e83SPiotr Jasiukajtis fsubd %f50,%f44,%f18 ! (2_1) x_lo = x - x_hi; 455*25c28e83SPiotr Jasiukajtis lda [%o1+4]%asi,%f27 ! (2_0) ((float*)&x)[1] = ((float*)px)[1]; 456*25c28e83SPiotr Jasiukajtis 457*25c28e83SPiotr Jasiukajtis fmuld %f58,%f58,%f44 ! (2_1) dtmp0 = y_hi * y_hi; 458*25c28e83SPiotr Jasiukajtis fsubd %f34,%f58,%f22 ! (2_1) y_lo = y - y_hi; 459*25c28e83SPiotr Jasiukajtis 460*25c28e83SPiotr Jasiukajtis fmuld %f30,%f10,%f58 ! (3_1) y *= dnorm; 461*25c28e83SPiotr Jasiukajtis fabsd %f28,%f34 ! (2_0) y = fabs(y); 462*25c28e83SPiotr Jasiukajtis 463*25c28e83SPiotr Jasiukajtis fabsd %f26,%f50 ! (2_0) x = fabs(x); 464*25c28e83SPiotr Jasiukajtis 465*25c28e83SPiotr Jasiukajtis fmuld %f56,%f18,%f10 ! (2_1) dtmp1 *= x_lo; 466*25c28e83SPiotr Jasiukajtis fcmped %fcc1,%f40,%f4 ! (0_0) dmax ? y 467*25c28e83SPiotr Jasiukajtis 468*25c28e83SPiotr Jasiukajtis fmuld %f60,%f22,%f12 ! (2_1) dtmp2 *= y_lo; 469*25c28e83SPiotr Jasiukajtis 470*25c28e83SPiotr Jasiukajtis fmovdg %fcc1,%f40,%f4 ! (0_0) if ( dmax < y ) dmax = y; 471*25c28e83SPiotr Jasiukajtis 472*25c28e83SPiotr Jasiukajtis faddd %f6,D2ON28,%f56 ! (3_1) x_hi = x + D2ON28; 473*25c28e83SPiotr Jasiukajtis fcmple32 DC1,%f50,%o3 ! (2_0) c0 = vis_fcmple32(DC1,x); 474*25c28e83SPiotr Jasiukajtis 475*25c28e83SPiotr Jasiukajtis faddd %f58,D2ON28,%f28 ! (3_1) y_hi = y + D2ON28; 476*25c28e83SPiotr Jasiukajtis fcmple32 DC1,%f34,%o0 ! (2_0) c2 = vis_fcmple32(DC1,y); 477*25c28e83SPiotr Jasiukajtis 478*25c28e83SPiotr Jasiukajtis faddd %f2,%f44,%f30 ! (2_1) res += dtmp0; 479*25c28e83SPiotr Jasiukajtis fcmpgt32 DC2,%f50,%o4 ! (2_0) c1 = vis_fcmpgt32(DC2,x); 480*25c28e83SPiotr Jasiukajtis 481*25c28e83SPiotr Jasiukajtis faddd %f10,%f12,%f26 ! (2_1) dtmp1 += dtmp2; 482*25c28e83SPiotr Jasiukajtis fcmpgt32 DC2,%f34,%o5 ! (2_0) c3 = vis_fcmpgt32(DC2,y); 483*25c28e83SPiotr Jasiukajtis 484*25c28e83SPiotr Jasiukajtis fand %f4,DC0,%f16 ! (0_0) dmax = vis_fand(dmax,DC0); 485*25c28e83SPiotr Jasiukajtis 486*25c28e83SPiotr Jasiukajtis or %o3,%o0,%o3 ! (2_0) c0 |= c2; 487*25c28e83SPiotr Jasiukajtis fsubd %f56,D2ON28,%f18 ! (3_1) x_hi -= D2ON28; 488*25c28e83SPiotr Jasiukajtis 489*25c28e83SPiotr Jasiukajtis andcc %o3,2,%g0 ! (2_0) c0 & 2 490*25c28e83SPiotr Jasiukajtis bnz,pn %icc,.update8 ! (2_0) if ( (c0 & 2) != 0 ) 491*25c28e83SPiotr Jasiukajtis fsubd %f28,D2ON28,%f4 ! (3_1) y_hi -= D2ON28; 492*25c28e83SPiotr Jasiukajtis.cont8: 493*25c28e83SPiotr Jasiukajtis and %o4,%o5,%o4 ! (2_0) c1 &= c3; 494*25c28e83SPiotr Jasiukajtis faddd %f30,%f26,%f12 ! (2_1) res += dtmp1; 495*25c28e83SPiotr Jasiukajtis 496*25c28e83SPiotr Jasiukajtis add %i3,stridey,%l4 ! py += stridey 497*25c28e83SPiotr Jasiukajtis andcc %o4,2,%g0 ! (2_0) c1 & 2 498*25c28e83SPiotr Jasiukajtis bnz,pn %icc,.update9 ! (2_0) if ( (c1 & 2) != 0 ) 499*25c28e83SPiotr Jasiukajtis fmovd %f36,%f56 ! (1_0) dmax = x; 500*25c28e83SPiotr Jasiukajtis.cont9: 501*25c28e83SPiotr Jasiukajtis lda [%l4]%asi,%f30 ! (3_0) ((float*)&y)[0] = ((float*)py)[0]; 502*25c28e83SPiotr Jasiukajtis add %o1,stridex,%l2 ! px += stridex 503*25c28e83SPiotr Jasiukajtis fpsub32 DC1,%f16,%f44 ! (0_0) dnorm = vis_fpsub32(DC1,dmax); 504*25c28e83SPiotr Jasiukajtis 505*25c28e83SPiotr Jasiukajtis fmuld %f18,%f18,%f60 ! (3_1) res = x_hi * x_hi; 506*25c28e83SPiotr Jasiukajtis lda [%l4+4]%asi,%f31 ! (3_0) ((float*)&y)[1] = ((float*)py)[1]; 507*25c28e83SPiotr Jasiukajtis faddd %f58,%f4,%f32 ! (3_1) dtmp2 = y + y_hi; 508*25c28e83SPiotr Jasiukajtis 509*25c28e83SPiotr Jasiukajtis fsqrtd %f12,%f12 ! (2_1) res = sqrt(res); 510*25c28e83SPiotr Jasiukajtis faddd %f6,%f18,%f28 ! (3_1) dtmp1 = x + x_hi; 511*25c28e83SPiotr Jasiukajtis 512*25c28e83SPiotr Jasiukajtis cmp counter,4 513*25c28e83SPiotr Jasiukajtis bl,pn %icc,.tail 514*25c28e83SPiotr Jasiukajtis nop 515*25c28e83SPiotr Jasiukajtis 516*25c28e83SPiotr Jasiukajtis ba .main_loop 517*25c28e83SPiotr Jasiukajtis sub counter,4,counter 518*25c28e83SPiotr Jasiukajtis 519*25c28e83SPiotr Jasiukajtis .align 16 520*25c28e83SPiotr Jasiukajtis.main_loop: 521*25c28e83SPiotr Jasiukajtis fmuld %f20,%f44,%f2 ! (0_1) x *= dnorm; 522*25c28e83SPiotr Jasiukajtis fsubd %f6,%f18,%f20 ! (3_2) x_lo = x - x_hi; 523*25c28e83SPiotr Jasiukajtis lda [%l2]%asi,%f18 ! (3_1) ((float*)&x)[0] = ((float*)px)[0]; 524*25c28e83SPiotr Jasiukajtis 525*25c28e83SPiotr Jasiukajtis fmuld %f4,%f4,%f22 ! (3_2) dtmp0 = y_hi * y_hi; 526*25c28e83SPiotr Jasiukajtis lda [%l2+4]%asi,%f19 ! (3_1) ((float*)&x)[1] = ((float*)px)[1]; 527*25c28e83SPiotr Jasiukajtis fsubd %f58,%f4,%f58 ! (3_2) y_lo = y - y_hi; 528*25c28e83SPiotr Jasiukajtis 529*25c28e83SPiotr Jasiukajtis fmuld %f40,%f44,%f44 ! (0_1) y *= dnorm; 530*25c28e83SPiotr Jasiukajtis fabsd %f30,%f30 ! (3_1) y = fabs(y); 531*25c28e83SPiotr Jasiukajtis 532*25c28e83SPiotr Jasiukajtis fmuld %f38,%f24,%f10 ! (1_2) res = dmax * res; 533*25c28e83SPiotr Jasiukajtis fabsd %f18,%f18 ! (3_1) x = fabs(x); 534*25c28e83SPiotr Jasiukajtis st %f10,[%i5] ! (1_2) ((float*)pz)[0] = ((float*)&res)[0]; 535*25c28e83SPiotr Jasiukajtis 536*25c28e83SPiotr Jasiukajtis fmuld %f28,%f20,%f28 ! (3_2) dtmp1 *= x_lo; 537*25c28e83SPiotr Jasiukajtis st %f11,[%i5+4] ! (1_2) ((float*)pz)[1] = ((float*)&res)[1]; 538*25c28e83SPiotr Jasiukajtis fcmped %fcc2,%f54,%f56 ! (1_1) dmax ? y 539*25c28e83SPiotr Jasiukajtis 540*25c28e83SPiotr Jasiukajtis fmuld %f32,%f58,%f24 ! (3_2) dtmp2 *= y_lo; 541*25c28e83SPiotr Jasiukajtis 542*25c28e83SPiotr Jasiukajtis fmovdg %fcc2,%f54,%f56 ! (1_1) if ( dmax < y ) dmax = y; 543*25c28e83SPiotr Jasiukajtis 544*25c28e83SPiotr Jasiukajtis faddd %f2,D2ON28,%f10 ! (0_1) x_hi = x + D2ON28; 545*25c28e83SPiotr Jasiukajtis fcmple32 DC1,%f18,%o3 ! (3_1) c0 = vis_fcmple32(DC1,x); 546*25c28e83SPiotr Jasiukajtis 547*25c28e83SPiotr Jasiukajtis faddd %f44,D2ON28,%f20 ! (0_1) y_hi = y + D2ON28; 548*25c28e83SPiotr Jasiukajtis fcmple32 DC1,%f30,%o0 ! (3_1) c2 = vis_fcmple32(DC1,y); 549*25c28e83SPiotr Jasiukajtis 550*25c28e83SPiotr Jasiukajtis faddd %f60,%f22,%f22 ! (3_2) res += dtmp0; 551*25c28e83SPiotr Jasiukajtis fcmpgt32 DC2,%f18,%o4 ! (3_1) c1 = vis_fcmpgt32(DC2,x); 552*25c28e83SPiotr Jasiukajtis 553*25c28e83SPiotr Jasiukajtis faddd %f28,%f24,%f26 ! (3_2) dtmp1 += dtmp2; 554*25c28e83SPiotr Jasiukajtis fcmpgt32 DC2,%f30,%o1 ! (3_1) c3 = vis_fcmpgt32(DC2,y); 555*25c28e83SPiotr Jasiukajtis 556*25c28e83SPiotr Jasiukajtis fand %f56,DC0,%f38 ! (1_1) dmax = vis_fand(dmax,DC0); 557*25c28e83SPiotr Jasiukajtis 558*25c28e83SPiotr Jasiukajtis or %o3,%o0,%o3 ! (3_1) c0 |= c2; 559*25c28e83SPiotr Jasiukajtis fsubd %f10,D2ON28,%f58 ! (0_1) x_hi -= D2ON28; 560*25c28e83SPiotr Jasiukajtis 561*25c28e83SPiotr Jasiukajtis andcc %o3,2,%g0 ! (3_1) c0 & 2 562*25c28e83SPiotr Jasiukajtis bnz,pn %icc,.update10 ! (3_1) if ( (c0 & 2) != 0 ) 563*25c28e83SPiotr Jasiukajtis fsubd %f20,D2ON28,%f56 ! (0_1) y_hi -= D2ON28; 564*25c28e83SPiotr Jasiukajtis.cont10: 565*25c28e83SPiotr Jasiukajtis faddd %f22,%f26,%f28 ! (3_2) res += dtmp1; 566*25c28e83SPiotr Jasiukajtis and %o4,%o1,%o4 ! (3_1) c1 &= c3; 567*25c28e83SPiotr Jasiukajtis 568*25c28e83SPiotr Jasiukajtis add %l4,stridey,%i3 ! py += stridey 569*25c28e83SPiotr Jasiukajtis andcc %o4,2,%g0 ! (3_1) c1 & 2 570*25c28e83SPiotr Jasiukajtis bnz,pn %icc,.update11 ! (3_1) if ( (c1 & 2) != 0 ) 571*25c28e83SPiotr Jasiukajtis fmovd %f50,%f32 ! (2_1) dmax = x; 572*25c28e83SPiotr Jasiukajtis.cont11: 573*25c28e83SPiotr Jasiukajtis fpsub32 DC1,%f38,%f10 ! (1_1) dnorm = vis_fpsub32(DC1,dmax); 574*25c28e83SPiotr Jasiukajtis add %l2,stridex,%l1 ! px += stridex 575*25c28e83SPiotr Jasiukajtis lda [%i3]%asi,%f20 ! (0_0) ((float*)&y)[0] = ((float*)py)[0]; 576*25c28e83SPiotr Jasiukajtis 577*25c28e83SPiotr Jasiukajtis fmuld %f58,%f58,%f6 ! (0_1) res = x_hi * x_hi; 578*25c28e83SPiotr Jasiukajtis lda [%i3+4]%asi,%f21 ! (0_0) ((float*)&y)[1] = ((float*)py)[1]; 579*25c28e83SPiotr Jasiukajtis add %i5,stridez,%l6 ! pz += stridez 580*25c28e83SPiotr Jasiukajtis faddd %f44,%f56,%f60 ! (0_1) dtmp2 = y + y_hi; 581*25c28e83SPiotr Jasiukajtis 582*25c28e83SPiotr Jasiukajtis fsqrtd %f28,%f4 ! (3_2) res = sqrt(res); 583*25c28e83SPiotr Jasiukajtis lda [%l1]%asi,%f22 ! (0_0) ((float*)&x)[0] = ((float*)px)[0]; 584*25c28e83SPiotr Jasiukajtis faddd %f2,%f58,%f24 ! (0_1) dtmp1 = x + x_hi; 585*25c28e83SPiotr Jasiukajtis 586*25c28e83SPiotr Jasiukajtis fmuld %f36,%f10,%f36 ! (1_1) x *= dnorm; 587*25c28e83SPiotr Jasiukajtis fsubd %f2,%f58,%f26 ! (0_1) x_lo = x - x_hi; 588*25c28e83SPiotr Jasiukajtis lda [%l1+4]%asi,%f23 ! (0_0) ((float*)&x)[1] = ((float*)px)[1]; 589*25c28e83SPiotr Jasiukajtis 590*25c28e83SPiotr Jasiukajtis fmuld %f56,%f56,%f28 ! (0_1) dtmp0 = y_hi * y_hi; 591*25c28e83SPiotr Jasiukajtis fsubd %f44,%f56,%f44 ! (0_1) y_lo = y - y_hi; 592*25c28e83SPiotr Jasiukajtis 593*25c28e83SPiotr Jasiukajtis fmuld %f54,%f10,%f56 ! (1_1) y *= dnorm; 594*25c28e83SPiotr Jasiukajtis fabsd %f20,%f40 ! (0_0) y = fabs(y); 595*25c28e83SPiotr Jasiukajtis 596*25c28e83SPiotr Jasiukajtis fmuld %f52,%f12,%f12 ! (2_2) res = dmax * res; 597*25c28e83SPiotr Jasiukajtis fabsd %f22,%f20 ! (0_0) x = fabs(x); 598*25c28e83SPiotr Jasiukajtis st %f12,[%l6] ! (2_2) ((float*)pz)[0] = ((float*)&res)[0]; 599*25c28e83SPiotr Jasiukajtis 600*25c28e83SPiotr Jasiukajtis fmuld %f24,%f26,%f10 ! (0_1) dtmp1 *= x_lo; 601*25c28e83SPiotr Jasiukajtis st %f13,[%l6+4] ! (2_2) ((float*)pz)[1] = ((float*)&res)[1]; 602*25c28e83SPiotr Jasiukajtis fcmped %fcc3,%f34,%f32 ! (2_1) dmax ? y 603*25c28e83SPiotr Jasiukajtis 604*25c28e83SPiotr Jasiukajtis fmuld %f60,%f44,%f12 ! (0_1) dtmp2 *= y_lo; 605*25c28e83SPiotr Jasiukajtis 606*25c28e83SPiotr Jasiukajtis fmovdg %fcc3,%f34,%f32 ! (2_1) if ( dmax < y ) dmax = y; 607*25c28e83SPiotr Jasiukajtis 608*25c28e83SPiotr Jasiukajtis faddd %f36,D2ON28,%f58 ! (1_1) x_hi = x + D2ON28; 609*25c28e83SPiotr Jasiukajtis fcmple32 DC1,%f20,%g5 ! (0_0) c0 = vis_fcmple32(DC1,x); 610*25c28e83SPiotr Jasiukajtis 611*25c28e83SPiotr Jasiukajtis faddd %f56,D2ON28,%f22 ! (1_1) y_hi = y + D2ON28; 612*25c28e83SPiotr Jasiukajtis fcmple32 DC1,%f40,%o2 ! (0_0) c2 = vis_fcmple32(DC1,y); 613*25c28e83SPiotr Jasiukajtis 614*25c28e83SPiotr Jasiukajtis faddd %f6,%f28,%f24 ! (0_1) res += dtmp0; 615*25c28e83SPiotr Jasiukajtis fcmpgt32 DC2,%f20,%g1 ! (0_0) c1 = vis_fcmpgt32(DC2,x); 616*25c28e83SPiotr Jasiukajtis 617*25c28e83SPiotr Jasiukajtis faddd %f10,%f12,%f26 ! (0_1) dtmp1 += dtmp2; 618*25c28e83SPiotr Jasiukajtis fcmpgt32 DC2,%f40,%o4 ! (0_0) c3 = vis_fcmpgt32(DC2,y); 619*25c28e83SPiotr Jasiukajtis 620*25c28e83SPiotr Jasiukajtis fand %f32,DC0,%f52 ! (2_1) dmax = vis_fand(dmax,DC0); 621*25c28e83SPiotr Jasiukajtis 622*25c28e83SPiotr Jasiukajtis or %g5,%o2,%g5 ! (0_0) c0 |= c2; 623*25c28e83SPiotr Jasiukajtis fsubd %f58,D2ON28,%f58 ! (1_1) x_hi -= D2ON28; 624*25c28e83SPiotr Jasiukajtis 625*25c28e83SPiotr Jasiukajtis andcc %g5,2,%g0 ! (0_0) c0 & 2 626*25c28e83SPiotr Jasiukajtis bnz,pn %icc,.update12 ! (0_0) if ( (c0 & 2) != 0 ) 627*25c28e83SPiotr Jasiukajtis fsubd %f22,D2ON28,%f22 ! (1_1) y_hi -= D2ON28; 628*25c28e83SPiotr Jasiukajtis.cont12: 629*25c28e83SPiotr Jasiukajtis and %g1,%o4,%g1 ! (0_0) c1 &= c3; 630*25c28e83SPiotr Jasiukajtis faddd %f24,%f26,%f12 ! (0_1) res += dtmp1; 631*25c28e83SPiotr Jasiukajtis 632*25c28e83SPiotr Jasiukajtis add %i3,stridey,%l2 ! py += stridey 633*25c28e83SPiotr Jasiukajtis andcc %g1,2,%g0 ! (0_0) c1 & 2 634*25c28e83SPiotr Jasiukajtis bnz,pn %icc,.update13 ! (0_0) if ( (c1 & 2) != 0 ) 635*25c28e83SPiotr Jasiukajtis fmovd %f18,%f44 ! (3_1) dmax = x; 636*25c28e83SPiotr Jasiukajtis.cont13: 637*25c28e83SPiotr Jasiukajtis fpsub32 DC1,%f52,%f10 ! (2_1) dnorm = vis_fpsub32(DC1,dmax); 638*25c28e83SPiotr Jasiukajtis add %l1,stridex,%l7 ! px += stridex 639*25c28e83SPiotr Jasiukajtis lda [%l2]%asi,%f24 ! (1_0) ((float*)&y)[0] = ((float*)py)[0]; 640*25c28e83SPiotr Jasiukajtis 641*25c28e83SPiotr Jasiukajtis fmuld %f58,%f58,%f60 ! (1_1) res = x_hi * x_hi; 642*25c28e83SPiotr Jasiukajtis add %l6,stridez,%i5 ! pz += stridez 643*25c28e83SPiotr Jasiukajtis lda [%l2+4]%asi,%f25 ! (1_0) ((float*)&y)[1] = ((float*)py)[1]; 644*25c28e83SPiotr Jasiukajtis faddd %f56,%f22,%f28 ! (1_1) dtmp2 = y + y_hi; 645*25c28e83SPiotr Jasiukajtis 646*25c28e83SPiotr Jasiukajtis fsqrtd %f12,%f12 ! (0_1) res = sqrt(res); 647*25c28e83SPiotr Jasiukajtis lda [%l7]%asi,%f26 ! (1_0) ((float*)&x)[0] = ((float*)px)[0]; 648*25c28e83SPiotr Jasiukajtis faddd %f36,%f58,%f6 ! (1_1) dtmp1 = x + x_hi; 649*25c28e83SPiotr Jasiukajtis 650*25c28e83SPiotr Jasiukajtis fmuld %f50,%f10,%f50 ! (2_1) x *= dnorm; 651*25c28e83SPiotr Jasiukajtis fsubd %f36,%f58,%f58 ! (1_1) x_lo = x - x_hi; 652*25c28e83SPiotr Jasiukajtis lda [%l7+4]%asi,%f27 ! (1_0) ((float*)&x)[1] = ((float*)px)[1]; 653*25c28e83SPiotr Jasiukajtis 654*25c28e83SPiotr Jasiukajtis fmuld %f22,%f22,%f2 ! (1_1) dtmp0 = y_hi * y_hi; 655*25c28e83SPiotr Jasiukajtis fsubd %f56,%f22,%f56 ! (1_1) y_lo = y - y_hi; 656*25c28e83SPiotr Jasiukajtis 657*25c28e83SPiotr Jasiukajtis fmuld %f34,%f10,%f34 ! (2_1) y *= dnorm; 658*25c28e83SPiotr Jasiukajtis fabsd %f24,%f54 ! (1_0) y = fabs(y); 659*25c28e83SPiotr Jasiukajtis 660*25c28e83SPiotr Jasiukajtis fmuld %f14,%f4,%f14 ! (3_2) res = dmax * res; 661*25c28e83SPiotr Jasiukajtis fabsd %f26,%f36 ! (1_0) x = fabs(x); 662*25c28e83SPiotr Jasiukajtis st %f14,[%i5] ! (3_2) ((float*)pz)[0] = ((float*)&res)[0]; 663*25c28e83SPiotr Jasiukajtis 664*25c28e83SPiotr Jasiukajtis fmuld %f6,%f58,%f10 ! (1_1) dtmp1 *= x_lo; 665*25c28e83SPiotr Jasiukajtis st %f15,[%i5+4] ! (3_2) ((float*)pz)[1] = ((float*)&res)[1]; 666*25c28e83SPiotr Jasiukajtis fcmped %fcc0,%f30,%f44 ! (3_1) dmax ? y 667*25c28e83SPiotr Jasiukajtis 668*25c28e83SPiotr Jasiukajtis fmuld %f28,%f56,%f26 ! (1_1) dtmp2 *= y_lo; 669*25c28e83SPiotr Jasiukajtis 670*25c28e83SPiotr Jasiukajtis fmovdg %fcc0,%f30,%f44 ! (3_1) if ( dmax < y ) dmax = y; 671*25c28e83SPiotr Jasiukajtis 672*25c28e83SPiotr Jasiukajtis faddd %f50,D2ON28,%f58 ! (2_1) x_hi = x + D2ON28; 673*25c28e83SPiotr Jasiukajtis fcmple32 DC1,%f36,%g1 ! (1_0) c0 = vis_fcmple32(DC1,x); 674*25c28e83SPiotr Jasiukajtis 675*25c28e83SPiotr Jasiukajtis faddd %f34,D2ON28,%f22 ! (2_1) y_hi = y + D2ON28; 676*25c28e83SPiotr Jasiukajtis fcmple32 DC1,%f54,%g5 ! (1_0) c2 = vis_fcmple32(DC1,y); 677*25c28e83SPiotr Jasiukajtis 678*25c28e83SPiotr Jasiukajtis faddd %f60,%f2,%f24 ! (1_1) res += dtmp0; 679*25c28e83SPiotr Jasiukajtis fcmpgt32 DC2,%f36,%o5 ! (1_0) c1 = vis_fcmpgt32(DC2,x); 680*25c28e83SPiotr Jasiukajtis 681*25c28e83SPiotr Jasiukajtis faddd %f10,%f26,%f28 ! (1_1) dtmp1 += dtmp2; 682*25c28e83SPiotr Jasiukajtis fcmpgt32 DC2,%f54,%o1 ! (1_0) c3 = vis_fcmpgt32(DC2,y); 683*25c28e83SPiotr Jasiukajtis 684*25c28e83SPiotr Jasiukajtis fand %f44,DC0,%f14 ! (3_1) dmax = vis_fand(dmax,DC0); 685*25c28e83SPiotr Jasiukajtis 686*25c28e83SPiotr Jasiukajtis or %g1,%g5,%g1 ! (1_0) c0 |= c2; 687*25c28e83SPiotr Jasiukajtis fsubd %f58,D2ON28,%f44 ! (2_1) x_hi -= D2ON28; 688*25c28e83SPiotr Jasiukajtis 689*25c28e83SPiotr Jasiukajtis andcc %g1,2,%g0 ! (1_0) c0 & 2 690*25c28e83SPiotr Jasiukajtis bnz,pn %icc,.update14 ! (1_0) if ( (c0 & 2) != 0 ) 691*25c28e83SPiotr Jasiukajtis fsubd %f22,D2ON28,%f58 ! (2_1) y_hi -= D2ON28; 692*25c28e83SPiotr Jasiukajtis.cont14: 693*25c28e83SPiotr Jasiukajtis and %o5,%o1,%o5 ! (1_0) c1 &= c3; 694*25c28e83SPiotr Jasiukajtis faddd %f24,%f28,%f26 ! (1_1) res += dtmp1; 695*25c28e83SPiotr Jasiukajtis 696*25c28e83SPiotr Jasiukajtis add %l2,stridey,%i3 ! py += stridey 697*25c28e83SPiotr Jasiukajtis andcc %o5,2,%g0 ! (1_0) c1 & 2 698*25c28e83SPiotr Jasiukajtis bnz,pn %icc,.update15 ! (1_0) if ( (c1 & 2) != 0 ) 699*25c28e83SPiotr Jasiukajtis fmovd %f20,%f4 ! (0_0) dmax = x; 700*25c28e83SPiotr Jasiukajtis.cont15: 701*25c28e83SPiotr Jasiukajtis fpsub32 DC1,%f14,%f10 ! (3_1) dnorm = vis_fpsub32(DC1,dmax); 702*25c28e83SPiotr Jasiukajtis add %l7,stridex,%o1 ! px += stridex 703*25c28e83SPiotr Jasiukajtis lda [%i3]%asi,%f28 ! (2_0) ((float*)&y)[0] = ((float*)py)[0]; 704*25c28e83SPiotr Jasiukajtis 705*25c28e83SPiotr Jasiukajtis fmuld %f44,%f44,%f2 ! (2_1) res = x_hi * x_hi; 706*25c28e83SPiotr Jasiukajtis add %i5,stridez,%g5 ! pz += stridez 707*25c28e83SPiotr Jasiukajtis lda [%i3+4]%asi,%f29 ! (2_0) ((float*)&y)[1] = ((float*)py)[1]; 708*25c28e83SPiotr Jasiukajtis faddd %f34,%f58,%f60 ! (2_1) dtmp2 = y + y_hi; 709*25c28e83SPiotr Jasiukajtis 710*25c28e83SPiotr Jasiukajtis fsqrtd %f26,%f24 ! (1_1) res = sqrt(res); 711*25c28e83SPiotr Jasiukajtis lda [%o1]%asi,%f26 ! (2_0) ((float*)&x)[0] = ((float*)px)[0]; 712*25c28e83SPiotr Jasiukajtis faddd %f50,%f44,%f56 ! (2_1) dtmp1 = x + x_hi; 713*25c28e83SPiotr Jasiukajtis 714*25c28e83SPiotr Jasiukajtis fmuld %f18,%f10,%f6 ! (3_1) x *= dnorm; 715*25c28e83SPiotr Jasiukajtis fsubd %f50,%f44,%f18 ! (2_1) x_lo = x - x_hi; 716*25c28e83SPiotr Jasiukajtis lda [%o1+4]%asi,%f27 ! (2_0) ((float*)&x)[1] = ((float*)px)[1]; 717*25c28e83SPiotr Jasiukajtis 718*25c28e83SPiotr Jasiukajtis fmuld %f58,%f58,%f44 ! (2_1) dtmp0 = y_hi * y_hi; 719*25c28e83SPiotr Jasiukajtis fsubd %f34,%f58,%f22 ! (2_1) y_lo = y - y_hi; 720*25c28e83SPiotr Jasiukajtis 721*25c28e83SPiotr Jasiukajtis fmuld %f30,%f10,%f58 ! (3_1) y *= dnorm; 722*25c28e83SPiotr Jasiukajtis fabsd %f28,%f34 ! (2_0) y = fabs(y); 723*25c28e83SPiotr Jasiukajtis 724*25c28e83SPiotr Jasiukajtis fmuld %f16,%f12,%f16 ! (0_1) res = dmax * res; 725*25c28e83SPiotr Jasiukajtis fabsd %f26,%f50 ! (2_0) x = fabs(x); 726*25c28e83SPiotr Jasiukajtis st %f16,[%g5] ! (0_1) ((float*)pz)[0] = ((float*)&res)[0]; 727*25c28e83SPiotr Jasiukajtis 728*25c28e83SPiotr Jasiukajtis fmuld %f56,%f18,%f10 ! (2_1) dtmp1 *= x_lo; 729*25c28e83SPiotr Jasiukajtis st %f17,[%g5+4] ! (0_1) ((float*)pz)[1] = ((float*)&res)[1]; 730*25c28e83SPiotr Jasiukajtis fcmped %fcc1,%f40,%f4 ! (0_0) dmax ? y 731*25c28e83SPiotr Jasiukajtis 732*25c28e83SPiotr Jasiukajtis fmuld %f60,%f22,%f12 ! (2_1) dtmp2 *= y_lo; 733*25c28e83SPiotr Jasiukajtis 734*25c28e83SPiotr Jasiukajtis fmovdg %fcc1,%f40,%f4 ! (0_0) if ( dmax < y ) dmax = y; 735*25c28e83SPiotr Jasiukajtis 736*25c28e83SPiotr Jasiukajtis faddd %f6,D2ON28,%f56 ! (3_1) x_hi = x + D2ON28; 737*25c28e83SPiotr Jasiukajtis fcmple32 DC1,%f50,%o3 ! (2_0) c0 = vis_fcmple32(DC1,x); 738*25c28e83SPiotr Jasiukajtis 739*25c28e83SPiotr Jasiukajtis faddd %f58,D2ON28,%f28 ! (3_1) y_hi = y + D2ON28; 740*25c28e83SPiotr Jasiukajtis fcmple32 DC1,%f34,%o0 ! (2_0) c2 = vis_fcmple32(DC1,y); 741*25c28e83SPiotr Jasiukajtis 742*25c28e83SPiotr Jasiukajtis faddd %f2,%f44,%f30 ! (2_1) res += dtmp0; 743*25c28e83SPiotr Jasiukajtis fcmpgt32 DC2,%f50,%o4 ! (2_0) c1 = vis_fcmpgt32(DC2,x); 744*25c28e83SPiotr Jasiukajtis 745*25c28e83SPiotr Jasiukajtis faddd %f10,%f12,%f26 ! (2_1) dtmp1 += dtmp2; 746*25c28e83SPiotr Jasiukajtis fcmpgt32 DC2,%f34,%o5 ! (2_0) c3 = vis_fcmpgt32(DC2,y); 747*25c28e83SPiotr Jasiukajtis 748*25c28e83SPiotr Jasiukajtis fand %f4,DC0,%f16 ! (0_0) dmax = vis_fand(dmax,DC0); 749*25c28e83SPiotr Jasiukajtis 750*25c28e83SPiotr Jasiukajtis or %o3,%o0,%o3 ! (2_0) c0 |= c2; 751*25c28e83SPiotr Jasiukajtis fsubd %f56,D2ON28,%f18 ! (3_1) x_hi -= D2ON28; 752*25c28e83SPiotr Jasiukajtis 753*25c28e83SPiotr Jasiukajtis andcc %o3,2,%g0 ! (2_0) c0 & 2 754*25c28e83SPiotr Jasiukajtis bnz,pn %icc,.update16 ! (2_0) if ( (c0 & 2) != 0 ) 755*25c28e83SPiotr Jasiukajtis fsubd %f28,D2ON28,%f4 ! (3_1) y_hi -= D2ON28; 756*25c28e83SPiotr Jasiukajtis.cont16: 757*25c28e83SPiotr Jasiukajtis and %o4,%o5,%o4 ! (2_0) c1 &= c3; 758*25c28e83SPiotr Jasiukajtis faddd %f30,%f26,%f12 ! (2_1) res += dtmp1; 759*25c28e83SPiotr Jasiukajtis 760*25c28e83SPiotr Jasiukajtis add %i3,stridey,%l4 ! py += stridey 761*25c28e83SPiotr Jasiukajtis andcc %o4,2,%g0 ! (2_0) c1 & 2 762*25c28e83SPiotr Jasiukajtis bnz,pn %icc,.update17 ! (2_0) if ( (c1 & 2) != 0 ) 763*25c28e83SPiotr Jasiukajtis fmovd %f36,%f56 ! (1_0) dmax = x; 764*25c28e83SPiotr Jasiukajtis.cont17: 765*25c28e83SPiotr Jasiukajtis lda [%l4]%asi,%f30 ! (3_0) ((float*)&y)[0] = ((float*)py)[0]; 766*25c28e83SPiotr Jasiukajtis add %o1,stridex,%l2 ! px += stridex 767*25c28e83SPiotr Jasiukajtis fpsub32 DC1,%f16,%f44 ! (0_0) dnorm = vis_fpsub32(DC1,dmax); 768*25c28e83SPiotr Jasiukajtis 769*25c28e83SPiotr Jasiukajtis fmuld %f18,%f18,%f60 ! (3_1) res = x_hi * x_hi; 770*25c28e83SPiotr Jasiukajtis add %g5,stridez,%i5 ! pz += stridez 771*25c28e83SPiotr Jasiukajtis lda [%l4+4]%asi,%f31 ! (3_0) ((float*)&y)[1] = ((float*)py)[1]; 772*25c28e83SPiotr Jasiukajtis faddd %f58,%f4,%f32 ! (3_1) dtmp2 = y + y_hi; 773*25c28e83SPiotr Jasiukajtis 774*25c28e83SPiotr Jasiukajtis fsqrtd %f12,%f12 ! (2_1) res = sqrt(res); 775*25c28e83SPiotr Jasiukajtis subcc counter,4,counter ! counter -= 4; 776*25c28e83SPiotr Jasiukajtis bpos,pt %icc,.main_loop 777*25c28e83SPiotr Jasiukajtis faddd %f6,%f18,%f28 ! (3_1) dtmp1 = x + x_hi; 778*25c28e83SPiotr Jasiukajtis 779*25c28e83SPiotr Jasiukajtis add counter,4,counter 780*25c28e83SPiotr Jasiukajtis 781*25c28e83SPiotr Jasiukajtis.tail: 782*25c28e83SPiotr Jasiukajtis subcc counter,1,counter 783*25c28e83SPiotr Jasiukajtis bneg,a .begin 784*25c28e83SPiotr Jasiukajtis nop 785*25c28e83SPiotr Jasiukajtis 786*25c28e83SPiotr Jasiukajtis fsubd %f6,%f18,%f20 ! (3_2) x_lo = x - x_hi; 787*25c28e83SPiotr Jasiukajtis 788*25c28e83SPiotr Jasiukajtis fmuld %f4,%f4,%f22 ! (3_2) dtmp0 = y_hi * y_hi; 789*25c28e83SPiotr Jasiukajtis fsubd %f58,%f4,%f58 ! (3_2) y_lo = y - y_hi; 790*25c28e83SPiotr Jasiukajtis 791*25c28e83SPiotr Jasiukajtis fmuld %f38,%f24,%f10 ! (1_2) res = dmax * res; 792*25c28e83SPiotr Jasiukajtis st %f10,[%i5] ! (1_2) ((float*)pz)[0] = ((float*)&res)[0]; 793*25c28e83SPiotr Jasiukajtis 794*25c28e83SPiotr Jasiukajtis st %f11,[%i5+4] ! (1_2) ((float*)pz)[1] = ((float*)&res)[1]; 795*25c28e83SPiotr Jasiukajtis 796*25c28e83SPiotr Jasiukajtis subcc counter,1,counter 797*25c28e83SPiotr Jasiukajtis bneg,a .begin 798*25c28e83SPiotr Jasiukajtis add %i5,stridez,%i5 799*25c28e83SPiotr Jasiukajtis 800*25c28e83SPiotr Jasiukajtis fmuld %f28,%f20,%f28 ! (3_2) dtmp1 *= x_lo; 801*25c28e83SPiotr Jasiukajtis 802*25c28e83SPiotr Jasiukajtis fmuld %f32,%f58,%f24 ! (3_2) dtmp2 *= y_lo; 803*25c28e83SPiotr Jasiukajtis 804*25c28e83SPiotr Jasiukajtis faddd %f60,%f22,%f22 ! (3_2) res += dtmp0; 805*25c28e83SPiotr Jasiukajtis 806*25c28e83SPiotr Jasiukajtis faddd %f28,%f24,%f26 ! (3_2) dtmp1 += dtmp2; 807*25c28e83SPiotr Jasiukajtis 808*25c28e83SPiotr Jasiukajtis faddd %f22,%f26,%f28 ! (3_2) res += dtmp1; 809*25c28e83SPiotr Jasiukajtis 810*25c28e83SPiotr Jasiukajtis add %i5,stridez,%l6 ! pz += stridez 811*25c28e83SPiotr Jasiukajtis 812*25c28e83SPiotr Jasiukajtis fsqrtd %f28,%f4 ! (3_2) res = sqrt(res); 813*25c28e83SPiotr Jasiukajtis add %l2,stridex,%l1 ! px += stridex 814*25c28e83SPiotr Jasiukajtis 815*25c28e83SPiotr Jasiukajtis fmuld %f52,%f12,%f12 ! (2_2) res = dmax * res; 816*25c28e83SPiotr Jasiukajtis st %f12,[%l6] ! (2_2) ((float*)pz)[0] = ((float*)&res)[0]; 817*25c28e83SPiotr Jasiukajtis 818*25c28e83SPiotr Jasiukajtis st %f13,[%l6+4] ! (2_2) ((float*)pz)[1] = ((float*)&res)[1]; 819*25c28e83SPiotr Jasiukajtis 820*25c28e83SPiotr Jasiukajtis subcc counter,1,counter 821*25c28e83SPiotr Jasiukajtis bneg .begin 822*25c28e83SPiotr Jasiukajtis add %l6,stridez,%i5 823*25c28e83SPiotr Jasiukajtis 824*25c28e83SPiotr Jasiukajtis fmuld %f14,%f4,%f14 ! (3_2) res = dmax * res; 825*25c28e83SPiotr Jasiukajtis st %f14,[%i5] ! (3_2) ((float*)pz)[0] = ((float*)&res)[0]; 826*25c28e83SPiotr Jasiukajtis 827*25c28e83SPiotr Jasiukajtis st %f15,[%i5+4] ! (3_2) ((float*)pz)[1] = ((float*)&res)[1]; 828*25c28e83SPiotr Jasiukajtis 829*25c28e83SPiotr Jasiukajtis ba .begin 830*25c28e83SPiotr Jasiukajtis add %i5,stridez,%i5 831*25c28e83SPiotr Jasiukajtis 832*25c28e83SPiotr Jasiukajtis .align 16 833*25c28e83SPiotr Jasiukajtis.spec0: 834*25c28e83SPiotr Jasiukajtis ld [%i1+4],%l1 ! lx = ((int*)px)[1]; 835*25c28e83SPiotr Jasiukajtis cmp %o2,%o4 ! j0 ? 0x7ff00000 836*25c28e83SPiotr Jasiukajtis bge,pn %icc,1f ! if ( j0 >= 0x7ff00000 ) 837*25c28e83SPiotr Jasiukajtis fabsd %f26,%f26 ! x = fabs(x); 838*25c28e83SPiotr Jasiukajtis 839*25c28e83SPiotr Jasiukajtis sub %o0,%l4,%o0 ! diff = hy - hx; 840*25c28e83SPiotr Jasiukajtis fabsd %f24,%f24 ! y = fabs(y); 841*25c28e83SPiotr Jasiukajtis 842*25c28e83SPiotr Jasiukajtis sra %o0,31,%l4 ! j0 = diff >> 31; 843*25c28e83SPiotr Jasiukajtis 844*25c28e83SPiotr Jasiukajtis xor %o0,%l4,%o0 ! diff ^ j0 845*25c28e83SPiotr Jasiukajtis 846*25c28e83SPiotr Jasiukajtis sethi %hi(0x03600000),%l1 847*25c28e83SPiotr Jasiukajtis sub %o0,%l4,%o0 ! (diff ^ j0) - j0 848*25c28e83SPiotr Jasiukajtis 849*25c28e83SPiotr Jasiukajtis cmp %o0,%l1 ! ((diff ^ j0) - j0) ? 0x03600000 850*25c28e83SPiotr Jasiukajtis bge,a,pn %icc,2f ! if ( ((diff ^ j0) - j0) >= 0x03600000 ) 851*25c28e83SPiotr Jasiukajtis faddd %f26,%f24,%f24 ! *pz = x + y 852*25c28e83SPiotr Jasiukajtis 853*25c28e83SPiotr Jasiukajtis fmuld %f26,DC2,%f36 ! (1_1) x *= dnorm; 854*25c28e83SPiotr Jasiukajtis 855*25c28e83SPiotr Jasiukajtis fmuld %f24,DC2,%f56 ! (1_1) y *= dnorm; 856*25c28e83SPiotr Jasiukajtis 857*25c28e83SPiotr Jasiukajtis faddd %f36,D2ON28,%f58 ! (1_1) x_hi = x + D2ON28; 858*25c28e83SPiotr Jasiukajtis 859*25c28e83SPiotr Jasiukajtis faddd %f56,D2ON28,%f22 ! (1_1) y_hi = y + D2ON28; 860*25c28e83SPiotr Jasiukajtis 861*25c28e83SPiotr Jasiukajtis fsubd %f58,D2ON28,%f58 ! (1_1) x_hi -= D2ON28; 862*25c28e83SPiotr Jasiukajtis 863*25c28e83SPiotr Jasiukajtis fsubd %f22,D2ON28,%f22 ! (1_1) y_hi -= D2ON28; 864*25c28e83SPiotr Jasiukajtis 865*25c28e83SPiotr Jasiukajtis fmuld %f58,%f58,%f60 ! (1_1) res = x_hi * x_hi; 866*25c28e83SPiotr Jasiukajtis faddd %f56,%f22,%f28 ! (1_1) dtmp2 = y + y_hi; 867*25c28e83SPiotr Jasiukajtis 868*25c28e83SPiotr Jasiukajtis faddd %f36,%f58,%f6 ! (1_1) dtmp1 = x + x_hi; 869*25c28e83SPiotr Jasiukajtis 870*25c28e83SPiotr Jasiukajtis fsubd %f36,%f58,%f58 ! (1_1) x_lo = x - x_hi; 871*25c28e83SPiotr Jasiukajtis 872*25c28e83SPiotr Jasiukajtis fmuld %f22,%f22,%f2 ! (1_1) dtmp0 = y_hi * y_hi; 873*25c28e83SPiotr Jasiukajtis fsubd %f56,%f22,%f56 ! (1_1) y_lo = y - y_hi; 874*25c28e83SPiotr Jasiukajtis 875*25c28e83SPiotr Jasiukajtis fmuld %f6,%f58,%f10 ! (1_1) dtmp1 *= x_lo; 876*25c28e83SPiotr Jasiukajtis 877*25c28e83SPiotr Jasiukajtis fmuld %f28,%f56,%f26 ! (1_1) dtmp2 *= y_lo; 878*25c28e83SPiotr Jasiukajtis 879*25c28e83SPiotr Jasiukajtis faddd %f60,%f2,%f24 ! (1_1) res += dtmp0; 880*25c28e83SPiotr Jasiukajtis 881*25c28e83SPiotr Jasiukajtis faddd %f10,%f26,%f28 ! (1_1) dtmp1 += dtmp2; 882*25c28e83SPiotr Jasiukajtis 883*25c28e83SPiotr Jasiukajtis faddd %f24,%f28,%f26 ! (1_1) res += dtmp1; 884*25c28e83SPiotr Jasiukajtis 885*25c28e83SPiotr Jasiukajtis fsqrtd %f26,%f24 ! (1_1) res = sqrt(res); 886*25c28e83SPiotr Jasiukajtis 887*25c28e83SPiotr Jasiukajtis fmuld DC3,%f24,%f24 ! (1_2) res = dmax * res; 888*25c28e83SPiotr Jasiukajtis2: 889*25c28e83SPiotr Jasiukajtis add %i3,stridey,%i3 890*25c28e83SPiotr Jasiukajtis add %i1,stridex,%i1 891*25c28e83SPiotr Jasiukajtis st %f24,[%i5] ! ((float*)pz)[0] = ((float*)&res)[0]; 892*25c28e83SPiotr Jasiukajtis st %f25,[%i5+4] ! ((float*)pz)[1] = ((float*)&res)[1]; 893*25c28e83SPiotr Jasiukajtis 894*25c28e83SPiotr Jasiukajtis add %i5,stridez,%i5 895*25c28e83SPiotr Jasiukajtis ba .begin1 896*25c28e83SPiotr Jasiukajtis sub counter,1,counter 897*25c28e83SPiotr Jasiukajtis 898*25c28e83SPiotr Jasiukajtis1: 899*25c28e83SPiotr Jasiukajtis ld [%i3+4],%l2 ! ly = ((int*)py)[1]; 900*25c28e83SPiotr Jasiukajtis cmp %o0,%o4 ! hx ? 0x7ff00000 901*25c28e83SPiotr Jasiukajtis bne,pn %icc,1f ! if ( hx != 0x7ff00000 ) 902*25c28e83SPiotr Jasiukajtis fabsd %f24,%f24 ! y = fabs(y); 903*25c28e83SPiotr Jasiukajtis 904*25c28e83SPiotr Jasiukajtis cmp %l1,0 ! lx ? 0 905*25c28e83SPiotr Jasiukajtis be,pn %icc,2f ! if ( lx == 0 ) 906*25c28e83SPiotr Jasiukajtis nop 907*25c28e83SPiotr Jasiukajtis1: 908*25c28e83SPiotr Jasiukajtis cmp %l4,%o4 ! hy ? 0x7ff00000 909*25c28e83SPiotr Jasiukajtis bne,pn %icc,1f ! if ( hy != 0x7ff00000 ) 910*25c28e83SPiotr Jasiukajtis nop 911*25c28e83SPiotr Jasiukajtis 912*25c28e83SPiotr Jasiukajtis cmp %l2,0 ! ly ? 0 913*25c28e83SPiotr Jasiukajtis be,pn %icc,2f ! if ( ly == 0 ) 914*25c28e83SPiotr Jasiukajtis nop 915*25c28e83SPiotr Jasiukajtis1: 916*25c28e83SPiotr Jasiukajtis add %i3,stridey,%i3 917*25c28e83SPiotr Jasiukajtis add %i1,stridex,%i1 918*25c28e83SPiotr Jasiukajtis fmuld %f26,%f24,%f24 ! res = x * y; 919*25c28e83SPiotr Jasiukajtis st %f24,[%i5] ! ((float*)pz)[0] = ((float*)&res)[0]; 920*25c28e83SPiotr Jasiukajtis 921*25c28e83SPiotr Jasiukajtis st %f25,[%i5+4] ! ((float*)pz)[1] = ((float*)&res)[1]; 922*25c28e83SPiotr Jasiukajtis 923*25c28e83SPiotr Jasiukajtis add %i5,stridez,%i5 924*25c28e83SPiotr Jasiukajtis ba .begin1 925*25c28e83SPiotr Jasiukajtis sub counter,1,counter 926*25c28e83SPiotr Jasiukajtis 927*25c28e83SPiotr Jasiukajtis2: 928*25c28e83SPiotr Jasiukajtis add %i1,stridex,%i1 929*25c28e83SPiotr Jasiukajtis add %i3,stridey,%i3 930*25c28e83SPiotr Jasiukajtis st DC0_HI,[%i5] ! ((int*)pz)[0] = 0x7ff00000; 931*25c28e83SPiotr Jasiukajtis st DC0_LO,[%i5+4] ! ((int*)pz)[1] = 0; 932*25c28e83SPiotr Jasiukajtis fcmpd %f26,%f24 ! x ? y 933*25c28e83SPiotr Jasiukajtis 934*25c28e83SPiotr Jasiukajtis add %i5,stridez,%i5 935*25c28e83SPiotr Jasiukajtis ba .begin1 936*25c28e83SPiotr Jasiukajtis sub counter,1,counter 937*25c28e83SPiotr Jasiukajtis 938*25c28e83SPiotr Jasiukajtis .align 16 939*25c28e83SPiotr Jasiukajtis.spec1: 940*25c28e83SPiotr Jasiukajtis fmuld %f26,DC3,%f36 ! (1_1) x *= dnorm; 941*25c28e83SPiotr Jasiukajtis 942*25c28e83SPiotr Jasiukajtis fmuld %f24,DC3,%f56 ! (1_1) y *= dnorm; 943*25c28e83SPiotr Jasiukajtis 944*25c28e83SPiotr Jasiukajtis faddd %f36,D2ON28,%f58 ! (1_1) x_hi = x + D2ON28; 945*25c28e83SPiotr Jasiukajtis 946*25c28e83SPiotr Jasiukajtis faddd %f56,D2ON28,%f22 ! (1_1) y_hi = y + D2ON28; 947*25c28e83SPiotr Jasiukajtis 948*25c28e83SPiotr Jasiukajtis fsubd %f58,D2ON28,%f58 ! (1_1) x_hi -= D2ON28; 949*25c28e83SPiotr Jasiukajtis 950*25c28e83SPiotr Jasiukajtis fsubd %f22,D2ON28,%f22 ! (1_1) y_hi -= D2ON28; 951*25c28e83SPiotr Jasiukajtis 952*25c28e83SPiotr Jasiukajtis fmuld %f58,%f58,%f60 ! (1_1) res = x_hi * x_hi; 953*25c28e83SPiotr Jasiukajtis faddd %f56,%f22,%f28 ! (1_1) dtmp2 = y + y_hi; 954*25c28e83SPiotr Jasiukajtis 955*25c28e83SPiotr Jasiukajtis faddd %f36,%f58,%f6 ! (1_1) dtmp1 = x + x_hi; 956*25c28e83SPiotr Jasiukajtis 957*25c28e83SPiotr Jasiukajtis fsubd %f36,%f58,%f58 ! (1_1) x_lo = x - x_hi; 958*25c28e83SPiotr Jasiukajtis 959*25c28e83SPiotr Jasiukajtis fmuld %f22,%f22,%f2 ! (1_1) dtmp0 = y_hi * y_hi; 960*25c28e83SPiotr Jasiukajtis fsubd %f56,%f22,%f56 ! (1_1) y_lo = y - y_hi; 961*25c28e83SPiotr Jasiukajtis 962*25c28e83SPiotr Jasiukajtis fmuld %f6,%f58,%f10 ! (1_1) dtmp1 *= x_lo; 963*25c28e83SPiotr Jasiukajtis 964*25c28e83SPiotr Jasiukajtis fmuld %f28,%f56,%f26 ! (1_1) dtmp2 *= y_lo; 965*25c28e83SPiotr Jasiukajtis 966*25c28e83SPiotr Jasiukajtis faddd %f60,%f2,%f24 ! (1_1) res += dtmp0; 967*25c28e83SPiotr Jasiukajtis 968*25c28e83SPiotr Jasiukajtis faddd %f10,%f26,%f28 ! (1_1) dtmp1 += dtmp2; 969*25c28e83SPiotr Jasiukajtis 970*25c28e83SPiotr Jasiukajtis faddd %f24,%f28,%f26 ! (1_1) res += dtmp1; 971*25c28e83SPiotr Jasiukajtis 972*25c28e83SPiotr Jasiukajtis fsqrtd %f26,%f24 ! (1_1) res = sqrt(res); 973*25c28e83SPiotr Jasiukajtis 974*25c28e83SPiotr Jasiukajtis fmuld DC2,%f24,%f24 ! (1_2) res = dmax * res; 975*25c28e83SPiotr Jasiukajtis 976*25c28e83SPiotr Jasiukajtis add %i3,stridey,%i3 977*25c28e83SPiotr Jasiukajtis add %i1,stridex,%i1 978*25c28e83SPiotr Jasiukajtis st %f24,[%i5] ! ((float*)pz)[0] = ((float*)&res)[0]; 979*25c28e83SPiotr Jasiukajtis 980*25c28e83SPiotr Jasiukajtis st %f25,[%i5+4] ! ((float*)pz)[1] = ((float*)&res)[1]; 981*25c28e83SPiotr Jasiukajtis add %i5,stridez,%i5 982*25c28e83SPiotr Jasiukajtis ba .begin1 983*25c28e83SPiotr Jasiukajtis sub counter,1,counter 984*25c28e83SPiotr Jasiukajtis 985*25c28e83SPiotr Jasiukajtis .align 16 986*25c28e83SPiotr Jasiukajtis.update0: 987*25c28e83SPiotr Jasiukajtis fzero %f50 988*25c28e83SPiotr Jasiukajtis cmp counter,1 989*25c28e83SPiotr Jasiukajtis ble .cont0 990*25c28e83SPiotr Jasiukajtis fzero %f34 991*25c28e83SPiotr Jasiukajtis 992*25c28e83SPiotr Jasiukajtis mov %o1,tmp_px 993*25c28e83SPiotr Jasiukajtis mov %i3,tmp_py 994*25c28e83SPiotr Jasiukajtis 995*25c28e83SPiotr Jasiukajtis sub counter,1,tmp_counter 996*25c28e83SPiotr Jasiukajtis ba .cont0 997*25c28e83SPiotr Jasiukajtis mov 1,counter 998*25c28e83SPiotr Jasiukajtis 999*25c28e83SPiotr Jasiukajtis .align 16 1000*25c28e83SPiotr Jasiukajtis.update1: 1001*25c28e83SPiotr Jasiukajtis fzero %f50 1002*25c28e83SPiotr Jasiukajtis cmp counter,1 1003*25c28e83SPiotr Jasiukajtis ble .cont1 1004*25c28e83SPiotr Jasiukajtis fzero %f34 1005*25c28e83SPiotr Jasiukajtis 1006*25c28e83SPiotr Jasiukajtis mov %o1,tmp_px 1007*25c28e83SPiotr Jasiukajtis mov %i3,tmp_py 1008*25c28e83SPiotr Jasiukajtis 1009*25c28e83SPiotr Jasiukajtis sub counter,1,tmp_counter 1010*25c28e83SPiotr Jasiukajtis ba .cont1 1011*25c28e83SPiotr Jasiukajtis mov 1,counter 1012*25c28e83SPiotr Jasiukajtis 1013*25c28e83SPiotr Jasiukajtis .align 16 1014*25c28e83SPiotr Jasiukajtis.update2: 1015*25c28e83SPiotr Jasiukajtis fzero %f18 1016*25c28e83SPiotr Jasiukajtis cmp counter,2 1017*25c28e83SPiotr Jasiukajtis ble .cont2 1018*25c28e83SPiotr Jasiukajtis fzero %f30 1019*25c28e83SPiotr Jasiukajtis 1020*25c28e83SPiotr Jasiukajtis mov %l2,tmp_px 1021*25c28e83SPiotr Jasiukajtis mov %l4,tmp_py 1022*25c28e83SPiotr Jasiukajtis 1023*25c28e83SPiotr Jasiukajtis sub counter,2,tmp_counter 1024*25c28e83SPiotr Jasiukajtis ba .cont1 1025*25c28e83SPiotr Jasiukajtis mov 2,counter 1026*25c28e83SPiotr Jasiukajtis 1027*25c28e83SPiotr Jasiukajtis .align 16 1028*25c28e83SPiotr Jasiukajtis.update3: 1029*25c28e83SPiotr Jasiukajtis fzero %f18 1030*25c28e83SPiotr Jasiukajtis cmp counter,2 1031*25c28e83SPiotr Jasiukajtis ble .cont3 1032*25c28e83SPiotr Jasiukajtis fzero %f30 1033*25c28e83SPiotr Jasiukajtis 1034*25c28e83SPiotr Jasiukajtis mov %l2,tmp_px 1035*25c28e83SPiotr Jasiukajtis mov %l4,tmp_py 1036*25c28e83SPiotr Jasiukajtis 1037*25c28e83SPiotr Jasiukajtis sub counter,2,tmp_counter 1038*25c28e83SPiotr Jasiukajtis ba .cont3 1039*25c28e83SPiotr Jasiukajtis mov 2,counter 1040*25c28e83SPiotr Jasiukajtis 1041*25c28e83SPiotr Jasiukajtis .align 16 1042*25c28e83SPiotr Jasiukajtis.update4: 1043*25c28e83SPiotr Jasiukajtis fzero %f20 1044*25c28e83SPiotr Jasiukajtis cmp counter,3 1045*25c28e83SPiotr Jasiukajtis ble .cont4 1046*25c28e83SPiotr Jasiukajtis fzero %f40 1047*25c28e83SPiotr Jasiukajtis 1048*25c28e83SPiotr Jasiukajtis mov %l1,tmp_px 1049*25c28e83SPiotr Jasiukajtis mov %i3,tmp_py 1050*25c28e83SPiotr Jasiukajtis 1051*25c28e83SPiotr Jasiukajtis sub counter,3,tmp_counter 1052*25c28e83SPiotr Jasiukajtis ba .cont4 1053*25c28e83SPiotr Jasiukajtis mov 3,counter 1054*25c28e83SPiotr Jasiukajtis 1055*25c28e83SPiotr Jasiukajtis .align 16 1056*25c28e83SPiotr Jasiukajtis.update5: 1057*25c28e83SPiotr Jasiukajtis fzero %f20 1058*25c28e83SPiotr Jasiukajtis cmp counter,3 1059*25c28e83SPiotr Jasiukajtis ble .cont5 1060*25c28e83SPiotr Jasiukajtis fzero %f40 1061*25c28e83SPiotr Jasiukajtis 1062*25c28e83SPiotr Jasiukajtis mov %l1,tmp_px 1063*25c28e83SPiotr Jasiukajtis mov %i3,tmp_py 1064*25c28e83SPiotr Jasiukajtis 1065*25c28e83SPiotr Jasiukajtis sub counter,3,tmp_counter 1066*25c28e83SPiotr Jasiukajtis ba .cont5 1067*25c28e83SPiotr Jasiukajtis mov 3,counter 1068*25c28e83SPiotr Jasiukajtis 1069*25c28e83SPiotr Jasiukajtis .align 16 1070*25c28e83SPiotr Jasiukajtis.update6: 1071*25c28e83SPiotr Jasiukajtis fzero %f36 1072*25c28e83SPiotr Jasiukajtis cmp counter,4 1073*25c28e83SPiotr Jasiukajtis ble .cont6 1074*25c28e83SPiotr Jasiukajtis fzero %f54 1075*25c28e83SPiotr Jasiukajtis 1076*25c28e83SPiotr Jasiukajtis mov %l7,tmp_px 1077*25c28e83SPiotr Jasiukajtis mov %l2,tmp_py 1078*25c28e83SPiotr Jasiukajtis 1079*25c28e83SPiotr Jasiukajtis sub counter,4,tmp_counter 1080*25c28e83SPiotr Jasiukajtis ba .cont6 1081*25c28e83SPiotr Jasiukajtis mov 4,counter 1082*25c28e83SPiotr Jasiukajtis 1083*25c28e83SPiotr Jasiukajtis .align 16 1084*25c28e83SPiotr Jasiukajtis.update7: 1085*25c28e83SPiotr Jasiukajtis fzero %f36 1086*25c28e83SPiotr Jasiukajtis cmp counter,4 1087*25c28e83SPiotr Jasiukajtis ble .cont7 1088*25c28e83SPiotr Jasiukajtis fzero %f54 1089*25c28e83SPiotr Jasiukajtis 1090*25c28e83SPiotr Jasiukajtis mov %l7,tmp_px 1091*25c28e83SPiotr Jasiukajtis mov %l2,tmp_py 1092*25c28e83SPiotr Jasiukajtis 1093*25c28e83SPiotr Jasiukajtis sub counter,4,tmp_counter 1094*25c28e83SPiotr Jasiukajtis ba .cont7 1095*25c28e83SPiotr Jasiukajtis mov 4,counter 1096*25c28e83SPiotr Jasiukajtis 1097*25c28e83SPiotr Jasiukajtis .align 16 1098*25c28e83SPiotr Jasiukajtis.update8: 1099*25c28e83SPiotr Jasiukajtis fzero %f50 1100*25c28e83SPiotr Jasiukajtis cmp counter,5 1101*25c28e83SPiotr Jasiukajtis ble .cont8 1102*25c28e83SPiotr Jasiukajtis fzero %f34 1103*25c28e83SPiotr Jasiukajtis 1104*25c28e83SPiotr Jasiukajtis mov %o1,tmp_px 1105*25c28e83SPiotr Jasiukajtis mov %i3,tmp_py 1106*25c28e83SPiotr Jasiukajtis 1107*25c28e83SPiotr Jasiukajtis sub counter,5,tmp_counter 1108*25c28e83SPiotr Jasiukajtis ba .cont8 1109*25c28e83SPiotr Jasiukajtis mov 5,counter 1110*25c28e83SPiotr Jasiukajtis 1111*25c28e83SPiotr Jasiukajtis .align 16 1112*25c28e83SPiotr Jasiukajtis.update9: 1113*25c28e83SPiotr Jasiukajtis fzero %f50 1114*25c28e83SPiotr Jasiukajtis cmp counter,5 1115*25c28e83SPiotr Jasiukajtis ble .cont9 1116*25c28e83SPiotr Jasiukajtis fzero %f34 1117*25c28e83SPiotr Jasiukajtis 1118*25c28e83SPiotr Jasiukajtis mov %o1,tmp_px 1119*25c28e83SPiotr Jasiukajtis mov %i3,tmp_py 1120*25c28e83SPiotr Jasiukajtis 1121*25c28e83SPiotr Jasiukajtis sub counter,5,tmp_counter 1122*25c28e83SPiotr Jasiukajtis ba .cont9 1123*25c28e83SPiotr Jasiukajtis mov 5,counter 1124*25c28e83SPiotr Jasiukajtis 1125*25c28e83SPiotr Jasiukajtis 1126*25c28e83SPiotr Jasiukajtis .align 16 1127*25c28e83SPiotr Jasiukajtis.update10: 1128*25c28e83SPiotr Jasiukajtis fzero %f18 1129*25c28e83SPiotr Jasiukajtis cmp counter,2 1130*25c28e83SPiotr Jasiukajtis ble .cont10 1131*25c28e83SPiotr Jasiukajtis fzero %f30 1132*25c28e83SPiotr Jasiukajtis 1133*25c28e83SPiotr Jasiukajtis mov %l2,tmp_px 1134*25c28e83SPiotr Jasiukajtis mov %l4,tmp_py 1135*25c28e83SPiotr Jasiukajtis 1136*25c28e83SPiotr Jasiukajtis sub counter,2,tmp_counter 1137*25c28e83SPiotr Jasiukajtis ba .cont10 1138*25c28e83SPiotr Jasiukajtis mov 2,counter 1139*25c28e83SPiotr Jasiukajtis 1140*25c28e83SPiotr Jasiukajtis .align 16 1141*25c28e83SPiotr Jasiukajtis.update11: 1142*25c28e83SPiotr Jasiukajtis fzero %f18 1143*25c28e83SPiotr Jasiukajtis cmp counter,2 1144*25c28e83SPiotr Jasiukajtis ble .cont11 1145*25c28e83SPiotr Jasiukajtis fzero %f30 1146*25c28e83SPiotr Jasiukajtis 1147*25c28e83SPiotr Jasiukajtis mov %l2,tmp_px 1148*25c28e83SPiotr Jasiukajtis mov %l4,tmp_py 1149*25c28e83SPiotr Jasiukajtis 1150*25c28e83SPiotr Jasiukajtis sub counter,2,tmp_counter 1151*25c28e83SPiotr Jasiukajtis ba .cont11 1152*25c28e83SPiotr Jasiukajtis mov 2,counter 1153*25c28e83SPiotr Jasiukajtis 1154*25c28e83SPiotr Jasiukajtis .align 16 1155*25c28e83SPiotr Jasiukajtis.update12: 1156*25c28e83SPiotr Jasiukajtis fzero %f20 1157*25c28e83SPiotr Jasiukajtis cmp counter,3 1158*25c28e83SPiotr Jasiukajtis ble .cont12 1159*25c28e83SPiotr Jasiukajtis fzero %f40 1160*25c28e83SPiotr Jasiukajtis 1161*25c28e83SPiotr Jasiukajtis mov %l1,tmp_px 1162*25c28e83SPiotr Jasiukajtis mov %i3,tmp_py 1163*25c28e83SPiotr Jasiukajtis 1164*25c28e83SPiotr Jasiukajtis sub counter,3,tmp_counter 1165*25c28e83SPiotr Jasiukajtis ba .cont12 1166*25c28e83SPiotr Jasiukajtis mov 3,counter 1167*25c28e83SPiotr Jasiukajtis 1168*25c28e83SPiotr Jasiukajtis .align 16 1169*25c28e83SPiotr Jasiukajtis.update13: 1170*25c28e83SPiotr Jasiukajtis fzero %f20 1171*25c28e83SPiotr Jasiukajtis cmp counter,3 1172*25c28e83SPiotr Jasiukajtis ble .cont13 1173*25c28e83SPiotr Jasiukajtis fzero %f40 1174*25c28e83SPiotr Jasiukajtis 1175*25c28e83SPiotr Jasiukajtis mov %l1,tmp_px 1176*25c28e83SPiotr Jasiukajtis mov %i3,tmp_py 1177*25c28e83SPiotr Jasiukajtis 1178*25c28e83SPiotr Jasiukajtis sub counter,3,tmp_counter 1179*25c28e83SPiotr Jasiukajtis ba .cont13 1180*25c28e83SPiotr Jasiukajtis mov 3,counter 1181*25c28e83SPiotr Jasiukajtis 1182*25c28e83SPiotr Jasiukajtis .align 16 1183*25c28e83SPiotr Jasiukajtis.update14: 1184*25c28e83SPiotr Jasiukajtis fzero %f54 1185*25c28e83SPiotr Jasiukajtis cmp counter,4 1186*25c28e83SPiotr Jasiukajtis ble .cont14 1187*25c28e83SPiotr Jasiukajtis fzero %f36 1188*25c28e83SPiotr Jasiukajtis 1189*25c28e83SPiotr Jasiukajtis mov %l7,tmp_px 1190*25c28e83SPiotr Jasiukajtis mov %l2,tmp_py 1191*25c28e83SPiotr Jasiukajtis 1192*25c28e83SPiotr Jasiukajtis sub counter,4,tmp_counter 1193*25c28e83SPiotr Jasiukajtis ba .cont14 1194*25c28e83SPiotr Jasiukajtis mov 4,counter 1195*25c28e83SPiotr Jasiukajtis 1196*25c28e83SPiotr Jasiukajtis .align 16 1197*25c28e83SPiotr Jasiukajtis.update15: 1198*25c28e83SPiotr Jasiukajtis fzero %f54 1199*25c28e83SPiotr Jasiukajtis cmp counter,4 1200*25c28e83SPiotr Jasiukajtis ble .cont15 1201*25c28e83SPiotr Jasiukajtis fzero %f36 1202*25c28e83SPiotr Jasiukajtis 1203*25c28e83SPiotr Jasiukajtis mov %l7,tmp_px 1204*25c28e83SPiotr Jasiukajtis mov %l2,tmp_py 1205*25c28e83SPiotr Jasiukajtis 1206*25c28e83SPiotr Jasiukajtis sub counter,4,tmp_counter 1207*25c28e83SPiotr Jasiukajtis ba .cont15 1208*25c28e83SPiotr Jasiukajtis mov 4,counter 1209*25c28e83SPiotr Jasiukajtis 1210*25c28e83SPiotr Jasiukajtis .align 16 1211*25c28e83SPiotr Jasiukajtis.update16: 1212*25c28e83SPiotr Jasiukajtis fzero %f50 1213*25c28e83SPiotr Jasiukajtis cmp counter,5 1214*25c28e83SPiotr Jasiukajtis ble .cont16 1215*25c28e83SPiotr Jasiukajtis fzero %f34 1216*25c28e83SPiotr Jasiukajtis 1217*25c28e83SPiotr Jasiukajtis mov %o1,tmp_px 1218*25c28e83SPiotr Jasiukajtis mov %i3,tmp_py 1219*25c28e83SPiotr Jasiukajtis 1220*25c28e83SPiotr Jasiukajtis sub counter,5,tmp_counter 1221*25c28e83SPiotr Jasiukajtis ba .cont16 1222*25c28e83SPiotr Jasiukajtis mov 5,counter 1223*25c28e83SPiotr Jasiukajtis 1224*25c28e83SPiotr Jasiukajtis .align 16 1225*25c28e83SPiotr Jasiukajtis.update17: 1226*25c28e83SPiotr Jasiukajtis fzero %f50 1227*25c28e83SPiotr Jasiukajtis cmp counter,5 1228*25c28e83SPiotr Jasiukajtis ble .cont17 1229*25c28e83SPiotr Jasiukajtis fzero %f34 1230*25c28e83SPiotr Jasiukajtis 1231*25c28e83SPiotr Jasiukajtis mov %o1,tmp_px 1232*25c28e83SPiotr Jasiukajtis mov %i3,tmp_py 1233*25c28e83SPiotr Jasiukajtis 1234*25c28e83SPiotr Jasiukajtis sub counter,5,tmp_counter 1235*25c28e83SPiotr Jasiukajtis ba .cont17 1236*25c28e83SPiotr Jasiukajtis mov 5,counter 1237*25c28e83SPiotr Jasiukajtis 1238*25c28e83SPiotr Jasiukajtis .align 16 1239*25c28e83SPiotr Jasiukajtis.exit: 1240*25c28e83SPiotr Jasiukajtis ret 1241*25c28e83SPiotr Jasiukajtis restore 1242*25c28e83SPiotr Jasiukajtis SET_SIZE(__vhypot) 1243*25c28e83SPiotr Jasiukajtis 1244