1*25c28e83SPiotr Jasiukajtis/* 2*25c28e83SPiotr Jasiukajtis * CDDL HEADER START 3*25c28e83SPiotr Jasiukajtis * 4*25c28e83SPiotr Jasiukajtis * The contents of this file are subject to the terms of the 5*25c28e83SPiotr Jasiukajtis * Common Development and Distribution License (the "License"). 6*25c28e83SPiotr Jasiukajtis * You may not use this file except in compliance with the License. 7*25c28e83SPiotr Jasiukajtis * 8*25c28e83SPiotr Jasiukajtis * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*25c28e83SPiotr Jasiukajtis * or http://www.opensolaris.org/os/licensing. 10*25c28e83SPiotr Jasiukajtis * See the License for the specific language governing permissions 11*25c28e83SPiotr Jasiukajtis * and limitations under the License. 12*25c28e83SPiotr Jasiukajtis * 13*25c28e83SPiotr Jasiukajtis * When distributing Covered Code, include this CDDL HEADER in each 14*25c28e83SPiotr Jasiukajtis * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*25c28e83SPiotr Jasiukajtis * If applicable, add the following below this CDDL HEADER, with the 16*25c28e83SPiotr Jasiukajtis * fields enclosed by brackets "[]" replaced with your own identifying 17*25c28e83SPiotr Jasiukajtis * information: Portions Copyright [yyyy] [name of copyright owner] 18*25c28e83SPiotr Jasiukajtis * 19*25c28e83SPiotr Jasiukajtis * CDDL HEADER END 20*25c28e83SPiotr Jasiukajtis */ 21*25c28e83SPiotr Jasiukajtis/* 22*25c28e83SPiotr Jasiukajtis * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 23*25c28e83SPiotr Jasiukajtis */ 24*25c28e83SPiotr Jasiukajtis/* 25*25c28e83SPiotr Jasiukajtis * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 26*25c28e83SPiotr Jasiukajtis * Use is subject to license terms. 27*25c28e83SPiotr Jasiukajtis */ 28*25c28e83SPiotr Jasiukajtis 29*25c28e83SPiotr Jasiukajtis .file "__vatan2f.S" 30*25c28e83SPiotr Jasiukajtis 31*25c28e83SPiotr Jasiukajtis#include "libm.h" 32*25c28e83SPiotr Jasiukajtis 33*25c28e83SPiotr Jasiukajtis RO_DATA 34*25c28e83SPiotr Jasiukajtis .align 64 35*25c28e83SPiotr Jasiukajtis.CONST_TBL: 36*25c28e83SPiotr Jasiukajtis .word 0xbff921fb, 0x54442d18 ! -M_PI_2 37*25c28e83SPiotr Jasiukajtis .word 0x3ff921fb, 0x54442d18 ! M_PI_2 38*25c28e83SPiotr Jasiukajtis .word 0xbff921fb, 0x54442d18 ! -M_PI_2 39*25c28e83SPiotr Jasiukajtis .word 0x3ff921fb, 0x54442d18 ! M_PI_2 40*25c28e83SPiotr Jasiukajtis .word 0xc00921fb, 0x54442d18 ! -M_PI 41*25c28e83SPiotr Jasiukajtis .word 0x400921fb, 0x54442d18 ! M_PI 42*25c28e83SPiotr Jasiukajtis .word 0x80000000, 0x00000000 ! -0.0 43*25c28e83SPiotr Jasiukajtis .word 0x00000000, 0x00000000 ! 0.0 44*25c28e83SPiotr Jasiukajtis 45*25c28e83SPiotr Jasiukajtis .word 0xbff00000, 0x00000000 ! -1.0 46*25c28e83SPiotr Jasiukajtis .word 0x3ff00000, 0x00000000 ! 1.0 47*25c28e83SPiotr Jasiukajtis 48*25c28e83SPiotr Jasiukajtis .word 0x3fefffff, 0xfe79bf93 ! K0 = 9.99999997160545464888e-01 49*25c28e83SPiotr Jasiukajtis .word 0xbfd55552, 0xf0db4320 ! K1 = -3.33332762919825514315e-01 50*25c28e83SPiotr Jasiukajtis .word 0x3fc998f8, 0x2493d066 ! K2 = 1.99980752811487135558e-01 51*25c28e83SPiotr Jasiukajtis .word 0xbfc240b8, 0xd994abf9 ! K3 = -1.42600160828209047720e-01 52*25c28e83SPiotr Jasiukajtis .word 0x3fbbfc9e, 0x8c2b0243 ! K4 = 1.09323415013030928421e-01 53*25c28e83SPiotr Jasiukajtis .word 0xbfb56013, 0x64b1cac3 ! K5 = -8.34972496830160174704e-02 54*25c28e83SPiotr Jasiukajtis .word 0x3fad3ad7, 0x9f53e142 ! K6 = 5.70895559303061900411e-02 55*25c28e83SPiotr Jasiukajtis .word 0xbf9f148f, 0x2a829af1 ! K7 = -3.03518647857811706139e-02 56*25c28e83SPiotr Jasiukajtis .word 0x3f857a8c, 0x747ed314 ! K8 = 1.04876492549493055747e-02 57*25c28e83SPiotr Jasiukajtis .word 0xbf5bdf39, 0x729124b6 ! K9 = -1.70117006406859722727e-03 58*25c28e83SPiotr Jasiukajtis 59*25c28e83SPiotr Jasiukajtis .word 0x3fe921fb, 0x54442d18 ! M_PI_4 60*25c28e83SPiotr Jasiukajtis .word 0x36a00000, 0x00000000 ! 2^(-149) 61*25c28e83SPiotr Jasiukajtis 62*25c28e83SPiotr Jasiukajtis#define counter %o3 63*25c28e83SPiotr Jasiukajtis#define stridex %i4 64*25c28e83SPiotr Jasiukajtis#define stridey %i5 65*25c28e83SPiotr Jasiukajtis#define stridez %l1 66*25c28e83SPiotr Jasiukajtis#define cmul_arr %i0 67*25c28e83SPiotr Jasiukajtis#define cadd_arr %i2 68*25c28e83SPiotr Jasiukajtis#define _0x7fffffff %l0 69*25c28e83SPiotr Jasiukajtis#define _0x7f800000 %l2 70*25c28e83SPiotr Jasiukajtis 71*25c28e83SPiotr Jasiukajtis#define K0 %f42 72*25c28e83SPiotr Jasiukajtis#define K1 %f44 73*25c28e83SPiotr Jasiukajtis#define K2 %f46 74*25c28e83SPiotr Jasiukajtis#define K3 %f48 75*25c28e83SPiotr Jasiukajtis#define K4 %f50 76*25c28e83SPiotr Jasiukajtis#define K5 %f52 77*25c28e83SPiotr Jasiukajtis#define K6 %f54 78*25c28e83SPiotr Jasiukajtis#define K7 %f56 79*25c28e83SPiotr Jasiukajtis#define K8 %f58 80*25c28e83SPiotr Jasiukajtis#define K9 %f60 81*25c28e83SPiotr Jasiukajtis 82*25c28e83SPiotr Jasiukajtis#define tmp_counter STACK_BIAS-32 83*25c28e83SPiotr Jasiukajtis#define tmp_py STACK_BIAS-24 84*25c28e83SPiotr Jasiukajtis#define tmp_px STACK_BIAS-16 85*25c28e83SPiotr Jasiukajtis#define tmp_pz STACK_BIAS-8 86*25c28e83SPiotr Jasiukajtis 87*25c28e83SPiotr Jasiukajtis! sizeof temp storage - must be a multiple of 16 for V9 88*25c28e83SPiotr Jasiukajtis#define tmps 0x20 89*25c28e83SPiotr Jasiukajtis 90*25c28e83SPiotr Jasiukajtis!-------------------------------------------------------------------- 91*25c28e83SPiotr Jasiukajtis! !!!!! vatan2f algorithm !!!!! 92*25c28e83SPiotr Jasiukajtis! uy0 = *(int*)py; 93*25c28e83SPiotr Jasiukajtis! ux0 = *(int*)px; 94*25c28e83SPiotr Jasiukajtis! ay0 = uy0 & 0x7fffffff; 95*25c28e83SPiotr Jasiukajtis! ax0 = ux0 & 0x7fffffff; 96*25c28e83SPiotr Jasiukajtis! if ( ax0 >= 0x7f800000 || ay0 >= 0x7f800000 ) 97*25c28e83SPiotr Jasiukajtis! { 98*25c28e83SPiotr Jasiukajtis! /* |X| or |Y| = Nan */ 99*25c28e83SPiotr Jasiukajtis! if ( ax0 > 0x7f800000 || ay0 > 0x7f800000 ) 100*25c28e83SPiotr Jasiukajtis! { 101*25c28e83SPiotr Jasiukajtis! ftmp0 = *(float*)&ax0 * *(float*)&ay0; 102*25c28e83SPiotr Jasiukajtis! *pz = ftmp0; 103*25c28e83SPiotr Jasiukajtis! } 104*25c28e83SPiotr Jasiukajtis! signx0 = (unsigned)ux0 >> 30; 105*25c28e83SPiotr Jasiukajtis! signx0 &= 2; 106*25c28e83SPiotr Jasiukajtis! signy0 = uy0 >> 31; 107*25c28e83SPiotr Jasiukajtis! if (ay0 == 0x7f800000) 108*25c28e83SPiotr Jasiukajtis! signx0 = (ax0 == 0x7f800000) ? signx0 + 1 : 2; 109*25c28e83SPiotr Jasiukajtis! else 110*25c28e83SPiotr Jasiukajtis! signx0 += signx0; 111*25c28e83SPiotr Jasiukajtis! res = signx0 * M_PI_4; 112*25c28e83SPiotr Jasiukajtis! signy0 <<= 3; 113*25c28e83SPiotr Jasiukajtis! dtmp0 = *(double*)((char*)(cmul_arr + 1) + signy0); 114*25c28e83SPiotr Jasiukajtis! res *= dtmp0; 115*25c28e83SPiotr Jasiukajtis! ftmp0 = (float) res; 116*25c28e83SPiotr Jasiukajtis! *pz = ftmp0; 117*25c28e83SPiotr Jasiukajtis! goto next; 118*25c28e83SPiotr Jasiukajtis! } 119*25c28e83SPiotr Jasiukajtis! if ( ax0 == 0 && ay0 == 0 ) 120*25c28e83SPiotr Jasiukajtis! { 121*25c28e83SPiotr Jasiukajtis! signy0 = uy0 >> 28; 122*25c28e83SPiotr Jasiukajtis! signx0 = ux0 >> 27; 123*25c28e83SPiotr Jasiukajtis! ldiff0 = ax0 - ay0; 124*25c28e83SPiotr Jasiukajtis! ldiff0 >>= 31; 125*25c28e83SPiotr Jasiukajtis! signx0 &= -16; 126*25c28e83SPiotr Jasiukajtis! signy0 &= -8; 127*25c28e83SPiotr Jasiukajtis! ldiff0 <<= 5; 128*25c28e83SPiotr Jasiukajtis! signx0 += signy0; 129*25c28e83SPiotr Jasiukajtis! res = *(double*)((char*)(cadd_arr + 7) + ldiff0 + signx0 + signy0); 130*25c28e83SPiotr Jasiukajtis! ftmp0 = (float) res; 131*25c28e83SPiotr Jasiukajtis! *pz = ftmp0; 132*25c28e83SPiotr Jasiukajtis! goto next; 133*25c28e83SPiotr Jasiukajtis! } 134*25c28e83SPiotr Jasiukajtis! ldiff0 = ax0 - ay0; 135*25c28e83SPiotr Jasiukajtis! ldiff0 >>= 31; 136*25c28e83SPiotr Jasiukajtis! addrc0 = (char*)px - (char*)py; 137*25c28e83SPiotr Jasiukajtis! addrc0 &= ldiff0; 138*25c28e83SPiotr Jasiukajtis! fy0 = *(float*)((char*)py + addrc0); 139*25c28e83SPiotr Jasiukajtis! fx0 = *(float*)((char*)px - addrc0); 140*25c28e83SPiotr Jasiukajtis! itmp0 = *(int*)&fy0; 141*25c28e83SPiotr Jasiukajtis! if((itmp0 & 0x7fffffff) < 0x00800000) 142*25c28e83SPiotr Jasiukajtis! { 143*25c28e83SPiotr Jasiukajtis! itmp0 >>= 28; 144*25c28e83SPiotr Jasiukajtis! itmp0 &= -8; 145*25c28e83SPiotr Jasiukajtis! fy0 = fabsf(fy0); 146*25c28e83SPiotr Jasiukajtis! dtmp0 = (double) *(int*)&fy0; 147*25c28e83SPiotr Jasiukajtis! dtmp0 *= C2ONM149; 148*25c28e83SPiotr Jasiukajtis! dsign = *(double*)((char*)cmul_arr + itmp0); 149*25c28e83SPiotr Jasiukajtis! dtmp0 *= dsign; 150*25c28e83SPiotr Jasiukajtis! y0 = dtm0; 151*25c28e83SPiotr Jasiukajtis! } 152*25c28e83SPiotr Jasiukajtis! else 153*25c28e83SPiotr Jasiukajtis! y0 = (double)fy0; 154*25c28e83SPiotr Jasiukajtis! itmp0 = *(int*)&fx0; 155*25c28e83SPiotr Jasiukajtis! if((itmp0 & 0x7fffffff) < 0x00800000) 156*25c28e83SPiotr Jasiukajtis! { 157*25c28e83SPiotr Jasiukajtis! itmp0 >>= 28; 158*25c28e83SPiotr Jasiukajtis! itmp0 &= -8; 159*25c28e83SPiotr Jasiukajtis! fx0 = fabsf(fx0); 160*25c28e83SPiotr Jasiukajtis! dtmp0 = (double) *(int*)&fx0; 161*25c28e83SPiotr Jasiukajtis! dtmp0 *= C2ONM149; 162*25c28e83SPiotr Jasiukajtis! dsign = *(double*)((char*)cmul_arr + itmp0); 163*25c28e83SPiotr Jasiukajtis! dtmp0 *= dsign; 164*25c28e83SPiotr Jasiukajtis! x0 = dtmp0; 165*25c28e83SPiotr Jasiukajtis! } 166*25c28e83SPiotr Jasiukajtis! else 167*25c28e83SPiotr Jasiukajtis! x0 = (double)fx0; 168*25c28e83SPiotr Jasiukajtis! px += stridex; 169*25c28e83SPiotr Jasiukajtis! py += stridey; 170*25c28e83SPiotr Jasiukajtis! x0 = y0 / x0; 171*25c28e83SPiotr Jasiukajtis! x20 = x0 * x0; 172*25c28e83SPiotr Jasiukajtis! dtmp0 = K9 * x20; 173*25c28e83SPiotr Jasiukajtis! dtmp0 += K8; 174*25c28e83SPiotr Jasiukajtis! dtmp0 *= x20; 175*25c28e83SPiotr Jasiukajtis! dtmp0 += K7; 176*25c28e83SPiotr Jasiukajtis! dtmp0 *= x20; 177*25c28e83SPiotr Jasiukajtis! dtmp0 += K6; 178*25c28e83SPiotr Jasiukajtis! dtmp0 *= x20; 179*25c28e83SPiotr Jasiukajtis! dtmp0 += K5; 180*25c28e83SPiotr Jasiukajtis! dtmp0 *= x20; 181*25c28e83SPiotr Jasiukajtis! dtmp0 += K4; 182*25c28e83SPiotr Jasiukajtis! dtmp0 *= x20; 183*25c28e83SPiotr Jasiukajtis! dtmp0 += K3; 184*25c28e83SPiotr Jasiukajtis! dtmp0 *= x20; 185*25c28e83SPiotr Jasiukajtis! dtmp0 += K2; 186*25c28e83SPiotr Jasiukajtis! dtmp0 *= x20; 187*25c28e83SPiotr Jasiukajtis! dtmp0 += K1; 188*25c28e83SPiotr Jasiukajtis! dtmp0 *= x20; 189*25c28e83SPiotr Jasiukajtis! dtmp0 += K0; 190*25c28e83SPiotr Jasiukajtis! x0 = dtmp0 * x0; 191*25c28e83SPiotr Jasiukajtis! signy0 = uy0 >> 28; 192*25c28e83SPiotr Jasiukajtis! signy0 &= -8; 193*25c28e83SPiotr Jasiukajtis! signx0 = ux0 >> 27; 194*25c28e83SPiotr Jasiukajtis! signx0 &= -16; 195*25c28e83SPiotr Jasiukajtis! ltmp0 = ldiff0 << 5; 196*25c28e83SPiotr Jasiukajtis! ltmp0 += (char*)cadd_arr; 197*25c28e83SPiotr Jasiukajtis! ltmp0 += signx0; 198*25c28e83SPiotr Jasiukajtis! cadd0 = *(double*)(ltmp0 + signy0); 199*25c28e83SPiotr Jasiukajtis! cmul0_ind = ldiff0 << 3; 200*25c28e83SPiotr Jasiukajtis! cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); 201*25c28e83SPiotr Jasiukajtis! dtmp0 = cmul0 * x0; 202*25c28e83SPiotr Jasiukajtis! dtmp0 = cadd0 + dtmp0; 203*25c28e83SPiotr Jasiukajtis! ftmp0 = (float)dtmp0; 204*25c28e83SPiotr Jasiukajtis! *pz = ftmp0; 205*25c28e83SPiotr Jasiukajtis! pz += stridez; 206*25c28e83SPiotr Jasiukajtis! 207*25c28e83SPiotr Jasiukajtis!-------------------------------------------------------------------- 208*25c28e83SPiotr Jasiukajtis 209*25c28e83SPiotr Jasiukajtis ENTRY(__vatan2f) 210*25c28e83SPiotr Jasiukajtis save %sp,-SA(MINFRAME)-tmps,%sp 211*25c28e83SPiotr Jasiukajtis PIC_SETUP(l7) 212*25c28e83SPiotr Jasiukajtis PIC_SET(l7,.CONST_TBL,g5) 213*25c28e83SPiotr Jasiukajtis 214*25c28e83SPiotr Jasiukajtis#ifdef __sparcv9 215*25c28e83SPiotr Jasiukajtis ldx [%fp+STACK_BIAS+176],%l7 216*25c28e83SPiotr Jasiukajtis#else 217*25c28e83SPiotr Jasiukajtis ld [%fp+STACK_BIAS+92],%l7 218*25c28e83SPiotr Jasiukajtis#endif 219*25c28e83SPiotr Jasiukajtis 220*25c28e83SPiotr Jasiukajtis st %i0,[%fp+tmp_counter] 221*25c28e83SPiotr Jasiukajtis sethi %hi(0x7ffffc00),_0x7fffffff 222*25c28e83SPiotr Jasiukajtis add _0x7fffffff,1023,_0x7fffffff 223*25c28e83SPiotr Jasiukajtis or %g0,%i2,%o2 224*25c28e83SPiotr Jasiukajtis sll %l7,2,stridez 225*25c28e83SPiotr Jasiukajtis 226*25c28e83SPiotr Jasiukajtis sethi %hi(0x7f800000),_0x7f800000 227*25c28e83SPiotr Jasiukajtis mov %g5,%g1 228*25c28e83SPiotr Jasiukajtis 229*25c28e83SPiotr Jasiukajtis or %g0,stridey,%o4 230*25c28e83SPiotr Jasiukajtis add %g1,56,cadd_arr 231*25c28e83SPiotr Jasiukajtis 232*25c28e83SPiotr Jasiukajtis sll %o2,2,stridey 233*25c28e83SPiotr Jasiukajtis add %g1,72,cmul_arr 234*25c28e83SPiotr Jasiukajtis 235*25c28e83SPiotr Jasiukajtis ldd [%g1+80],K0 236*25c28e83SPiotr Jasiukajtis ldd [%g1+80+8],K1 237*25c28e83SPiotr Jasiukajtis ldd [%g1+80+16],K2 238*25c28e83SPiotr Jasiukajtis ldd [%g1+80+24],K3 239*25c28e83SPiotr Jasiukajtis ldd [%g1+80+32],K4 240*25c28e83SPiotr Jasiukajtis ldd [%g1+80+40],K5 241*25c28e83SPiotr Jasiukajtis ldd [%g1+80+48],K6 242*25c28e83SPiotr Jasiukajtis ldd [%g1+80+56],K7 243*25c28e83SPiotr Jasiukajtis ldd [%g1+80+64],K8 244*25c28e83SPiotr Jasiukajtis ldd [%g1+80+72],K9 245*25c28e83SPiotr Jasiukajtis 246*25c28e83SPiotr Jasiukajtis sll stridex,2,stridex 247*25c28e83SPiotr Jasiukajtis 248*25c28e83SPiotr Jasiukajtis stx %i1,[%fp+tmp_py] 249*25c28e83SPiotr Jasiukajtis stx %i3,[%fp+tmp_px] 250*25c28e83SPiotr Jasiukajtis.begin: 251*25c28e83SPiotr Jasiukajtis ld [%fp+tmp_counter],counter 252*25c28e83SPiotr Jasiukajtis ldx [%fp+tmp_py],%i1 253*25c28e83SPiotr Jasiukajtis ldx [%fp+tmp_px],%i3 254*25c28e83SPiotr Jasiukajtis st %g0,[%fp+tmp_counter] 255*25c28e83SPiotr Jasiukajtis.begin1: 256*25c28e83SPiotr Jasiukajtis subcc counter,1,counter 257*25c28e83SPiotr Jasiukajtis bneg,pn %icc,.exit 258*25c28e83SPiotr Jasiukajtis nop 259*25c28e83SPiotr Jasiukajtis 260*25c28e83SPiotr Jasiukajtis lda [%i1]0x82,%l4 ! (0_0) uy0 = *(int*)py; 261*25c28e83SPiotr Jasiukajtis 262*25c28e83SPiotr Jasiukajtis lda [%i3]0x82,%l3 ! (0_0) ux0 = *(int*)px; 263*25c28e83SPiotr Jasiukajtis 264*25c28e83SPiotr Jasiukajtis and %l4,_0x7fffffff,%l7 ! (0_0) ay0 = uy0 & 0x7fffffff; 265*25c28e83SPiotr Jasiukajtis 266*25c28e83SPiotr Jasiukajtis cmp %l7,_0x7f800000 267*25c28e83SPiotr Jasiukajtis bge,pn %icc,.spec0 268*25c28e83SPiotr Jasiukajtis and %l3,_0x7fffffff,%l6 ! (0_0) ax0 = ux0 & 0x7fffffff; 269*25c28e83SPiotr Jasiukajtis 270*25c28e83SPiotr Jasiukajtis cmp %l6,_0x7f800000 271*25c28e83SPiotr Jasiukajtis bge,pn %icc,.spec0 272*25c28e83SPiotr Jasiukajtis sethi %hi(0x00800000),%o5 273*25c28e83SPiotr Jasiukajtis 274*25c28e83SPiotr Jasiukajtis cmp %l6,%o5 275*25c28e83SPiotr Jasiukajtis bl,pn %icc,.spec1 276*25c28e83SPiotr Jasiukajtis sub %l6,%l7,%o2 ! (0_0) ldiff0 = ax0 - ay0; 277*25c28e83SPiotr Jasiukajtis 278*25c28e83SPiotr Jasiukajtis cmp %l7,%o5 279*25c28e83SPiotr Jasiukajtis bl,pn %icc,.spec1 280*25c28e83SPiotr Jasiukajtis nop 281*25c28e83SPiotr Jasiukajtis 282*25c28e83SPiotr Jasiukajtis stx %o4,[%fp+tmp_pz] 283*25c28e83SPiotr Jasiukajtis sra %o2,31,%l7 ! (0_0) ldiff0 >>= 31; 284*25c28e83SPiotr Jasiukajtis sub %i3,%i1,%l6 ! (0_0) addrc0 = (char*)px - (char*)py; 285*25c28e83SPiotr Jasiukajtis 286*25c28e83SPiotr Jasiukajtis and %l6,%l7,%o2 ! (0_0) addrc0 &= ldiff0; 287*25c28e83SPiotr Jasiukajtis 288*25c28e83SPiotr Jasiukajtis lda [%i1+%o2]0x82,%f0 ! (0_0) fy0 = *(float*)((char*)py + addrc0); 289*25c28e83SPiotr Jasiukajtis sub %i3,%o2,%o4 ! (0_0) (char*)px - addrc0 290*25c28e83SPiotr Jasiukajtis 291*25c28e83SPiotr Jasiukajtis lda [%o4]0x82,%f2 ! (0_0) fx0 = *(float*)((char*)px - addrc0); 292*25c28e83SPiotr Jasiukajtis sll %l7,5,%l6 ! (0_0) ltmp0 = ldiff0 << 5; 293*25c28e83SPiotr Jasiukajtis 294*25c28e83SPiotr Jasiukajtis sra %l3,27,%o5 ! (0_0) signx0 = ux0 >> 27; 295*25c28e83SPiotr Jasiukajtis add %i1,stridey,%i1 ! py += stridey 296*25c28e83SPiotr Jasiukajtis 297*25c28e83SPiotr Jasiukajtis add %i3,stridex,%i3 ! px += stridex 298*25c28e83SPiotr Jasiukajtis 299*25c28e83SPiotr Jasiukajtis lda [%i1]0x82,%l3 ! (1_0) uy0 = *(int*)py; 300*25c28e83SPiotr Jasiukajtis sra %l4,28,%o4 ! (0_0) signy0 = uy0 >> 28; 301*25c28e83SPiotr Jasiukajtis 302*25c28e83SPiotr Jasiukajtis add %l6,cadd_arr,%l6 ! (0_0) ltmp0 += (char*)cadd_arr; 303*25c28e83SPiotr Jasiukajtis 304*25c28e83SPiotr Jasiukajtis fstod %f0,%f40 ! (0_0) y0 = (double)fy0; 305*25c28e83SPiotr Jasiukajtis 306*25c28e83SPiotr Jasiukajtis fstod %f2,%f2 ! (0_0) x0 = (double)fx0; 307*25c28e83SPiotr Jasiukajtis 308*25c28e83SPiotr Jasiukajtis.spec1_cont: 309*25c28e83SPiotr Jasiukajtis lda [%i3]0x82,%l4 ! (1_0) ux0 = *(int*)px; 310*25c28e83SPiotr Jasiukajtis and %o5,-16,%o5 ! (0_0) signx0 &= -16; 311*25c28e83SPiotr Jasiukajtis 312*25c28e83SPiotr Jasiukajtis and %o4,-8,%o4 ! (0_0) signy0 &= -8; 313*25c28e83SPiotr Jasiukajtis 314*25c28e83SPiotr Jasiukajtis fdivd %f40,%f2,%f12 ! (0_0) x0 = y0 / x0; 315*25c28e83SPiotr Jasiukajtis 316*25c28e83SPiotr Jasiukajtis add %l6,%o5,%o1 ! (0_0) ltmp0 += signx0; 317*25c28e83SPiotr Jasiukajtis 318*25c28e83SPiotr Jasiukajtis and %l4,_0x7fffffff,%l6 ! (1_0) ax0 = ux0 & 0x7fffffff; 319*25c28e83SPiotr Jasiukajtis sethi %hi(0x00800000),%o5 320*25c28e83SPiotr Jasiukajtis 321*25c28e83SPiotr Jasiukajtis cmp %l6,%o5 322*25c28e83SPiotr Jasiukajtis bl,pn %icc,.u0 323*25c28e83SPiotr Jasiukajtis and %l3,_0x7fffffff,%g1 ! (1_0) ay0 = uy0 & 0x7fffffff; 324*25c28e83SPiotr Jasiukajtis.c0: 325*25c28e83SPiotr Jasiukajtis cmp %g1,%o5 326*25c28e83SPiotr Jasiukajtis bl,pn %icc,.u1 327*25c28e83SPiotr Jasiukajtis ldd [%o1+%o4],%f34 ! (0_0) cadd0 = *(double*)(ltmp0 + signy0); 328*25c28e83SPiotr Jasiukajtis.c1: 329*25c28e83SPiotr Jasiukajtis cmp %l6,_0x7f800000 330*25c28e83SPiotr Jasiukajtis bge,pn %icc,.u2 331*25c28e83SPiotr Jasiukajtis sub %l6,%g1,%o1 ! (1_0) ldiff0 = ax0 - ay0; 332*25c28e83SPiotr Jasiukajtis.c2: 333*25c28e83SPiotr Jasiukajtis cmp %g1,_0x7f800000 334*25c28e83SPiotr Jasiukajtis bge,pn %icc,.u3 335*25c28e83SPiotr Jasiukajtis nop 336*25c28e83SPiotr Jasiukajtis.c3: 337*25c28e83SPiotr Jasiukajtis sra %o1,31,%g1 ! (1_0) ldiff0 >>= 31; 338*25c28e83SPiotr Jasiukajtis sub %i3,%i1,%l6 ! (1_0) addrc0 = (char*)px - (char*)py; 339*25c28e83SPiotr Jasiukajtis 340*25c28e83SPiotr Jasiukajtis and %l6,%g1,%o1 ! (1_0) addrc0 &= ldiff0; 341*25c28e83SPiotr Jasiukajtis 342*25c28e83SPiotr Jasiukajtis lda [%i1+%o1]0x82,%f0 ! (1_0) fy0 = *(float*)((char*)py + addrc0); 343*25c28e83SPiotr Jasiukajtis sub %i3,%o1,%o4 ! (1_0) (char*)px - addrc0; 344*25c28e83SPiotr Jasiukajtis 345*25c28e83SPiotr Jasiukajtis lda [%o4]0x82,%f2 ! (1_0) fx0 = *(float*)((char*)px - addrc0); 346*25c28e83SPiotr Jasiukajtis sll %g1,5,%l6 ! (1_0) ltmp0 = ldiff0 << 5; 347*25c28e83SPiotr Jasiukajtis 348*25c28e83SPiotr Jasiukajtis cmp %o5,_0x7f800000 ! (1_0) b0 ? 0x7f800000 349*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update0 ! (1_0) if ( b0 > 0x7f800000 ) 350*25c28e83SPiotr Jasiukajtis nop 351*25c28e83SPiotr Jasiukajtis.cont0: 352*25c28e83SPiotr Jasiukajtis add %i1,stridey,%i1 ! py += stridey 353*25c28e83SPiotr Jasiukajtis fstod %f0,%f40 ! (1_0) y0 = (double)fy0; 354*25c28e83SPiotr Jasiukajtis 355*25c28e83SPiotr Jasiukajtis sra %l4,27,%o5 ! (1_0) signx0 = ux0 >> 27; 356*25c28e83SPiotr Jasiukajtis add %i3,stridex,%i3 ! px += stridex 357*25c28e83SPiotr Jasiukajtis 358*25c28e83SPiotr Jasiukajtis sra %l3,28,%o4 ! (1_0) signy0 = uy0 >> 28; 359*25c28e83SPiotr Jasiukajtis add %l6,cadd_arr,%l6 ! (1_0) ltmp0 += (char*)cadd_arr; 360*25c28e83SPiotr Jasiukajtis fstod %f2,%f2 ! (1_0) x0 = (double)fx0; 361*25c28e83SPiotr Jasiukajtis.d0: 362*25c28e83SPiotr Jasiukajtis and %o5,-16,%o5 ! (1_0) signx0 &= -16; 363*25c28e83SPiotr Jasiukajtis and %o4,-8,%o4 ! (1_0) signy0 &= -8; 364*25c28e83SPiotr Jasiukajtis 365*25c28e83SPiotr Jasiukajtis lda [%i1]0x82,%l4 ! (2_0) uy0 = *(int*)py; 366*25c28e83SPiotr Jasiukajtis 367*25c28e83SPiotr Jasiukajtis lda [%i3]0x82,%l3 ! (2_0) ux0 = *(int*)px; 368*25c28e83SPiotr Jasiukajtis fdivd %f40,%f2,%f10 ! (1_0) x0 = y0 / x0; 369*25c28e83SPiotr Jasiukajtis 370*25c28e83SPiotr Jasiukajtis fmuld %f12,%f12,%f20 ! (0_0) x20 = x0 * x0; 371*25c28e83SPiotr Jasiukajtis 372*25c28e83SPiotr Jasiukajtis add %l6,%o5,%o2 ! (1_0) ltmp0 += signx0; 373*25c28e83SPiotr Jasiukajtis 374*25c28e83SPiotr Jasiukajtis and %l3,_0x7fffffff,%l6 ! (2_0) ax0 = ux0 & 0x7fffffff; 375*25c28e83SPiotr Jasiukajtis sethi %hi(0x00800000),%o5 376*25c28e83SPiotr Jasiukajtis 377*25c28e83SPiotr Jasiukajtis cmp %l6,%o5 378*25c28e83SPiotr Jasiukajtis bl,pn %icc,.u4 379*25c28e83SPiotr Jasiukajtis and %l4,_0x7fffffff,%g5 ! (2_0) ay0 = uy0 & 0x7fffffff; 380*25c28e83SPiotr Jasiukajtis.c4: 381*25c28e83SPiotr Jasiukajtis cmp %g5,%o5 382*25c28e83SPiotr Jasiukajtis bl,pn %icc,.u5 383*25c28e83SPiotr Jasiukajtis fmuld K9,%f20,%f40 ! (0_0) dtmp0 = K9 * x20; 384*25c28e83SPiotr Jasiukajtis.c5: 385*25c28e83SPiotr Jasiukajtis cmp %l6,_0x7f800000 386*25c28e83SPiotr Jasiukajtis bge,pn %icc,.u6 387*25c28e83SPiotr Jasiukajtis ldd [%o2+%o4],%f32 ! (1_0) cadd0 = *(double*)(ltmp0 + signy0); 388*25c28e83SPiotr Jasiukajtis.c6: 389*25c28e83SPiotr Jasiukajtis cmp %g5,_0x7f800000 390*25c28e83SPiotr Jasiukajtis bge,pn %icc,.u7 391*25c28e83SPiotr Jasiukajtis sub %l6,%g5,%o2 ! (2_0) ldiff0 = ax0 - ay0; 392*25c28e83SPiotr Jasiukajtis.c7: 393*25c28e83SPiotr Jasiukajtis sra %o2,31,%g5 ! (2_0) ldiff0 >>= 31; 394*25c28e83SPiotr Jasiukajtis sub %i3,%i1,%l6 ! (2_0) addrc0 = (char*)px - (char*)py; 395*25c28e83SPiotr Jasiukajtis 396*25c28e83SPiotr Jasiukajtis faddd %f40,K8,%f40 ! (0_0) dtmp0 += K8; 397*25c28e83SPiotr Jasiukajtis and %l6,%g5,%o2 ! (2_0) addrc0 &= ldiff0; 398*25c28e83SPiotr Jasiukajtis 399*25c28e83SPiotr Jasiukajtis lda [%i1+%o2]0x82,%f0 ! (2_0) fy0 = *(float*)((char*)py + addrc0); 400*25c28e83SPiotr Jasiukajtis sub %i3,%o2,%o4 ! (2_0) (char*)px - addrc0; 401*25c28e83SPiotr Jasiukajtis 402*25c28e83SPiotr Jasiukajtis lda [%o4]0x82,%f2 ! (2_0) fx0 = *(float*)((char*)px - addrc0); 403*25c28e83SPiotr Jasiukajtis 404*25c28e83SPiotr Jasiukajtis cmp %o5,_0x7f800000 ! (2_0) b0 ? 0x7f800000 405*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update1 ! (2_0) if ( b0 > 0x7f800000 ) 406*25c28e83SPiotr Jasiukajtis nop 407*25c28e83SPiotr Jasiukajtis.cont1: 408*25c28e83SPiotr Jasiukajtis fmuld %f40,%f20,%f30 ! (0_0) dtmp0 *= x20; 409*25c28e83SPiotr Jasiukajtis sll %g5,5,%l6 ! (2_0) ltmp0 = ldiff0 << 5; 410*25c28e83SPiotr Jasiukajtis add %i1,stridey,%i1 ! py += stridey 411*25c28e83SPiotr Jasiukajtis fstod %f0,%f40 ! (2_0) y0 = (double)fy0; 412*25c28e83SPiotr Jasiukajtis 413*25c28e83SPiotr Jasiukajtis sra %l3,27,%o5 ! (2_0) signx0 = ux0 >> 27; 414*25c28e83SPiotr Jasiukajtis add %i3,stridex,%i3 ! px += stridex 415*25c28e83SPiotr Jasiukajtis 416*25c28e83SPiotr Jasiukajtis fstod %f2,%f2 ! (2_0) x0 = (double)fx0; 417*25c28e83SPiotr Jasiukajtis sra %l4,28,%o4 ! (2_0) signy0 = uy0 >> 28; 418*25c28e83SPiotr Jasiukajtis add %l6,cadd_arr,%l6 ! (2_0) ltmp0 += (char*)cadd_arr; 419*25c28e83SPiotr Jasiukajtis.d1: 420*25c28e83SPiotr Jasiukajtis lda [%i1]0x82,%l3 ! (3_0) uy0 = *(int*)py; 421*25c28e83SPiotr Jasiukajtis and %o5,-16,%o5 ! (2_0) signx0 &= -16; 422*25c28e83SPiotr Jasiukajtis faddd %f30,K7,%f30 ! (0_0) dtmp0 += K7; 423*25c28e83SPiotr Jasiukajtis 424*25c28e83SPiotr Jasiukajtis lda [%i3]0x82,%l4 ! (3_0) ux0 = *(int*)px; 425*25c28e83SPiotr Jasiukajtis 426*25c28e83SPiotr Jasiukajtis fdivd %f40,%f2,%f8 ! (2_0) x0 = y0 / x0; 427*25c28e83SPiotr Jasiukajtis 428*25c28e83SPiotr Jasiukajtis fmuld %f10,%f10,%f18 ! (1_0) x20 = x0 * x0; 429*25c28e83SPiotr Jasiukajtis 430*25c28e83SPiotr Jasiukajtis add %l6,%o5,%o1 ! (2_0) ltmp0 += signx0; 431*25c28e83SPiotr Jasiukajtis and %o4,-8,%o4 ! (2_0) signy0 &= -8; 432*25c28e83SPiotr Jasiukajtis fmuld %f30,%f20,%f30 ! (0_0) dtmp0 *= x20; 433*25c28e83SPiotr Jasiukajtis 434*25c28e83SPiotr Jasiukajtis and %l4,_0x7fffffff,%l6 ! (3_0) ax0 = ux0 & 0x7fffffff; 435*25c28e83SPiotr Jasiukajtis sethi %hi(0x00800000),%o5 436*25c28e83SPiotr Jasiukajtis 437*25c28e83SPiotr Jasiukajtis cmp %l6,%o5 438*25c28e83SPiotr Jasiukajtis bl,pn %icc,.u8 439*25c28e83SPiotr Jasiukajtis and %l3,_0x7fffffff,%o0 ! (3_0) ay0 = uy0 & 0x7fffffff; 440*25c28e83SPiotr Jasiukajtis.c8: 441*25c28e83SPiotr Jasiukajtis cmp %o0,%o5 442*25c28e83SPiotr Jasiukajtis bl,pn %icc,.u9 443*25c28e83SPiotr Jasiukajtis fmuld K9,%f18,%f40 ! (1_0) dtmp0 = K9 * x20; 444*25c28e83SPiotr Jasiukajtis.c9: 445*25c28e83SPiotr Jasiukajtis cmp %l6,_0x7f800000 446*25c28e83SPiotr Jasiukajtis bge,pn %icc,.u10 447*25c28e83SPiotr Jasiukajtis faddd %f30,K6,%f16 ! (0_0) dtmp0 += K6; 448*25c28e83SPiotr Jasiukajtis.c10: 449*25c28e83SPiotr Jasiukajtis cmp %o0,_0x7f800000 450*25c28e83SPiotr Jasiukajtis bge,pn %icc,.u11 451*25c28e83SPiotr Jasiukajtis ldd [%o1+%o4],%f30 ! (2_0) cadd0 = *(double*)(ltmp0 + signy0); 452*25c28e83SPiotr Jasiukajtis.c11: 453*25c28e83SPiotr Jasiukajtis sub %l6,%o0,%o1 ! (3_0) ldiff0 = ax0 - ay0; 454*25c28e83SPiotr Jasiukajtis 455*25c28e83SPiotr Jasiukajtis sra %o1,31,%o0 ! (3_0) ldiff0 >>= 31; 456*25c28e83SPiotr Jasiukajtis sub %i3,%i1,%l6 ! (3_0) addrc0 = (char*)px - (char*)py; 457*25c28e83SPiotr Jasiukajtis 458*25c28e83SPiotr Jasiukajtis faddd %f40,K8,%f40 ! (1_0) dtmp0 += K8; 459*25c28e83SPiotr Jasiukajtis and %l6,%o0,%o1 ! (3_0) addrc0 &= ldiff0; 460*25c28e83SPiotr Jasiukajtis fmuld %f16,%f20,%f16 ! (0_0) dtmp0 *= x20; 461*25c28e83SPiotr Jasiukajtis 462*25c28e83SPiotr Jasiukajtis lda [%i1+%o1]0x82,%f0 ! (3_0) fy0 = *(float*)((char*)py + addrc0); 463*25c28e83SPiotr Jasiukajtis sub %i3,%o1,%o4 ! (3_0) (char*)px - addrc0; 464*25c28e83SPiotr Jasiukajtis 465*25c28e83SPiotr Jasiukajtis lda [%o4]0x82,%f1 ! (3_0) fx0 = *(float*)((char*)px - addrc0); 466*25c28e83SPiotr Jasiukajtis 467*25c28e83SPiotr Jasiukajtis cmp %o5,_0x7f800000 ! (3_0) b0 ? 0x7f800000 468*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update2 ! (3_0) if ( b0 > 0x7f800000 ) 469*25c28e83SPiotr Jasiukajtis nop 470*25c28e83SPiotr Jasiukajtis.cont2: 471*25c28e83SPiotr Jasiukajtis fmuld %f40,%f18,%f28 ! (1_0) dtmp0 *= x20; 472*25c28e83SPiotr Jasiukajtis sll %o0,5,%l6 ! (3_0) ltmp0 = ldiff0 << 5; 473*25c28e83SPiotr Jasiukajtis add %i1,stridey,%i1 ! py += stridey 474*25c28e83SPiotr Jasiukajtis fstod %f0,%f40 ! (3_0) y0 = (double)fy0; 475*25c28e83SPiotr Jasiukajtis 476*25c28e83SPiotr Jasiukajtis faddd %f16,K5,%f2 ! (0_0) dtmp0 += K5; 477*25c28e83SPiotr Jasiukajtis sra %l4,27,%o5 ! (3_0) signx0 = ux0 >> 27; 478*25c28e83SPiotr Jasiukajtis add %i3,stridex,%i3 ! px += stridex 479*25c28e83SPiotr Jasiukajtis 480*25c28e83SPiotr Jasiukajtis sra %l3,28,%o4 ! (3_0) signy0 = uy0 >> 28; 481*25c28e83SPiotr Jasiukajtis fstod %f1,%f16 ! (3_0) x0 = (double)fx0; 482*25c28e83SPiotr Jasiukajtis.d2: 483*25c28e83SPiotr Jasiukajtis faddd %f28,K7,%f28 ! (1_0) dtmp0 += K7; 484*25c28e83SPiotr Jasiukajtis add %l6,cadd_arr,%l6 ! (3_0) ltmp0 += (char*)cadd_arr; 485*25c28e83SPiotr Jasiukajtis and %o5,-16,%o5 ! (3_0) signx0 &= -16; 486*25c28e83SPiotr Jasiukajtis 487*25c28e83SPiotr Jasiukajtis lda [%i1]0x82,%l4 ! (4_0) uy0 = *(int*)py; 488*25c28e83SPiotr Jasiukajtis fmuld %f2,%f20,%f2 ! (0_0) dtmp0 *= x20; 489*25c28e83SPiotr Jasiukajtis 490*25c28e83SPiotr Jasiukajtis lda [%i3]0x82,%l3 ! (4_0) ux0 = *(int*)px; 491*25c28e83SPiotr Jasiukajtis fdivd %f40,%f16,%f6 ! (3_0) x0 = y0 / x0; 492*25c28e83SPiotr Jasiukajtis 493*25c28e83SPiotr Jasiukajtis and %o4,-8,%o4 ! (3_0) signy0 &= -8; 494*25c28e83SPiotr Jasiukajtis fmuld %f8,%f8,%f16 ! (2_0) x20 = x0 * x0; 495*25c28e83SPiotr Jasiukajtis 496*25c28e83SPiotr Jasiukajtis add %l6,%o5,%o2 ! (3_0) ltmp0 += signx0; 497*25c28e83SPiotr Jasiukajtis fmuld %f28,%f18,%f28 ! (1_0) dtmp0 *= x20; 498*25c28e83SPiotr Jasiukajtis 499*25c28e83SPiotr Jasiukajtis and %l3,_0x7fffffff,%l6 ! (4_0) ax0 = ux0 & 0x7fffffff; 500*25c28e83SPiotr Jasiukajtis sethi %hi(0x00800000),%o5 501*25c28e83SPiotr Jasiukajtis faddd %f2,K4,%f2 ! (0_0) dtmp0 += K4; 502*25c28e83SPiotr Jasiukajtis 503*25c28e83SPiotr Jasiukajtis cmp %l6,%o5 504*25c28e83SPiotr Jasiukajtis bl,pn %icc,.u12 505*25c28e83SPiotr Jasiukajtis and %l4,_0x7fffffff,%l5 ! (4_0) ay0 = uy0 & 0x7fffffff; 506*25c28e83SPiotr Jasiukajtis.c12: 507*25c28e83SPiotr Jasiukajtis cmp %l5,%o5 508*25c28e83SPiotr Jasiukajtis bl,pn %icc,.u13 509*25c28e83SPiotr Jasiukajtis fmuld K9,%f16,%f40 ! (2_0) dtmp0 = K9 * x20; 510*25c28e83SPiotr Jasiukajtis.c13: 511*25c28e83SPiotr Jasiukajtis cmp %l6,_0x7f800000 512*25c28e83SPiotr Jasiukajtis bge,pn %icc,.u14 513*25c28e83SPiotr Jasiukajtis faddd %f28,K6,%f4 ! (1_0) dtmp0 += K6; 514*25c28e83SPiotr Jasiukajtis.c14: 515*25c28e83SPiotr Jasiukajtis ldd [%o2+%o4],%f28 ! (3_0) cadd0 = *(double*)(ltmp0 + signy0); 516*25c28e83SPiotr Jasiukajtis cmp %l5,_0x7f800000 517*25c28e83SPiotr Jasiukajtis bge,pn %icc,.u15 518*25c28e83SPiotr Jasiukajtis fmuld %f2,%f20,%f24 ! (0_0) dtmp0 *= x20; 519*25c28e83SPiotr Jasiukajtis.c15: 520*25c28e83SPiotr Jasiukajtis sub %l6,%l5,%o2 ! (4_0) ldiff0 = ax0 - ay0; 521*25c28e83SPiotr Jasiukajtis 522*25c28e83SPiotr Jasiukajtis sra %o2,31,%l5 ! (4_0) ldiff0 >>= 31; 523*25c28e83SPiotr Jasiukajtis sub %i3,%i1,%l6 ! (4_0) addrc0 = (char*)px - (char*)py; 524*25c28e83SPiotr Jasiukajtis 525*25c28e83SPiotr Jasiukajtis faddd %f40,K8,%f40 ! (2_0) dtmp0 += K8; 526*25c28e83SPiotr Jasiukajtis and %l6,%l5,%o2 ! (4_0) addrc0 &= ldiff0; 527*25c28e83SPiotr Jasiukajtis fmuld %f4,%f18,%f4 ! (1_0) dtmp0 *= x20; 528*25c28e83SPiotr Jasiukajtis 529*25c28e83SPiotr Jasiukajtis lda [%i1+%o2]0x82,%f0 ! (4_0) fy0 = *(float*)((char*)py + addrc0); 530*25c28e83SPiotr Jasiukajtis sub %i3,%o2,%o4 ! (4_0) (char*)px - addrc0; 531*25c28e83SPiotr Jasiukajtis faddd %f24,K3,%f24 ! (0_0) dtmp0 += K3; 532*25c28e83SPiotr Jasiukajtis 533*25c28e83SPiotr Jasiukajtis lda [%o4]0x82,%f2 ! (4_0) fx0 = *(float*)((char*)px - addrc0); 534*25c28e83SPiotr Jasiukajtis 535*25c28e83SPiotr Jasiukajtis cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 536*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update3 ! (4_0) if ( b0 > 0x7f800000 ) 537*25c28e83SPiotr Jasiukajtis nop 538*25c28e83SPiotr Jasiukajtis.cont3: 539*25c28e83SPiotr Jasiukajtis fmuld %f40,%f16,%f26 ! (2_0) dtmp0 *= x20; 540*25c28e83SPiotr Jasiukajtis sll %l5,5,%l6 ! (4_0) ltmp0 = ldiff0 << 5; 541*25c28e83SPiotr Jasiukajtis add %i1,stridey,%i1 ! py += stridey 542*25c28e83SPiotr Jasiukajtis fstod %f0,%f40 ! (4_0) y0 = (double)fy0; 543*25c28e83SPiotr Jasiukajtis 544*25c28e83SPiotr Jasiukajtis faddd %f4,K5,%f62 ! (1_0) dtmp0 += K5; 545*25c28e83SPiotr Jasiukajtis add %i3,stridex,%i3 ! px += stridex 546*25c28e83SPiotr Jasiukajtis fmuld %f24,%f20,%f24 ! (0_0) dtmp0 *= x20; 547*25c28e83SPiotr Jasiukajtis 548*25c28e83SPiotr Jasiukajtis fstod %f2,%f2 ! (4_0) x0 = (double)fx0; 549*25c28e83SPiotr Jasiukajtis sra %l3,27,%o5 ! (4_0) signx0 = ux0 >> 27; 550*25c28e83SPiotr Jasiukajtis sra %l4,28,%o4 ! (4_0) signy0 = uy0 >> 28; 551*25c28e83SPiotr Jasiukajtis.d3: 552*25c28e83SPiotr Jasiukajtis lda [%i1]0x82,%l3 ! (5_0) uy0 = *(int*)py; 553*25c28e83SPiotr Jasiukajtis add %l6,cadd_arr,%l6 ! (4_0) ltmp0 += (char*)cadd_arr; 554*25c28e83SPiotr Jasiukajtis faddd %f26,K7,%f26 ! (2_0) dtmp0 += K7; 555*25c28e83SPiotr Jasiukajtis 556*25c28e83SPiotr Jasiukajtis fmuld %f62,%f18,%f4 ! (1_0) dtmp0 *= x20; 557*25c28e83SPiotr Jasiukajtis and %o5,-16,%o5 ! (4_0) signx0 &= -16; 558*25c28e83SPiotr Jasiukajtis 559*25c28e83SPiotr Jasiukajtis lda [%i3]0x82,%l4 ! (5_1) ux0 = *(int*)px; 560*25c28e83SPiotr Jasiukajtis fdivd %f40,%f2,%f62 ! (4_1) x0 = y0 / x0; 561*25c28e83SPiotr Jasiukajtis faddd %f24,K2,%f40 ! (0_1) dtmp0 += K2; 562*25c28e83SPiotr Jasiukajtis 563*25c28e83SPiotr Jasiukajtis and %o4,-8,%o4 ! (4_1) signy0 &= -8; 564*25c28e83SPiotr Jasiukajtis fmuld %f6,%f6,%f24 ! (3_1) x20 = x0 * x0; 565*25c28e83SPiotr Jasiukajtis 566*25c28e83SPiotr Jasiukajtis add %l6,%o5,%o1 ! (4_1) ltmp0 += signx0; 567*25c28e83SPiotr Jasiukajtis fmuld %f26,%f16,%f26 ! (2_1) dtmp0 *= x20; 568*25c28e83SPiotr Jasiukajtis 569*25c28e83SPiotr Jasiukajtis and %l4,_0x7fffffff,%l6 ! (5_1) ax0 = ux0 & 0x7fffffff; 570*25c28e83SPiotr Jasiukajtis sethi %hi(0x00800000),%o5 571*25c28e83SPiotr Jasiukajtis faddd %f4,K4,%f4 ! (1_1) dtmp0 += K4; 572*25c28e83SPiotr Jasiukajtis 573*25c28e83SPiotr Jasiukajtis cmp %l6,%o5 574*25c28e83SPiotr Jasiukajtis bl,pn %icc,.u16 575*25c28e83SPiotr Jasiukajtis and %l3,_0x7fffffff,%o7 ! (5_1) ay0 = uy0 & 0x7fffffff; 576*25c28e83SPiotr Jasiukajtis.c16: 577*25c28e83SPiotr Jasiukajtis cmp %o7,%o5 578*25c28e83SPiotr Jasiukajtis bl,pn %icc,.u17 579*25c28e83SPiotr Jasiukajtis fmuld %f40,%f20,%f38 ! (0_1) dtmp0 *= x20; 580*25c28e83SPiotr Jasiukajtis.c17: 581*25c28e83SPiotr Jasiukajtis cmp %l6,_0x7f800000 582*25c28e83SPiotr Jasiukajtis bge,pn %icc,.u18 583*25c28e83SPiotr Jasiukajtis fmuld K9,%f24,%f40 ! (3_1) dtmp0 = K9 * x20; 584*25c28e83SPiotr Jasiukajtis.c18: 585*25c28e83SPiotr Jasiukajtis cmp %o7,_0x7f800000 586*25c28e83SPiotr Jasiukajtis bge,pn %icc,.u19 587*25c28e83SPiotr Jasiukajtis faddd %f26,K6,%f22 ! (2_1) dtmp0 += K6; 588*25c28e83SPiotr Jasiukajtis.c19: 589*25c28e83SPiotr Jasiukajtis ldd [%o1+%o4],%f26 ! (4_1) cadd0 = *(double*)(ltmp0 + signy0); 590*25c28e83SPiotr Jasiukajtis fmuld %f4,%f18,%f4 ! (1_1) dtmp0 *= x20; 591*25c28e83SPiotr Jasiukajtis 592*25c28e83SPiotr Jasiukajtis sub %l6,%o7,%o1 ! (5_1) ldiff0 = ax0 - ay0; 593*25c28e83SPiotr Jasiukajtis 594*25c28e83SPiotr Jasiukajtis sra %o1,31,%o7 ! (5_1) ldiff0 >>= 31; 595*25c28e83SPiotr Jasiukajtis sub %i3,%i1,%l6 ! (5_1) addrc0 = (char*)px - (char*)py; 596*25c28e83SPiotr Jasiukajtis faddd %f38,K1,%f38 ! (0_1) dtmp0 += K1; 597*25c28e83SPiotr Jasiukajtis 598*25c28e83SPiotr Jasiukajtis faddd %f40,K8,%f40 ! (3_1) dtmp0 += K8; 599*25c28e83SPiotr Jasiukajtis and %l6,%o7,%o1 ! (5_1) addrc0 &= ldiff0; 600*25c28e83SPiotr Jasiukajtis fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20; 601*25c28e83SPiotr Jasiukajtis 602*25c28e83SPiotr Jasiukajtis lda [%i1+%o1]0x82,%f0 ! (5_1) fy0 = *(float*)((char*)py + addrc0); 603*25c28e83SPiotr Jasiukajtis sll %o7,5,%l6 ! (5_1) ltmp0 = ldiff0 << 5; 604*25c28e83SPiotr Jasiukajtis sub %i3,%o1,%o4 ! (5_1) (char*)px - addrc0; 605*25c28e83SPiotr Jasiukajtis faddd %f4,K3,%f4 ! (1_1) dtmp0 += K3; 606*25c28e83SPiotr Jasiukajtis 607*25c28e83SPiotr Jasiukajtis lda [%o4]0x82,%f1 ! (5_1) fx0 = *(float*)((char*)px - addrc0); 608*25c28e83SPiotr Jasiukajtis 609*25c28e83SPiotr Jasiukajtis fmuld %f38,%f20,%f38 ! (0_1) dtmp0 *= x20; 610*25c28e83SPiotr Jasiukajtis cmp %o5,_0x7f800000 ! (5_1) b0 ? 0x7f800000 611*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update4 ! (5_1) if ( b0 > 0x7f800000 ) 612*25c28e83SPiotr Jasiukajtis nop 613*25c28e83SPiotr Jasiukajtis.cont4: 614*25c28e83SPiotr Jasiukajtis fmuld %f40,%f24,%f36 ! (3_1) dtmp0 *= x20; 615*25c28e83SPiotr Jasiukajtis fstod %f0,%f40 ! (5_1) y0 = (double)fy0; 616*25c28e83SPiotr Jasiukajtis 617*25c28e83SPiotr Jasiukajtis faddd %f22,K5,%f14 ! (2_1) dtmp0 += K5; 618*25c28e83SPiotr Jasiukajtis fmuld %f4,%f18,%f4 ! (1_1) dtmp0 *= x20; 619*25c28e83SPiotr Jasiukajtis 620*25c28e83SPiotr Jasiukajtis add %i3,stridex,%i3 ! px += stridex 621*25c28e83SPiotr Jasiukajtis sll %l7,3,%l7 ! (0_1) cmul0_ind = ldiff0 << 3; 622*25c28e83SPiotr Jasiukajtis fstod %f1,%f2 ! (5_1) x0 = (double)fx0; 623*25c28e83SPiotr Jasiukajtis.d4: 624*25c28e83SPiotr Jasiukajtis sra %l3,28,%o4 ! (5_1) signy0 = uy0 >> 28; 625*25c28e83SPiotr Jasiukajtis add %i1,stridey,%i1 ! py += stridey 626*25c28e83SPiotr Jasiukajtis 627*25c28e83SPiotr Jasiukajtis faddd %f36,K7,%f36 ! (3_1) dtmp0 += K7; 628*25c28e83SPiotr Jasiukajtis sra %l4,27,%o5 ! (5_1) signx0 = ux0 >> 27; 629*25c28e83SPiotr Jasiukajtis 630*25c28e83SPiotr Jasiukajtis lda [%i1]0x82,%l4 ! (0_0) uy0 = *(int*)py; 631*25c28e83SPiotr Jasiukajtis add %l6,cadd_arr,%l6 ! (5_1) ltmp0 += (char*)cadd_arr; 632*25c28e83SPiotr Jasiukajtis fmuld %f14,%f16,%f22 ! (2_1) dtmp0 *= x20; 633*25c28e83SPiotr Jasiukajtis faddd %f38,K0,%f38 ! (0_1) dtmp0 += K0; 634*25c28e83SPiotr Jasiukajtis 635*25c28e83SPiotr Jasiukajtis lda [%i3]0x82,%l3 ! (0_0) ux0 = *(int*)px; 636*25c28e83SPiotr Jasiukajtis and %o5,-16,%o5 ! (5_1) signx0 &= -16; 637*25c28e83SPiotr Jasiukajtis fdivd %f40,%f2,%f14 ! (5_1) x0 = y0 / x0; 638*25c28e83SPiotr Jasiukajtis faddd %f4,K2,%f40 ! (1_1) dtmp0 += K2; 639*25c28e83SPiotr Jasiukajtis 640*25c28e83SPiotr Jasiukajtis fmuld %f62,%f62,%f4 ! (4_1) x20 = x0 * x0; 641*25c28e83SPiotr Jasiukajtis 642*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+%l7],%f0 ! (0_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); 643*25c28e83SPiotr Jasiukajtis add %l6,%o5,%o2 ! (5_1) ltmp0 += signx0; 644*25c28e83SPiotr Jasiukajtis and %o4,-8,%o4 ! (5_1) signy0 &= -8; 645*25c28e83SPiotr Jasiukajtis fmuld %f36,%f24,%f36 ! (3_1) dtmp0 *= x20; 646*25c28e83SPiotr Jasiukajtis 647*25c28e83SPiotr Jasiukajtis fmuld %f38,%f12,%f12 ! (0_1) x0 = dtmp0 * x0; 648*25c28e83SPiotr Jasiukajtis and %l4,_0x7fffffff,%l7 ! (0_0) ay0 = uy0 & 0x7fffffff; 649*25c28e83SPiotr Jasiukajtis sethi %hi(0x00800000),%o5 650*25c28e83SPiotr Jasiukajtis faddd %f22,K4,%f22 ! (2_1) dtmp0 += K4; 651*25c28e83SPiotr Jasiukajtis 652*25c28e83SPiotr Jasiukajtis and %l3,_0x7fffffff,%l6 ! (0_0) ax0 = ux0 & 0x7fffffff; 653*25c28e83SPiotr Jasiukajtis cmp %l7,%o5 654*25c28e83SPiotr Jasiukajtis bl,pn %icc,.u20 655*25c28e83SPiotr Jasiukajtis fmuld %f40,%f18,%f38 ! (1_1) dtmp0 *= x20; 656*25c28e83SPiotr Jasiukajtis.c20: 657*25c28e83SPiotr Jasiukajtis cmp %l6,%o5 658*25c28e83SPiotr Jasiukajtis bl,pn %icc,.u21 659*25c28e83SPiotr Jasiukajtis fmuld K9,%f4,%f40 ! (4_1) dtmp0 = K9 * x20; 660*25c28e83SPiotr Jasiukajtis.c21: 661*25c28e83SPiotr Jasiukajtis cmp %l7,_0x7f800000 662*25c28e83SPiotr Jasiukajtis bge,pn %icc,.u22 663*25c28e83SPiotr Jasiukajtis faddd %f36,K6,%f20 ! (3_1) dtmp0 += K6; 664*25c28e83SPiotr Jasiukajtis.c22: 665*25c28e83SPiotr Jasiukajtis ldd [%o2+%o4],%f36 ! (5_1) cadd0 = *(double*)(ltmp0 + signy0); 666*25c28e83SPiotr Jasiukajtis cmp %l6,_0x7f800000 667*25c28e83SPiotr Jasiukajtis bge,pn %icc,.u23 668*25c28e83SPiotr Jasiukajtis fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20; 669*25c28e83SPiotr Jasiukajtis.c23: 670*25c28e83SPiotr Jasiukajtis sub %l6,%l7,%o2 ! (0_0) ldiff0 = ax0 - ay0; 671*25c28e83SPiotr Jasiukajtis 672*25c28e83SPiotr Jasiukajtis fmuld %f0,%f12,%f12 ! (0_1) dtmp0 = cmul0 * x0; 673*25c28e83SPiotr Jasiukajtis sra %o2,31,%l7 ! (0_0) ldiff0 >>= 31; 674*25c28e83SPiotr Jasiukajtis sub %i3,%i1,%l6 ! (0_0) addrc0 = (char*)px - (char*)py; 675*25c28e83SPiotr Jasiukajtis faddd %f38,K1,%f38 ! (1_1) dtmp0 += K1; 676*25c28e83SPiotr Jasiukajtis 677*25c28e83SPiotr Jasiukajtis faddd %f40,K8,%f40 ! (4_1) dtmp0 += K8; 678*25c28e83SPiotr Jasiukajtis and %l6,%l7,%o2 ! (0_0) addrc0 &= ldiff0; 679*25c28e83SPiotr Jasiukajtis fmuld %f20,%f24,%f20 ! (3_1) dtmp0 *= x20; 680*25c28e83SPiotr Jasiukajtis 681*25c28e83SPiotr Jasiukajtis lda [%i1+%o2]0x82,%f0 ! (0_0) fy0 = *(float*)((char*)py + addrc0); 682*25c28e83SPiotr Jasiukajtis sll %g1,3,%g1 ! (1_1) cmul0_ind = ldiff0 << 3; 683*25c28e83SPiotr Jasiukajtis sub %i3,%o2,%o4 ! (0_0) (char*)px - addrc0 684*25c28e83SPiotr Jasiukajtis faddd %f22,K3,%f22 ! (2_1) dtmp0 += K3; 685*25c28e83SPiotr Jasiukajtis 686*25c28e83SPiotr Jasiukajtis lda [%o4]0x82,%f2 ! (0_0) fx0 = *(float*)((char*)px - addrc0); 687*25c28e83SPiotr Jasiukajtis sll %l7,5,%l6 ! (0_0) ltmp0 = ldiff0 << 5; 688*25c28e83SPiotr Jasiukajtis 689*25c28e83SPiotr Jasiukajtis fmuld %f38,%f18,%f38 ! (1_1) dtmp0 *= x20; 690*25c28e83SPiotr Jasiukajtis cmp %o5,_0x7f800000 ! (0_0) b0 ? 0x7f800000 691*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update5 ! (0_0) if ( b0 > 0x7f800000 ) 692*25c28e83SPiotr Jasiukajtis faddd %f34,%f12,%f18 ! (0_1) dtmp0 = cadd0 + dtmp0; 693*25c28e83SPiotr Jasiukajtis.cont5: 694*25c28e83SPiotr Jasiukajtis fmuld %f40,%f4,%f34 ! (4_1) dtmp0 *= x20; 695*25c28e83SPiotr Jasiukajtis sra %l3,27,%o5 ! (0_0) signx0 = ux0 >> 27; 696*25c28e83SPiotr Jasiukajtis add %i3,stridex,%i3 ! px += stridex 697*25c28e83SPiotr Jasiukajtis fstod %f0,%f40 ! (0_0) y0 = (double)fy0; 698*25c28e83SPiotr Jasiukajtis 699*25c28e83SPiotr Jasiukajtis faddd %f20,K5,%f12 ! (3_1) dtmp0 += K5; 700*25c28e83SPiotr Jasiukajtis add %i1,stridey,%i1 ! py += stridey 701*25c28e83SPiotr Jasiukajtis fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20; 702*25c28e83SPiotr Jasiukajtis 703*25c28e83SPiotr Jasiukajtis lda [%i1]0x82,%l3 ! (1_0) uy0 = *(int*)py; 704*25c28e83SPiotr Jasiukajtis sra %l4,28,%o4 ! (0_0) signy0 = uy0 >> 28; 705*25c28e83SPiotr Jasiukajtis add %l6,cadd_arr,%l6 ! (0_0) ltmp0 += (char*)cadd_arr; 706*25c28e83SPiotr Jasiukajtis fstod %f2,%f2 ! (0_0) x0 = (double)fx0; 707*25c28e83SPiotr Jasiukajtis.d5: 708*25c28e83SPiotr Jasiukajtis lda [%i3]0x82,%l4 ! (1_0) ux0 = *(int*)px; 709*25c28e83SPiotr Jasiukajtis and %o5,-16,%o5 ! (0_0) signx0 &= -16; 710*25c28e83SPiotr Jasiukajtis faddd %f34,K7,%f34 ! (4_1) dtmp0 += K7; 711*25c28e83SPiotr Jasiukajtis 712*25c28e83SPiotr Jasiukajtis ldx [%fp+tmp_pz],%o1 713*25c28e83SPiotr Jasiukajtis fmuld %f12,%f24,%f20 ! (3_1) dtmp0 *= x20; 714*25c28e83SPiotr Jasiukajtis and %o4,-8,%o4 ! (0_0) signy0 &= -8; 715*25c28e83SPiotr Jasiukajtis faddd %f38,K0,%f38 ! (1_1) dtmp0 += K0; 716*25c28e83SPiotr Jasiukajtis 717*25c28e83SPiotr Jasiukajtis fdivd %f40,%f2,%f12 ! (0_0) x0 = y0 / x0; 718*25c28e83SPiotr Jasiukajtis faddd %f22,K2,%f40 ! (2_1) dtmp0 += K2; 719*25c28e83SPiotr Jasiukajtis 720*25c28e83SPiotr Jasiukajtis fdtos %f18,%f2 ! (0_1) ftmp0 = (float)dtmp0; 721*25c28e83SPiotr Jasiukajtis st %f2,[%o1] ! (0_1) *pz = ftmp0 722*25c28e83SPiotr Jasiukajtis add %o1,stridez,%o2 723*25c28e83SPiotr Jasiukajtis fmuld %f14,%f14,%f22 ! (5_1) x20 = x0 * x0; 724*25c28e83SPiotr Jasiukajtis 725*25c28e83SPiotr Jasiukajtis subcc counter,1,counter 726*25c28e83SPiotr Jasiukajtis bneg,a,pn %icc,.begin 727*25c28e83SPiotr Jasiukajtis or %g0,%o2,%o4 728*25c28e83SPiotr Jasiukajtis 729*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+%g1],%f0 ! (1_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); 730*25c28e83SPiotr Jasiukajtis add %l6,%o5,%o1 ! (0_0) ltmp0 += signx0; 731*25c28e83SPiotr Jasiukajtis fmuld %f34,%f4,%f34 ! (4_1) dtmp0 *= x20; 732*25c28e83SPiotr Jasiukajtis 733*25c28e83SPiotr Jasiukajtis fmuld %f38,%f10,%f10 ! (1_1) x0 = dtmp0 * x0; 734*25c28e83SPiotr Jasiukajtis and %l4,_0x7fffffff,%l6 ! (1_0) ax0 = ux0 & 0x7fffffff; 735*25c28e83SPiotr Jasiukajtis sethi %hi(0x00800000),%o5 736*25c28e83SPiotr Jasiukajtis faddd %f20,K4,%f20 ! (3_1) dtmp0 += K4; 737*25c28e83SPiotr Jasiukajtis 738*25c28e83SPiotr Jasiukajtis and %l3,_0x7fffffff,%g1 ! (1_0) ay0 = uy0 & 0x7fffffff; 739*25c28e83SPiotr Jasiukajtis cmp %l6,%o5 740*25c28e83SPiotr Jasiukajtis bl,pn %icc,.u24 741*25c28e83SPiotr Jasiukajtis fmuld %f40,%f16,%f38 ! (2_1) dtmp0 *= x20; 742*25c28e83SPiotr Jasiukajtis.c24: 743*25c28e83SPiotr Jasiukajtis cmp %g1,%o5 744*25c28e83SPiotr Jasiukajtis bl,pn %icc,.u25 745*25c28e83SPiotr Jasiukajtis fmuld K9,%f22,%f40 ! (5_1) dtmp0 = K9 * x20; 746*25c28e83SPiotr Jasiukajtis.c25: 747*25c28e83SPiotr Jasiukajtis cmp %l6,_0x7f800000 748*25c28e83SPiotr Jasiukajtis bge,pn %icc,.u26 749*25c28e83SPiotr Jasiukajtis faddd %f34,K6,%f18 ! (4_1) dtmp0 += K6; 750*25c28e83SPiotr Jasiukajtis.c26: 751*25c28e83SPiotr Jasiukajtis ldd [%o1+%o4],%f34 ! (0_0) cadd0 = *(double*)(ltmp0 + signy0); 752*25c28e83SPiotr Jasiukajtis cmp %g1,_0x7f800000 753*25c28e83SPiotr Jasiukajtis bge,pn %icc,.u27 754*25c28e83SPiotr Jasiukajtis fmuld %f20,%f24,%f20 ! (3_1) dtmp0 *= x20; 755*25c28e83SPiotr Jasiukajtis.c27: 756*25c28e83SPiotr Jasiukajtis sub %l6,%g1,%o1 ! (1_0) ldiff0 = ax0 - ay0; 757*25c28e83SPiotr Jasiukajtis 758*25c28e83SPiotr Jasiukajtis fmuld %f0,%f10,%f10 ! (1_1) dtmp0 = cmul0 * x0; 759*25c28e83SPiotr Jasiukajtis sra %o1,31,%g1 ! (1_0) ldiff0 >>= 31; 760*25c28e83SPiotr Jasiukajtis sub %i3,%i1,%l6 ! (1_0) addrc0 = (char*)px - (char*)py; 761*25c28e83SPiotr Jasiukajtis faddd %f38,K1,%f38 ! (2_1) dtmp0 += K1; 762*25c28e83SPiotr Jasiukajtis 763*25c28e83SPiotr Jasiukajtis faddd %f40,K8,%f40 ! (5_1) dtmp0 += K8; 764*25c28e83SPiotr Jasiukajtis and %l6,%g1,%o1 ! (1_0) addrc0 &= ldiff0; 765*25c28e83SPiotr Jasiukajtis fmuld %f18,%f4,%f18 ! (4_1) dtmp0 *= x20; 766*25c28e83SPiotr Jasiukajtis 767*25c28e83SPiotr Jasiukajtis lda [%i1+%o1]0x82,%f0 ! (1_0) fy0 = *(float*)((char*)py + addrc0); 768*25c28e83SPiotr Jasiukajtis sll %g5,3,%g5 ! (2_1) cmul0_ind = ldiff0 << 3; 769*25c28e83SPiotr Jasiukajtis sub %i3,%o1,%o4 ! (1_0) (char*)px - addrc0; 770*25c28e83SPiotr Jasiukajtis faddd %f20,K3,%f20 ! (3_1) dtmp0 += K3; 771*25c28e83SPiotr Jasiukajtis 772*25c28e83SPiotr Jasiukajtis lda [%o4]0x82,%f2 ! (1_0) fx0 = *(float*)((char*)px - addrc0); 773*25c28e83SPiotr Jasiukajtis sll %g1,5,%l6 ! (1_0) ltmp0 = ldiff0 << 5; 774*25c28e83SPiotr Jasiukajtis add %o2,stridez,%o1 ! pz += stridez 775*25c28e83SPiotr Jasiukajtis 776*25c28e83SPiotr Jasiukajtis fmuld %f38,%f16,%f38 ! (2_1) dtmp0 *= x20; 777*25c28e83SPiotr Jasiukajtis cmp %o5,_0x7f800000 ! (1_0) b0 ? 0x7f800000 778*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update6 ! (1_0) if ( b0 > 0x7f800000 ) 779*25c28e83SPiotr Jasiukajtis faddd %f32,%f10,%f16 ! (1_1) dtmp0 = cadd0 + dtmp0; 780*25c28e83SPiotr Jasiukajtis.cont6: 781*25c28e83SPiotr Jasiukajtis fmuld %f40,%f22,%f32 ! (5_1) dtmp0 *= x20; 782*25c28e83SPiotr Jasiukajtis add %i1,stridey,%i1 ! py += stridey 783*25c28e83SPiotr Jasiukajtis fstod %f0,%f40 ! (1_0) y0 = (double)fy0; 784*25c28e83SPiotr Jasiukajtis 785*25c28e83SPiotr Jasiukajtis faddd %f18,K5,%f10 ! (4_1) dtmp0 += K5; 786*25c28e83SPiotr Jasiukajtis sra %l4,27,%o5 ! (1_0) signx0 = ux0 >> 27; 787*25c28e83SPiotr Jasiukajtis add %i3,stridex,%i3 ! px += stridex 788*25c28e83SPiotr Jasiukajtis fmuld %f20,%f24,%f20 ! (3_1) dtmp0 *= x20; 789*25c28e83SPiotr Jasiukajtis 790*25c28e83SPiotr Jasiukajtis sra %l3,28,%o4 ! (1_0) signy0 = uy0 >> 28; 791*25c28e83SPiotr Jasiukajtis add %l6,cadd_arr,%l6 ! (1_0) ltmp0 += (char*)cadd_arr; 792*25c28e83SPiotr Jasiukajtis fstod %f2,%f2 ! (1_0) x0 = (double)fx0; 793*25c28e83SPiotr Jasiukajtis.d6: 794*25c28e83SPiotr Jasiukajtis faddd %f32,K7,%f32 ! (5_1) dtmp0 += K7; 795*25c28e83SPiotr Jasiukajtis and %o5,-16,%o5 ! (1_0) signx0 &= -16; 796*25c28e83SPiotr Jasiukajtis and %o4,-8,%o4 ! (1_0) signy0 &= -8; 797*25c28e83SPiotr Jasiukajtis 798*25c28e83SPiotr Jasiukajtis lda [%i1]0x82,%l4 ! (2_0) uy0 = *(int*)py; 799*25c28e83SPiotr Jasiukajtis fmuld %f10,%f4,%f18 ! (4_1) dtmp0 *= x20; 800*25c28e83SPiotr Jasiukajtis faddd %f38,K0,%f38 ! (2_1) dtmp0 += K0; 801*25c28e83SPiotr Jasiukajtis 802*25c28e83SPiotr Jasiukajtis lda [%i3]0x82,%l3 ! (2_0) ux0 = *(int*)px; 803*25c28e83SPiotr Jasiukajtis fdivd %f40,%f2,%f10 ! (1_0) x0 = y0 / x0; 804*25c28e83SPiotr Jasiukajtis faddd %f20,K2,%f40 ! (3_1) dtmp0 += K2; 805*25c28e83SPiotr Jasiukajtis 806*25c28e83SPiotr Jasiukajtis fmuld %f12,%f12,%f20 ! (0_0) x20 = x0 * x0; 807*25c28e83SPiotr Jasiukajtis fdtos %f16,%f2 ! (1_1) ftmp0 = (float)dtmp0; 808*25c28e83SPiotr Jasiukajtis st %f2,[%o2] ! (1_1) *pz = ftmp0; 809*25c28e83SPiotr Jasiukajtis 810*25c28e83SPiotr Jasiukajtis subcc counter,1,counter 811*25c28e83SPiotr Jasiukajtis bneg,a,pn %icc,.begin 812*25c28e83SPiotr Jasiukajtis or %g0,%o1,%o4 813*25c28e83SPiotr Jasiukajtis 814*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+%g5],%f0 ! (2_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); 815*25c28e83SPiotr Jasiukajtis add %l6,%o5,%o2 ! (1_0) ltmp0 += signx0; 816*25c28e83SPiotr Jasiukajtis fmuld %f32,%f22,%f32 ! (5_1) dtmp0 *= x20; 817*25c28e83SPiotr Jasiukajtis 818*25c28e83SPiotr Jasiukajtis fmuld %f38,%f8,%f8 ! (2_1) x0 = dtmp0 * x0; 819*25c28e83SPiotr Jasiukajtis and %l3,_0x7fffffff,%l6 ! (2_0) ax0 = ux0 & 0x7fffffff; 820*25c28e83SPiotr Jasiukajtis sethi %hi(0x00800000),%o5 821*25c28e83SPiotr Jasiukajtis faddd %f18,K4,%f18 ! (4_1) dtmp0 += K4; 822*25c28e83SPiotr Jasiukajtis 823*25c28e83SPiotr Jasiukajtis and %l4,_0x7fffffff,%g5 ! (2_0) ay0 = uy0 & 0x7fffffff; 824*25c28e83SPiotr Jasiukajtis cmp %l6,%o5 825*25c28e83SPiotr Jasiukajtis bl,pn %icc,.u28 826*25c28e83SPiotr Jasiukajtis fmuld %f40,%f24,%f38 ! (3_1) dtmp0 *= x20; 827*25c28e83SPiotr Jasiukajtis.c28: 828*25c28e83SPiotr Jasiukajtis cmp %g5,%o5 829*25c28e83SPiotr Jasiukajtis bl,pn %icc,.u29 830*25c28e83SPiotr Jasiukajtis fmuld K9,%f20,%f40 ! (0_0) dtmp0 = K9 * x20; 831*25c28e83SPiotr Jasiukajtis.c29: 832*25c28e83SPiotr Jasiukajtis cmp %l6,_0x7f800000 833*25c28e83SPiotr Jasiukajtis bge,pn %icc,.u30 834*25c28e83SPiotr Jasiukajtis faddd %f32,K6,%f16 ! (5_1) dtmp0 += K6; 835*25c28e83SPiotr Jasiukajtis.c30: 836*25c28e83SPiotr Jasiukajtis ldd [%o2+%o4],%f32 ! (1_0) cadd0 = *(double*)(ltmp0 + signy0); 837*25c28e83SPiotr Jasiukajtis cmp %g5,_0x7f800000 838*25c28e83SPiotr Jasiukajtis bge,pn %icc,.u31 839*25c28e83SPiotr Jasiukajtis fmuld %f18,%f4,%f18 ! (4_1) dtmp0 *= x20; 840*25c28e83SPiotr Jasiukajtis.c31: 841*25c28e83SPiotr Jasiukajtis sub %l6,%g5,%o2 ! (2_0) ldiff0 = ax0 - ay0; 842*25c28e83SPiotr Jasiukajtis 843*25c28e83SPiotr Jasiukajtis fmuld %f0,%f8,%f8 ! (2_1) dtmp0 = cmul0 * x0; 844*25c28e83SPiotr Jasiukajtis sra %o2,31,%g5 ! (2_0) ldiff0 >>= 31; 845*25c28e83SPiotr Jasiukajtis sub %i3,%i1,%l6 ! (2_0) addrc0 = (char*)px - (char*)py; 846*25c28e83SPiotr Jasiukajtis faddd %f38,K1,%f38 ! (3_1) dtmp0 += K1; 847*25c28e83SPiotr Jasiukajtis 848*25c28e83SPiotr Jasiukajtis faddd %f40,K8,%f40 ! (0_0) dtmp0 += K8; 849*25c28e83SPiotr Jasiukajtis and %l6,%g5,%o2 ! (2_0) addrc0 &= ldiff0; 850*25c28e83SPiotr Jasiukajtis fmuld %f16,%f22,%f16 ! (5_1) dtmp0 *= x20; 851*25c28e83SPiotr Jasiukajtis 852*25c28e83SPiotr Jasiukajtis lda [%i1+%o2]0x82,%f0 ! (2_0) fy0 = *(float*)((char*)py + addrc0); 853*25c28e83SPiotr Jasiukajtis sub %i3,%o2,%o4 ! (2_0) (char*)px - addrc0; 854*25c28e83SPiotr Jasiukajtis add %o1,stridez,%o2 ! pz += stridez 855*25c28e83SPiotr Jasiukajtis faddd %f18,K3,%f18 ! (4_1) dtmp0 += K3; 856*25c28e83SPiotr Jasiukajtis 857*25c28e83SPiotr Jasiukajtis lda [%o4]0x82,%f2 ! (2_0) fx0 = *(float*)((char*)px - addrc0); 858*25c28e83SPiotr Jasiukajtis sll %o0,3,%o0 ! (3_1) cmul0_ind = ldiff0 << 3; 859*25c28e83SPiotr Jasiukajtis 860*25c28e83SPiotr Jasiukajtis fmuld %f38,%f24,%f38 ! (3_1) dtmp0 *= x20; 861*25c28e83SPiotr Jasiukajtis cmp %o5,_0x7f800000 ! (2_0) b0 ? 0x7f800000 862*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update7 ! (2_0) if ( b0 > 0x7f800000 ) 863*25c28e83SPiotr Jasiukajtis faddd %f30,%f8,%f24 ! (2_1) dtmp0 = cadd0 + dtmp0; 864*25c28e83SPiotr Jasiukajtis.cont7: 865*25c28e83SPiotr Jasiukajtis fmuld %f40,%f20,%f30 ! (0_0) dtmp0 *= x20; 866*25c28e83SPiotr Jasiukajtis sll %g5,5,%l6 ! (2_0) ltmp0 = ldiff0 << 5; 867*25c28e83SPiotr Jasiukajtis add %i1,stridey,%i1 ! py += stridey 868*25c28e83SPiotr Jasiukajtis fstod %f0,%f40 ! (2_0) y0 = (double)fy0; 869*25c28e83SPiotr Jasiukajtis 870*25c28e83SPiotr Jasiukajtis faddd %f16,K5,%f8 ! (5_1) dtmp0 += K5; 871*25c28e83SPiotr Jasiukajtis sra %l3,27,%o5 ! (2_0) signx0 = ux0 >> 27; 872*25c28e83SPiotr Jasiukajtis add %i3,stridex,%i3 ! px += stridex 873*25c28e83SPiotr Jasiukajtis fmuld %f18,%f4,%f18 ! (4_1) dtmp0 *= x20; 874*25c28e83SPiotr Jasiukajtis 875*25c28e83SPiotr Jasiukajtis fstod %f2,%f2 ! (2_0) x0 = (double)fx0; 876*25c28e83SPiotr Jasiukajtis sra %l4,28,%o4 ! (2_0) signy0 = uy0 >> 28; 877*25c28e83SPiotr Jasiukajtis add %l6,cadd_arr,%l6 ! (2_0) ltmp0 += (char*)cadd_arr; 878*25c28e83SPiotr Jasiukajtis.d7: 879*25c28e83SPiotr Jasiukajtis lda [%i1]0x82,%l3 ! (3_0) uy0 = *(int*)py; 880*25c28e83SPiotr Jasiukajtis and %o5,-16,%o5 ! (2_0) signx0 &= -16; 881*25c28e83SPiotr Jasiukajtis faddd %f30,K7,%f30 ! (0_0) dtmp0 += K7; 882*25c28e83SPiotr Jasiukajtis 883*25c28e83SPiotr Jasiukajtis lda [%i3]0x82,%l4 ! (3_0) ux0 = *(int*)px; 884*25c28e83SPiotr Jasiukajtis fmuld %f8,%f22,%f16 ! (5_1) dtmp0 *= x20; 885*25c28e83SPiotr Jasiukajtis faddd %f38,K0,%f38 ! (3_1) dtmp0 += K0; 886*25c28e83SPiotr Jasiukajtis 887*25c28e83SPiotr Jasiukajtis fdivd %f40,%f2,%f8 ! (2_0) x0 = y0 / x0; 888*25c28e83SPiotr Jasiukajtis faddd %f18,K2,%f40 ! (4_1) dtmp0 += K2; 889*25c28e83SPiotr Jasiukajtis 890*25c28e83SPiotr Jasiukajtis fmuld %f10,%f10,%f18 ! (1_0) x20 = x0 * x0; 891*25c28e83SPiotr Jasiukajtis fdtos %f24,%f1 ! (2_1) ftmp0 = (float)dtmp0; 892*25c28e83SPiotr Jasiukajtis st %f1,[%o1] ! (2_1) *pz = ftmp0; 893*25c28e83SPiotr Jasiukajtis 894*25c28e83SPiotr Jasiukajtis subcc counter,1,counter 895*25c28e83SPiotr Jasiukajtis bneg,a,pn %icc,.begin 896*25c28e83SPiotr Jasiukajtis or %g0,%o2,%o4 897*25c28e83SPiotr Jasiukajtis 898*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+%o0],%f2 ! (3_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); 899*25c28e83SPiotr Jasiukajtis add %l6,%o5,%o1 ! (2_0) ltmp0 += signx0; 900*25c28e83SPiotr Jasiukajtis and %o4,-8,%o4 ! (2_0) signy0 &= -8; 901*25c28e83SPiotr Jasiukajtis fmuld %f30,%f20,%f30 ! (0_0) dtmp0 *= x20; 902*25c28e83SPiotr Jasiukajtis 903*25c28e83SPiotr Jasiukajtis fmuld %f38,%f6,%f6 ! (3_1) x0 = dtmp0 * x0; 904*25c28e83SPiotr Jasiukajtis and %l4,_0x7fffffff,%l6 ! (3_0) ax0 = ux0 & 0x7fffffff; 905*25c28e83SPiotr Jasiukajtis sethi %hi(0x00800000),%o5 906*25c28e83SPiotr Jasiukajtis faddd %f16,K4,%f24 ! (5_1) dtmp0 += K4; 907*25c28e83SPiotr Jasiukajtis 908*25c28e83SPiotr Jasiukajtis and %l3,_0x7fffffff,%o0 ! (3_0) ay0 = uy0 & 0x7fffffff; 909*25c28e83SPiotr Jasiukajtis cmp %l6,%o5 910*25c28e83SPiotr Jasiukajtis bl,pn %icc,.u32 911*25c28e83SPiotr Jasiukajtis fmuld %f40,%f4,%f38 ! (4_1) dtmp0 *= x20; 912*25c28e83SPiotr Jasiukajtis.c32: 913*25c28e83SPiotr Jasiukajtis cmp %o0,%o5 914*25c28e83SPiotr Jasiukajtis bl,pn %icc,.u33 915*25c28e83SPiotr Jasiukajtis fmuld K9,%f18,%f40 ! (1_0) dtmp0 = K9 * x20; 916*25c28e83SPiotr Jasiukajtis.c33: 917*25c28e83SPiotr Jasiukajtis cmp %l6,_0x7f800000 918*25c28e83SPiotr Jasiukajtis bge,pn %icc,.u34 919*25c28e83SPiotr Jasiukajtis faddd %f30,K6,%f16 ! (0_0) dtmp0 += K6; 920*25c28e83SPiotr Jasiukajtis.c34: 921*25c28e83SPiotr Jasiukajtis ldd [%o1+%o4],%f30 ! (2_0) cadd0 = *(double*)(ltmp0 + signy0); 922*25c28e83SPiotr Jasiukajtis cmp %o0,_0x7f800000 923*25c28e83SPiotr Jasiukajtis bge,pn %icc,.u35 924*25c28e83SPiotr Jasiukajtis fmuld %f24,%f22,%f24 ! (5_1) dtmp0 *= x20; 925*25c28e83SPiotr Jasiukajtis.c35: 926*25c28e83SPiotr Jasiukajtis sub %l6,%o0,%o1 ! (3_0) ldiff0 = ax0 - ay0; 927*25c28e83SPiotr Jasiukajtis 928*25c28e83SPiotr Jasiukajtis fmuld %f2,%f6,%f6 ! (3_1) dtmp0 = cmul0 * x0; 929*25c28e83SPiotr Jasiukajtis sra %o1,31,%o0 ! (3_0) ldiff0 >>= 31; 930*25c28e83SPiotr Jasiukajtis sub %i3,%i1,%l6 ! (3_0) addrc0 = (char*)px - (char*)py; 931*25c28e83SPiotr Jasiukajtis faddd %f38,K1,%f38 ! (4_1) dtmp0 += K1; 932*25c28e83SPiotr Jasiukajtis 933*25c28e83SPiotr Jasiukajtis faddd %f40,K8,%f40 ! (1_0) dtmp0 += K8; 934*25c28e83SPiotr Jasiukajtis and %l6,%o0,%o1 ! (3_0) addrc0 &= ldiff0; 935*25c28e83SPiotr Jasiukajtis fmuld %f16,%f20,%f16 ! (0_0) dtmp0 *= x20; 936*25c28e83SPiotr Jasiukajtis 937*25c28e83SPiotr Jasiukajtis lda [%i1+%o1]0x82,%f0 ! (3_0) fy0 = *(float*)((char*)py + addrc0); 938*25c28e83SPiotr Jasiukajtis sub %i3,%o1,%o4 ! (3_0) (char*)px - addrc0; 939*25c28e83SPiotr Jasiukajtis add %o2,stridez,%o1 ! pz += stridez 940*25c28e83SPiotr Jasiukajtis faddd %f24,K3,%f24 ! (5_1) dtmp0 += K3; 941*25c28e83SPiotr Jasiukajtis 942*25c28e83SPiotr Jasiukajtis lda [%o4]0x82,%f1 ! (3_0) fx0 = *(float*)((char*)px - addrc0); 943*25c28e83SPiotr Jasiukajtis sll %l5,3,%l5 ! (4_1) cmul0_ind = ldiff0 << 3; 944*25c28e83SPiotr Jasiukajtis 945*25c28e83SPiotr Jasiukajtis fmuld %f38,%f4,%f38 ! (4_1) dtmp0 *= x20; 946*25c28e83SPiotr Jasiukajtis cmp %o5,_0x7f800000 ! (3_0) b0 ? 0x7f800000 947*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update8 ! (3_0) if ( b0 > 0x7f800000 ) 948*25c28e83SPiotr Jasiukajtis faddd %f28,%f6,%f4 ! (3_1) dtmp0 = cadd0 + dtmp0; 949*25c28e83SPiotr Jasiukajtis.cont8: 950*25c28e83SPiotr Jasiukajtis fmuld %f40,%f18,%f28 ! (1_0) dtmp0 *= x20; 951*25c28e83SPiotr Jasiukajtis sll %o0,5,%l6 ! (3_0) ltmp0 = ldiff0 << 5; 952*25c28e83SPiotr Jasiukajtis add %i1,stridey,%i1 ! py += stridey 953*25c28e83SPiotr Jasiukajtis fstod %f0,%f40 ! (3_0) y0 = (double)fy0; 954*25c28e83SPiotr Jasiukajtis 955*25c28e83SPiotr Jasiukajtis faddd %f16,K5,%f2 ! (0_0) dtmp0 += K5; 956*25c28e83SPiotr Jasiukajtis sra %l4,27,%o5 ! (3_0) signx0 = ux0 >> 27; 957*25c28e83SPiotr Jasiukajtis add %i3,stridex,%i3 ! px += stridex 958*25c28e83SPiotr Jasiukajtis fmuld %f24,%f22,%f24 ! (5_1) dtmp0 *= x20; 959*25c28e83SPiotr Jasiukajtis 960*25c28e83SPiotr Jasiukajtis sra %l3,28,%o4 ! (3_0) signy0 = uy0 >> 28; 961*25c28e83SPiotr Jasiukajtis fstod %f1,%f16 ! (3_0) x0 = (double)fx0; 962*25c28e83SPiotr Jasiukajtis.d8: 963*25c28e83SPiotr Jasiukajtis faddd %f28,K7,%f28 ! (1_0) dtmp0 += K7; 964*25c28e83SPiotr Jasiukajtis add %l6,cadd_arr,%l6 ! (3_0) ltmp0 += (char*)cadd_arr; 965*25c28e83SPiotr Jasiukajtis and %o5,-16,%o5 ! (3_0) signx0 &= -16; 966*25c28e83SPiotr Jasiukajtis 967*25c28e83SPiotr Jasiukajtis lda [%i1]0x82,%l4 ! (4_0) uy0 = *(int*)py; 968*25c28e83SPiotr Jasiukajtis fmuld %f2,%f20,%f2 ! (0_0) dtmp0 *= x20; 969*25c28e83SPiotr Jasiukajtis faddd %f38,K0,%f38 ! (4_1) dtmp0 += K0; 970*25c28e83SPiotr Jasiukajtis 971*25c28e83SPiotr Jasiukajtis lda [%i3]0x82,%l3 ! (4_0) ux0 = *(int*)px; 972*25c28e83SPiotr Jasiukajtis fdivd %f40,%f16,%f6 ! (3_0) x0 = y0 / x0; 973*25c28e83SPiotr Jasiukajtis faddd %f24,K2,%f24 ! (5_1) dtmp0 += K2; 974*25c28e83SPiotr Jasiukajtis 975*25c28e83SPiotr Jasiukajtis fdtos %f4,%f1 ! (3_1) ftmp0 = (float)dtmp0; 976*25c28e83SPiotr Jasiukajtis and %o4,-8,%o4 ! (3_0) signy0 &= -8; 977*25c28e83SPiotr Jasiukajtis st %f1,[%o2] ! (3_1) *pz = ftmp0; 978*25c28e83SPiotr Jasiukajtis fmuld %f8,%f8,%f16 ! (2_0) x20 = x0 * x0; 979*25c28e83SPiotr Jasiukajtis 980*25c28e83SPiotr Jasiukajtis subcc counter,1,counter 981*25c28e83SPiotr Jasiukajtis bneg,a,pn %icc,.begin 982*25c28e83SPiotr Jasiukajtis or %g0,%o1,%o4 983*25c28e83SPiotr Jasiukajtis 984*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+%l5],%f0 ! (4_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); 985*25c28e83SPiotr Jasiukajtis add %l6,%o5,%o2 ! (3_0) ltmp0 += signx0; 986*25c28e83SPiotr Jasiukajtis fmuld %f28,%f18,%f28 ! (1_0) dtmp0 *= x20; 987*25c28e83SPiotr Jasiukajtis 988*25c28e83SPiotr Jasiukajtis fmuld %f38,%f62,%f62 ! (4_1) x0 = dtmp0 * x0; 989*25c28e83SPiotr Jasiukajtis and %l3,_0x7fffffff,%l6 ! (4_0) ax0 = ux0 & 0x7fffffff; 990*25c28e83SPiotr Jasiukajtis sethi %hi(0x00800000),%o5 991*25c28e83SPiotr Jasiukajtis faddd %f2,K4,%f2 ! (0_0) dtmp0 += K4; 992*25c28e83SPiotr Jasiukajtis 993*25c28e83SPiotr Jasiukajtis and %l4,_0x7fffffff,%l5 ! (4_0) ay0 = uy0 & 0x7fffffff; 994*25c28e83SPiotr Jasiukajtis cmp %l6,%o5 995*25c28e83SPiotr Jasiukajtis bl,pn %icc,.u36 996*25c28e83SPiotr Jasiukajtis fmuld %f24,%f22,%f38 ! (5_1) dtmp0 *= x20; 997*25c28e83SPiotr Jasiukajtis.c36: 998*25c28e83SPiotr Jasiukajtis cmp %l5,%o5 999*25c28e83SPiotr Jasiukajtis bl,pn %icc,.u37 1000*25c28e83SPiotr Jasiukajtis fmuld K9,%f16,%f40 ! (2_0) dtmp0 = K9 * x20; 1001*25c28e83SPiotr Jasiukajtis.c37: 1002*25c28e83SPiotr Jasiukajtis cmp %l6,_0x7f800000 1003*25c28e83SPiotr Jasiukajtis bge,pn %icc,.u38 1004*25c28e83SPiotr Jasiukajtis faddd %f28,K6,%f4 ! (1_0) dtmp0 += K6; 1005*25c28e83SPiotr Jasiukajtis.c38: 1006*25c28e83SPiotr Jasiukajtis ldd [%o2+%o4],%f28 ! (3_0) cadd0 = *(double*)(ltmp0 + signy0); 1007*25c28e83SPiotr Jasiukajtis cmp %l5,_0x7f800000 1008*25c28e83SPiotr Jasiukajtis bge,pn %icc,.u39 1009*25c28e83SPiotr Jasiukajtis fmuld %f2,%f20,%f24 ! (0_0) dtmp0 *= x20; 1010*25c28e83SPiotr Jasiukajtis.c39: 1011*25c28e83SPiotr Jasiukajtis sub %l6,%l5,%o2 ! (4_0) ldiff0 = ax0 - ay0; 1012*25c28e83SPiotr Jasiukajtis 1013*25c28e83SPiotr Jasiukajtis fmuld %f0,%f62,%f62 ! (4_1) dtmp0 = cmul0 * x0; 1014*25c28e83SPiotr Jasiukajtis sra %o2,31,%l5 ! (4_0) ldiff0 >>= 31; 1015*25c28e83SPiotr Jasiukajtis sub %i3,%i1,%l6 ! (4_0) addrc0 = (char*)px - (char*)py; 1016*25c28e83SPiotr Jasiukajtis faddd %f38,K1,%f38 ! (5_1) dtmp0 += K1; 1017*25c28e83SPiotr Jasiukajtis 1018*25c28e83SPiotr Jasiukajtis faddd %f40,K8,%f40 ! (2_0) dtmp0 += K8; 1019*25c28e83SPiotr Jasiukajtis and %l6,%l5,%o2 ! (4_0) addrc0 &= ldiff0; 1020*25c28e83SPiotr Jasiukajtis fmuld %f4,%f18,%f4 ! (1_0) dtmp0 *= x20; 1021*25c28e83SPiotr Jasiukajtis 1022*25c28e83SPiotr Jasiukajtis lda [%i1+%o2]0x82,%f0 ! (4_0) fy0 = *(float*)((char*)py + addrc0); 1023*25c28e83SPiotr Jasiukajtis sub %i3,%o2,%o4 ! (4_0) (char*)px - addrc0; 1024*25c28e83SPiotr Jasiukajtis add %o1,stridez,%o2 ! pz += stridez 1025*25c28e83SPiotr Jasiukajtis faddd %f24,K3,%f24 ! (0_0) dtmp0 += K3; 1026*25c28e83SPiotr Jasiukajtis 1027*25c28e83SPiotr Jasiukajtis lda [%o4]0x82,%f2 ! (4_0) fx0 = *(float*)((char*)px - addrc0); 1028*25c28e83SPiotr Jasiukajtis sll %o7,3,%o7 ! (5_1) cmul0_ind = ldiff0 << 3; 1029*25c28e83SPiotr Jasiukajtis 1030*25c28e83SPiotr Jasiukajtis fmuld %f38,%f22,%f38 ! (5_1) dtmp0 *= x20; 1031*25c28e83SPiotr Jasiukajtis cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 1032*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update9 ! (4_0) if ( b0 > 0x7f800000 ) 1033*25c28e83SPiotr Jasiukajtis faddd %f26,%f62,%f22 ! (4_1) dtmp0 = cadd0 + dtmp0; 1034*25c28e83SPiotr Jasiukajtis.cont9: 1035*25c28e83SPiotr Jasiukajtis fmuld %f40,%f16,%f26 ! (2_0) dtmp0 *= x20; 1036*25c28e83SPiotr Jasiukajtis sll %l5,5,%l6 ! (4_0) ltmp0 = ldiff0 << 5; 1037*25c28e83SPiotr Jasiukajtis add %i1,stridey,%i1 ! py += stridey 1038*25c28e83SPiotr Jasiukajtis fstod %f0,%f40 ! (4_0) y0 = (double)fy0; 1039*25c28e83SPiotr Jasiukajtis 1040*25c28e83SPiotr Jasiukajtis faddd %f4,K5,%f62 ! (1_0) dtmp0 += K5; 1041*25c28e83SPiotr Jasiukajtis sra %l3,27,%o5 ! (4_0) signx0 = ux0 >> 27; 1042*25c28e83SPiotr Jasiukajtis add %i3,stridex,%i3 ! px += stridex 1043*25c28e83SPiotr Jasiukajtis fmuld %f24,%f20,%f24 ! (0_0) dtmp0 *= x20; 1044*25c28e83SPiotr Jasiukajtis 1045*25c28e83SPiotr Jasiukajtis fstod %f2,%f2 ! (4_0) x0 = (double)fx0; 1046*25c28e83SPiotr Jasiukajtis sra %l4,28,%o4 ! (4_0) signy0 = uy0 >> 28; 1047*25c28e83SPiotr Jasiukajtis.d9: 1048*25c28e83SPiotr Jasiukajtis lda [%i1]0x82,%l3 ! (5_0) uy0 = *(int*)py; 1049*25c28e83SPiotr Jasiukajtis add %l6,cadd_arr,%l6 ! (4_0) ltmp0 += (char*)cadd_arr; 1050*25c28e83SPiotr Jasiukajtis faddd %f26,K7,%f26 ! (2_0) dtmp0 += K7; 1051*25c28e83SPiotr Jasiukajtis 1052*25c28e83SPiotr Jasiukajtis fmuld %f62,%f18,%f4 ! (1_0) dtmp0 *= x20; 1053*25c28e83SPiotr Jasiukajtis and %o5,-16,%o5 ! (4_0) signx0 &= -16; 1054*25c28e83SPiotr Jasiukajtis faddd %f38,K0,%f38 ! (5_1) dtmp0 += K0; 1055*25c28e83SPiotr Jasiukajtis 1056*25c28e83SPiotr Jasiukajtis subcc counter,5,counter 1057*25c28e83SPiotr Jasiukajtis bneg,pn %icc,.tail 1058*25c28e83SPiotr Jasiukajtis nop 1059*25c28e83SPiotr Jasiukajtis 1060*25c28e83SPiotr Jasiukajtis ba .main_loop 1061*25c28e83SPiotr Jasiukajtis nop 1062*25c28e83SPiotr Jasiukajtis 1063*25c28e83SPiotr Jasiukajtis .align 16 1064*25c28e83SPiotr Jasiukajtis.main_loop: 1065*25c28e83SPiotr Jasiukajtis lda [%i3]0x82,%l4 ! (5_1) ux0 = *(int*)px; 1066*25c28e83SPiotr Jasiukajtis nop 1067*25c28e83SPiotr Jasiukajtis fdivd %f40,%f2,%f62 ! (4_1) x0 = y0 / x0; 1068*25c28e83SPiotr Jasiukajtis faddd %f24,K2,%f40 ! (0_1) dtmp0 += K2; 1069*25c28e83SPiotr Jasiukajtis 1070*25c28e83SPiotr Jasiukajtis fdtos %f22,%f22 ! (4_2) ftmp0 = (float)dtmp0; 1071*25c28e83SPiotr Jasiukajtis and %o4,-8,%o4 ! (4_1) signy0 &= -8; 1072*25c28e83SPiotr Jasiukajtis st %f22,[%o1] ! (4_2) *pz = ftmp0; 1073*25c28e83SPiotr Jasiukajtis fmuld %f6,%f6,%f24 ! (3_1) x20 = x0 * x0; 1074*25c28e83SPiotr Jasiukajtis 1075*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+%o7],%f0 ! (5_2) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); 1076*25c28e83SPiotr Jasiukajtis add %l6,%o5,%o1 ! (4_1) ltmp0 += signx0; 1077*25c28e83SPiotr Jasiukajtis fmuld %f26,%f16,%f26 ! (2_1) dtmp0 *= x20; 1078*25c28e83SPiotr Jasiukajtis 1079*25c28e83SPiotr Jasiukajtis fmuld %f38,%f14,%f14 ! (5_2) x0 = dtmp0 * x0; 1080*25c28e83SPiotr Jasiukajtis and %l4,_0x7fffffff,%l6 ! (5_1) ax0 = ux0 & 0x7fffffff; 1081*25c28e83SPiotr Jasiukajtis sethi %hi(0x00800000),%o5 1082*25c28e83SPiotr Jasiukajtis faddd %f4,K4,%f4 ! (1_1) dtmp0 += K4; 1083*25c28e83SPiotr Jasiukajtis 1084*25c28e83SPiotr Jasiukajtis and %l3,_0x7fffffff,%o7 ! (5_1) ay0 = uy0 & 0x7fffffff; 1085*25c28e83SPiotr Jasiukajtis fmuld %f40,%f20,%f38 ! (0_1) dtmp0 *= x20; 1086*25c28e83SPiotr Jasiukajtis 1087*25c28e83SPiotr Jasiukajtis cmp %l6,%o5 1088*25c28e83SPiotr Jasiukajtis bl,pn %icc,.up0 1089*25c28e83SPiotr Jasiukajtis fmuld K9,%f24,%f40 ! (3_1) dtmp0 = K9 * x20; 1090*25c28e83SPiotr Jasiukajtis.co0: 1091*25c28e83SPiotr Jasiukajtis nop 1092*25c28e83SPiotr Jasiukajtis cmp %o7,%o5 1093*25c28e83SPiotr Jasiukajtis bl,pn %icc,.up1 1094*25c28e83SPiotr Jasiukajtis faddd %f26,K6,%f22 ! (2_1) dtmp0 += K6; 1095*25c28e83SPiotr Jasiukajtis.co1: 1096*25c28e83SPiotr Jasiukajtis ldd [%o1+%o4],%f26 ! (4_1) cadd0 = *(double*)(ltmp0 + signy0); 1097*25c28e83SPiotr Jasiukajtis cmp %l6,_0x7f800000 1098*25c28e83SPiotr Jasiukajtis bge,pn %icc,.up2 1099*25c28e83SPiotr Jasiukajtis fmuld %f4,%f18,%f4 ! (1_1) dtmp0 *= x20; 1100*25c28e83SPiotr Jasiukajtis.co2: 1101*25c28e83SPiotr Jasiukajtis sub %l6,%o7,%o1 ! (5_1) ldiff0 = ax0 - ay0; 1102*25c28e83SPiotr Jasiukajtis cmp %o7,_0x7f800000 1103*25c28e83SPiotr Jasiukajtis bge,pn %icc,.up3 1104*25c28e83SPiotr Jasiukajtis 1105*25c28e83SPiotr Jasiukajtis fmuld %f0,%f14,%f14 ! (5_2) dtmp0 = cmul0 * x0; 1106*25c28e83SPiotr Jasiukajtis.co3: 1107*25c28e83SPiotr Jasiukajtis sra %o1,31,%o7 ! (5_1) ldiff0 >>= 31; 1108*25c28e83SPiotr Jasiukajtis sub %i3,%i1,%l6 ! (5_1) addrc0 = (char*)px - (char*)py; 1109*25c28e83SPiotr Jasiukajtis faddd %f38,K1,%f38 ! (0_1) dtmp0 += K1; 1110*25c28e83SPiotr Jasiukajtis 1111*25c28e83SPiotr Jasiukajtis faddd %f40,K8,%f40 ! (3_1) dtmp0 += K8; 1112*25c28e83SPiotr Jasiukajtis and %l6,%o7,%o1 ! (5_1) addrc0 &= ldiff0; 1113*25c28e83SPiotr Jasiukajtis fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20; 1114*25c28e83SPiotr Jasiukajtis 1115*25c28e83SPiotr Jasiukajtis lda [%i1+%o1]0x82,%f0 ! (5_1) fy0 = *(float*)((char*)py + addrc0); 1116*25c28e83SPiotr Jasiukajtis sll %o7,5,%l6 ! (5_1) ltmp0 = ldiff0 << 5; 1117*25c28e83SPiotr Jasiukajtis sub %i3,%o1,%o4 ! (5_1) (char*)px - addrc0; 1118*25c28e83SPiotr Jasiukajtis faddd %f4,K3,%f4 ! (1_1) dtmp0 += K3; 1119*25c28e83SPiotr Jasiukajtis 1120*25c28e83SPiotr Jasiukajtis lda [%o4]0x82,%f2 ! (5_1) fx0 = *(float*)((char*)px - addrc0); 1121*25c28e83SPiotr Jasiukajtis 1122*25c28e83SPiotr Jasiukajtis fmuld %f38,%f20,%f38 ! (0_1) dtmp0 *= x20; 1123*25c28e83SPiotr Jasiukajtis cmp %o5,_0x7f800000 ! (5_1) b0 ? 0x7f800000 1124*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update10 ! (5_1) if ( b0 > 0x7f800000 ) 1125*25c28e83SPiotr Jasiukajtis faddd %f36,%f14,%f20 ! (5_2) dtmp0 = cadd0 + dtmp0; 1126*25c28e83SPiotr Jasiukajtis.cont10: 1127*25c28e83SPiotr Jasiukajtis fmuld %f40,%f24,%f36 ! (3_1) dtmp0 *= x20; 1128*25c28e83SPiotr Jasiukajtis nop 1129*25c28e83SPiotr Jasiukajtis fstod %f0,%f40 ! (5_1) y0 = (double)fy0; 1130*25c28e83SPiotr Jasiukajtis 1131*25c28e83SPiotr Jasiukajtis faddd %f22,K5,%f14 ! (2_1) dtmp0 += K5; 1132*25c28e83SPiotr Jasiukajtis add %o2,stridez,%o1 ! pz += stridez 1133*25c28e83SPiotr Jasiukajtis fmuld %f4,%f18,%f4 ! (1_1) dtmp0 *= x20; 1134*25c28e83SPiotr Jasiukajtis 1135*25c28e83SPiotr Jasiukajtis sll %l7,3,%l7 ! (0_1) cmul0_ind = ldiff0 << 3; 1136*25c28e83SPiotr Jasiukajtis add %i3,stridex,%i3 ! px += stridex 1137*25c28e83SPiotr Jasiukajtis fstod %f2,%f2 ! (5_1) x0 = (double)fx0; 1138*25c28e83SPiotr Jasiukajtis.den0: 1139*25c28e83SPiotr Jasiukajtis sra %l3,28,%o4 ! (5_1) signy0 = uy0 >> 28; 1140*25c28e83SPiotr Jasiukajtis add %i1,stridey,%i1 ! py += stridey 1141*25c28e83SPiotr Jasiukajtis 1142*25c28e83SPiotr Jasiukajtis faddd %f36,K7,%f36 ! (3_1) dtmp0 += K7; 1143*25c28e83SPiotr Jasiukajtis sra %l4,27,%o5 ! (5_1) signx0 = ux0 >> 27; 1144*25c28e83SPiotr Jasiukajtis 1145*25c28e83SPiotr Jasiukajtis lda [%i1]0x82,%l4 ! (0_0) uy0 = *(int*)py; 1146*25c28e83SPiotr Jasiukajtis add %l6,cadd_arr,%l6 ! (5_1) ltmp0 += (char*)cadd_arr; 1147*25c28e83SPiotr Jasiukajtis fmuld %f14,%f16,%f22 ! (2_1) dtmp0 *= x20; 1148*25c28e83SPiotr Jasiukajtis faddd %f38,K0,%f38 ! (0_1) dtmp0 += K0; 1149*25c28e83SPiotr Jasiukajtis 1150*25c28e83SPiotr Jasiukajtis lda [%i3]0x82,%l3 ! (0_0) ux0 = *(int*)px; 1151*25c28e83SPiotr Jasiukajtis and %o5,-16,%o5 ! (5_1) signx0 &= -16; 1152*25c28e83SPiotr Jasiukajtis fdivd %f40,%f2,%f14 ! (5_1) x0 = y0 / x0; 1153*25c28e83SPiotr Jasiukajtis faddd %f4,K2,%f40 ! (1_1) dtmp0 += K2; 1154*25c28e83SPiotr Jasiukajtis 1155*25c28e83SPiotr Jasiukajtis fdtos %f20,%f2 ! (5_2) ftmp0 = (float)dtmp0; 1156*25c28e83SPiotr Jasiukajtis st %f2,[%o2] ! (5_2) *pz = ftmp0; 1157*25c28e83SPiotr Jasiukajtis fmuld %f62,%f62,%f4 ! (4_1) x20 = x0 * x0; 1158*25c28e83SPiotr Jasiukajtis 1159*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+%l7],%f0 ! (0_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); 1160*25c28e83SPiotr Jasiukajtis add %l6,%o5,%o2 ! (5_1) ltmp0 += signx0; 1161*25c28e83SPiotr Jasiukajtis and %o4,-8,%o4 ! (5_1) signy0 &= -8; 1162*25c28e83SPiotr Jasiukajtis fmuld %f36,%f24,%f36 ! (3_1) dtmp0 *= x20; 1163*25c28e83SPiotr Jasiukajtis 1164*25c28e83SPiotr Jasiukajtis fmuld %f38,%f12,%f12 ! (0_1) x0 = dtmp0 * x0; 1165*25c28e83SPiotr Jasiukajtis and %l4,_0x7fffffff,%l7 ! (0_0) ay0 = uy0 & 0x7fffffff; 1166*25c28e83SPiotr Jasiukajtis sethi %hi(0x00800000),%o5 1167*25c28e83SPiotr Jasiukajtis faddd %f22,K4,%f22 ! (2_1) dtmp0 += K4; 1168*25c28e83SPiotr Jasiukajtis 1169*25c28e83SPiotr Jasiukajtis and %l3,_0x7fffffff,%l6 ! (0_0) ax0 = ux0 & 0x7fffffff; 1170*25c28e83SPiotr Jasiukajtis fmuld %f40,%f18,%f38 ! (1_1) dtmp0 *= x20; 1171*25c28e83SPiotr Jasiukajtis 1172*25c28e83SPiotr Jasiukajtis cmp %l7,%o5 1173*25c28e83SPiotr Jasiukajtis bl,pn %icc,.up4 1174*25c28e83SPiotr Jasiukajtis fmuld K9,%f4,%f40 ! (4_1) dtmp0 = K9 * x20; 1175*25c28e83SPiotr Jasiukajtis.co4: 1176*25c28e83SPiotr Jasiukajtis nop 1177*25c28e83SPiotr Jasiukajtis cmp %l6,%o5 1178*25c28e83SPiotr Jasiukajtis bl,pn %icc,.up5 1179*25c28e83SPiotr Jasiukajtis faddd %f36,K6,%f20 ! (3_1) dtmp0 += K6; 1180*25c28e83SPiotr Jasiukajtis.co5: 1181*25c28e83SPiotr Jasiukajtis ldd [%o2+%o4],%f36 ! (5_1) cadd0 = *(double*)(ltmp0 + signy0); 1182*25c28e83SPiotr Jasiukajtis cmp %l7,_0x7f800000 1183*25c28e83SPiotr Jasiukajtis bge,pn %icc,.up6 1184*25c28e83SPiotr Jasiukajtis fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20; 1185*25c28e83SPiotr Jasiukajtis.co6: 1186*25c28e83SPiotr Jasiukajtis sub %l6,%l7,%o2 ! (0_0) ldiff0 = ax0 - ay0; 1187*25c28e83SPiotr Jasiukajtis cmp %l6,_0x7f800000 1188*25c28e83SPiotr Jasiukajtis bge,pn %icc,.up7 1189*25c28e83SPiotr Jasiukajtis 1190*25c28e83SPiotr Jasiukajtis fmuld %f0,%f12,%f12 ! (0_1) dtmp0 = cmul0 * x0; 1191*25c28e83SPiotr Jasiukajtis.co7: 1192*25c28e83SPiotr Jasiukajtis sra %o2,31,%l7 ! (0_0) ldiff0 >>= 31; 1193*25c28e83SPiotr Jasiukajtis sub %i3,%i1,%l6 ! (0_0) addrc0 = (char*)px - (char*)py; 1194*25c28e83SPiotr Jasiukajtis faddd %f38,K1,%f38 ! (1_1) dtmp0 += K1; 1195*25c28e83SPiotr Jasiukajtis 1196*25c28e83SPiotr Jasiukajtis faddd %f40,K8,%f40 ! (4_1) dtmp0 += K8; 1197*25c28e83SPiotr Jasiukajtis and %l6,%l7,%o2 ! (0_0) addrc0 &= ldiff0; 1198*25c28e83SPiotr Jasiukajtis fmuld %f20,%f24,%f20 ! (3_1) dtmp0 *= x20; 1199*25c28e83SPiotr Jasiukajtis 1200*25c28e83SPiotr Jasiukajtis lda [%i1+%o2]0x82,%f0 ! (0_0) fy0 = *(float*)((char*)py + addrc0); 1201*25c28e83SPiotr Jasiukajtis sll %g1,3,%g1 ! (1_1) cmul0_ind = ldiff0 << 3; 1202*25c28e83SPiotr Jasiukajtis sub %i3,%o2,%o4 ! (0_0) (char*)px - addrc0 1203*25c28e83SPiotr Jasiukajtis faddd %f22,K3,%f22 ! (2_1) dtmp0 += K3; 1204*25c28e83SPiotr Jasiukajtis 1205*25c28e83SPiotr Jasiukajtis lda [%o4]0x82,%f2 ! (0_0) fx0 = *(float*)((char*)px - addrc0); 1206*25c28e83SPiotr Jasiukajtis sll %l7,5,%l6 ! (0_0) ltmp0 = ldiff0 << 5; 1207*25c28e83SPiotr Jasiukajtis add %o1,stridez,%o2 ! pz += stridez 1208*25c28e83SPiotr Jasiukajtis 1209*25c28e83SPiotr Jasiukajtis fmuld %f38,%f18,%f38 ! (1_1) dtmp0 *= x20; 1210*25c28e83SPiotr Jasiukajtis cmp %o5,_0x7f800000 ! (0_0) b0 ? 0x7f800000 1211*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update11 ! (0_0) if ( b0 > 0x7f800000 ) 1212*25c28e83SPiotr Jasiukajtis faddd %f34,%f12,%f18 ! (0_1) dtmp0 = cadd0 + dtmp0; 1213*25c28e83SPiotr Jasiukajtis.cont11: 1214*25c28e83SPiotr Jasiukajtis fmuld %f40,%f4,%f34 ! (4_1) dtmp0 *= x20; 1215*25c28e83SPiotr Jasiukajtis sra %l3,27,%o5 ! (0_0) signx0 = ux0 >> 27; 1216*25c28e83SPiotr Jasiukajtis add %i3,stridex,%i3 ! px += stridex 1217*25c28e83SPiotr Jasiukajtis fstod %f0,%f40 ! (0_0) y0 = (double)fy0; 1218*25c28e83SPiotr Jasiukajtis 1219*25c28e83SPiotr Jasiukajtis faddd %f20,K5,%f12 ! (3_1) dtmp0 += K5; 1220*25c28e83SPiotr Jasiukajtis add %i1,stridey,%i1 ! py += stridey 1221*25c28e83SPiotr Jasiukajtis fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20; 1222*25c28e83SPiotr Jasiukajtis 1223*25c28e83SPiotr Jasiukajtis lda [%i1]0x82,%l3 ! (1_0) uy0 = *(int*)py; 1224*25c28e83SPiotr Jasiukajtis sra %l4,28,%o4 ! (0_0) signy0 = uy0 >> 28; 1225*25c28e83SPiotr Jasiukajtis add %l6,cadd_arr,%l6 ! (0_0) ltmp0 += (char*)cadd_arr; 1226*25c28e83SPiotr Jasiukajtis fstod %f2,%f2 ! (0_0) x0 = (double)fx0; 1227*25c28e83SPiotr Jasiukajtis.den1: 1228*25c28e83SPiotr Jasiukajtis lda [%i3]0x82,%l4 ! (1_0) ux0 = *(int*)px; 1229*25c28e83SPiotr Jasiukajtis and %o5,-16,%o5 ! (0_0) signx0 &= -16; 1230*25c28e83SPiotr Jasiukajtis faddd %f34,K7,%f34 ! (4_1) dtmp0 += K7; 1231*25c28e83SPiotr Jasiukajtis 1232*25c28e83SPiotr Jasiukajtis fmuld %f12,%f24,%f20 ! (3_1) dtmp0 *= x20; 1233*25c28e83SPiotr Jasiukajtis and %o4,-8,%o4 ! (0_0) signy0 &= -8; 1234*25c28e83SPiotr Jasiukajtis faddd %f38,K0,%f38 ! (1_1) dtmp0 += K0; 1235*25c28e83SPiotr Jasiukajtis 1236*25c28e83SPiotr Jasiukajtis fdivd %f40,%f2,%f12 ! (0_0) x0 = y0 / x0; 1237*25c28e83SPiotr Jasiukajtis faddd %f22,K2,%f40 ! (2_1) dtmp0 += K2; 1238*25c28e83SPiotr Jasiukajtis 1239*25c28e83SPiotr Jasiukajtis fdtos %f18,%f2 ! (0_1) ftmp0 = (float)dtmp0; 1240*25c28e83SPiotr Jasiukajtis nop 1241*25c28e83SPiotr Jasiukajtis st %f2,[%o1] ! (0_1) *pz = ftmp0 1242*25c28e83SPiotr Jasiukajtis fmuld %f14,%f14,%f22 ! (5_1) x20 = x0 * x0; 1243*25c28e83SPiotr Jasiukajtis 1244*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+%g1],%f0 ! (1_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); 1245*25c28e83SPiotr Jasiukajtis add %l6,%o5,%o1 ! (0_0) ltmp0 += signx0; 1246*25c28e83SPiotr Jasiukajtis fmuld %f34,%f4,%f34 ! (4_1) dtmp0 *= x20; 1247*25c28e83SPiotr Jasiukajtis 1248*25c28e83SPiotr Jasiukajtis fmuld %f38,%f10,%f10 ! (1_1) x0 = dtmp0 * x0; 1249*25c28e83SPiotr Jasiukajtis and %l4,_0x7fffffff,%l6 ! (1_0) ax0 = ux0 & 0x7fffffff; 1250*25c28e83SPiotr Jasiukajtis sethi %hi(0x00800000),%o5 1251*25c28e83SPiotr Jasiukajtis faddd %f20,K4,%f20 ! (3_1) dtmp0 += K4; 1252*25c28e83SPiotr Jasiukajtis 1253*25c28e83SPiotr Jasiukajtis and %l3,_0x7fffffff,%g1 ! (1_0) ay0 = uy0 & 0x7fffffff; 1254*25c28e83SPiotr Jasiukajtis fmuld %f40,%f16,%f38 ! (2_1) dtmp0 *= x20; 1255*25c28e83SPiotr Jasiukajtis 1256*25c28e83SPiotr Jasiukajtis cmp %l6,%o5 1257*25c28e83SPiotr Jasiukajtis bl,pn %icc,.up8 1258*25c28e83SPiotr Jasiukajtis fmuld K9,%f22,%f40 ! (5_1) dtmp0 = K9 * x20; 1259*25c28e83SPiotr Jasiukajtis.co8: 1260*25c28e83SPiotr Jasiukajtis nop 1261*25c28e83SPiotr Jasiukajtis cmp %g1,%o5 1262*25c28e83SPiotr Jasiukajtis bl,pn %icc,.up9 1263*25c28e83SPiotr Jasiukajtis faddd %f34,K6,%f18 ! (4_1) dtmp0 += K6; 1264*25c28e83SPiotr Jasiukajtis.co9: 1265*25c28e83SPiotr Jasiukajtis ldd [%o1+%o4],%f34 ! (0_0) cadd0 = *(double*)(ltmp0 + signy0); 1266*25c28e83SPiotr Jasiukajtis cmp %l6,_0x7f800000 1267*25c28e83SPiotr Jasiukajtis bge,pn %icc,.up10 1268*25c28e83SPiotr Jasiukajtis fmuld %f20,%f24,%f20 ! (3_1) dtmp0 *= x20; 1269*25c28e83SPiotr Jasiukajtis.co10: 1270*25c28e83SPiotr Jasiukajtis sub %l6,%g1,%o1 ! (1_0) ldiff0 = ax0 - ay0; 1271*25c28e83SPiotr Jasiukajtis cmp %g1,_0x7f800000 1272*25c28e83SPiotr Jasiukajtis bge,pn %icc,.up11 1273*25c28e83SPiotr Jasiukajtis 1274*25c28e83SPiotr Jasiukajtis fmuld %f0,%f10,%f10 ! (1_1) dtmp0 = cmul0 * x0; 1275*25c28e83SPiotr Jasiukajtis.co11: 1276*25c28e83SPiotr Jasiukajtis sra %o1,31,%g1 ! (1_0) ldiff0 >>= 31; 1277*25c28e83SPiotr Jasiukajtis sub %i3,%i1,%l6 ! (1_0) addrc0 = (char*)px - (char*)py; 1278*25c28e83SPiotr Jasiukajtis faddd %f38,K1,%f38 ! (2_1) dtmp0 += K1; 1279*25c28e83SPiotr Jasiukajtis 1280*25c28e83SPiotr Jasiukajtis faddd %f40,K8,%f40 ! (5_1) dtmp0 += K8; 1281*25c28e83SPiotr Jasiukajtis and %l6,%g1,%o1 ! (1_0) addrc0 &= ldiff0; 1282*25c28e83SPiotr Jasiukajtis fmuld %f18,%f4,%f18 ! (4_1) dtmp0 *= x20; 1283*25c28e83SPiotr Jasiukajtis 1284*25c28e83SPiotr Jasiukajtis lda [%i1+%o1]0x82,%f0 ! (1_0) fy0 = *(float*)((char*)py + addrc0); 1285*25c28e83SPiotr Jasiukajtis sll %g5,3,%g5 ! (2_1) cmul0_ind = ldiff0 << 3; 1286*25c28e83SPiotr Jasiukajtis sub %i3,%o1,%o4 ! (1_0) (char*)px - addrc0; 1287*25c28e83SPiotr Jasiukajtis faddd %f20,K3,%f20 ! (3_1) dtmp0 += K3; 1288*25c28e83SPiotr Jasiukajtis 1289*25c28e83SPiotr Jasiukajtis lda [%o4]0x82,%f2 ! (1_0) fx0 = *(float*)((char*)px - addrc0); 1290*25c28e83SPiotr Jasiukajtis sll %g1,5,%l6 ! (1_0) ltmp0 = ldiff0 << 5; 1291*25c28e83SPiotr Jasiukajtis add %o2,stridez,%o1 ! pz += stridez 1292*25c28e83SPiotr Jasiukajtis 1293*25c28e83SPiotr Jasiukajtis fmuld %f38,%f16,%f38 ! (2_1) dtmp0 *= x20; 1294*25c28e83SPiotr Jasiukajtis cmp %o5,_0x7f800000 ! (1_0) b0 ? 0x7f800000 1295*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update12 ! (1_0) if ( b0 > 0x7f800000 ) 1296*25c28e83SPiotr Jasiukajtis faddd %f32,%f10,%f16 ! (1_1) dtmp0 = cadd0 + dtmp0; 1297*25c28e83SPiotr Jasiukajtis.cont12: 1298*25c28e83SPiotr Jasiukajtis fmuld %f40,%f22,%f32 ! (5_1) dtmp0 *= x20; 1299*25c28e83SPiotr Jasiukajtis add %i1,stridey,%i1 ! py += stridey 1300*25c28e83SPiotr Jasiukajtis nop 1301*25c28e83SPiotr Jasiukajtis fstod %f0,%f40 ! (1_0) y0 = (double)fy0; 1302*25c28e83SPiotr Jasiukajtis 1303*25c28e83SPiotr Jasiukajtis faddd %f18,K5,%f10 ! (4_1) dtmp0 += K5; 1304*25c28e83SPiotr Jasiukajtis sra %l4,27,%o5 ! (1_0) signx0 = ux0 >> 27; 1305*25c28e83SPiotr Jasiukajtis add %i3,stridex,%i3 ! px += stridex 1306*25c28e83SPiotr Jasiukajtis fmuld %f20,%f24,%f20 ! (3_1) dtmp0 *= x20; 1307*25c28e83SPiotr Jasiukajtis 1308*25c28e83SPiotr Jasiukajtis sra %l3,28,%o4 ! (1_0) signy0 = uy0 >> 28; 1309*25c28e83SPiotr Jasiukajtis add %l6,cadd_arr,%l6 ! (1_0) ltmp0 += (char*)cadd_arr; 1310*25c28e83SPiotr Jasiukajtis fstod %f2,%f2 ! (1_0) x0 = (double)fx0; 1311*25c28e83SPiotr Jasiukajtis.den2: 1312*25c28e83SPiotr Jasiukajtis faddd %f32,K7,%f32 ! (5_1) dtmp0 += K7; 1313*25c28e83SPiotr Jasiukajtis and %o5,-16,%o5 ! (1_0) signx0 &= -16; 1314*25c28e83SPiotr Jasiukajtis and %o4,-8,%o4 ! (1_0) signy0 &= -8; 1315*25c28e83SPiotr Jasiukajtis 1316*25c28e83SPiotr Jasiukajtis lda [%i1]0x82,%l4 ! (2_0) uy0 = *(int*)py; 1317*25c28e83SPiotr Jasiukajtis fmuld %f10,%f4,%f18 ! (4_1) dtmp0 *= x20; 1318*25c28e83SPiotr Jasiukajtis faddd %f38,K0,%f38 ! (2_1) dtmp0 += K0; 1319*25c28e83SPiotr Jasiukajtis 1320*25c28e83SPiotr Jasiukajtis lda [%i3]0x82,%l3 ! (2_0) ux0 = *(int*)px; 1321*25c28e83SPiotr Jasiukajtis fdivd %f40,%f2,%f10 ! (1_0) x0 = y0 / x0; 1322*25c28e83SPiotr Jasiukajtis faddd %f20,K2,%f40 ! (3_1) dtmp0 += K2; 1323*25c28e83SPiotr Jasiukajtis 1324*25c28e83SPiotr Jasiukajtis fdtos %f16,%f2 ! (1_1) ftmp0 = (float)dtmp0; 1325*25c28e83SPiotr Jasiukajtis nop 1326*25c28e83SPiotr Jasiukajtis st %f2,[%o2] ! (1_1) *pz = ftmp0; 1327*25c28e83SPiotr Jasiukajtis fmuld %f12,%f12,%f20 ! (0_0) x20 = x0 * x0; 1328*25c28e83SPiotr Jasiukajtis 1329*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+%g5],%f0 ! (2_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); 1330*25c28e83SPiotr Jasiukajtis add %l6,%o5,%o2 ! (1_0) ltmp0 += signx0; 1331*25c28e83SPiotr Jasiukajtis fmuld %f32,%f22,%f32 ! (5_1) dtmp0 *= x20; 1332*25c28e83SPiotr Jasiukajtis 1333*25c28e83SPiotr Jasiukajtis fmuld %f38,%f8,%f8 ! (2_1) x0 = dtmp0 * x0; 1334*25c28e83SPiotr Jasiukajtis and %l3,_0x7fffffff,%l6 ! (2_0) ax0 = ux0 & 0x7fffffff; 1335*25c28e83SPiotr Jasiukajtis sethi %hi(0x00800000),%o5 1336*25c28e83SPiotr Jasiukajtis faddd %f18,K4,%f18 ! (4_1) dtmp0 += K4; 1337*25c28e83SPiotr Jasiukajtis 1338*25c28e83SPiotr Jasiukajtis and %l4,_0x7fffffff,%g5 ! (2_0) ay0 = uy0 & 0x7fffffff; 1339*25c28e83SPiotr Jasiukajtis fmuld %f40,%f24,%f38 ! (3_1) dtmp0 *= x20; 1340*25c28e83SPiotr Jasiukajtis 1341*25c28e83SPiotr Jasiukajtis cmp %l6,%o5 1342*25c28e83SPiotr Jasiukajtis bl,pn %icc,.up12 1343*25c28e83SPiotr Jasiukajtis fmuld K9,%f20,%f40 ! (0_0) dtmp0 = K9 * x20; 1344*25c28e83SPiotr Jasiukajtis.co12: 1345*25c28e83SPiotr Jasiukajtis nop 1346*25c28e83SPiotr Jasiukajtis cmp %g5,%o5 1347*25c28e83SPiotr Jasiukajtis bl,pn %icc,.up13 1348*25c28e83SPiotr Jasiukajtis faddd %f32,K6,%f16 ! (5_1) dtmp0 += K6; 1349*25c28e83SPiotr Jasiukajtis.co13: 1350*25c28e83SPiotr Jasiukajtis ldd [%o2+%o4],%f32 ! (1_0) cadd0 = *(double*)(ltmp0 + signy0); 1351*25c28e83SPiotr Jasiukajtis cmp %l6,_0x7f800000 1352*25c28e83SPiotr Jasiukajtis bge,pn %icc,.up14 1353*25c28e83SPiotr Jasiukajtis fmuld %f18,%f4,%f18 ! (4_1) dtmp0 *= x20; 1354*25c28e83SPiotr Jasiukajtis.co14: 1355*25c28e83SPiotr Jasiukajtis sub %l6,%g5,%o2 ! (2_0) ldiff0 = ax0 - ay0; 1356*25c28e83SPiotr Jasiukajtis cmp %g5,_0x7f800000 1357*25c28e83SPiotr Jasiukajtis bge,pn %icc,.up15 1358*25c28e83SPiotr Jasiukajtis 1359*25c28e83SPiotr Jasiukajtis fmuld %f0,%f8,%f8 ! (2_1) dtmp0 = cmul0 * x0; 1360*25c28e83SPiotr Jasiukajtis.co15: 1361*25c28e83SPiotr Jasiukajtis sra %o2,31,%g5 ! (2_0) ldiff0 >>= 31; 1362*25c28e83SPiotr Jasiukajtis sub %i3,%i1,%l6 ! (2_0) addrc0 = (char*)px - (char*)py; 1363*25c28e83SPiotr Jasiukajtis faddd %f38,K1,%f38 ! (3_1) dtmp0 += K1; 1364*25c28e83SPiotr Jasiukajtis 1365*25c28e83SPiotr Jasiukajtis faddd %f40,K8,%f40 ! (0_0) dtmp0 += K8; 1366*25c28e83SPiotr Jasiukajtis and %l6,%g5,%o2 ! (2_0) addrc0 &= ldiff0; 1367*25c28e83SPiotr Jasiukajtis fmuld %f16,%f22,%f16 ! (5_1) dtmp0 *= x20; 1368*25c28e83SPiotr Jasiukajtis 1369*25c28e83SPiotr Jasiukajtis lda [%i1+%o2]0x82,%f0 ! (2_0) fy0 = *(float*)((char*)py + addrc0); 1370*25c28e83SPiotr Jasiukajtis sub %i3,%o2,%o4 ! (2_0) (char*)px - addrc0; 1371*25c28e83SPiotr Jasiukajtis add %o1,stridez,%o2 ! pz += stridez 1372*25c28e83SPiotr Jasiukajtis faddd %f18,K3,%f18 ! (4_1) dtmp0 += K3; 1373*25c28e83SPiotr Jasiukajtis 1374*25c28e83SPiotr Jasiukajtis lda [%o4]0x82,%f2 ! (2_0) fx0 = *(float*)((char*)px - addrc0); 1375*25c28e83SPiotr Jasiukajtis sll %o0,3,%o0 ! (3_1) cmul0_ind = ldiff0 << 3; 1376*25c28e83SPiotr Jasiukajtis add %i3,stridex,%i3 ! px += stridex 1377*25c28e83SPiotr Jasiukajtis 1378*25c28e83SPiotr Jasiukajtis fmuld %f38,%f24,%f38 ! (3_1) dtmp0 *= x20; 1379*25c28e83SPiotr Jasiukajtis cmp %o5,_0x7f800000 ! (2_0) b0 ? 0x7f800000 1380*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update13 ! (2_0) if ( b0 > 0x7f800000 ) 1381*25c28e83SPiotr Jasiukajtis faddd %f30,%f8,%f24 ! (2_1) dtmp0 = cadd0 + dtmp0; 1382*25c28e83SPiotr Jasiukajtis.cont13: 1383*25c28e83SPiotr Jasiukajtis fmuld %f40,%f20,%f30 ! (0_0) dtmp0 *= x20; 1384*25c28e83SPiotr Jasiukajtis sll %g5,5,%l6 ! (2_0) ltmp0 = ldiff0 << 5; 1385*25c28e83SPiotr Jasiukajtis add %i1,stridey,%i1 ! py += stridey 1386*25c28e83SPiotr Jasiukajtis fstod %f0,%f40 ! (2_0) y0 = (double)fy0; 1387*25c28e83SPiotr Jasiukajtis 1388*25c28e83SPiotr Jasiukajtis faddd %f16,K5,%f8 ! (5_1) dtmp0 += K5; 1389*25c28e83SPiotr Jasiukajtis sra %l3,27,%o5 ! (2_0) signx0 = ux0 >> 27; 1390*25c28e83SPiotr Jasiukajtis fmuld %f18,%f4,%f18 ! (4_1) dtmp0 *= x20; 1391*25c28e83SPiotr Jasiukajtis 1392*25c28e83SPiotr Jasiukajtis fstod %f2,%f2 ! (2_0) x0 = (double)fx0; 1393*25c28e83SPiotr Jasiukajtis sra %l4,28,%o4 ! (2_0) signy0 = uy0 >> 28; 1394*25c28e83SPiotr Jasiukajtis add %l6,cadd_arr,%l6 ! (2_0) ltmp0 += (char*)cadd_arr; 1395*25c28e83SPiotr Jasiukajtis.den3: 1396*25c28e83SPiotr Jasiukajtis lda [%i1]0x82,%l3 ! (3_0) uy0 = *(int*)py; 1397*25c28e83SPiotr Jasiukajtis and %o5,-16,%o5 ! (2_0) signx0 &= -16; 1398*25c28e83SPiotr Jasiukajtis faddd %f30,K7,%f30 ! (0_0) dtmp0 += K7; 1399*25c28e83SPiotr Jasiukajtis 1400*25c28e83SPiotr Jasiukajtis lda [%i3]0x82,%l4 ! (3_0) ux0 = *(int*)px; 1401*25c28e83SPiotr Jasiukajtis fmuld %f8,%f22,%f16 ! (5_1) dtmp0 *= x20; 1402*25c28e83SPiotr Jasiukajtis faddd %f38,K0,%f38 ! (3_1) dtmp0 += K0; 1403*25c28e83SPiotr Jasiukajtis 1404*25c28e83SPiotr Jasiukajtis fdivd %f40,%f2,%f8 ! (2_0) x0 = y0 / x0; 1405*25c28e83SPiotr Jasiukajtis faddd %f18,K2,%f40 ! (4_1) dtmp0 += K2; 1406*25c28e83SPiotr Jasiukajtis 1407*25c28e83SPiotr Jasiukajtis fdtos %f24,%f1 ! (2_1) ftmp0 = (float)dtmp0; 1408*25c28e83SPiotr Jasiukajtis st %f1,[%o1] ! (2_1) *pz = ftmp0; 1409*25c28e83SPiotr Jasiukajtis fmuld %f10,%f10,%f18 ! (1_0) x20 = x0 * x0; 1410*25c28e83SPiotr Jasiukajtis 1411*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+%o0],%f2 ! (3_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); 1412*25c28e83SPiotr Jasiukajtis add %l6,%o5,%o1 ! (2_0) ltmp0 += signx0; 1413*25c28e83SPiotr Jasiukajtis and %o4,-8,%o4 ! (2_0) signy0 &= -8; 1414*25c28e83SPiotr Jasiukajtis fmuld %f30,%f20,%f30 ! (0_0) dtmp0 *= x20; 1415*25c28e83SPiotr Jasiukajtis 1416*25c28e83SPiotr Jasiukajtis fmuld %f38,%f6,%f6 ! (3_1) x0 = dtmp0 * x0; 1417*25c28e83SPiotr Jasiukajtis and %l4,_0x7fffffff,%l6 ! (3_0) ax0 = ux0 & 0x7fffffff; 1418*25c28e83SPiotr Jasiukajtis sethi %hi(0x00800000),%o5 1419*25c28e83SPiotr Jasiukajtis faddd %f16,K4,%f24 ! (5_1) dtmp0 += K4; 1420*25c28e83SPiotr Jasiukajtis 1421*25c28e83SPiotr Jasiukajtis and %l3,_0x7fffffff,%o0 ! (3_0) ay0 = uy0 & 0x7fffffff; 1422*25c28e83SPiotr Jasiukajtis fmuld %f40,%f4,%f38 ! (4_1) dtmp0 *= x20; 1423*25c28e83SPiotr Jasiukajtis 1424*25c28e83SPiotr Jasiukajtis cmp %l6,%o5 1425*25c28e83SPiotr Jasiukajtis bl,pn %icc,.up16 1426*25c28e83SPiotr Jasiukajtis fmuld K9,%f18,%f40 ! (1_0) dtmp0 = K9 * x20; 1427*25c28e83SPiotr Jasiukajtis.co16: 1428*25c28e83SPiotr Jasiukajtis nop 1429*25c28e83SPiotr Jasiukajtis cmp %o0,%o5 1430*25c28e83SPiotr Jasiukajtis bl,pn %icc,.up17 1431*25c28e83SPiotr Jasiukajtis faddd %f30,K6,%f16 ! (0_0) dtmp0 += K6; 1432*25c28e83SPiotr Jasiukajtis.co17: 1433*25c28e83SPiotr Jasiukajtis ldd [%o1+%o4],%f30 ! (2_0) cadd0 = *(double*)(ltmp0 + signy0); 1434*25c28e83SPiotr Jasiukajtis cmp %l6,_0x7f800000 1435*25c28e83SPiotr Jasiukajtis bge,pn %icc,.up18 1436*25c28e83SPiotr Jasiukajtis fmuld %f24,%f22,%f24 ! (5_1) dtmp0 *= x20; 1437*25c28e83SPiotr Jasiukajtis.co18: 1438*25c28e83SPiotr Jasiukajtis sub %l6,%o0,%o1 ! (3_0) ldiff0 = ax0 - ay0; 1439*25c28e83SPiotr Jasiukajtis cmp %o0,_0x7f800000 1440*25c28e83SPiotr Jasiukajtis bge,pn %icc,.up19 1441*25c28e83SPiotr Jasiukajtis 1442*25c28e83SPiotr Jasiukajtis fmuld %f2,%f6,%f6 ! (3_1) dtmp0 = cmul0 * x0; 1443*25c28e83SPiotr Jasiukajtis.co19: 1444*25c28e83SPiotr Jasiukajtis sra %o1,31,%o0 ! (3_0) ldiff0 >>= 31; 1445*25c28e83SPiotr Jasiukajtis sub %i3,%i1,%l6 ! (3_0) addrc0 = (char*)px - (char*)py; 1446*25c28e83SPiotr Jasiukajtis faddd %f38,K1,%f38 ! (4_1) dtmp0 += K1; 1447*25c28e83SPiotr Jasiukajtis 1448*25c28e83SPiotr Jasiukajtis faddd %f40,K8,%f40 ! (1_0) dtmp0 += K8; 1449*25c28e83SPiotr Jasiukajtis and %l6,%o0,%o1 ! (3_0) addrc0 &= ldiff0; 1450*25c28e83SPiotr Jasiukajtis fmuld %f16,%f20,%f16 ! (0_0) dtmp0 *= x20; 1451*25c28e83SPiotr Jasiukajtis 1452*25c28e83SPiotr Jasiukajtis lda [%i1+%o1]0x82,%f0 ! (3_0) fy0 = *(float*)((char*)py + addrc0); 1453*25c28e83SPiotr Jasiukajtis sub %i3,%o1,%o4 ! (3_0) (char*)px - addrc0; 1454*25c28e83SPiotr Jasiukajtis add %o2,stridez,%o1 ! pz += stridez 1455*25c28e83SPiotr Jasiukajtis faddd %f24,K3,%f24 ! (5_1) dtmp0 += K3; 1456*25c28e83SPiotr Jasiukajtis 1457*25c28e83SPiotr Jasiukajtis lda [%o4]0x82,%f1 ! (3_0) fx0 = *(float*)((char*)px - addrc0); 1458*25c28e83SPiotr Jasiukajtis sll %l5,3,%l5 ! (4_1) cmul0_ind = ldiff0 << 3; 1459*25c28e83SPiotr Jasiukajtis add %i3,stridex,%i3 ! px += stridex 1460*25c28e83SPiotr Jasiukajtis 1461*25c28e83SPiotr Jasiukajtis fmuld %f38,%f4,%f38 ! (4_1) dtmp0 *= x20; 1462*25c28e83SPiotr Jasiukajtis cmp %o5,_0x7f800000 ! (3_0) b0 ? 0x7f800000 1463*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update14 ! (3_0) if ( b0 > 0x7f800000 ) 1464*25c28e83SPiotr Jasiukajtis faddd %f28,%f6,%f4 ! (3_1) dtmp0 = cadd0 + dtmp0; 1465*25c28e83SPiotr Jasiukajtis.cont14: 1466*25c28e83SPiotr Jasiukajtis fmuld %f40,%f18,%f28 ! (1_0) dtmp0 *= x20; 1467*25c28e83SPiotr Jasiukajtis sll %o0,5,%l6 ! (3_0) ltmp0 = ldiff0 << 5; 1468*25c28e83SPiotr Jasiukajtis add %i1,stridey,%i1 ! py += stridey 1469*25c28e83SPiotr Jasiukajtis fstod %f0,%f40 ! (3_0) y0 = (double)fy0; 1470*25c28e83SPiotr Jasiukajtis 1471*25c28e83SPiotr Jasiukajtis faddd %f16,K5,%f2 ! (0_0) dtmp0 += K5; 1472*25c28e83SPiotr Jasiukajtis sra %l4,27,%o5 ! (3_0) signx0 = ux0 >> 27; 1473*25c28e83SPiotr Jasiukajtis fmuld %f24,%f22,%f24 ! (5_1) dtmp0 *= x20; 1474*25c28e83SPiotr Jasiukajtis 1475*25c28e83SPiotr Jasiukajtis sra %l3,28,%o4 ! (3_0) signy0 = uy0 >> 28; 1476*25c28e83SPiotr Jasiukajtis fstod %f1,%f16 ! (3_0) x0 = (double)fx0; 1477*25c28e83SPiotr Jasiukajtis.den4: 1478*25c28e83SPiotr Jasiukajtis faddd %f28,K7,%f28 ! (1_0) dtmp0 += K7; 1479*25c28e83SPiotr Jasiukajtis add %l6,cadd_arr,%l6 ! (3_0) ltmp0 += (char*)cadd_arr; 1480*25c28e83SPiotr Jasiukajtis and %o5,-16,%o5 ! (3_0) signx0 &= -16; 1481*25c28e83SPiotr Jasiukajtis 1482*25c28e83SPiotr Jasiukajtis lda [%i1]0x82,%l4 ! (4_0) uy0 = *(int*)py; 1483*25c28e83SPiotr Jasiukajtis fmuld %f2,%f20,%f2 ! (0_0) dtmp0 *= x20; 1484*25c28e83SPiotr Jasiukajtis faddd %f38,K0,%f38 ! (4_1) dtmp0 += K0; 1485*25c28e83SPiotr Jasiukajtis 1486*25c28e83SPiotr Jasiukajtis lda [%i3]0x82,%l3 ! (4_0) ux0 = *(int*)px; 1487*25c28e83SPiotr Jasiukajtis fdivd %f40,%f16,%f6 ! (3_0) x0 = y0 / x0; 1488*25c28e83SPiotr Jasiukajtis faddd %f24,K2,%f24 ! (5_1) dtmp0 += K2; 1489*25c28e83SPiotr Jasiukajtis 1490*25c28e83SPiotr Jasiukajtis fdtos %f4,%f1 ! (3_1) ftmp0 = (float)dtmp0; 1491*25c28e83SPiotr Jasiukajtis and %o4,-8,%o4 ! (3_0) signy0 &= -8; 1492*25c28e83SPiotr Jasiukajtis st %f1,[%o2] ! (3_1) *pz = ftmp0; 1493*25c28e83SPiotr Jasiukajtis fmuld %f8,%f8,%f16 ! (2_0) x20 = x0 * x0; 1494*25c28e83SPiotr Jasiukajtis 1495*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+%l5],%f0 ! (4_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); 1496*25c28e83SPiotr Jasiukajtis add %l6,%o5,%o2 ! (3_0) ltmp0 += signx0; 1497*25c28e83SPiotr Jasiukajtis fmuld %f28,%f18,%f28 ! (1_0) dtmp0 *= x20; 1498*25c28e83SPiotr Jasiukajtis 1499*25c28e83SPiotr Jasiukajtis fmuld %f38,%f62,%f62 ! (4_1) x0 = dtmp0 * x0; 1500*25c28e83SPiotr Jasiukajtis and %l3,_0x7fffffff,%l6 ! (4_0) ax0 = ux0 & 0x7fffffff; 1501*25c28e83SPiotr Jasiukajtis sethi %hi(0x00800000),%o5 1502*25c28e83SPiotr Jasiukajtis faddd %f2,K4,%f2 ! (0_0) dtmp0 += K4; 1503*25c28e83SPiotr Jasiukajtis 1504*25c28e83SPiotr Jasiukajtis and %l4,_0x7fffffff,%l5 ! (4_0) ay0 = uy0 & 0x7fffffff; 1505*25c28e83SPiotr Jasiukajtis fmuld %f24,%f22,%f38 ! (5_1) dtmp0 *= x20; 1506*25c28e83SPiotr Jasiukajtis 1507*25c28e83SPiotr Jasiukajtis cmp %l6,%o5 1508*25c28e83SPiotr Jasiukajtis bl,pn %icc,.up20 1509*25c28e83SPiotr Jasiukajtis fmuld K9,%f16,%f40 ! (2_0) dtmp0 = K9 * x20; 1510*25c28e83SPiotr Jasiukajtis.co20: 1511*25c28e83SPiotr Jasiukajtis nop 1512*25c28e83SPiotr Jasiukajtis cmp %l5,%o5 1513*25c28e83SPiotr Jasiukajtis bl,pn %icc,.up21 1514*25c28e83SPiotr Jasiukajtis faddd %f28,K6,%f4 ! (1_0) dtmp0 += K6; 1515*25c28e83SPiotr Jasiukajtis.co21: 1516*25c28e83SPiotr Jasiukajtis ldd [%o2+%o4],%f28 ! (3_0) cadd0 = *(double*)(ltmp0 + signy0); 1517*25c28e83SPiotr Jasiukajtis cmp %l6,_0x7f800000 1518*25c28e83SPiotr Jasiukajtis bge,pn %icc,.up22 1519*25c28e83SPiotr Jasiukajtis fmuld %f2,%f20,%f24 ! (0_0) dtmp0 *= x20; 1520*25c28e83SPiotr Jasiukajtis.co22: 1521*25c28e83SPiotr Jasiukajtis sub %l6,%l5,%o2 ! (4_0) ldiff0 = ax0 - ay0; 1522*25c28e83SPiotr Jasiukajtis cmp %l5,_0x7f800000 1523*25c28e83SPiotr Jasiukajtis bge,pn %icc,.up23 1524*25c28e83SPiotr Jasiukajtis 1525*25c28e83SPiotr Jasiukajtis fmuld %f0,%f62,%f62 ! (4_1) dtmp0 = cmul0 * x0; 1526*25c28e83SPiotr Jasiukajtis.co23: 1527*25c28e83SPiotr Jasiukajtis sra %o2,31,%l5 ! (4_0) ldiff0 >>= 31; 1528*25c28e83SPiotr Jasiukajtis sub %i3,%i1,%l6 ! (4_0) addrc0 = (char*)px - (char*)py; 1529*25c28e83SPiotr Jasiukajtis faddd %f38,K1,%f38 ! (5_1) dtmp0 += K1; 1530*25c28e83SPiotr Jasiukajtis 1531*25c28e83SPiotr Jasiukajtis faddd %f40,K8,%f40 ! (2_0) dtmp0 += K8; 1532*25c28e83SPiotr Jasiukajtis and %l6,%l5,%o2 ! (4_0) addrc0 &= ldiff0; 1533*25c28e83SPiotr Jasiukajtis fmuld %f4,%f18,%f4 ! (1_0) dtmp0 *= x20; 1534*25c28e83SPiotr Jasiukajtis 1535*25c28e83SPiotr Jasiukajtis lda [%i1+%o2]0x82,%f0 ! (4_0) fy0 = *(float*)((char*)py + addrc0); 1536*25c28e83SPiotr Jasiukajtis sub %i3,%o2,%o4 ! (4_0) (char*)px - addrc0; 1537*25c28e83SPiotr Jasiukajtis add %o1,stridez,%o2 ! pz += stridez 1538*25c28e83SPiotr Jasiukajtis faddd %f24,K3,%f24 ! (0_0) dtmp0 += K3; 1539*25c28e83SPiotr Jasiukajtis 1540*25c28e83SPiotr Jasiukajtis lda [%o4]0x82,%f2 ! (4_0) fx0 = *(float*)((char*)px - addrc0); 1541*25c28e83SPiotr Jasiukajtis sll %o7,3,%o7 ! (5_1) cmul0_ind = ldiff0 << 3; 1542*25c28e83SPiotr Jasiukajtis add %i3,stridex,%i3 ! px += stridex 1543*25c28e83SPiotr Jasiukajtis 1544*25c28e83SPiotr Jasiukajtis fmuld %f38,%f22,%f38 ! (5_1) dtmp0 *= x20; 1545*25c28e83SPiotr Jasiukajtis cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 1546*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update15 ! (4_0) if ( b0 > 0x7f800000 ) 1547*25c28e83SPiotr Jasiukajtis faddd %f26,%f62,%f22 ! (4_1) dtmp0 = cadd0 + dtmp0; 1548*25c28e83SPiotr Jasiukajtis.cont15: 1549*25c28e83SPiotr Jasiukajtis fmuld %f40,%f16,%f26 ! (2_0) dtmp0 *= x20; 1550*25c28e83SPiotr Jasiukajtis sll %l5,5,%l6 ! (4_0) ltmp0 = ldiff0 << 5; 1551*25c28e83SPiotr Jasiukajtis add %i1,stridey,%i1 ! py += stridey 1552*25c28e83SPiotr Jasiukajtis fstod %f0,%f40 ! (4_0) y0 = (double)fy0; 1553*25c28e83SPiotr Jasiukajtis 1554*25c28e83SPiotr Jasiukajtis faddd %f4,K5,%f62 ! (1_0) dtmp0 += K5; 1555*25c28e83SPiotr Jasiukajtis sra %l3,27,%o5 ! (4_0) signx0 = ux0 >> 27; 1556*25c28e83SPiotr Jasiukajtis fmuld %f24,%f20,%f24 ! (0_0) dtmp0 *= x20; 1557*25c28e83SPiotr Jasiukajtis 1558*25c28e83SPiotr Jasiukajtis fstod %f2,%f2 ! (4_0) x0 = (double)fx0; 1559*25c28e83SPiotr Jasiukajtis sra %l4,28,%o4 ! (4_0) signy0 = uy0 >> 28; 1560*25c28e83SPiotr Jasiukajtis.den5: 1561*25c28e83SPiotr Jasiukajtis lda [%i1]0x82,%l3 ! (5_0) uy0 = *(int*)py; 1562*25c28e83SPiotr Jasiukajtis subcc counter,6,counter ! counter? 1563*25c28e83SPiotr Jasiukajtis add %l6,cadd_arr,%l6 ! (4_0) ltmp0 += (char*)cadd_arr; 1564*25c28e83SPiotr Jasiukajtis faddd %f26,K7,%f26 ! (2_0) dtmp0 += K7; 1565*25c28e83SPiotr Jasiukajtis 1566*25c28e83SPiotr Jasiukajtis fmuld %f62,%f18,%f4 ! (1_0) dtmp0 *= x20; 1567*25c28e83SPiotr Jasiukajtis and %o5,-16,%o5 ! (4_0) signx0 &= -16; 1568*25c28e83SPiotr Jasiukajtis bpos,pt %icc,.main_loop 1569*25c28e83SPiotr Jasiukajtis faddd %f38,K0,%f38 ! (5_1) dtmp0 += K0; 1570*25c28e83SPiotr Jasiukajtis 1571*25c28e83SPiotr Jasiukajtis.tail: 1572*25c28e83SPiotr Jasiukajtis addcc counter,5,counter 1573*25c28e83SPiotr Jasiukajtis bneg,a,pn %icc,.begin 1574*25c28e83SPiotr Jasiukajtis or %g0,%o1,%o4 1575*25c28e83SPiotr Jasiukajtis 1576*25c28e83SPiotr Jasiukajtis faddd %f24,K2,%f40 ! (0_1) dtmp0 += K2; 1577*25c28e83SPiotr Jasiukajtis 1578*25c28e83SPiotr Jasiukajtis fdtos %f22,%f22 ! (4_2) ftmp0 = (float)dtmp0; 1579*25c28e83SPiotr Jasiukajtis st %f22,[%o1] ! (4_2) *pz = ftmp0; 1580*25c28e83SPiotr Jasiukajtis 1581*25c28e83SPiotr Jasiukajtis subcc counter,1,counter 1582*25c28e83SPiotr Jasiukajtis bneg,a,pn %icc,.begin 1583*25c28e83SPiotr Jasiukajtis or %g0,%o2,%o4 1584*25c28e83SPiotr Jasiukajtis 1585*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+%o7],%f0 ! (5_2) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); 1586*25c28e83SPiotr Jasiukajtis fmuld %f26,%f16,%f26 ! (2_1) dtmp0 *= x20; 1587*25c28e83SPiotr Jasiukajtis 1588*25c28e83SPiotr Jasiukajtis fmuld %f38,%f14,%f14 ! (5_2) x0 = dtmp0 * x0; 1589*25c28e83SPiotr Jasiukajtis faddd %f4,K4,%f4 ! (1_1) dtmp0 += K4; 1590*25c28e83SPiotr Jasiukajtis 1591*25c28e83SPiotr Jasiukajtis fmuld %f40,%f20,%f38 ! (0_1) dtmp0 *= x20; 1592*25c28e83SPiotr Jasiukajtis 1593*25c28e83SPiotr Jasiukajtis 1594*25c28e83SPiotr Jasiukajtis faddd %f26,K6,%f22 ! (2_1) dtmp0 += K6; 1595*25c28e83SPiotr Jasiukajtis 1596*25c28e83SPiotr Jasiukajtis fmuld %f4,%f18,%f4 ! (1_1) dtmp0 *= x20; 1597*25c28e83SPiotr Jasiukajtis 1598*25c28e83SPiotr Jasiukajtis fmuld %f0,%f14,%f14 ! (5_2) dtmp0 = cmul0 * x0; 1599*25c28e83SPiotr Jasiukajtis faddd %f38,K1,%f38 ! (0_1) dtmp0 += K1; 1600*25c28e83SPiotr Jasiukajtis 1601*25c28e83SPiotr Jasiukajtis fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20; 1602*25c28e83SPiotr Jasiukajtis 1603*25c28e83SPiotr Jasiukajtis faddd %f4,K3,%f4 ! (1_1) dtmp0 += K3; 1604*25c28e83SPiotr Jasiukajtis 1605*25c28e83SPiotr Jasiukajtis fmuld %f38,%f20,%f38 ! (0_1) dtmp0 *= x20; 1606*25c28e83SPiotr Jasiukajtis faddd %f36,%f14,%f20 ! (5_2) dtmp0 = cadd0 + dtmp0; 1607*25c28e83SPiotr Jasiukajtis 1608*25c28e83SPiotr Jasiukajtis faddd %f22,K5,%f14 ! (2_1) dtmp0 += K5; 1609*25c28e83SPiotr Jasiukajtis add %o2,stridez,%o1 ! pz += stridez 1610*25c28e83SPiotr Jasiukajtis fmuld %f4,%f18,%f4 ! (1_1) dtmp0 *= x20; 1611*25c28e83SPiotr Jasiukajtis 1612*25c28e83SPiotr Jasiukajtis sll %l7,3,%l7 ! (0_1) cmul0_ind = ldiff0 << 3; 1613*25c28e83SPiotr Jasiukajtis 1614*25c28e83SPiotr Jasiukajtis fmuld %f14,%f16,%f22 ! (2_1) dtmp0 *= x20; 1615*25c28e83SPiotr Jasiukajtis faddd %f38,K0,%f38 ! (0_1) dtmp0 += K0; 1616*25c28e83SPiotr Jasiukajtis 1617*25c28e83SPiotr Jasiukajtis faddd %f4,K2,%f40 ! (1_1) dtmp0 += K2; 1618*25c28e83SPiotr Jasiukajtis 1619*25c28e83SPiotr Jasiukajtis fdtos %f20,%f2 ! (5_2) ftmp0 = (float)dtmp0; 1620*25c28e83SPiotr Jasiukajtis st %f2,[%o2] ! (5_2) *pz = ftmp0; 1621*25c28e83SPiotr Jasiukajtis 1622*25c28e83SPiotr Jasiukajtis subcc counter,1,counter 1623*25c28e83SPiotr Jasiukajtis bneg,a,pn %icc,.begin 1624*25c28e83SPiotr Jasiukajtis or %g0,%o1,%o4 1625*25c28e83SPiotr Jasiukajtis 1626*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+%l7],%f0 ! (0_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); 1627*25c28e83SPiotr Jasiukajtis 1628*25c28e83SPiotr Jasiukajtis fmuld %f38,%f12,%f12 ! (0_1) x0 = dtmp0 * x0; 1629*25c28e83SPiotr Jasiukajtis faddd %f22,K4,%f22 ! (2_1) dtmp0 += K4; 1630*25c28e83SPiotr Jasiukajtis 1631*25c28e83SPiotr Jasiukajtis fmuld %f40,%f18,%f38 ! (1_1) dtmp0 *= x20; 1632*25c28e83SPiotr Jasiukajtis 1633*25c28e83SPiotr Jasiukajtis fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20; 1634*25c28e83SPiotr Jasiukajtis 1635*25c28e83SPiotr Jasiukajtis fmuld %f0,%f12,%f12 ! (0_1) dtmp0 = cmul0 * x0; 1636*25c28e83SPiotr Jasiukajtis faddd %f38,K1,%f38 ! (1_1) dtmp0 += K1; 1637*25c28e83SPiotr Jasiukajtis 1638*25c28e83SPiotr Jasiukajtis sll %g1,3,%g1 ! (1_1) cmul0_ind = ldiff0 << 3; 1639*25c28e83SPiotr Jasiukajtis faddd %f22,K3,%f22 ! (2_1) dtmp0 += K3; 1640*25c28e83SPiotr Jasiukajtis 1641*25c28e83SPiotr Jasiukajtis add %o1,stridez,%o2 ! pz += stridez 1642*25c28e83SPiotr Jasiukajtis 1643*25c28e83SPiotr Jasiukajtis fmuld %f38,%f18,%f38 ! (1_1) dtmp0 *= x20; 1644*25c28e83SPiotr Jasiukajtis faddd %f34,%f12,%f18 ! (0_1) dtmp0 = cadd0 + dtmp0; 1645*25c28e83SPiotr Jasiukajtis 1646*25c28e83SPiotr Jasiukajtis fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20; 1647*25c28e83SPiotr Jasiukajtis 1648*25c28e83SPiotr Jasiukajtis faddd %f38,K0,%f38 ! (1_1) dtmp0 += K0; 1649*25c28e83SPiotr Jasiukajtis 1650*25c28e83SPiotr Jasiukajtis faddd %f22,K2,%f40 ! (2_1) dtmp0 += K2; 1651*25c28e83SPiotr Jasiukajtis 1652*25c28e83SPiotr Jasiukajtis fdtos %f18,%f2 ! (0_1) ftmp0 = (float)dtmp0; 1653*25c28e83SPiotr Jasiukajtis st %f2,[%o1] ! (0_1) *pz = ftmp0 1654*25c28e83SPiotr Jasiukajtis 1655*25c28e83SPiotr Jasiukajtis subcc counter,1,counter 1656*25c28e83SPiotr Jasiukajtis bneg,a,pn %icc,.begin 1657*25c28e83SPiotr Jasiukajtis or %g0,%o2,%o4 1658*25c28e83SPiotr Jasiukajtis 1659*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+%g1],%f0 ! (1_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); 1660*25c28e83SPiotr Jasiukajtis 1661*25c28e83SPiotr Jasiukajtis fmuld %f38,%f10,%f10 ! (1_1) x0 = dtmp0 * x0; 1662*25c28e83SPiotr Jasiukajtis 1663*25c28e83SPiotr Jasiukajtis fmuld %f40,%f16,%f38 ! (2_1) dtmp0 *= x20; 1664*25c28e83SPiotr Jasiukajtis 1665*25c28e83SPiotr Jasiukajtis fmuld %f0,%f10,%f10 ! (1_1) dtmp0 = cmul0 * x0; 1666*25c28e83SPiotr Jasiukajtis faddd %f38,K1,%f38 ! (2_1) dtmp0 += K1; 1667*25c28e83SPiotr Jasiukajtis 1668*25c28e83SPiotr Jasiukajtis sll %g5,3,%g5 ! (2_1) cmul0_ind = ldiff0 << 3; 1669*25c28e83SPiotr Jasiukajtis 1670*25c28e83SPiotr Jasiukajtis add %o2,stridez,%o1 ! pz += stridez 1671*25c28e83SPiotr Jasiukajtis 1672*25c28e83SPiotr Jasiukajtis fmuld %f38,%f16,%f38 ! (2_1) dtmp0 *= x20; 1673*25c28e83SPiotr Jasiukajtis faddd %f32,%f10,%f16 ! (1_1) dtmp0 = cadd0 + dtmp0; 1674*25c28e83SPiotr Jasiukajtis 1675*25c28e83SPiotr Jasiukajtis faddd %f38,K0,%f38 ! (2_1) dtmp0 += K0; 1676*25c28e83SPiotr Jasiukajtis 1677*25c28e83SPiotr Jasiukajtis fdtos %f16,%f2 ! (1_1) ftmp0 = (float)dtmp0; 1678*25c28e83SPiotr Jasiukajtis st %f2,[%o2] ! (1_1) *pz = ftmp0; 1679*25c28e83SPiotr Jasiukajtis 1680*25c28e83SPiotr Jasiukajtis subcc counter,1,counter 1681*25c28e83SPiotr Jasiukajtis bneg,a,pn %icc,.begin 1682*25c28e83SPiotr Jasiukajtis or %g0,%o1,%o4 1683*25c28e83SPiotr Jasiukajtis 1684*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+%g5],%f0 ! (2_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); 1685*25c28e83SPiotr Jasiukajtis 1686*25c28e83SPiotr Jasiukajtis fmuld %f38,%f8,%f8 ! (2_1) x0 = dtmp0 * x0; 1687*25c28e83SPiotr Jasiukajtis 1688*25c28e83SPiotr Jasiukajtis fmuld %f0,%f8,%f8 ! (2_1) dtmp0 = cmul0 * x0; 1689*25c28e83SPiotr Jasiukajtis 1690*25c28e83SPiotr Jasiukajtis add %o1,stridez,%o2 ! pz += stridez 1691*25c28e83SPiotr Jasiukajtis 1692*25c28e83SPiotr Jasiukajtis faddd %f30,%f8,%f24 ! (2_1) dtmp0 = cadd0 + dtmp0; 1693*25c28e83SPiotr Jasiukajtis 1694*25c28e83SPiotr Jasiukajtis fdtos %f24,%f1 ! (2_1) ftmp0 = (float)dtmp0; 1695*25c28e83SPiotr Jasiukajtis st %f1,[%o1] ! (2_1) *pz = ftmp0; 1696*25c28e83SPiotr Jasiukajtis 1697*25c28e83SPiotr Jasiukajtis ba .begin 1698*25c28e83SPiotr Jasiukajtis or %g0,%o2,%o4 1699*25c28e83SPiotr Jasiukajtis 1700*25c28e83SPiotr Jasiukajtis .align 16 1701*25c28e83SPiotr Jasiukajtis.spec0: 1702*25c28e83SPiotr Jasiukajtis cmp %l6,_0x7f800000 ! ax0 ? 0x7f800000 1703*25c28e83SPiotr Jasiukajtis bg 2f ! if ( ax0 >= 0x7f800000 ) 1704*25c28e83SPiotr Jasiukajtis srl %l3,30,%l3 ! signx0 = (unsigned)ux0 >> 30; 1705*25c28e83SPiotr Jasiukajtis 1706*25c28e83SPiotr Jasiukajtis cmp %l7,_0x7f800000 ! ay0 ? 0x7f800000 1707*25c28e83SPiotr Jasiukajtis bg 2f ! if ( ay0 >= 0x7f800000 ) 1708*25c28e83SPiotr Jasiukajtis and %l3,2,%l3 ! signx0 &= 2; 1709*25c28e83SPiotr Jasiukajtis 1710*25c28e83SPiotr Jasiukajtis sra %l4,31,%l4 ! signy0 = uy0 >> 31; 1711*25c28e83SPiotr Jasiukajtis bne,a 1f ! if (ay0 != 0x7f800000) 1712*25c28e83SPiotr Jasiukajtis add %l3,%l3,%l3 ! signx0 += signx0; 1713*25c28e83SPiotr Jasiukajtis 1714*25c28e83SPiotr Jasiukajtis cmp %l6,_0x7f800000 ! ax0 ? 0x7f800000 1715*25c28e83SPiotr Jasiukajtis bne,a 1f ! if ( ax0 != 0x7f800000 ) 1716*25c28e83SPiotr Jasiukajtis add %g0,2,%l3 ! signx0 = 2 1717*25c28e83SPiotr Jasiukajtis 1718*25c28e83SPiotr Jasiukajtis add %l3,1,%l3 ! signx0 ++; 1719*25c28e83SPiotr Jasiukajtis1: 1720*25c28e83SPiotr Jasiukajtis sll %l4,3,%l4 ! signy0 <<= 3; 1721*25c28e83SPiotr Jasiukajtis st %l3,[%fp+tmp_pz] ! STORE signx0 1722*25c28e83SPiotr Jasiukajtis 1723*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+88],%f0 ! LOAD M_PI_4 1724*25c28e83SPiotr Jasiukajtis 1725*25c28e83SPiotr Jasiukajtis ld [%fp+tmp_pz],%f2 ! LOAD signx0 1726*25c28e83SPiotr Jasiukajtis 1727*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+%l4],%f4 ! dtmp0 = *(double*)((char*)(cmul_arr + 1) + signy0); 1728*25c28e83SPiotr Jasiukajtis 1729*25c28e83SPiotr Jasiukajtis add %i1,stridey,%i1 ! py += stridey; 1730*25c28e83SPiotr Jasiukajtis fitod %f2,%f2 ! dtmp1 = (double)signx0; 1731*25c28e83SPiotr Jasiukajtis 1732*25c28e83SPiotr Jasiukajtis add %i3,stridex,%i3 ! px += stridex; 1733*25c28e83SPiotr Jasiukajtis 1734*25c28e83SPiotr Jasiukajtis fmuld %f2,%f0,%f0 ! res = signx0 * M_PI_4; 1735*25c28e83SPiotr Jasiukajtis 1736*25c28e83SPiotr Jasiukajtis fmuld %f0,%f4,%f0 ! res *= dtmp0; 1737*25c28e83SPiotr Jasiukajtis fdtos %f0,%f0 ! ftmp0 = (float) res; 1738*25c28e83SPiotr Jasiukajtis st %f0,[%o4] ! *pz = ftmp0; 1739*25c28e83SPiotr Jasiukajtis 1740*25c28e83SPiotr Jasiukajtis ba .begin1 1741*25c28e83SPiotr Jasiukajtis add %o4,stridez,%o4 ! pz += stridez; 1742*25c28e83SPiotr Jasiukajtis2: 1743*25c28e83SPiotr Jasiukajtis std %l6,[%fp+tmp_pz] ! *(float*)&ax0, *(float*)&ay0 1744*25c28e83SPiotr Jasiukajtis ldd [%fp+tmp_pz],%f0 ! *(float*)&ax0, *(float*)&ay0 1745*25c28e83SPiotr Jasiukajtis 1746*25c28e83SPiotr Jasiukajtis add %i1,stridey,%i1 ! py += stridey; 1747*25c28e83SPiotr Jasiukajtis 1748*25c28e83SPiotr Jasiukajtis fmuls %f0,%f1,%f0 ! ftmp0 = *(float*)&ax0 * *(float*)&ay0; 1749*25c28e83SPiotr Jasiukajtis add %i3,stridex,%i3 ! pz += stridex; 1750*25c28e83SPiotr Jasiukajtis st %f0,[%o4] ! *pz = ftmp0; 1751*25c28e83SPiotr Jasiukajtis 1752*25c28e83SPiotr Jasiukajtis ba .begin1 1753*25c28e83SPiotr Jasiukajtis add %o4,stridez,%o4 ! pz += stridez; 1754*25c28e83SPiotr Jasiukajtis 1755*25c28e83SPiotr Jasiukajtis .align 16 1756*25c28e83SPiotr Jasiukajtis.spec1: 1757*25c28e83SPiotr Jasiukajtis cmp %l6,0 1758*25c28e83SPiotr Jasiukajtis bne,pn %icc,1f 1759*25c28e83SPiotr Jasiukajtis nop 1760*25c28e83SPiotr Jasiukajtis 1761*25c28e83SPiotr Jasiukajtis cmp %l7,0 1762*25c28e83SPiotr Jasiukajtis bne,pn %icc,1f 1763*25c28e83SPiotr Jasiukajtis nop 1764*25c28e83SPiotr Jasiukajtis 1765*25c28e83SPiotr Jasiukajtis sra %l4,28,%l4 ! signy0 = uy0 >> 28; 1766*25c28e83SPiotr Jasiukajtis 1767*25c28e83SPiotr Jasiukajtis sra %l3,27,%l3 ! signx0 = ux0 >> 27; 1768*25c28e83SPiotr Jasiukajtis and %l4,-8,%l4 ! signy0 &= -8; 1769*25c28e83SPiotr Jasiukajtis 1770*25c28e83SPiotr Jasiukajtis sra %o2,31,%o2 ! ldiff0 >>= 31; 1771*25c28e83SPiotr Jasiukajtis and %l3,-16,%l3 ! signx0 &= -16; 1772*25c28e83SPiotr Jasiukajtis 1773*25c28e83SPiotr Jasiukajtis sll %o2,5,%o2 ! ldiff0 <<= 5; 1774*25c28e83SPiotr Jasiukajtis add %l4,%l3,%l3 ! signx0 += signy0; 1775*25c28e83SPiotr Jasiukajtis 1776*25c28e83SPiotr Jasiukajtis add %o2,%l3,%l3 ! signx0 += ldiff0; 1777*25c28e83SPiotr Jasiukajtis add %i1,stridey,%i1 ! py += stridey; 1778*25c28e83SPiotr Jasiukajtis 1779*25c28e83SPiotr Jasiukajtis ldd [cadd_arr+%l3],%f0 ! res = *(double*)((char*)(cadd_arr + 7) + signx0); 1780*25c28e83SPiotr Jasiukajtis add %i3,stridex,%i3 ! px += stridex; 1781*25c28e83SPiotr Jasiukajtis 1782*25c28e83SPiotr Jasiukajtis fdtos %f0,%f0 ! ftmp0 = (float) res; 1783*25c28e83SPiotr Jasiukajtis st %f0,[%o4] ! *pz = ftmp0; 1784*25c28e83SPiotr Jasiukajtis 1785*25c28e83SPiotr Jasiukajtis ba .begin1 1786*25c28e83SPiotr Jasiukajtis add %o4,stridez,%o4 ! pz += stridez; 1787*25c28e83SPiotr Jasiukajtis1: 1788*25c28e83SPiotr Jasiukajtis stx %o4,[%fp+tmp_pz] 1789*25c28e83SPiotr Jasiukajtis sra %o2,31,%l7 ! (0_0) ldiff0 >>= 31; 1790*25c28e83SPiotr Jasiukajtis sub %i3,%i1,%l6 ! (0_0) addrc0 = (char*)px - (char*)py; 1791*25c28e83SPiotr Jasiukajtis 1792*25c28e83SPiotr Jasiukajtis and %l6,%l7,%o2 ! (0_0) addrc0 &= ldiff0; 1793*25c28e83SPiotr Jasiukajtis 1794*25c28e83SPiotr Jasiukajtis lda [%i1+%o2]0x82,%f0 ! (0_0) fy0 = *(float*)((char*)py + addrc0); 1795*25c28e83SPiotr Jasiukajtis sub %i3,%o2,%o4 ! (0_0) (char*)px - addrc0 1796*25c28e83SPiotr Jasiukajtis 1797*25c28e83SPiotr Jasiukajtis lda [%i1+%o2]0x82,%l5 ! (0_0) fy0 = *(float*)((char*)py + addrc0); 1798*25c28e83SPiotr Jasiukajtis 1799*25c28e83SPiotr Jasiukajtis lda [%o4]0x82,%f2 ! (0_0) fx0 = *(float*)((char*)px - addrc0); 1800*25c28e83SPiotr Jasiukajtis sll %l7,5,%l6 ! (0_0) ltmp0 = ldiff0 << 5; 1801*25c28e83SPiotr Jasiukajtis 1802*25c28e83SPiotr Jasiukajtis lda [%o4]0x82,%g5 ! (0_0) fx0 = *(float*)((char*)px - addrc0); 1803*25c28e83SPiotr Jasiukajtis 1804*25c28e83SPiotr Jasiukajtis sra %l3,27,%o5 ! (0_0) signx0 = ux0 >> 27; 1805*25c28e83SPiotr Jasiukajtis add %i1,stridey,%i1 ! py += stridey 1806*25c28e83SPiotr Jasiukajtis 1807*25c28e83SPiotr Jasiukajtis add %i3,stridex,%i3 ! px += stridex 1808*25c28e83SPiotr Jasiukajtis 1809*25c28e83SPiotr Jasiukajtis lda [%i1]0x82,%l3 ! (1_0) uy0 = *(int*)py; 1810*25c28e83SPiotr Jasiukajtis sra %l4,28,%o4 ! (0_0) signy0 = uy0 >> 28; 1811*25c28e83SPiotr Jasiukajtis 1812*25c28e83SPiotr Jasiukajtis add %l6,cadd_arr,%l6 ! (0_0) ltmp0 += (char*)cadd_arr; 1813*25c28e83SPiotr Jasiukajtis 1814*25c28e83SPiotr Jasiukajtis and %l5,_0x7fffffff,%l4 1815*25c28e83SPiotr Jasiukajtis sethi %hi(0x00800000),%g1 1816*25c28e83SPiotr Jasiukajtis 1817*25c28e83SPiotr Jasiukajtis cmp %l4,%g1 1818*25c28e83SPiotr Jasiukajtis bge,a %icc,1f 1819*25c28e83SPiotr Jasiukajtis fstod %f0,%f40 ! (0_0) y0 = (double)fy0; 1820*25c28e83SPiotr Jasiukajtis 1821*25c28e83SPiotr Jasiukajtis fabss %f0,%f0 ! fy0 = fabsf(fy0); 1822*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+96],%f40 1823*25c28e83SPiotr Jasiukajtis sra %l5,28,%l4 ! itmp0 >>= 28; 1824*25c28e83SPiotr Jasiukajtis 1825*25c28e83SPiotr Jasiukajtis and %l4,-8,%l4 1826*25c28e83SPiotr Jasiukajtis fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0; 1827*25c28e83SPiotr Jasiukajtis 1828*25c28e83SPiotr Jasiukajtis fmuld %f40,%f0,%f40 ! dtmp0 *= C2ONM149; 1829*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+%l4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); 1830*25c28e83SPiotr Jasiukajtis 1831*25c28e83SPiotr Jasiukajtis fmuld %f40,%f0,%f40 ! dtmp0 *= dsign; 1832*25c28e83SPiotr Jasiukajtis1: 1833*25c28e83SPiotr Jasiukajtis and %g5,_0x7fffffff,%l4 1834*25c28e83SPiotr Jasiukajtis cmp %l4,%g1 1835*25c28e83SPiotr Jasiukajtis bge,a %icc,.spec1_cont 1836*25c28e83SPiotr Jasiukajtis fstod %f2,%f2 ! (0_0) x0 = (double)fx0; 1837*25c28e83SPiotr Jasiukajtis 1838*25c28e83SPiotr Jasiukajtis fabss %f2,%f2 ! fx0 = fabsf(fx0); 1839*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 1840*25c28e83SPiotr Jasiukajtis sra %g5,28,%l4 ! itmp0 >>= 28; 1841*25c28e83SPiotr Jasiukajtis 1842*25c28e83SPiotr Jasiukajtis and %l4,-8,%l4 ! itmp0 = -8; 1843*25c28e83SPiotr Jasiukajtis fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0; 1844*25c28e83SPiotr Jasiukajtis 1845*25c28e83SPiotr Jasiukajtis fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149; 1846*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+%l4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); 1847*25c28e83SPiotr Jasiukajtis 1848*25c28e83SPiotr Jasiukajtis ba .spec1_cont 1849*25c28e83SPiotr Jasiukajtis fmuld %f2,%f0,%f2 ! dtmp0 *= dsign; 1850*25c28e83SPiotr Jasiukajtis 1851*25c28e83SPiotr Jasiukajtis .align 16 1852*25c28e83SPiotr Jasiukajtis.update0: 1853*25c28e83SPiotr Jasiukajtis cmp counter,0 1854*25c28e83SPiotr Jasiukajtis bg,pn %icc,1f 1855*25c28e83SPiotr Jasiukajtis nop 1856*25c28e83SPiotr Jasiukajtis 1857*25c28e83SPiotr Jasiukajtis ld [cmul_arr],%f2 1858*25c28e83SPiotr Jasiukajtis ba .cont0 1859*25c28e83SPiotr Jasiukajtis fzero %f0 1860*25c28e83SPiotr Jasiukajtis1: 1861*25c28e83SPiotr Jasiukajtis cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 1862*25c28e83SPiotr Jasiukajtis bg,pt %icc,1f 1863*25c28e83SPiotr Jasiukajtis nop 1864*25c28e83SPiotr Jasiukajtis2: 1865*25c28e83SPiotr Jasiukajtis sub counter,0,counter 1866*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1867*25c28e83SPiotr Jasiukajtis stx %i1,[%fp+tmp_py] 1868*25c28e83SPiotr Jasiukajtis stx %i3,[%fp+tmp_px] 1869*25c28e83SPiotr Jasiukajtis 1870*25c28e83SPiotr Jasiukajtis ld [cmul_arr],%f2 1871*25c28e83SPiotr Jasiukajtis or %g0,0,counter 1872*25c28e83SPiotr Jasiukajtis ba .cont0 1873*25c28e83SPiotr Jasiukajtis fzero %f0 1874*25c28e83SPiotr Jasiukajtis1: 1875*25c28e83SPiotr Jasiukajtis andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff 1876*25c28e83SPiotr Jasiukajtis bne,pn %icc,1f 1877*25c28e83SPiotr Jasiukajtis sethi %hi(0x00800000),%o5 1878*25c28e83SPiotr Jasiukajtis 1879*25c28e83SPiotr Jasiukajtis andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff 1880*25c28e83SPiotr Jasiukajtis be,pn %icc,2b 1881*25c28e83SPiotr Jasiukajtis nop 1882*25c28e83SPiotr Jasiukajtis1: 1883*25c28e83SPiotr Jasiukajtis st %f0,[%fp+tmp_px] 1884*25c28e83SPiotr Jasiukajtis st %f2,[%fp+tmp_px+4] 1885*25c28e83SPiotr Jasiukajtis ld [%fp+tmp_px],%o4 1886*25c28e83SPiotr Jasiukajtis 1887*25c28e83SPiotr Jasiukajtis and %o4,_0x7fffffff,%l5 ! itmp0 & 0x7fffffff 1888*25c28e83SPiotr Jasiukajtis cmp %l5,%o5 1889*25c28e83SPiotr Jasiukajtis bge,a 1f 1890*25c28e83SPiotr Jasiukajtis fstod %f0,%f40 ! (0_0) y0 = (double)fy0; 1891*25c28e83SPiotr Jasiukajtis 1892*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 1893*25c28e83SPiotr Jasiukajtis sra %o4,28,%o4 ! itmp0 >>= 28; 1894*25c28e83SPiotr Jasiukajtis fabss %f0,%f0 ! fy0 = fabsf(fy0); 1895*25c28e83SPiotr Jasiukajtis 1896*25c28e83SPiotr Jasiukajtis and %o4,-8,%o4 ! itmp0 = -8; 1897*25c28e83SPiotr Jasiukajtis fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0; 1898*25c28e83SPiotr Jasiukajtis 1899*25c28e83SPiotr Jasiukajtis fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149; 1900*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); 1901*25c28e83SPiotr Jasiukajtis 1902*25c28e83SPiotr Jasiukajtis fmuld %f0,%f40,%f40 ! dtmp0 *= dsign; 1903*25c28e83SPiotr Jasiukajtis1: 1904*25c28e83SPiotr Jasiukajtis add %i3,stridex,%i3 ! px += stridex 1905*25c28e83SPiotr Jasiukajtis add %i1,stridey,%i1 ! py += stridey 1906*25c28e83SPiotr Jasiukajtis 1907*25c28e83SPiotr Jasiukajtis ld [%fp+tmp_px+4],%o4 1908*25c28e83SPiotr Jasiukajtis and %o4,_0x7fffffff,%l5 ! itmp0 & 0x7fffffff 1909*25c28e83SPiotr Jasiukajtis cmp %l5,%o5 1910*25c28e83SPiotr Jasiukajtis bge,a 1f 1911*25c28e83SPiotr Jasiukajtis fstod %f2,%f2 ! (5_1) x0 = (double)fx0; 1912*25c28e83SPiotr Jasiukajtis 1913*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 1914*25c28e83SPiotr Jasiukajtis sra %o4,28,%o4 ! itmp0 >>= 28; 1915*25c28e83SPiotr Jasiukajtis fabss %f2,%f2 ! fx0 = fabsf(fx0); 1916*25c28e83SPiotr Jasiukajtis 1917*25c28e83SPiotr Jasiukajtis and %o4,-8,%o4 ! itmp0 = -8; 1918*25c28e83SPiotr Jasiukajtis fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0; 1919*25c28e83SPiotr Jasiukajtis 1920*25c28e83SPiotr Jasiukajtis fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149; 1921*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); 1922*25c28e83SPiotr Jasiukajtis 1923*25c28e83SPiotr Jasiukajtis fmuld %f2,%f0,%f2 ! dtmp0 *= dsign; 1924*25c28e83SPiotr Jasiukajtis1: 1925*25c28e83SPiotr Jasiukajtis sra %l4,27,%o5 ! (1_0) signx0 = ux0 >> 27; 1926*25c28e83SPiotr Jasiukajtis 1927*25c28e83SPiotr Jasiukajtis sra %l3,28,%o4 ! (1_0) signy0 = uy0 >> 28; 1928*25c28e83SPiotr Jasiukajtis ba .d0 1929*25c28e83SPiotr Jasiukajtis add %l6,cadd_arr,%l6 ! (1_0) ltmp0 += (char*)cadd_arr; 1930*25c28e83SPiotr Jasiukajtis 1931*25c28e83SPiotr Jasiukajtis .align 16 1932*25c28e83SPiotr Jasiukajtis.update1: 1933*25c28e83SPiotr Jasiukajtis cmp counter,1 1934*25c28e83SPiotr Jasiukajtis bg,pn %icc,1f 1935*25c28e83SPiotr Jasiukajtis nop 1936*25c28e83SPiotr Jasiukajtis 1937*25c28e83SPiotr Jasiukajtis fzero %f0 1938*25c28e83SPiotr Jasiukajtis ba .cont1 1939*25c28e83SPiotr Jasiukajtis ld [cmul_arr],%f2 1940*25c28e83SPiotr Jasiukajtis1: 1941*25c28e83SPiotr Jasiukajtis cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 1942*25c28e83SPiotr Jasiukajtis bg,pt %icc,1f 1943*25c28e83SPiotr Jasiukajtis nop 1944*25c28e83SPiotr Jasiukajtis2: 1945*25c28e83SPiotr Jasiukajtis sub counter,1,counter 1946*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1947*25c28e83SPiotr Jasiukajtis stx %i1,[%fp+tmp_py] 1948*25c28e83SPiotr Jasiukajtis stx %i3,[%fp+tmp_px] 1949*25c28e83SPiotr Jasiukajtis 1950*25c28e83SPiotr Jasiukajtis ld [cmul_arr],%f2 1951*25c28e83SPiotr Jasiukajtis or %g0,1,counter 1952*25c28e83SPiotr Jasiukajtis ba .cont1 1953*25c28e83SPiotr Jasiukajtis fzero %f0 1954*25c28e83SPiotr Jasiukajtis1: 1955*25c28e83SPiotr Jasiukajtis andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff 1956*25c28e83SPiotr Jasiukajtis bne,pn %icc,1f 1957*25c28e83SPiotr Jasiukajtis sethi %hi(0x00800000),%o5 1958*25c28e83SPiotr Jasiukajtis 1959*25c28e83SPiotr Jasiukajtis andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff 1960*25c28e83SPiotr Jasiukajtis be,pn %icc,2b 1961*25c28e83SPiotr Jasiukajtis nop 1962*25c28e83SPiotr Jasiukajtis1: 1963*25c28e83SPiotr Jasiukajtis st %f0,[%fp+tmp_px] 1964*25c28e83SPiotr Jasiukajtis st %f2,[%fp+tmp_px+4] 1965*25c28e83SPiotr Jasiukajtis ld [%fp+tmp_px],%o4 1966*25c28e83SPiotr Jasiukajtis fmuld %f40,%f20,%f30 ! (0_0) dtmp0 *= x20; 1967*25c28e83SPiotr Jasiukajtis 1968*25c28e83SPiotr Jasiukajtis and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff 1969*25c28e83SPiotr Jasiukajtis cmp %l6,%o5 1970*25c28e83SPiotr Jasiukajtis bge,a 1f 1971*25c28e83SPiotr Jasiukajtis fstod %f0,%f40 ! (0_0) y0 = (double)fy0; 1972*25c28e83SPiotr Jasiukajtis 1973*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 1974*25c28e83SPiotr Jasiukajtis sra %o4,28,%o4 ! itmp0 >>= 28; 1975*25c28e83SPiotr Jasiukajtis fabss %f0,%f0 ! fy0 = fabsf(fy0); 1976*25c28e83SPiotr Jasiukajtis 1977*25c28e83SPiotr Jasiukajtis and %o4,-8,%o4 ! itmp0 = -8; 1978*25c28e83SPiotr Jasiukajtis fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0; 1979*25c28e83SPiotr Jasiukajtis 1980*25c28e83SPiotr Jasiukajtis fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149; 1981*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); 1982*25c28e83SPiotr Jasiukajtis 1983*25c28e83SPiotr Jasiukajtis fmuld %f0,%f40,%f40 ! dtmp0 *= dsign; 1984*25c28e83SPiotr Jasiukajtis1: 1985*25c28e83SPiotr Jasiukajtis 1986*25c28e83SPiotr Jasiukajtis add %i1,stridey,%i1 ! py += stridey 1987*25c28e83SPiotr Jasiukajtis 1988*25c28e83SPiotr Jasiukajtis ld [%fp+tmp_px+4],%o4 1989*25c28e83SPiotr Jasiukajtis and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff 1990*25c28e83SPiotr Jasiukajtis cmp %l6,%o5 1991*25c28e83SPiotr Jasiukajtis bge,a 1f 1992*25c28e83SPiotr Jasiukajtis fstod %f2,%f2 ! (5_1) x0 = (double)fx0; 1993*25c28e83SPiotr Jasiukajtis 1994*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 1995*25c28e83SPiotr Jasiukajtis sra %o4,28,%o4 ! itmp0 >>= 28; 1996*25c28e83SPiotr Jasiukajtis fabss %f2,%f2 ! fx0 = fabsf(fx0); 1997*25c28e83SPiotr Jasiukajtis 1998*25c28e83SPiotr Jasiukajtis and %o4,-8,%o4 ! itmp0 = -8; 1999*25c28e83SPiotr Jasiukajtis fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0; 2000*25c28e83SPiotr Jasiukajtis 2001*25c28e83SPiotr Jasiukajtis fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149; 2002*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); 2003*25c28e83SPiotr Jasiukajtis 2004*25c28e83SPiotr Jasiukajtis fmuld %f2,%f0,%f2 ! dtmp0 *= dsign; 2005*25c28e83SPiotr Jasiukajtis1: 2006*25c28e83SPiotr Jasiukajtis sll %g5,5,%l6 ! (2_0) ltmp0 = ldiff0 << 5; 2007*25c28e83SPiotr Jasiukajtis sra %l3,27,%o5 ! (2_0) signx0 = ux0 >> 27; 2008*25c28e83SPiotr Jasiukajtis add %i3,stridex,%i3 ! px += stridex 2009*25c28e83SPiotr Jasiukajtis 2010*25c28e83SPiotr Jasiukajtis sra %l4,28,%o4 ! (2_0) signy0 = uy0 >> 28; 2011*25c28e83SPiotr Jasiukajtis ba .d1 2012*25c28e83SPiotr Jasiukajtis add %l6,cadd_arr,%l6 ! (2_0) ltmp0 += (char*)cadd_arr; 2013*25c28e83SPiotr Jasiukajtis 2014*25c28e83SPiotr Jasiukajtis .align 16 2015*25c28e83SPiotr Jasiukajtis.update2: 2016*25c28e83SPiotr Jasiukajtis cmp counter,2 2017*25c28e83SPiotr Jasiukajtis bg,pn %icc,1f 2018*25c28e83SPiotr Jasiukajtis nop 2019*25c28e83SPiotr Jasiukajtis 2020*25c28e83SPiotr Jasiukajtis ld [cmul_arr],%f1 2021*25c28e83SPiotr Jasiukajtis ba .cont2 2022*25c28e83SPiotr Jasiukajtis fzeros %f0 2023*25c28e83SPiotr Jasiukajtis1: 2024*25c28e83SPiotr Jasiukajtis cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 2025*25c28e83SPiotr Jasiukajtis bg,pt %icc,1f 2026*25c28e83SPiotr Jasiukajtis nop 2027*25c28e83SPiotr Jasiukajtis2: 2028*25c28e83SPiotr Jasiukajtis sub counter,2,counter 2029*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 2030*25c28e83SPiotr Jasiukajtis stx %i1,[%fp+tmp_py] 2031*25c28e83SPiotr Jasiukajtis stx %i3,[%fp+tmp_px] 2032*25c28e83SPiotr Jasiukajtis 2033*25c28e83SPiotr Jasiukajtis ld [cmul_arr],%f1 2034*25c28e83SPiotr Jasiukajtis or %g0,2,counter 2035*25c28e83SPiotr Jasiukajtis ba .cont2 2036*25c28e83SPiotr Jasiukajtis fzeros %f0 2037*25c28e83SPiotr Jasiukajtis1: 2038*25c28e83SPiotr Jasiukajtis andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff 2039*25c28e83SPiotr Jasiukajtis bne,pn %icc,1f 2040*25c28e83SPiotr Jasiukajtis sethi %hi(0x00800000),%o5 2041*25c28e83SPiotr Jasiukajtis 2042*25c28e83SPiotr Jasiukajtis andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff 2043*25c28e83SPiotr Jasiukajtis be,pn %icc,2b 2044*25c28e83SPiotr Jasiukajtis nop 2045*25c28e83SPiotr Jasiukajtis1: 2046*25c28e83SPiotr Jasiukajtis std %f0,[%fp+tmp_px] 2047*25c28e83SPiotr Jasiukajtis ld [%fp+tmp_px],%o4 2048*25c28e83SPiotr Jasiukajtis fmuld %f40,%f18,%f28 ! (1_0) dtmp0 *= x20; 2049*25c28e83SPiotr Jasiukajtis 2050*25c28e83SPiotr Jasiukajtis faddd %f16,K5,%f2 ! (0_0) dtmp0 += K5; 2051*25c28e83SPiotr Jasiukajtis 2052*25c28e83SPiotr Jasiukajtis and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff 2053*25c28e83SPiotr Jasiukajtis cmp %l6,%o5 2054*25c28e83SPiotr Jasiukajtis bge,a 1f 2055*25c28e83SPiotr Jasiukajtis fstod %f0,%f40 ! (0_0) y0 = (double)fy0; 2056*25c28e83SPiotr Jasiukajtis 2057*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 2058*25c28e83SPiotr Jasiukajtis sra %o4,28,%o4 ! itmp0 >>= 28; 2059*25c28e83SPiotr Jasiukajtis fabss %f0,%f0 ! fy0 = fabsf(fy0); 2060*25c28e83SPiotr Jasiukajtis 2061*25c28e83SPiotr Jasiukajtis and %o4,-8,%o4 ! itmp0 = -8; 2062*25c28e83SPiotr Jasiukajtis fitod %f0,%f16 ! dtmp0 = (double) *(int*)&fy0; 2063*25c28e83SPiotr Jasiukajtis 2064*25c28e83SPiotr Jasiukajtis fmuld %f16,%f40,%f40 ! dtmp0 *= C2ONM149; 2065*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+%o4],%f16 ! dsign = *(double*)((char*)cmul_arr + itmp0); 2066*25c28e83SPiotr Jasiukajtis 2067*25c28e83SPiotr Jasiukajtis fmuld %f16,%f40,%f40 ! dtmp0 *= dsign; 2068*25c28e83SPiotr Jasiukajtis1: 2069*25c28e83SPiotr Jasiukajtis add %i1,stridey,%i1 ! py += stridey 2070*25c28e83SPiotr Jasiukajtis 2071*25c28e83SPiotr Jasiukajtis ld [%fp+tmp_px+4],%o4 2072*25c28e83SPiotr Jasiukajtis and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff 2073*25c28e83SPiotr Jasiukajtis cmp %l6,%o5 2074*25c28e83SPiotr Jasiukajtis bge,a 1f 2075*25c28e83SPiotr Jasiukajtis fstod %f1,%f16 ! (5_1) x0 = (double)fx0; 2076*25c28e83SPiotr Jasiukajtis 2077*25c28e83SPiotr Jasiukajtis fabss %f1,%f16 ! fx0 = fabsf(fx0); 2078*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 2079*25c28e83SPiotr Jasiukajtis sra %o4,28,%o4 ! itmp0 >>= 28; 2080*25c28e83SPiotr Jasiukajtis 2081*25c28e83SPiotr Jasiukajtis and %o4,-8,%o4 ! itmp0 = -8; 2082*25c28e83SPiotr Jasiukajtis fitod %f16,%f16 ! dtmp0 = (double) *(int*)&fx0; 2083*25c28e83SPiotr Jasiukajtis 2084*25c28e83SPiotr Jasiukajtis fmuld %f16,%f0,%f16 ! dtmp0 *= C2ONM149; 2085*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); 2086*25c28e83SPiotr Jasiukajtis 2087*25c28e83SPiotr Jasiukajtis fmuld %f16,%f0,%f16 ! dtmp0 *= dsign; 2088*25c28e83SPiotr Jasiukajtis1: 2089*25c28e83SPiotr Jasiukajtis sll %o0,5,%l6 ! (3_0) ltmp0 = ldiff0 << 5; 2090*25c28e83SPiotr Jasiukajtis sra %l4,27,%o5 ! (3_0) signx0 = ux0 >> 27; 2091*25c28e83SPiotr Jasiukajtis 2092*25c28e83SPiotr Jasiukajtis add %i3,stridex,%i3 ! px += stridex 2093*25c28e83SPiotr Jasiukajtis ba .d2 2094*25c28e83SPiotr Jasiukajtis sra %l3,28,%o4 ! (3_0) signy0 = uy0 >> 28; 2095*25c28e83SPiotr Jasiukajtis 2096*25c28e83SPiotr Jasiukajtis .align 16 2097*25c28e83SPiotr Jasiukajtis.update3: 2098*25c28e83SPiotr Jasiukajtis cmp counter,3 2099*25c28e83SPiotr Jasiukajtis bg,pn %icc,1f 2100*25c28e83SPiotr Jasiukajtis nop 2101*25c28e83SPiotr Jasiukajtis 2102*25c28e83SPiotr Jasiukajtis fzero %f0 2103*25c28e83SPiotr Jasiukajtis ba .cont3 2104*25c28e83SPiotr Jasiukajtis ld [cmul_arr],%f2 2105*25c28e83SPiotr Jasiukajtis1: 2106*25c28e83SPiotr Jasiukajtis cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 2107*25c28e83SPiotr Jasiukajtis bg,pt %icc,1f 2108*25c28e83SPiotr Jasiukajtis nop 2109*25c28e83SPiotr Jasiukajtis2: 2110*25c28e83SPiotr Jasiukajtis sub counter,3,counter 2111*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 2112*25c28e83SPiotr Jasiukajtis stx %i1,[%fp+tmp_py] 2113*25c28e83SPiotr Jasiukajtis stx %i3,[%fp+tmp_px] 2114*25c28e83SPiotr Jasiukajtis 2115*25c28e83SPiotr Jasiukajtis ld [cmul_arr],%f2 2116*25c28e83SPiotr Jasiukajtis or %g0,3,counter 2117*25c28e83SPiotr Jasiukajtis ba .cont3 2118*25c28e83SPiotr Jasiukajtis fzero %f0 2119*25c28e83SPiotr Jasiukajtis1: 2120*25c28e83SPiotr Jasiukajtis andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff 2121*25c28e83SPiotr Jasiukajtis bne,pn %icc,1f 2122*25c28e83SPiotr Jasiukajtis sethi %hi(0x00800000),%o5 2123*25c28e83SPiotr Jasiukajtis 2124*25c28e83SPiotr Jasiukajtis andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff 2125*25c28e83SPiotr Jasiukajtis be,pn %icc,2b 2126*25c28e83SPiotr Jasiukajtis nop 2127*25c28e83SPiotr Jasiukajtis1: 2128*25c28e83SPiotr Jasiukajtis st %f0,[%fp+tmp_px] 2129*25c28e83SPiotr Jasiukajtis st %f2,[%fp+tmp_px+4] 2130*25c28e83SPiotr Jasiukajtis ld [%fp+tmp_px],%o4 2131*25c28e83SPiotr Jasiukajtis fmuld %f40,%f16,%f26 ! (2_0) dtmp0 *= x20; 2132*25c28e83SPiotr Jasiukajtis 2133*25c28e83SPiotr Jasiukajtis and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff 2134*25c28e83SPiotr Jasiukajtis cmp %l6,%o5 2135*25c28e83SPiotr Jasiukajtis bge,a 1f 2136*25c28e83SPiotr Jasiukajtis fstod %f0,%f40 ! (0_0) y0 = (double)fy0; 2137*25c28e83SPiotr Jasiukajtis 2138*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 2139*25c28e83SPiotr Jasiukajtis sra %o4,28,%o4 ! itmp0 >>= 28; 2140*25c28e83SPiotr Jasiukajtis fabss %f0,%f0 ! fy0 = fabsf(fy0); 2141*25c28e83SPiotr Jasiukajtis 2142*25c28e83SPiotr Jasiukajtis and %o4,-8,%o4 ! itmp0 = -8; 2143*25c28e83SPiotr Jasiukajtis fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0; 2144*25c28e83SPiotr Jasiukajtis 2145*25c28e83SPiotr Jasiukajtis fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149; 2146*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); 2147*25c28e83SPiotr Jasiukajtis 2148*25c28e83SPiotr Jasiukajtis fmuld %f0,%f40,%f40 ! dtmp0 *= dsign; 2149*25c28e83SPiotr Jasiukajtis1: 2150*25c28e83SPiotr Jasiukajtis add %i1,stridey,%i1 ! py += stridey 2151*25c28e83SPiotr Jasiukajtis faddd %f4,K5,%f62 ! (1_0) dtmp0 += K5; 2152*25c28e83SPiotr Jasiukajtis fmuld %f24,%f20,%f24 ! (0_0) dtmp0 *= x20; 2153*25c28e83SPiotr Jasiukajtis 2154*25c28e83SPiotr Jasiukajtis ld [%fp+tmp_px+4],%o4 2155*25c28e83SPiotr Jasiukajtis and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff 2156*25c28e83SPiotr Jasiukajtis cmp %l6,%o5 2157*25c28e83SPiotr Jasiukajtis bge,a 1f 2158*25c28e83SPiotr Jasiukajtis fstod %f2,%f2 ! (5_1) x0 = (double)fx0; 2159*25c28e83SPiotr Jasiukajtis 2160*25c28e83SPiotr Jasiukajtis fabss %f2,%f2 ! fx0 = fabsf(fx0); 2161*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 2162*25c28e83SPiotr Jasiukajtis sra %o4,28,%o4 ! itmp0 >>= 28; 2163*25c28e83SPiotr Jasiukajtis 2164*25c28e83SPiotr Jasiukajtis and %o4,-8,%o4 ! itmp0 = -8; 2165*25c28e83SPiotr Jasiukajtis fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0; 2166*25c28e83SPiotr Jasiukajtis 2167*25c28e83SPiotr Jasiukajtis fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149; 2168*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); 2169*25c28e83SPiotr Jasiukajtis 2170*25c28e83SPiotr Jasiukajtis fmuld %f2,%f0,%f2 ! dtmp0 *= dsign; 2171*25c28e83SPiotr Jasiukajtis1: 2172*25c28e83SPiotr Jasiukajtis sll %l5,5,%l6 ! (4_0) ltmp0 = ldiff0 << 5; 2173*25c28e83SPiotr Jasiukajtis sra %l3,27,%o5 ! (4_0) signx0 = ux0 >> 27; 2174*25c28e83SPiotr Jasiukajtis 2175*25c28e83SPiotr Jasiukajtis add %i3,stridex,%i3 ! px += stridex 2176*25c28e83SPiotr Jasiukajtis ba .d3 2177*25c28e83SPiotr Jasiukajtis sra %l4,28,%o4 ! (4_0) signy0 = uy0 >> 28; 2178*25c28e83SPiotr Jasiukajtis 2179*25c28e83SPiotr Jasiukajtis .align 16 2180*25c28e83SPiotr Jasiukajtis.update4: 2181*25c28e83SPiotr Jasiukajtis cmp counter,4 2182*25c28e83SPiotr Jasiukajtis bg,pn %icc,1f 2183*25c28e83SPiotr Jasiukajtis nop 2184*25c28e83SPiotr Jasiukajtis 2185*25c28e83SPiotr Jasiukajtis ld [cmul_arr],%f1 2186*25c28e83SPiotr Jasiukajtis ba .cont4 2187*25c28e83SPiotr Jasiukajtis fzeros %f0 2188*25c28e83SPiotr Jasiukajtis1: 2189*25c28e83SPiotr Jasiukajtis cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 2190*25c28e83SPiotr Jasiukajtis bg,pt %icc,1f 2191*25c28e83SPiotr Jasiukajtis nop 2192*25c28e83SPiotr Jasiukajtis2: 2193*25c28e83SPiotr Jasiukajtis sub counter,4,counter 2194*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 2195*25c28e83SPiotr Jasiukajtis stx %i1,[%fp+tmp_py] 2196*25c28e83SPiotr Jasiukajtis stx %i3,[%fp+tmp_px] 2197*25c28e83SPiotr Jasiukajtis 2198*25c28e83SPiotr Jasiukajtis ld [cmul_arr],%f1 2199*25c28e83SPiotr Jasiukajtis or %g0,4,counter 2200*25c28e83SPiotr Jasiukajtis ba .cont4 2201*25c28e83SPiotr Jasiukajtis fzeros %f0 2202*25c28e83SPiotr Jasiukajtis1: 2203*25c28e83SPiotr Jasiukajtis andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff 2204*25c28e83SPiotr Jasiukajtis bne,pn %icc,1f 2205*25c28e83SPiotr Jasiukajtis sethi %hi(0x00800000),%o5 2206*25c28e83SPiotr Jasiukajtis 2207*25c28e83SPiotr Jasiukajtis andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff 2208*25c28e83SPiotr Jasiukajtis be,pn %icc,2b 2209*25c28e83SPiotr Jasiukajtis nop 2210*25c28e83SPiotr Jasiukajtis1: 2211*25c28e83SPiotr Jasiukajtis std %f0,[%fp+tmp_px] 2212*25c28e83SPiotr Jasiukajtis ld [%fp+tmp_px],%o4 2213*25c28e83SPiotr Jasiukajtis fmuld %f40,%f24,%f36 ! (3_1) dtmp0 *= x20; 2214*25c28e83SPiotr Jasiukajtis 2215*25c28e83SPiotr Jasiukajtis and %o4,_0x7fffffff,%o1 ! itmp0 & 0x7fffffff 2216*25c28e83SPiotr Jasiukajtis cmp %o1,%o5 2217*25c28e83SPiotr Jasiukajtis bge,a 1f 2218*25c28e83SPiotr Jasiukajtis fstod %f0,%f40 ! (0_0) y0 = (double)fy0; 2219*25c28e83SPiotr Jasiukajtis 2220*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 2221*25c28e83SPiotr Jasiukajtis sra %o4,28,%o4 ! itmp0 >>= 28; 2222*25c28e83SPiotr Jasiukajtis fabss %f0,%f0 ! fy0 = fabsf(fy0); 2223*25c28e83SPiotr Jasiukajtis 2224*25c28e83SPiotr Jasiukajtis and %o4,-8,%o4 ! itmp0 = -8; 2225*25c28e83SPiotr Jasiukajtis fitod %f0,%f14 ! dtmp0 = (double) *(int*)&fy0; 2226*25c28e83SPiotr Jasiukajtis 2227*25c28e83SPiotr Jasiukajtis fmuld %f14,%f40,%f40 ! dtmp0 *= C2ONM149; 2228*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+%o4],%f14 ! dsign = *(double*)((char*)cmul_arr + itmp0); 2229*25c28e83SPiotr Jasiukajtis 2230*25c28e83SPiotr Jasiukajtis fmuld %f14,%f40,%f40 ! dtmp0 *= dsign; 2231*25c28e83SPiotr Jasiukajtis1: 2232*25c28e83SPiotr Jasiukajtis faddd %f22,K5,%f14 ! (2_1) dtmp0 += K5; 2233*25c28e83SPiotr Jasiukajtis fmuld %f4,%f18,%f4 ! (1_1) dtmp0 *= x20; 2234*25c28e83SPiotr Jasiukajtis 2235*25c28e83SPiotr Jasiukajtis ld [%fp+tmp_px+4],%o4 2236*25c28e83SPiotr Jasiukajtis and %o4,_0x7fffffff,%o1 ! itmp0 & 0x7fffffff 2237*25c28e83SPiotr Jasiukajtis cmp %o1,%o5 2238*25c28e83SPiotr Jasiukajtis bge,a 1f 2239*25c28e83SPiotr Jasiukajtis fstod %f1,%f2 ! (5_1) x0 = (double)fx0; 2240*25c28e83SPiotr Jasiukajtis 2241*25c28e83SPiotr Jasiukajtis fabss %f1,%f22 ! fx0 = fabsf(fx0); 2242*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 2243*25c28e83SPiotr Jasiukajtis sra %o4,28,%o4 ! itmp0 >>= 28; 2244*25c28e83SPiotr Jasiukajtis 2245*25c28e83SPiotr Jasiukajtis and %o4,-8,%o4 ! itmp0 = -8; 2246*25c28e83SPiotr Jasiukajtis fitod %f22,%f22 ! dtmp0 = (double) *(int*)&fx0; 2247*25c28e83SPiotr Jasiukajtis 2248*25c28e83SPiotr Jasiukajtis fmuld %f22,%f0,%f22 ! dtmp0 *= C2ONM149; 2249*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); 2250*25c28e83SPiotr Jasiukajtis 2251*25c28e83SPiotr Jasiukajtis fmuld %f22,%f0,%f2 ! dtmp0 *= dsign; 2252*25c28e83SPiotr Jasiukajtis1: 2253*25c28e83SPiotr Jasiukajtis sll %l7,3,%l7 ! (0_1) cmul0_ind = ldiff0 << 3; 2254*25c28e83SPiotr Jasiukajtis ba .d4 2255*25c28e83SPiotr Jasiukajtis add %i3,stridex,%i3 ! px += stridex 2256*25c28e83SPiotr Jasiukajtis 2257*25c28e83SPiotr Jasiukajtis .align 16 2258*25c28e83SPiotr Jasiukajtis.update5: 2259*25c28e83SPiotr Jasiukajtis cmp counter,5 2260*25c28e83SPiotr Jasiukajtis bg,pn %icc,1f 2261*25c28e83SPiotr Jasiukajtis nop 2262*25c28e83SPiotr Jasiukajtis 2263*25c28e83SPiotr Jasiukajtis ld [cmul_arr],%f2 2264*25c28e83SPiotr Jasiukajtis ba .cont5 2265*25c28e83SPiotr Jasiukajtis fzero %f0 2266*25c28e83SPiotr Jasiukajtis1: 2267*25c28e83SPiotr Jasiukajtis cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 2268*25c28e83SPiotr Jasiukajtis bg,pt %icc,1f 2269*25c28e83SPiotr Jasiukajtis nop 2270*25c28e83SPiotr Jasiukajtis2: 2271*25c28e83SPiotr Jasiukajtis sub counter,5,counter 2272*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 2273*25c28e83SPiotr Jasiukajtis stx %i1,[%fp+tmp_py] 2274*25c28e83SPiotr Jasiukajtis stx %i3,[%fp+tmp_px] 2275*25c28e83SPiotr Jasiukajtis 2276*25c28e83SPiotr Jasiukajtis ld [cmul_arr],%f2 2277*25c28e83SPiotr Jasiukajtis or %g0,5,counter 2278*25c28e83SPiotr Jasiukajtis ba .cont5 2279*25c28e83SPiotr Jasiukajtis fzero %f0 2280*25c28e83SPiotr Jasiukajtis1: 2281*25c28e83SPiotr Jasiukajtis andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff 2282*25c28e83SPiotr Jasiukajtis bne,pn %icc,1f 2283*25c28e83SPiotr Jasiukajtis sethi %hi(0x00800000),%o5 2284*25c28e83SPiotr Jasiukajtis 2285*25c28e83SPiotr Jasiukajtis andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff 2286*25c28e83SPiotr Jasiukajtis be,pn %icc,2b 2287*25c28e83SPiotr Jasiukajtis nop 2288*25c28e83SPiotr Jasiukajtis1: 2289*25c28e83SPiotr Jasiukajtis st %f0,[%fp+tmp_px] 2290*25c28e83SPiotr Jasiukajtis st %f2,[%fp+tmp_px+4] 2291*25c28e83SPiotr Jasiukajtis ld [%fp+tmp_px],%o4 2292*25c28e83SPiotr Jasiukajtis fmuld %f40,%f4,%f34 ! (4_1) dtmp0 *= x20; 2293*25c28e83SPiotr Jasiukajtis 2294*25c28e83SPiotr Jasiukajtis stx %l5,[%fp+tmp_py] 2295*25c28e83SPiotr Jasiukajtis and %o4,_0x7fffffff,%l5 ! itmp0 & 0x7fffffff 2296*25c28e83SPiotr Jasiukajtis cmp %l5,%o5 2297*25c28e83SPiotr Jasiukajtis bge,a 1f 2298*25c28e83SPiotr Jasiukajtis fstod %f0,%f40 ! (0_0) y0 = (double)fy0; 2299*25c28e83SPiotr Jasiukajtis 2300*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 2301*25c28e83SPiotr Jasiukajtis sra %o4,28,%o4 ! itmp0 >>= 28; 2302*25c28e83SPiotr Jasiukajtis fabss %f0,%f0 ! fy0 = fabsf(fy0); 2303*25c28e83SPiotr Jasiukajtis 2304*25c28e83SPiotr Jasiukajtis and %o4,-8,%o4 ! itmp0 = -8; 2305*25c28e83SPiotr Jasiukajtis fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0; 2306*25c28e83SPiotr Jasiukajtis 2307*25c28e83SPiotr Jasiukajtis fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149; 2308*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); 2309*25c28e83SPiotr Jasiukajtis 2310*25c28e83SPiotr Jasiukajtis fmuld %f0,%f40,%f40 ! dtmp0 *= dsign; 2311*25c28e83SPiotr Jasiukajtis1: 2312*25c28e83SPiotr Jasiukajtis faddd %f20,K5,%f12 ! (3_1) dtmp0 += K5; 2313*25c28e83SPiotr Jasiukajtis add %i1,stridey,%i1 ! py += stridey 2314*25c28e83SPiotr Jasiukajtis fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20; 2315*25c28e83SPiotr Jasiukajtis 2316*25c28e83SPiotr Jasiukajtis ld [%fp+tmp_px+4],%o4 2317*25c28e83SPiotr Jasiukajtis and %o4,_0x7fffffff,%l5 ! itmp0 & 0x7fffffff 2318*25c28e83SPiotr Jasiukajtis cmp %l5,%o5 2319*25c28e83SPiotr Jasiukajtis bge,a 1f 2320*25c28e83SPiotr Jasiukajtis fstod %f2,%f2 ! (5_1) x0 = (double)fx0; 2321*25c28e83SPiotr Jasiukajtis 2322*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 2323*25c28e83SPiotr Jasiukajtis sra %o4,28,%o4 ! itmp0 >>= 28; 2324*25c28e83SPiotr Jasiukajtis fabss %f2,%f2 ! fx0 = fabsf(fx0); 2325*25c28e83SPiotr Jasiukajtis 2326*25c28e83SPiotr Jasiukajtis and %o4,-8,%o4 ! itmp0 = -8; 2327*25c28e83SPiotr Jasiukajtis fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0; 2328*25c28e83SPiotr Jasiukajtis 2329*25c28e83SPiotr Jasiukajtis fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149; 2330*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); 2331*25c28e83SPiotr Jasiukajtis 2332*25c28e83SPiotr Jasiukajtis fmuld %f2,%f0,%f2 ! dtmp0 *= dsign; 2333*25c28e83SPiotr Jasiukajtis1: 2334*25c28e83SPiotr Jasiukajtis ldx [%fp+tmp_py],%l5 2335*25c28e83SPiotr Jasiukajtis sra %l3,27,%o5 ! (0_0) signx0 = ux0 >> 27; 2336*25c28e83SPiotr Jasiukajtis add %i3,stridex,%i3 ! px += stridex 2337*25c28e83SPiotr Jasiukajtis 2338*25c28e83SPiotr Jasiukajtis lda [%i1]0x82,%l3 ! (1_0) uy0 = *(int*)py; 2339*25c28e83SPiotr Jasiukajtis sra %l4,28,%o4 ! (0_0) signy0 = uy0 >> 28; 2340*25c28e83SPiotr Jasiukajtis ba .d5 2341*25c28e83SPiotr Jasiukajtis add %l6,cadd_arr,%l6 ! (0_0) ltmp0 += (char*)cadd_arr; 2342*25c28e83SPiotr Jasiukajtis 2343*25c28e83SPiotr Jasiukajtis .align 16 2344*25c28e83SPiotr Jasiukajtis.update6: 2345*25c28e83SPiotr Jasiukajtis cmp counter,5 2346*25c28e83SPiotr Jasiukajtis bg,pn %icc,1f 2347*25c28e83SPiotr Jasiukajtis nop 2348*25c28e83SPiotr Jasiukajtis 2349*25c28e83SPiotr Jasiukajtis ld [cmul_arr],%f2 2350*25c28e83SPiotr Jasiukajtis ba .cont6 2351*25c28e83SPiotr Jasiukajtis fzero %f0 2352*25c28e83SPiotr Jasiukajtis1: 2353*25c28e83SPiotr Jasiukajtis cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 2354*25c28e83SPiotr Jasiukajtis bg,pt %icc,1f 2355*25c28e83SPiotr Jasiukajtis nop 2356*25c28e83SPiotr Jasiukajtis2: 2357*25c28e83SPiotr Jasiukajtis sub counter,5,counter 2358*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 2359*25c28e83SPiotr Jasiukajtis stx %i1,[%fp+tmp_py] 2360*25c28e83SPiotr Jasiukajtis stx %i3,[%fp+tmp_px] 2361*25c28e83SPiotr Jasiukajtis 2362*25c28e83SPiotr Jasiukajtis ld [cmul_arr],%f2 2363*25c28e83SPiotr Jasiukajtis or %g0,5,counter 2364*25c28e83SPiotr Jasiukajtis ba .cont6 2365*25c28e83SPiotr Jasiukajtis fzero %f0 2366*25c28e83SPiotr Jasiukajtis1: 2367*25c28e83SPiotr Jasiukajtis andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff 2368*25c28e83SPiotr Jasiukajtis bne,pn %icc,1f 2369*25c28e83SPiotr Jasiukajtis sethi %hi(0x00800000),%o5 2370*25c28e83SPiotr Jasiukajtis 2371*25c28e83SPiotr Jasiukajtis andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff 2372*25c28e83SPiotr Jasiukajtis be,pn %icc,2b 2373*25c28e83SPiotr Jasiukajtis nop 2374*25c28e83SPiotr Jasiukajtis1: 2375*25c28e83SPiotr Jasiukajtis st %f0,[%fp+tmp_pz] 2376*25c28e83SPiotr Jasiukajtis st %f2,[%fp+tmp_pz+4] 2377*25c28e83SPiotr Jasiukajtis ld [%fp+tmp_pz],%o4 2378*25c28e83SPiotr Jasiukajtis fmuld %f40,%f22,%f32 ! (5_1) dtmp0 *= x20; 2379*25c28e83SPiotr Jasiukajtis 2380*25c28e83SPiotr Jasiukajtis stx %l5,[%fp+tmp_px] 2381*25c28e83SPiotr Jasiukajtis and %o4,_0x7fffffff,%l5 ! itmp0 & 0x7fffffff 2382*25c28e83SPiotr Jasiukajtis cmp %l5,%o5 2383*25c28e83SPiotr Jasiukajtis bge,a 1f 2384*25c28e83SPiotr Jasiukajtis fstod %f0,%f40 ! (0_0) y0 = (double)fy0; 2385*25c28e83SPiotr Jasiukajtis 2386*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 2387*25c28e83SPiotr Jasiukajtis sra %o4,28,%o4 ! itmp0 >>= 28; 2388*25c28e83SPiotr Jasiukajtis fabss %f0,%f0 ! fy0 = fabsf(fy0); 2389*25c28e83SPiotr Jasiukajtis 2390*25c28e83SPiotr Jasiukajtis and %o4,-8,%o4 ! itmp0 = -8; 2391*25c28e83SPiotr Jasiukajtis fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0; 2392*25c28e83SPiotr Jasiukajtis 2393*25c28e83SPiotr Jasiukajtis fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149; 2394*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); 2395*25c28e83SPiotr Jasiukajtis 2396*25c28e83SPiotr Jasiukajtis fmuld %f0,%f40,%f40 ! dtmp0 *= dsign; 2397*25c28e83SPiotr Jasiukajtis1: 2398*25c28e83SPiotr Jasiukajtis faddd %f18,K5,%f10 ! (4_1) dtmp0 += K5; 2399*25c28e83SPiotr Jasiukajtis add %i3,stridex,%i3 ! px += stridex 2400*25c28e83SPiotr Jasiukajtis add %i1,stridey,%i1 ! py += stridey 2401*25c28e83SPiotr Jasiukajtis fmuld %f20,%f24,%f20 ! (3_1) dtmp0 *= x20; 2402*25c28e83SPiotr Jasiukajtis 2403*25c28e83SPiotr Jasiukajtis ld [%fp+tmp_pz+4],%o4 2404*25c28e83SPiotr Jasiukajtis and %o4,_0x7fffffff,%l5 ! itmp0 & 0x7fffffff 2405*25c28e83SPiotr Jasiukajtis cmp %l5,%o5 2406*25c28e83SPiotr Jasiukajtis bge,a 1f 2407*25c28e83SPiotr Jasiukajtis fstod %f2,%f2 ! (5_1) x0 = (double)fx0; 2408*25c28e83SPiotr Jasiukajtis 2409*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 2410*25c28e83SPiotr Jasiukajtis sra %o4,28,%o4 ! itmp0 >>= 28; 2411*25c28e83SPiotr Jasiukajtis fabss %f2,%f2 ! fx0 = fabsf(fx0); 2412*25c28e83SPiotr Jasiukajtis 2413*25c28e83SPiotr Jasiukajtis and %o4,-8,%o4 ! itmp0 = -8; 2414*25c28e83SPiotr Jasiukajtis fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0; 2415*25c28e83SPiotr Jasiukajtis 2416*25c28e83SPiotr Jasiukajtis fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149; 2417*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); 2418*25c28e83SPiotr Jasiukajtis 2419*25c28e83SPiotr Jasiukajtis fmuld %f2,%f0,%f2 ! dtmp0 *= dsign; 2420*25c28e83SPiotr Jasiukajtis1: 2421*25c28e83SPiotr Jasiukajtis ldx [%fp+tmp_px],%l5 2422*25c28e83SPiotr Jasiukajtis 2423*25c28e83SPiotr Jasiukajtis sra %l4,27,%o5 ! (1_0) signx0 = ux0 >> 27; 2424*25c28e83SPiotr Jasiukajtis 2425*25c28e83SPiotr Jasiukajtis sra %l3,28,%o4 ! (1_0) signy0 = uy0 >> 28; 2426*25c28e83SPiotr Jasiukajtis ba .d6 2427*25c28e83SPiotr Jasiukajtis add %l6,cadd_arr,%l6 ! (1_0) ltmp0 += (char*)cadd_arr; 2428*25c28e83SPiotr Jasiukajtis 2429*25c28e83SPiotr Jasiukajtis .align 16 2430*25c28e83SPiotr Jasiukajtis.update7: 2431*25c28e83SPiotr Jasiukajtis cmp counter,5 2432*25c28e83SPiotr Jasiukajtis bg,pn %icc,1f 2433*25c28e83SPiotr Jasiukajtis nop 2434*25c28e83SPiotr Jasiukajtis 2435*25c28e83SPiotr Jasiukajtis ld [cmul_arr],%f2 2436*25c28e83SPiotr Jasiukajtis ba .cont7 2437*25c28e83SPiotr Jasiukajtis fzero %f0 2438*25c28e83SPiotr Jasiukajtis1: 2439*25c28e83SPiotr Jasiukajtis cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 2440*25c28e83SPiotr Jasiukajtis bg,pt %icc,1f 2441*25c28e83SPiotr Jasiukajtis nop 2442*25c28e83SPiotr Jasiukajtis2: 2443*25c28e83SPiotr Jasiukajtis sub counter,5,counter 2444*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 2445*25c28e83SPiotr Jasiukajtis stx %i1,[%fp+tmp_py] 2446*25c28e83SPiotr Jasiukajtis stx %i3,[%fp+tmp_px] 2447*25c28e83SPiotr Jasiukajtis 2448*25c28e83SPiotr Jasiukajtis ld [cmul_arr],%f2 2449*25c28e83SPiotr Jasiukajtis or %g0,5,counter 2450*25c28e83SPiotr Jasiukajtis ba .cont7 2451*25c28e83SPiotr Jasiukajtis fzero %f0 2452*25c28e83SPiotr Jasiukajtis1: 2453*25c28e83SPiotr Jasiukajtis andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff 2454*25c28e83SPiotr Jasiukajtis bne,pn %icc,1f 2455*25c28e83SPiotr Jasiukajtis sethi %hi(0x00800000),%o5 2456*25c28e83SPiotr Jasiukajtis 2457*25c28e83SPiotr Jasiukajtis andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff 2458*25c28e83SPiotr Jasiukajtis be,pn %icc,2b 2459*25c28e83SPiotr Jasiukajtis nop 2460*25c28e83SPiotr Jasiukajtis1: 2461*25c28e83SPiotr Jasiukajtis st %f0,[%fp+tmp_pz] 2462*25c28e83SPiotr Jasiukajtis st %f2,[%fp+tmp_pz+4] 2463*25c28e83SPiotr Jasiukajtis ld [%fp+tmp_pz],%o4 2464*25c28e83SPiotr Jasiukajtis fmuld %f40,%f20,%f30 ! (0_0) dtmp0 *= x20; 2465*25c28e83SPiotr Jasiukajtis 2466*25c28e83SPiotr Jasiukajtis and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff 2467*25c28e83SPiotr Jasiukajtis cmp %l6,%o5 2468*25c28e83SPiotr Jasiukajtis bge,a 1f 2469*25c28e83SPiotr Jasiukajtis fstod %f0,%f40 ! (0_0) y0 = (double)fy0; 2470*25c28e83SPiotr Jasiukajtis 2471*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 2472*25c28e83SPiotr Jasiukajtis sra %o4,28,%o4 ! itmp0 >>= 28; 2473*25c28e83SPiotr Jasiukajtis fabss %f0,%f0 ! fy0 = fabsf(fy0); 2474*25c28e83SPiotr Jasiukajtis 2475*25c28e83SPiotr Jasiukajtis and %o4,-8,%o4 ! itmp0 = -8; 2476*25c28e83SPiotr Jasiukajtis fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0; 2477*25c28e83SPiotr Jasiukajtis 2478*25c28e83SPiotr Jasiukajtis fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149; 2479*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); 2480*25c28e83SPiotr Jasiukajtis 2481*25c28e83SPiotr Jasiukajtis fmuld %f0,%f40,%f40 ! dtmp0 *= dsign; 2482*25c28e83SPiotr Jasiukajtis1: 2483*25c28e83SPiotr Jasiukajtis faddd %f16,K5,%f8 ! (5_1) dtmp0 += K5; 2484*25c28e83SPiotr Jasiukajtis add %i1,stridey,%i1 ! py += stridey 2485*25c28e83SPiotr Jasiukajtis fmuld %f18,%f4,%f18 ! (4_1) dtmp0 *= x20; 2486*25c28e83SPiotr Jasiukajtis 2487*25c28e83SPiotr Jasiukajtis ld [%fp+tmp_pz+4],%o4 2488*25c28e83SPiotr Jasiukajtis and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff 2489*25c28e83SPiotr Jasiukajtis cmp %l6,%o5 2490*25c28e83SPiotr Jasiukajtis bge,a 1f 2491*25c28e83SPiotr Jasiukajtis fstod %f2,%f2 ! (5_1) x0 = (double)fx0; 2492*25c28e83SPiotr Jasiukajtis 2493*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 2494*25c28e83SPiotr Jasiukajtis sra %o4,28,%o4 ! itmp0 >>= 28; 2495*25c28e83SPiotr Jasiukajtis fabss %f2,%f2 ! fx0 = fabsf(fx0); 2496*25c28e83SPiotr Jasiukajtis 2497*25c28e83SPiotr Jasiukajtis and %o4,-8,%o4 ! itmp0 = -8; 2498*25c28e83SPiotr Jasiukajtis fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0; 2499*25c28e83SPiotr Jasiukajtis 2500*25c28e83SPiotr Jasiukajtis fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149; 2501*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); 2502*25c28e83SPiotr Jasiukajtis 2503*25c28e83SPiotr Jasiukajtis fmuld %f2,%f0,%f2 ! dtmp0 *= dsign; 2504*25c28e83SPiotr Jasiukajtis1: 2505*25c28e83SPiotr Jasiukajtis sll %g5,5,%l6 ! (2_0) ltmp0 = ldiff0 << 5; 2506*25c28e83SPiotr Jasiukajtis sra %l3,27,%o5 ! (2_0) signx0 = ux0 >> 27; 2507*25c28e83SPiotr Jasiukajtis add %i3,stridex,%i3 ! px += stridex 2508*25c28e83SPiotr Jasiukajtis 2509*25c28e83SPiotr Jasiukajtis sra %l4,28,%o4 ! (2_0) signy0 = uy0 >> 28; 2510*25c28e83SPiotr Jasiukajtis ba .d7 2511*25c28e83SPiotr Jasiukajtis add %l6,cadd_arr,%l6 ! (2_0) ltmp0 += (char*)cadd_arr; 2512*25c28e83SPiotr Jasiukajtis 2513*25c28e83SPiotr Jasiukajtis .align 16 2514*25c28e83SPiotr Jasiukajtis.update8: 2515*25c28e83SPiotr Jasiukajtis cmp counter,5 2516*25c28e83SPiotr Jasiukajtis bg,pn %icc,1f 2517*25c28e83SPiotr Jasiukajtis nop 2518*25c28e83SPiotr Jasiukajtis 2519*25c28e83SPiotr Jasiukajtis ld [cmul_arr],%f1 2520*25c28e83SPiotr Jasiukajtis ba .cont8 2521*25c28e83SPiotr Jasiukajtis fzeros %f0 2522*25c28e83SPiotr Jasiukajtis1: 2523*25c28e83SPiotr Jasiukajtis cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 2524*25c28e83SPiotr Jasiukajtis bg,pt %icc,1f 2525*25c28e83SPiotr Jasiukajtis nop 2526*25c28e83SPiotr Jasiukajtis2: 2527*25c28e83SPiotr Jasiukajtis sub counter,5,counter 2528*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 2529*25c28e83SPiotr Jasiukajtis stx %i1,[%fp+tmp_py] 2530*25c28e83SPiotr Jasiukajtis stx %i3,[%fp+tmp_px] 2531*25c28e83SPiotr Jasiukajtis 2532*25c28e83SPiotr Jasiukajtis ld [cmul_arr],%f1 2533*25c28e83SPiotr Jasiukajtis or %g0,5,counter 2534*25c28e83SPiotr Jasiukajtis ba .cont8 2535*25c28e83SPiotr Jasiukajtis fzeros %f0 2536*25c28e83SPiotr Jasiukajtis1: 2537*25c28e83SPiotr Jasiukajtis andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff 2538*25c28e83SPiotr Jasiukajtis bne,pn %icc,1f 2539*25c28e83SPiotr Jasiukajtis sethi %hi(0x00800000),%o5 2540*25c28e83SPiotr Jasiukajtis 2541*25c28e83SPiotr Jasiukajtis andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff 2542*25c28e83SPiotr Jasiukajtis be,pn %icc,2b 2543*25c28e83SPiotr Jasiukajtis nop 2544*25c28e83SPiotr Jasiukajtis1: 2545*25c28e83SPiotr Jasiukajtis std %f0,[%fp+tmp_pz] 2546*25c28e83SPiotr Jasiukajtis ld [%fp+tmp_pz],%o4 2547*25c28e83SPiotr Jasiukajtis fmuld %f40,%f18,%f28 ! (1_0) dtmp0 *= x20; 2548*25c28e83SPiotr Jasiukajtis 2549*25c28e83SPiotr Jasiukajtis faddd %f16,K5,%f2 ! (0_0) dtmp0 += K5; 2550*25c28e83SPiotr Jasiukajtis 2551*25c28e83SPiotr Jasiukajtis and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff 2552*25c28e83SPiotr Jasiukajtis cmp %l6,%o5 2553*25c28e83SPiotr Jasiukajtis bge,a 1f 2554*25c28e83SPiotr Jasiukajtis fstod %f0,%f40 ! (0_0) y0 = (double)fy0; 2555*25c28e83SPiotr Jasiukajtis 2556*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 2557*25c28e83SPiotr Jasiukajtis sra %o4,28,%o4 ! itmp0 >>= 28; 2558*25c28e83SPiotr Jasiukajtis fabss %f0,%f0 ! fy0 = fabsf(fy0); 2559*25c28e83SPiotr Jasiukajtis 2560*25c28e83SPiotr Jasiukajtis and %o4,-8,%o4 ! itmp0 = -8; 2561*25c28e83SPiotr Jasiukajtis fitod %f0,%f16 ! dtmp0 = (double) *(int*)&fy0; 2562*25c28e83SPiotr Jasiukajtis 2563*25c28e83SPiotr Jasiukajtis fmuld %f16,%f40,%f40 ! dtmp0 *= C2ONM149; 2564*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+%o4],%f16 ! dsign = *(double*)((char*)cmul_arr + itmp0); 2565*25c28e83SPiotr Jasiukajtis 2566*25c28e83SPiotr Jasiukajtis fmuld %f16,%f40,%f40 ! dtmp0 *= dsign; 2567*25c28e83SPiotr Jasiukajtis1: 2568*25c28e83SPiotr Jasiukajtis add %i1,stridey,%i1 ! py += stridey 2569*25c28e83SPiotr Jasiukajtis fmuld %f24,%f22,%f24 ! (5_1) dtmp0 *= x20; 2570*25c28e83SPiotr Jasiukajtis 2571*25c28e83SPiotr Jasiukajtis ld [%fp+tmp_pz+4],%o4 2572*25c28e83SPiotr Jasiukajtis and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff 2573*25c28e83SPiotr Jasiukajtis cmp %l6,%o5 2574*25c28e83SPiotr Jasiukajtis bge,a 1f 2575*25c28e83SPiotr Jasiukajtis fstod %f1,%f16 ! (5_1) x0 = (double)fx0; 2576*25c28e83SPiotr Jasiukajtis 2577*25c28e83SPiotr Jasiukajtis fabss %f1,%f16 ! fx0 = fabsf(fx0); 2578*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 2579*25c28e83SPiotr Jasiukajtis sra %o4,28,%o4 ! itmp0 >>= 28; 2580*25c28e83SPiotr Jasiukajtis 2581*25c28e83SPiotr Jasiukajtis and %o4,-8,%o4 ! itmp0 = -8; 2582*25c28e83SPiotr Jasiukajtis fitod %f16,%f16 ! dtmp0 = (double) *(int*)&fx0; 2583*25c28e83SPiotr Jasiukajtis 2584*25c28e83SPiotr Jasiukajtis fmuld %f16,%f0,%f16 ! dtmp0 *= C2ONM149; 2585*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); 2586*25c28e83SPiotr Jasiukajtis 2587*25c28e83SPiotr Jasiukajtis fmuld %f16,%f0,%f16 ! dtmp0 *= dsign; 2588*25c28e83SPiotr Jasiukajtis1: 2589*25c28e83SPiotr Jasiukajtis sll %o0,5,%l6 ! (3_0) ltmp0 = ldiff0 << 5; 2590*25c28e83SPiotr Jasiukajtis sra %l4,27,%o5 ! (3_0) signx0 = ux0 >> 27; 2591*25c28e83SPiotr Jasiukajtis 2592*25c28e83SPiotr Jasiukajtis add %i3,stridex,%i3 ! px += stridex 2593*25c28e83SPiotr Jasiukajtis ba .d8 2594*25c28e83SPiotr Jasiukajtis sra %l3,28,%o4 ! (3_0) signy0 = uy0 >> 28; 2595*25c28e83SPiotr Jasiukajtis 2596*25c28e83SPiotr Jasiukajtis .align 16 2597*25c28e83SPiotr Jasiukajtis.update9: 2598*25c28e83SPiotr Jasiukajtis cmp counter,5 2599*25c28e83SPiotr Jasiukajtis bg,pn %icc,1f 2600*25c28e83SPiotr Jasiukajtis nop 2601*25c28e83SPiotr Jasiukajtis 2602*25c28e83SPiotr Jasiukajtis ld [cmul_arr],%f2 2603*25c28e83SPiotr Jasiukajtis ba .cont9 2604*25c28e83SPiotr Jasiukajtis fzero %f0 2605*25c28e83SPiotr Jasiukajtis1: 2606*25c28e83SPiotr Jasiukajtis cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 2607*25c28e83SPiotr Jasiukajtis bg,pt %icc,1f 2608*25c28e83SPiotr Jasiukajtis nop 2609*25c28e83SPiotr Jasiukajtis2: 2610*25c28e83SPiotr Jasiukajtis sub counter,5,counter 2611*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 2612*25c28e83SPiotr Jasiukajtis stx %i1,[%fp+tmp_py] 2613*25c28e83SPiotr Jasiukajtis stx %i3,[%fp+tmp_px] 2614*25c28e83SPiotr Jasiukajtis 2615*25c28e83SPiotr Jasiukajtis ld [cmul_arr],%f2 2616*25c28e83SPiotr Jasiukajtis or %g0,5,counter 2617*25c28e83SPiotr Jasiukajtis ba .cont9 2618*25c28e83SPiotr Jasiukajtis fzero %f0 2619*25c28e83SPiotr Jasiukajtis1: 2620*25c28e83SPiotr Jasiukajtis andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff 2621*25c28e83SPiotr Jasiukajtis bne,pn %icc,1f 2622*25c28e83SPiotr Jasiukajtis sethi %hi(0x00800000),%o5 2623*25c28e83SPiotr Jasiukajtis 2624*25c28e83SPiotr Jasiukajtis andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff 2625*25c28e83SPiotr Jasiukajtis be,pn %icc,2b 2626*25c28e83SPiotr Jasiukajtis nop 2627*25c28e83SPiotr Jasiukajtis1: 2628*25c28e83SPiotr Jasiukajtis st %f0,[%fp+tmp_pz] 2629*25c28e83SPiotr Jasiukajtis st %f2,[%fp+tmp_pz+4] 2630*25c28e83SPiotr Jasiukajtis ld [%fp+tmp_pz],%o4 2631*25c28e83SPiotr Jasiukajtis fmuld %f40,%f16,%f26 ! (2_0) dtmp0 *= x20; 2632*25c28e83SPiotr Jasiukajtis 2633*25c28e83SPiotr Jasiukajtis and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff 2634*25c28e83SPiotr Jasiukajtis cmp %l6,%o5 2635*25c28e83SPiotr Jasiukajtis bge,a 1f 2636*25c28e83SPiotr Jasiukajtis fstod %f0,%f40 ! (0_0) y0 = (double)fy0; 2637*25c28e83SPiotr Jasiukajtis 2638*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 2639*25c28e83SPiotr Jasiukajtis sra %o4,28,%o4 ! itmp0 >>= 28; 2640*25c28e83SPiotr Jasiukajtis fabss %f0,%f0 ! fy0 = fabsf(fy0); 2641*25c28e83SPiotr Jasiukajtis 2642*25c28e83SPiotr Jasiukajtis and %o4,-8,%o4 ! itmp0 = -8; 2643*25c28e83SPiotr Jasiukajtis fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0; 2644*25c28e83SPiotr Jasiukajtis 2645*25c28e83SPiotr Jasiukajtis fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149; 2646*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); 2647*25c28e83SPiotr Jasiukajtis 2648*25c28e83SPiotr Jasiukajtis fmuld %f0,%f40,%f40 ! dtmp0 *= dsign; 2649*25c28e83SPiotr Jasiukajtis1: 2650*25c28e83SPiotr Jasiukajtis add %i1,stridey,%i1 ! py += stridey 2651*25c28e83SPiotr Jasiukajtis faddd %f4,K5,%f62 ! (1_0) dtmp0 += K5; 2652*25c28e83SPiotr Jasiukajtis fmuld %f24,%f20,%f24 ! (0_0) dtmp0 *= x20; 2653*25c28e83SPiotr Jasiukajtis 2654*25c28e83SPiotr Jasiukajtis ld [%fp+tmp_pz+4],%o4 2655*25c28e83SPiotr Jasiukajtis and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff 2656*25c28e83SPiotr Jasiukajtis cmp %l6,%o5 2657*25c28e83SPiotr Jasiukajtis bge,a 1f 2658*25c28e83SPiotr Jasiukajtis fstod %f2,%f2 ! (5_1) x0 = (double)fx0; 2659*25c28e83SPiotr Jasiukajtis 2660*25c28e83SPiotr Jasiukajtis fabss %f2,%f2 ! fx0 = fabsf(fx0); 2661*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 2662*25c28e83SPiotr Jasiukajtis sra %o4,28,%o4 ! itmp0 >>= 28; 2663*25c28e83SPiotr Jasiukajtis 2664*25c28e83SPiotr Jasiukajtis and %o4,-8,%o4 ! itmp0 = -8; 2665*25c28e83SPiotr Jasiukajtis fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0; 2666*25c28e83SPiotr Jasiukajtis 2667*25c28e83SPiotr Jasiukajtis fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149; 2668*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); 2669*25c28e83SPiotr Jasiukajtis 2670*25c28e83SPiotr Jasiukajtis fmuld %f2,%f0,%f2 ! dtmp0 *= dsign; 2671*25c28e83SPiotr Jasiukajtis1: 2672*25c28e83SPiotr Jasiukajtis sll %l5,5,%l6 ! (4_0) ltmp0 = ldiff0 << 5; 2673*25c28e83SPiotr Jasiukajtis sra %l3,27,%o5 ! (4_0) signx0 = ux0 >> 27; 2674*25c28e83SPiotr Jasiukajtis 2675*25c28e83SPiotr Jasiukajtis add %i3,stridex,%i3 ! px += stridex 2676*25c28e83SPiotr Jasiukajtis ba .d9 2677*25c28e83SPiotr Jasiukajtis sra %l4,28,%o4 ! (4_0) signy0 = uy0 >> 28; 2678*25c28e83SPiotr Jasiukajtis 2679*25c28e83SPiotr Jasiukajtis .align 16 2680*25c28e83SPiotr Jasiukajtis.update10: 2681*25c28e83SPiotr Jasiukajtis cmp counter,1 2682*25c28e83SPiotr Jasiukajtis bg,pn %icc,1f 2683*25c28e83SPiotr Jasiukajtis nop 2684*25c28e83SPiotr Jasiukajtis 2685*25c28e83SPiotr Jasiukajtis ld [cmul_arr],%f2 2686*25c28e83SPiotr Jasiukajtis ba .cont10 2687*25c28e83SPiotr Jasiukajtis fzero %f0 2688*25c28e83SPiotr Jasiukajtis1: 2689*25c28e83SPiotr Jasiukajtis cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 2690*25c28e83SPiotr Jasiukajtis bg,pt %icc,1f 2691*25c28e83SPiotr Jasiukajtis nop 2692*25c28e83SPiotr Jasiukajtis2: 2693*25c28e83SPiotr Jasiukajtis sub counter,1,counter 2694*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 2695*25c28e83SPiotr Jasiukajtis stx %i1,[%fp+tmp_py] 2696*25c28e83SPiotr Jasiukajtis stx %i3,[%fp+tmp_px] 2697*25c28e83SPiotr Jasiukajtis 2698*25c28e83SPiotr Jasiukajtis ld [cmul_arr],%f2 2699*25c28e83SPiotr Jasiukajtis or %g0,1,counter 2700*25c28e83SPiotr Jasiukajtis ba .cont10 2701*25c28e83SPiotr Jasiukajtis fzero %f0 2702*25c28e83SPiotr Jasiukajtis1: 2703*25c28e83SPiotr Jasiukajtis andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff 2704*25c28e83SPiotr Jasiukajtis bne,pn %icc,1f 2705*25c28e83SPiotr Jasiukajtis sethi %hi(0x00800000),%o5 2706*25c28e83SPiotr Jasiukajtis 2707*25c28e83SPiotr Jasiukajtis andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff 2708*25c28e83SPiotr Jasiukajtis be,pn %icc,2b 2709*25c28e83SPiotr Jasiukajtis nop 2710*25c28e83SPiotr Jasiukajtis1: 2711*25c28e83SPiotr Jasiukajtis st %f0,[%fp+tmp_pz] 2712*25c28e83SPiotr Jasiukajtis st %f2,[%fp+tmp_pz+4] 2713*25c28e83SPiotr Jasiukajtis ld [%fp+tmp_pz],%o1 2714*25c28e83SPiotr Jasiukajtis fmuld %f40,%f24,%f36 ! (3_1) dtmp0 *= x20; 2715*25c28e83SPiotr Jasiukajtis 2716*25c28e83SPiotr Jasiukajtis and %o1,_0x7fffffff,%o4 ! itmp0 & 0x7fffffff 2717*25c28e83SPiotr Jasiukajtis cmp %o4,%o5 2718*25c28e83SPiotr Jasiukajtis bge,a 1f 2719*25c28e83SPiotr Jasiukajtis fstod %f0,%f40 ! (5_1) y0 = (double)fy0; 2720*25c28e83SPiotr Jasiukajtis 2721*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 2722*25c28e83SPiotr Jasiukajtis sra %o1,28,%o1 ! itmp0 >>= 28; 2723*25c28e83SPiotr Jasiukajtis fabss %f0,%f0 ! fy0 = fabsf(fy0); 2724*25c28e83SPiotr Jasiukajtis 2725*25c28e83SPiotr Jasiukajtis and %o1,-8,%o1 ! itmp0 = -8; 2726*25c28e83SPiotr Jasiukajtis fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0; 2727*25c28e83SPiotr Jasiukajtis 2728*25c28e83SPiotr Jasiukajtis fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149; 2729*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+%o1],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); 2730*25c28e83SPiotr Jasiukajtis 2731*25c28e83SPiotr Jasiukajtis fmuld %f0,%f40,%f40 ! dtmp0 *= dsign; 2732*25c28e83SPiotr Jasiukajtis1: 2733*25c28e83SPiotr Jasiukajtis faddd %f22,K5,%f14 ! (2_1) dtmp0 += K5; 2734*25c28e83SPiotr Jasiukajtis fmuld %f4,%f18,%f4 ! (1_1) dtmp0 *= x20; 2735*25c28e83SPiotr Jasiukajtis 2736*25c28e83SPiotr Jasiukajtis sll %l7,3,%l7 ! (0_1) cmul0_ind = ldiff0 << 3; 2737*25c28e83SPiotr Jasiukajtis add %i3,stridex,%i3 ! px += stridex 2738*25c28e83SPiotr Jasiukajtis 2739*25c28e83SPiotr Jasiukajtis ld [%fp+tmp_pz+4],%o1 2740*25c28e83SPiotr Jasiukajtis and %o1,_0x7fffffff,%o4 ! itmp0 & 0x7fffffff 2741*25c28e83SPiotr Jasiukajtis cmp %o4,%o5 2742*25c28e83SPiotr Jasiukajtis bge,a 1f 2743*25c28e83SPiotr Jasiukajtis fstod %f2,%f2 ! (5_1) x0 = (double)fx0; 2744*25c28e83SPiotr Jasiukajtis 2745*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 2746*25c28e83SPiotr Jasiukajtis sra %o1,28,%o1 ! itmp0 >>= 28; 2747*25c28e83SPiotr Jasiukajtis fabss %f2,%f2 ! fx0 = fabsf(fx0); 2748*25c28e83SPiotr Jasiukajtis 2749*25c28e83SPiotr Jasiukajtis and %o1,-8,%o1 ! itmp0 = -8; 2750*25c28e83SPiotr Jasiukajtis fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0; 2751*25c28e83SPiotr Jasiukajtis 2752*25c28e83SPiotr Jasiukajtis fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149; 2753*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+%o1],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); 2754*25c28e83SPiotr Jasiukajtis 2755*25c28e83SPiotr Jasiukajtis fmuld %f2,%f0,%f2 ! dtmp0 *= dsign; 2756*25c28e83SPiotr Jasiukajtis1: 2757*25c28e83SPiotr Jasiukajtis ba .den0 2758*25c28e83SPiotr Jasiukajtis add %o2,stridez,%o1 ! pz += stridez 2759*25c28e83SPiotr Jasiukajtis 2760*25c28e83SPiotr Jasiukajtis .align 16 2761*25c28e83SPiotr Jasiukajtis.update11: 2762*25c28e83SPiotr Jasiukajtis cmp counter,2 2763*25c28e83SPiotr Jasiukajtis bg,pn %icc,1f 2764*25c28e83SPiotr Jasiukajtis nop 2765*25c28e83SPiotr Jasiukajtis 2766*25c28e83SPiotr Jasiukajtis ld [cmul_arr],%f2 2767*25c28e83SPiotr Jasiukajtis ba .cont11 2768*25c28e83SPiotr Jasiukajtis fzero %f0 2769*25c28e83SPiotr Jasiukajtis1: 2770*25c28e83SPiotr Jasiukajtis cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 2771*25c28e83SPiotr Jasiukajtis bg,pt %icc,1f 2772*25c28e83SPiotr Jasiukajtis nop 2773*25c28e83SPiotr Jasiukajtis2: 2774*25c28e83SPiotr Jasiukajtis sub counter,2,counter 2775*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 2776*25c28e83SPiotr Jasiukajtis stx %i1,[%fp+tmp_py] 2777*25c28e83SPiotr Jasiukajtis stx %i3,[%fp+tmp_px] 2778*25c28e83SPiotr Jasiukajtis 2779*25c28e83SPiotr Jasiukajtis ld [cmul_arr],%f2 2780*25c28e83SPiotr Jasiukajtis or %g0,2,counter 2781*25c28e83SPiotr Jasiukajtis ba .cont11 2782*25c28e83SPiotr Jasiukajtis fzero %f0 2783*25c28e83SPiotr Jasiukajtis1: 2784*25c28e83SPiotr Jasiukajtis andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff 2785*25c28e83SPiotr Jasiukajtis bne,pn %icc,1f 2786*25c28e83SPiotr Jasiukajtis sethi %hi(0x00800000),%o5 2787*25c28e83SPiotr Jasiukajtis 2788*25c28e83SPiotr Jasiukajtis andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff 2789*25c28e83SPiotr Jasiukajtis be,pn %icc,2b 2790*25c28e83SPiotr Jasiukajtis nop 2791*25c28e83SPiotr Jasiukajtis1: 2792*25c28e83SPiotr Jasiukajtis st %f0,[%fp+tmp_pz] 2793*25c28e83SPiotr Jasiukajtis st %f2,[%fp+tmp_pz+4] 2794*25c28e83SPiotr Jasiukajtis ld [%fp+tmp_pz],%o4 2795*25c28e83SPiotr Jasiukajtis fmuld %f40,%f4,%f34 ! (4_1) dtmp0 *= x20; 2796*25c28e83SPiotr Jasiukajtis 2797*25c28e83SPiotr Jasiukajtis stx %l5,[%fp+tmp_px] 2798*25c28e83SPiotr Jasiukajtis and %o4,_0x7fffffff,%l5 ! itmp0 & 0x7fffffff 2799*25c28e83SPiotr Jasiukajtis cmp %l5,%o5 2800*25c28e83SPiotr Jasiukajtis bge,a 1f 2801*25c28e83SPiotr Jasiukajtis fstod %f0,%f40 ! (0_0) y0 = (double)fy0; 2802*25c28e83SPiotr Jasiukajtis 2803*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 2804*25c28e83SPiotr Jasiukajtis sra %o4,28,%o4 ! itmp0 >>= 28; 2805*25c28e83SPiotr Jasiukajtis fabss %f0,%f0 ! fy0 = fabsf(fy0); 2806*25c28e83SPiotr Jasiukajtis 2807*25c28e83SPiotr Jasiukajtis and %o4,-8,%o4 ! itmp0 = -8; 2808*25c28e83SPiotr Jasiukajtis fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0; 2809*25c28e83SPiotr Jasiukajtis 2810*25c28e83SPiotr Jasiukajtis fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149; 2811*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); 2812*25c28e83SPiotr Jasiukajtis 2813*25c28e83SPiotr Jasiukajtis fmuld %f0,%f40,%f40 ! dtmp0 *= dsign; 2814*25c28e83SPiotr Jasiukajtis1: 2815*25c28e83SPiotr Jasiukajtis faddd %f20,K5,%f12 ! (3_1) dtmp0 += K5; 2816*25c28e83SPiotr Jasiukajtis add %i1,stridey,%i1 ! py += stridey 2817*25c28e83SPiotr Jasiukajtis fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20; 2818*25c28e83SPiotr Jasiukajtis 2819*25c28e83SPiotr Jasiukajtis ld [%fp+tmp_pz+4],%o4 2820*25c28e83SPiotr Jasiukajtis and %o4,_0x7fffffff,%l5 ! itmp0 & 0x7fffffff 2821*25c28e83SPiotr Jasiukajtis cmp %l5,%o5 2822*25c28e83SPiotr Jasiukajtis bge,a 1f 2823*25c28e83SPiotr Jasiukajtis fstod %f2,%f2 ! (5_1) x0 = (double)fx0; 2824*25c28e83SPiotr Jasiukajtis 2825*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 2826*25c28e83SPiotr Jasiukajtis sra %o4,28,%o4 ! itmp0 >>= 28; 2827*25c28e83SPiotr Jasiukajtis fabss %f2,%f2 ! fx0 = fabsf(fx0); 2828*25c28e83SPiotr Jasiukajtis 2829*25c28e83SPiotr Jasiukajtis and %o4,-8,%o4 ! itmp0 = -8; 2830*25c28e83SPiotr Jasiukajtis fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0; 2831*25c28e83SPiotr Jasiukajtis 2832*25c28e83SPiotr Jasiukajtis fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149; 2833*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); 2834*25c28e83SPiotr Jasiukajtis 2835*25c28e83SPiotr Jasiukajtis fmuld %f2,%f0,%f2 ! dtmp0 *= dsign; 2836*25c28e83SPiotr Jasiukajtis1: 2837*25c28e83SPiotr Jasiukajtis ldx [%fp+tmp_px],%l5 2838*25c28e83SPiotr Jasiukajtis sra %l3,27,%o5 ! (0_0) signx0 = ux0 >> 27; 2839*25c28e83SPiotr Jasiukajtis add %i3,stridex,%i3 ! px += stridex 2840*25c28e83SPiotr Jasiukajtis 2841*25c28e83SPiotr Jasiukajtis lda [%i1]0x82,%l3 ! (1_0) uy0 = *(int*)py; 2842*25c28e83SPiotr Jasiukajtis sra %l4,28,%o4 ! (0_0) signy0 = uy0 >> 28; 2843*25c28e83SPiotr Jasiukajtis ba .den1 2844*25c28e83SPiotr Jasiukajtis add %l6,cadd_arr,%l6 ! (0_0) ltmp0 += (char*)cadd_arr; 2845*25c28e83SPiotr Jasiukajtis 2846*25c28e83SPiotr Jasiukajtis .align 16 2847*25c28e83SPiotr Jasiukajtis.update12: 2848*25c28e83SPiotr Jasiukajtis cmp counter,3 2849*25c28e83SPiotr Jasiukajtis bg,pn %icc,1f 2850*25c28e83SPiotr Jasiukajtis nop 2851*25c28e83SPiotr Jasiukajtis 2852*25c28e83SPiotr Jasiukajtis ld [cmul_arr],%f2 2853*25c28e83SPiotr Jasiukajtis ba .cont12 2854*25c28e83SPiotr Jasiukajtis fzero %f0 2855*25c28e83SPiotr Jasiukajtis1: 2856*25c28e83SPiotr Jasiukajtis cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 2857*25c28e83SPiotr Jasiukajtis bg,pt %icc,1f 2858*25c28e83SPiotr Jasiukajtis nop 2859*25c28e83SPiotr Jasiukajtis2: 2860*25c28e83SPiotr Jasiukajtis sub counter,3,counter 2861*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 2862*25c28e83SPiotr Jasiukajtis stx %i1,[%fp+tmp_py] 2863*25c28e83SPiotr Jasiukajtis stx %i3,[%fp+tmp_px] 2864*25c28e83SPiotr Jasiukajtis 2865*25c28e83SPiotr Jasiukajtis ld [cmul_arr],%f2 2866*25c28e83SPiotr Jasiukajtis or %g0,3,counter 2867*25c28e83SPiotr Jasiukajtis ba .cont12 2868*25c28e83SPiotr Jasiukajtis fzero %f0 2869*25c28e83SPiotr Jasiukajtis1: 2870*25c28e83SPiotr Jasiukajtis andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff 2871*25c28e83SPiotr Jasiukajtis bne,pn %icc,1f 2872*25c28e83SPiotr Jasiukajtis sethi %hi(0x00800000),%o5 2873*25c28e83SPiotr Jasiukajtis 2874*25c28e83SPiotr Jasiukajtis andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff 2875*25c28e83SPiotr Jasiukajtis be,pn %icc,2b 2876*25c28e83SPiotr Jasiukajtis nop 2877*25c28e83SPiotr Jasiukajtis1: 2878*25c28e83SPiotr Jasiukajtis st %f0,[%fp+tmp_pz] 2879*25c28e83SPiotr Jasiukajtis st %f2,[%fp+tmp_pz+4] 2880*25c28e83SPiotr Jasiukajtis ld [%fp+tmp_pz],%o4 2881*25c28e83SPiotr Jasiukajtis fmuld %f40,%f22,%f32 ! (5_1) dtmp0 *= x20; 2882*25c28e83SPiotr Jasiukajtis 2883*25c28e83SPiotr Jasiukajtis stx %l5,[%fp+tmp_px] 2884*25c28e83SPiotr Jasiukajtis and %o4,_0x7fffffff,%l5 ! itmp0 & 0x7fffffff 2885*25c28e83SPiotr Jasiukajtis cmp %l5,%o5 2886*25c28e83SPiotr Jasiukajtis bge,a 1f 2887*25c28e83SPiotr Jasiukajtis fstod %f0,%f40 ! (0_0) y0 = (double)fy0; 2888*25c28e83SPiotr Jasiukajtis 2889*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 2890*25c28e83SPiotr Jasiukajtis sra %o4,28,%o4 ! itmp0 >>= 28; 2891*25c28e83SPiotr Jasiukajtis fabss %f0,%f0 ! fy0 = fabsf(fy0); 2892*25c28e83SPiotr Jasiukajtis 2893*25c28e83SPiotr Jasiukajtis and %o4,-8,%o4 ! itmp0 = -8; 2894*25c28e83SPiotr Jasiukajtis fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0; 2895*25c28e83SPiotr Jasiukajtis 2896*25c28e83SPiotr Jasiukajtis fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149; 2897*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); 2898*25c28e83SPiotr Jasiukajtis 2899*25c28e83SPiotr Jasiukajtis fmuld %f0,%f40,%f40 ! dtmp0 *= dsign; 2900*25c28e83SPiotr Jasiukajtis1: 2901*25c28e83SPiotr Jasiukajtis faddd %f18,K5,%f10 ! (4_1) dtmp0 += K5; 2902*25c28e83SPiotr Jasiukajtis add %i3,stridex,%i3 ! px += stridex 2903*25c28e83SPiotr Jasiukajtis add %i1,stridey,%i1 ! py += stridey 2904*25c28e83SPiotr Jasiukajtis fmuld %f20,%f24,%f20 ! (3_1) dtmp0 *= x20; 2905*25c28e83SPiotr Jasiukajtis 2906*25c28e83SPiotr Jasiukajtis ld [%fp+tmp_pz+4],%o4 2907*25c28e83SPiotr Jasiukajtis and %o4,_0x7fffffff,%l5 ! itmp0 & 0x7fffffff 2908*25c28e83SPiotr Jasiukajtis cmp %l5,%o5 2909*25c28e83SPiotr Jasiukajtis bge,a 1f 2910*25c28e83SPiotr Jasiukajtis fstod %f2,%f2 ! (5_1) x0 = (double)fx0; 2911*25c28e83SPiotr Jasiukajtis 2912*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 2913*25c28e83SPiotr Jasiukajtis sra %o4,28,%o4 ! itmp0 >>= 28; 2914*25c28e83SPiotr Jasiukajtis fabss %f2,%f2 ! fx0 = fabsf(fx0); 2915*25c28e83SPiotr Jasiukajtis 2916*25c28e83SPiotr Jasiukajtis and %o4,-8,%o4 ! itmp0 = -8; 2917*25c28e83SPiotr Jasiukajtis fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0; 2918*25c28e83SPiotr Jasiukajtis 2919*25c28e83SPiotr Jasiukajtis fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149; 2920*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); 2921*25c28e83SPiotr Jasiukajtis 2922*25c28e83SPiotr Jasiukajtis fmuld %f2,%f0,%f2 ! dtmp0 *= dsign; 2923*25c28e83SPiotr Jasiukajtis1: 2924*25c28e83SPiotr Jasiukajtis ldx [%fp+tmp_px],%l5 2925*25c28e83SPiotr Jasiukajtis 2926*25c28e83SPiotr Jasiukajtis sra %l4,27,%o5 ! (1_0) signx0 = ux0 >> 27; 2927*25c28e83SPiotr Jasiukajtis 2928*25c28e83SPiotr Jasiukajtis sra %l3,28,%o4 ! (1_0) signy0 = uy0 >> 28; 2929*25c28e83SPiotr Jasiukajtis ba .den2 2930*25c28e83SPiotr Jasiukajtis add %l6,cadd_arr,%l6 ! (1_0) ltmp0 += (char*)cadd_arr; 2931*25c28e83SPiotr Jasiukajtis 2932*25c28e83SPiotr Jasiukajtis .align 16 2933*25c28e83SPiotr Jasiukajtis.update13: 2934*25c28e83SPiotr Jasiukajtis cmp counter,4 2935*25c28e83SPiotr Jasiukajtis bg,pn %icc,1f 2936*25c28e83SPiotr Jasiukajtis nop 2937*25c28e83SPiotr Jasiukajtis 2938*25c28e83SPiotr Jasiukajtis ld [cmul_arr],%f2 2939*25c28e83SPiotr Jasiukajtis ba .cont13 2940*25c28e83SPiotr Jasiukajtis fzero %f0 2941*25c28e83SPiotr Jasiukajtis1: 2942*25c28e83SPiotr Jasiukajtis cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 2943*25c28e83SPiotr Jasiukajtis bg,pt %icc,1f 2944*25c28e83SPiotr Jasiukajtis nop 2945*25c28e83SPiotr Jasiukajtis2: 2946*25c28e83SPiotr Jasiukajtis sub counter,4,counter 2947*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 2948*25c28e83SPiotr Jasiukajtis stx %i1,[%fp+tmp_py] 2949*25c28e83SPiotr Jasiukajtis sub %i3,stridex,%o5 2950*25c28e83SPiotr Jasiukajtis stx %o5,[%fp+tmp_px] 2951*25c28e83SPiotr Jasiukajtis 2952*25c28e83SPiotr Jasiukajtis ld [cmul_arr],%f2 2953*25c28e83SPiotr Jasiukajtis or %g0,4,counter 2954*25c28e83SPiotr Jasiukajtis ba .cont13 2955*25c28e83SPiotr Jasiukajtis fzero %f0 2956*25c28e83SPiotr Jasiukajtis1: 2957*25c28e83SPiotr Jasiukajtis andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff 2958*25c28e83SPiotr Jasiukajtis bne,pn %icc,1f 2959*25c28e83SPiotr Jasiukajtis sethi %hi(0x00800000),%o5 2960*25c28e83SPiotr Jasiukajtis 2961*25c28e83SPiotr Jasiukajtis andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff 2962*25c28e83SPiotr Jasiukajtis be,pn %icc,2b 2963*25c28e83SPiotr Jasiukajtis nop 2964*25c28e83SPiotr Jasiukajtis1: 2965*25c28e83SPiotr Jasiukajtis st %f0,[%fp+tmp_pz] 2966*25c28e83SPiotr Jasiukajtis st %f2,[%fp+tmp_pz+4] 2967*25c28e83SPiotr Jasiukajtis ld [%fp+tmp_pz],%o4 2968*25c28e83SPiotr Jasiukajtis fmuld %f40,%f20,%f30 ! (0_0) dtmp0 *= x20; 2969*25c28e83SPiotr Jasiukajtis 2970*25c28e83SPiotr Jasiukajtis and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff 2971*25c28e83SPiotr Jasiukajtis cmp %l6,%o5 2972*25c28e83SPiotr Jasiukajtis bge,a 1f 2973*25c28e83SPiotr Jasiukajtis fstod %f0,%f40 ! (0_0) y0 = (double)fy0; 2974*25c28e83SPiotr Jasiukajtis 2975*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 2976*25c28e83SPiotr Jasiukajtis sra %o4,28,%o4 ! itmp0 >>= 28; 2977*25c28e83SPiotr Jasiukajtis fabss %f0,%f0 ! fy0 = fabsf(fy0); 2978*25c28e83SPiotr Jasiukajtis 2979*25c28e83SPiotr Jasiukajtis and %o4,-8,%o4 ! itmp0 = -8; 2980*25c28e83SPiotr Jasiukajtis fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0; 2981*25c28e83SPiotr Jasiukajtis 2982*25c28e83SPiotr Jasiukajtis fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149; 2983*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); 2984*25c28e83SPiotr Jasiukajtis 2985*25c28e83SPiotr Jasiukajtis fmuld %f0,%f40,%f40 ! dtmp0 *= dsign; 2986*25c28e83SPiotr Jasiukajtis1: 2987*25c28e83SPiotr Jasiukajtis faddd %f16,K5,%f8 ! (5_1) dtmp0 += K5; 2988*25c28e83SPiotr Jasiukajtis add %i1,stridey,%i1 ! py += stridey 2989*25c28e83SPiotr Jasiukajtis fmuld %f18,%f4,%f18 ! (4_1) dtmp0 *= x20; 2990*25c28e83SPiotr Jasiukajtis 2991*25c28e83SPiotr Jasiukajtis ld [%fp+tmp_pz+4],%o4 2992*25c28e83SPiotr Jasiukajtis and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff 2993*25c28e83SPiotr Jasiukajtis cmp %l6,%o5 2994*25c28e83SPiotr Jasiukajtis bge,a 1f 2995*25c28e83SPiotr Jasiukajtis fstod %f2,%f2 ! (5_1) x0 = (double)fx0; 2996*25c28e83SPiotr Jasiukajtis 2997*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 2998*25c28e83SPiotr Jasiukajtis sra %o4,28,%o4 ! itmp0 >>= 28; 2999*25c28e83SPiotr Jasiukajtis fabss %f2,%f2 ! fx0 = fabsf(fx0); 3000*25c28e83SPiotr Jasiukajtis 3001*25c28e83SPiotr Jasiukajtis and %o4,-8,%o4 ! itmp0 = -8; 3002*25c28e83SPiotr Jasiukajtis fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0; 3003*25c28e83SPiotr Jasiukajtis 3004*25c28e83SPiotr Jasiukajtis fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149; 3005*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); 3006*25c28e83SPiotr Jasiukajtis 3007*25c28e83SPiotr Jasiukajtis fmuld %f2,%f0,%f2 ! dtmp0 *= dsign; 3008*25c28e83SPiotr Jasiukajtis1: 3009*25c28e83SPiotr Jasiukajtis sll %g5,5,%l6 ! (2_0) ltmp0 = ldiff0 << 5; 3010*25c28e83SPiotr Jasiukajtis sra %l3,27,%o5 ! (2_0) signx0 = ux0 >> 27; 3011*25c28e83SPiotr Jasiukajtis 3012*25c28e83SPiotr Jasiukajtis sra %l4,28,%o4 ! (2_0) signy0 = uy0 >> 28; 3013*25c28e83SPiotr Jasiukajtis ba .den3 3014*25c28e83SPiotr Jasiukajtis add %l6,cadd_arr,%l6 ! (2_0) ltmp0 += (char*)cadd_arr; 3015*25c28e83SPiotr Jasiukajtis 3016*25c28e83SPiotr Jasiukajtis .align 16 3017*25c28e83SPiotr Jasiukajtis.update14: 3018*25c28e83SPiotr Jasiukajtis cmp counter,5 3019*25c28e83SPiotr Jasiukajtis bg,pn %icc,1f 3020*25c28e83SPiotr Jasiukajtis nop 3021*25c28e83SPiotr Jasiukajtis 3022*25c28e83SPiotr Jasiukajtis ld [cmul_arr],%f1 3023*25c28e83SPiotr Jasiukajtis ba .cont14 3024*25c28e83SPiotr Jasiukajtis fzeros %f0 3025*25c28e83SPiotr Jasiukajtis1: 3026*25c28e83SPiotr Jasiukajtis cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 3027*25c28e83SPiotr Jasiukajtis bg,pt %icc,1f 3028*25c28e83SPiotr Jasiukajtis nop 3029*25c28e83SPiotr Jasiukajtis2: 3030*25c28e83SPiotr Jasiukajtis sub counter,5,counter 3031*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 3032*25c28e83SPiotr Jasiukajtis stx %i1,[%fp+tmp_py] 3033*25c28e83SPiotr Jasiukajtis sub %i3,stridex,%o5 3034*25c28e83SPiotr Jasiukajtis stx %o5,[%fp+tmp_px] 3035*25c28e83SPiotr Jasiukajtis 3036*25c28e83SPiotr Jasiukajtis ld [cmul_arr],%f1 3037*25c28e83SPiotr Jasiukajtis or %g0,5,counter 3038*25c28e83SPiotr Jasiukajtis ba .cont14 3039*25c28e83SPiotr Jasiukajtis fzeros %f0 3040*25c28e83SPiotr Jasiukajtis1: 3041*25c28e83SPiotr Jasiukajtis andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff 3042*25c28e83SPiotr Jasiukajtis bne,pn %icc,1f 3043*25c28e83SPiotr Jasiukajtis sethi %hi(0x00800000),%o5 3044*25c28e83SPiotr Jasiukajtis 3045*25c28e83SPiotr Jasiukajtis andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff 3046*25c28e83SPiotr Jasiukajtis be,pn %icc,2b 3047*25c28e83SPiotr Jasiukajtis nop 3048*25c28e83SPiotr Jasiukajtis1: 3049*25c28e83SPiotr Jasiukajtis std %f0,[%fp+tmp_pz] 3050*25c28e83SPiotr Jasiukajtis ld [%fp+tmp_pz],%o4 3051*25c28e83SPiotr Jasiukajtis fmuld %f40,%f18,%f28 ! (1_0) dtmp0 *= x20; 3052*25c28e83SPiotr Jasiukajtis 3053*25c28e83SPiotr Jasiukajtis faddd %f16,K5,%f2 ! (0_0) dtmp0 += K5; 3054*25c28e83SPiotr Jasiukajtis 3055*25c28e83SPiotr Jasiukajtis and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff 3056*25c28e83SPiotr Jasiukajtis cmp %l6,%o5 3057*25c28e83SPiotr Jasiukajtis bge,a 1f 3058*25c28e83SPiotr Jasiukajtis fstod %f0,%f40 ! (0_0) y0 = (double)fy0; 3059*25c28e83SPiotr Jasiukajtis 3060*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 3061*25c28e83SPiotr Jasiukajtis sra %o4,28,%o4 ! itmp0 >>= 28; 3062*25c28e83SPiotr Jasiukajtis fabss %f0,%f0 ! fy0 = fabsf(fy0); 3063*25c28e83SPiotr Jasiukajtis 3064*25c28e83SPiotr Jasiukajtis and %o4,-8,%o4 ! itmp0 = -8; 3065*25c28e83SPiotr Jasiukajtis fitod %f0,%f16 ! dtmp0 = (double) *(int*)&fy0; 3066*25c28e83SPiotr Jasiukajtis 3067*25c28e83SPiotr Jasiukajtis fmuld %f16,%f40,%f40 ! dtmp0 *= C2ONM149; 3068*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+%o4],%f16 ! dsign = *(double*)((char*)cmul_arr + itmp0); 3069*25c28e83SPiotr Jasiukajtis 3070*25c28e83SPiotr Jasiukajtis fmuld %f16,%f40,%f40 ! dtmp0 *= dsign; 3071*25c28e83SPiotr Jasiukajtis1: 3072*25c28e83SPiotr Jasiukajtis add %i1,stridey,%i1 ! py += stridey 3073*25c28e83SPiotr Jasiukajtis fmuld %f24,%f22,%f24 ! (5_1) dtmp0 *= x20; 3074*25c28e83SPiotr Jasiukajtis 3075*25c28e83SPiotr Jasiukajtis ld [%fp+tmp_pz+4],%o4 3076*25c28e83SPiotr Jasiukajtis and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff 3077*25c28e83SPiotr Jasiukajtis cmp %l6,%o5 3078*25c28e83SPiotr Jasiukajtis bge,a 1f 3079*25c28e83SPiotr Jasiukajtis fstod %f1,%f16 ! (5_1) x0 = (double)fx0; 3080*25c28e83SPiotr Jasiukajtis 3081*25c28e83SPiotr Jasiukajtis fabss %f1,%f16 ! fx0 = fabsf(fx0); 3082*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 3083*25c28e83SPiotr Jasiukajtis sra %o4,28,%o4 ! itmp0 >>= 28; 3084*25c28e83SPiotr Jasiukajtis 3085*25c28e83SPiotr Jasiukajtis and %o4,-8,%o4 ! itmp0 = -8; 3086*25c28e83SPiotr Jasiukajtis fitod %f16,%f16 ! dtmp0 = (double) *(int*)&fx0; 3087*25c28e83SPiotr Jasiukajtis 3088*25c28e83SPiotr Jasiukajtis fmuld %f16,%f0,%f16 ! dtmp0 *= C2ONM149; 3089*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); 3090*25c28e83SPiotr Jasiukajtis 3091*25c28e83SPiotr Jasiukajtis fmuld %f16,%f0,%f16 ! dtmp0 *= dsign; 3092*25c28e83SPiotr Jasiukajtis1: 3093*25c28e83SPiotr Jasiukajtis sll %o0,5,%l6 ! (3_0) ltmp0 = ldiff0 << 5; 3094*25c28e83SPiotr Jasiukajtis sra %l4,27,%o5 ! (3_0) signx0 = ux0 >> 27; 3095*25c28e83SPiotr Jasiukajtis 3096*25c28e83SPiotr Jasiukajtis ba .den4 3097*25c28e83SPiotr Jasiukajtis sra %l3,28,%o4 ! (3_0) signy0 = uy0 >> 28; 3098*25c28e83SPiotr Jasiukajtis 3099*25c28e83SPiotr Jasiukajtis .align 16 3100*25c28e83SPiotr Jasiukajtis.update15: 3101*25c28e83SPiotr Jasiukajtis cmp counter,6 3102*25c28e83SPiotr Jasiukajtis bg,pn %icc,1f 3103*25c28e83SPiotr Jasiukajtis nop 3104*25c28e83SPiotr Jasiukajtis 3105*25c28e83SPiotr Jasiukajtis ld [cmul_arr],%f2 3106*25c28e83SPiotr Jasiukajtis ba .cont15 3107*25c28e83SPiotr Jasiukajtis fzero %f0 3108*25c28e83SPiotr Jasiukajtis1: 3109*25c28e83SPiotr Jasiukajtis cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 3110*25c28e83SPiotr Jasiukajtis bg,pt %icc,1f 3111*25c28e83SPiotr Jasiukajtis nop 3112*25c28e83SPiotr Jasiukajtis2: 3113*25c28e83SPiotr Jasiukajtis sub counter,6,counter 3114*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 3115*25c28e83SPiotr Jasiukajtis stx %i1,[%fp+tmp_py] 3116*25c28e83SPiotr Jasiukajtis sub %i3,stridex,%o5 3117*25c28e83SPiotr Jasiukajtis stx %o5,[%fp+tmp_px] 3118*25c28e83SPiotr Jasiukajtis 3119*25c28e83SPiotr Jasiukajtis ld [cmul_arr],%f2 3120*25c28e83SPiotr Jasiukajtis or %g0,6,counter 3121*25c28e83SPiotr Jasiukajtis ba .cont15 3122*25c28e83SPiotr Jasiukajtis fzero %f0 3123*25c28e83SPiotr Jasiukajtis1: 3124*25c28e83SPiotr Jasiukajtis andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff 3125*25c28e83SPiotr Jasiukajtis bne,pn %icc,1f 3126*25c28e83SPiotr Jasiukajtis sethi %hi(0x00800000),%o5 3127*25c28e83SPiotr Jasiukajtis 3128*25c28e83SPiotr Jasiukajtis andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff 3129*25c28e83SPiotr Jasiukajtis be,pn %icc,2b 3130*25c28e83SPiotr Jasiukajtis nop 3131*25c28e83SPiotr Jasiukajtis1: 3132*25c28e83SPiotr Jasiukajtis st %f0,[%fp+tmp_pz] 3133*25c28e83SPiotr Jasiukajtis st %f2,[%fp+tmp_pz+4] 3134*25c28e83SPiotr Jasiukajtis ld [%fp+tmp_pz],%o4 3135*25c28e83SPiotr Jasiukajtis fmuld %f40,%f16,%f26 ! (2_0) dtmp0 *= x20; 3136*25c28e83SPiotr Jasiukajtis 3137*25c28e83SPiotr Jasiukajtis and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff 3138*25c28e83SPiotr Jasiukajtis cmp %l6,%o5 3139*25c28e83SPiotr Jasiukajtis bge,a 1f 3140*25c28e83SPiotr Jasiukajtis fstod %f0,%f40 ! (0_0) y0 = (double)fy0; 3141*25c28e83SPiotr Jasiukajtis 3142*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 3143*25c28e83SPiotr Jasiukajtis sra %o4,28,%o4 ! itmp0 >>= 28; 3144*25c28e83SPiotr Jasiukajtis fabss %f0,%f0 ! fy0 = fabsf(fy0); 3145*25c28e83SPiotr Jasiukajtis 3146*25c28e83SPiotr Jasiukajtis and %o4,-8,%o4 ! itmp0 = -8; 3147*25c28e83SPiotr Jasiukajtis fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0; 3148*25c28e83SPiotr Jasiukajtis 3149*25c28e83SPiotr Jasiukajtis fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149; 3150*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); 3151*25c28e83SPiotr Jasiukajtis 3152*25c28e83SPiotr Jasiukajtis fmuld %f0,%f40,%f40 ! dtmp0 *= dsign; 3153*25c28e83SPiotr Jasiukajtis1: 3154*25c28e83SPiotr Jasiukajtis add %i1,stridey,%i1 ! py += stridey 3155*25c28e83SPiotr Jasiukajtis faddd %f4,K5,%f62 ! (1_0) dtmp0 += K5; 3156*25c28e83SPiotr Jasiukajtis fmuld %f24,%f20,%f24 ! (0_0) dtmp0 *= x20; 3157*25c28e83SPiotr Jasiukajtis 3158*25c28e83SPiotr Jasiukajtis ld [%fp+tmp_pz+4],%o4 3159*25c28e83SPiotr Jasiukajtis and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff 3160*25c28e83SPiotr Jasiukajtis cmp %l6,%o5 3161*25c28e83SPiotr Jasiukajtis bge,a 1f 3162*25c28e83SPiotr Jasiukajtis fstod %f2,%f2 ! (5_1) x0 = (double)fx0; 3163*25c28e83SPiotr Jasiukajtis 3164*25c28e83SPiotr Jasiukajtis fabss %f2,%f2 ! fx0 = fabsf(fx0); 3165*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 3166*25c28e83SPiotr Jasiukajtis sra %o4,28,%o4 ! itmp0 >>= 28; 3167*25c28e83SPiotr Jasiukajtis 3168*25c28e83SPiotr Jasiukajtis and %o4,-8,%o4 ! itmp0 = -8; 3169*25c28e83SPiotr Jasiukajtis fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0; 3170*25c28e83SPiotr Jasiukajtis 3171*25c28e83SPiotr Jasiukajtis fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149; 3172*25c28e83SPiotr Jasiukajtis ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); 3173*25c28e83SPiotr Jasiukajtis 3174*25c28e83SPiotr Jasiukajtis fmuld %f2,%f0,%f2 ! dtmp0 *= dsign; 3175*25c28e83SPiotr Jasiukajtis1: 3176*25c28e83SPiotr Jasiukajtis sll %l5,5,%l6 ! (4_0) ltmp0 = ldiff0 << 5; 3177*25c28e83SPiotr Jasiukajtis sra %l3,27,%o5 ! (4_0) signx0 = ux0 >> 27; 3178*25c28e83SPiotr Jasiukajtis 3179*25c28e83SPiotr Jasiukajtis ba .den5 3180*25c28e83SPiotr Jasiukajtis sra %l4,28,%o4 ! (4_0) signy0 = uy0 >> 28; 3181*25c28e83SPiotr Jasiukajtis 3182*25c28e83SPiotr Jasiukajtis .align 16 3183*25c28e83SPiotr Jasiukajtis.u0: 3184*25c28e83SPiotr Jasiukajtis ba .c0 3185*25c28e83SPiotr Jasiukajtis or %g0,_0x7fffffff,%o5 3186*25c28e83SPiotr Jasiukajtis.u1: 3187*25c28e83SPiotr Jasiukajtis ba .c1 3188*25c28e83SPiotr Jasiukajtis or %g0,_0x7fffffff,%o5 3189*25c28e83SPiotr Jasiukajtis.u2: 3190*25c28e83SPiotr Jasiukajtis ba .c2 3191*25c28e83SPiotr Jasiukajtis or %g0,_0x7f800000,%o5 3192*25c28e83SPiotr Jasiukajtis.u3: 3193*25c28e83SPiotr Jasiukajtis ba .c3 3194*25c28e83SPiotr Jasiukajtis or %g0,_0x7f800000,%o5 3195*25c28e83SPiotr Jasiukajtis.u4: 3196*25c28e83SPiotr Jasiukajtis ba .c4 3197*25c28e83SPiotr Jasiukajtis or %g0,_0x7fffffff,%o5 3198*25c28e83SPiotr Jasiukajtis.u5: 3199*25c28e83SPiotr Jasiukajtis ba .c5 3200*25c28e83SPiotr Jasiukajtis or %g0,_0x7fffffff,%o5 3201*25c28e83SPiotr Jasiukajtis.u6: 3202*25c28e83SPiotr Jasiukajtis ba .c6 3203*25c28e83SPiotr Jasiukajtis or %g0,_0x7f800000,%o5 3204*25c28e83SPiotr Jasiukajtis.u7: 3205*25c28e83SPiotr Jasiukajtis ba .c7 3206*25c28e83SPiotr Jasiukajtis or %g0,_0x7f800000,%o5 3207*25c28e83SPiotr Jasiukajtis.u8: 3208*25c28e83SPiotr Jasiukajtis ba .c8 3209*25c28e83SPiotr Jasiukajtis or %g0,_0x7fffffff,%o5 3210*25c28e83SPiotr Jasiukajtis.u9: 3211*25c28e83SPiotr Jasiukajtis ba .c9 3212*25c28e83SPiotr Jasiukajtis or %g0,_0x7fffffff,%o5 3213*25c28e83SPiotr Jasiukajtis.u10: 3214*25c28e83SPiotr Jasiukajtis ba .c10 3215*25c28e83SPiotr Jasiukajtis or %g0,_0x7f800000,%o5 3216*25c28e83SPiotr Jasiukajtis.u11: 3217*25c28e83SPiotr Jasiukajtis ba .c11 3218*25c28e83SPiotr Jasiukajtis or %g0,_0x7f800000,%o5 3219*25c28e83SPiotr Jasiukajtis.u12: 3220*25c28e83SPiotr Jasiukajtis ba .c12 3221*25c28e83SPiotr Jasiukajtis or %g0,_0x7fffffff,%o5 3222*25c28e83SPiotr Jasiukajtis.u13: 3223*25c28e83SPiotr Jasiukajtis ba .c13 3224*25c28e83SPiotr Jasiukajtis or %g0,_0x7fffffff,%o5 3225*25c28e83SPiotr Jasiukajtis.u14: 3226*25c28e83SPiotr Jasiukajtis ba .c14 3227*25c28e83SPiotr Jasiukajtis or %g0,_0x7f800000,%o5 3228*25c28e83SPiotr Jasiukajtis.u15: 3229*25c28e83SPiotr Jasiukajtis ba .c15 3230*25c28e83SPiotr Jasiukajtis or %g0,_0x7f800000,%o5 3231*25c28e83SPiotr Jasiukajtis.u16: 3232*25c28e83SPiotr Jasiukajtis ba .c16 3233*25c28e83SPiotr Jasiukajtis or %g0,_0x7fffffff,%o5 3234*25c28e83SPiotr Jasiukajtis.u17: 3235*25c28e83SPiotr Jasiukajtis ba .c17 3236*25c28e83SPiotr Jasiukajtis or %g0,_0x7fffffff,%o5 3237*25c28e83SPiotr Jasiukajtis.u18: 3238*25c28e83SPiotr Jasiukajtis ba .c18 3239*25c28e83SPiotr Jasiukajtis or %g0,_0x7f800000,%o5 3240*25c28e83SPiotr Jasiukajtis.u19: 3241*25c28e83SPiotr Jasiukajtis ba .c19 3242*25c28e83SPiotr Jasiukajtis or %g0,_0x7f800000,%o5 3243*25c28e83SPiotr Jasiukajtis.u20: 3244*25c28e83SPiotr Jasiukajtis ba .c20 3245*25c28e83SPiotr Jasiukajtis or %g0,_0x7fffffff,%o5 3246*25c28e83SPiotr Jasiukajtis.u21: 3247*25c28e83SPiotr Jasiukajtis ba .c21 3248*25c28e83SPiotr Jasiukajtis or %g0,_0x7fffffff,%o5 3249*25c28e83SPiotr Jasiukajtis.u22: 3250*25c28e83SPiotr Jasiukajtis ba .c22 3251*25c28e83SPiotr Jasiukajtis or %g0,_0x7f800000,%o5 3252*25c28e83SPiotr Jasiukajtis.u23: 3253*25c28e83SPiotr Jasiukajtis ba .c23 3254*25c28e83SPiotr Jasiukajtis or %g0,_0x7f800000,%o5 3255*25c28e83SPiotr Jasiukajtis.u24: 3256*25c28e83SPiotr Jasiukajtis ba .c24 3257*25c28e83SPiotr Jasiukajtis or %g0,_0x7fffffff,%o5 3258*25c28e83SPiotr Jasiukajtis.u25: 3259*25c28e83SPiotr Jasiukajtis ba .c25 3260*25c28e83SPiotr Jasiukajtis or %g0,_0x7fffffff,%o5 3261*25c28e83SPiotr Jasiukajtis.u26: 3262*25c28e83SPiotr Jasiukajtis ba .c26 3263*25c28e83SPiotr Jasiukajtis or %g0,_0x7f800000,%o5 3264*25c28e83SPiotr Jasiukajtis.u27: 3265*25c28e83SPiotr Jasiukajtis ba .c27 3266*25c28e83SPiotr Jasiukajtis or %g0,_0x7f800000,%o5 3267*25c28e83SPiotr Jasiukajtis.u28: 3268*25c28e83SPiotr Jasiukajtis ba .c28 3269*25c28e83SPiotr Jasiukajtis or %g0,_0x7fffffff,%o5 3270*25c28e83SPiotr Jasiukajtis.u29: 3271*25c28e83SPiotr Jasiukajtis ba .c29 3272*25c28e83SPiotr Jasiukajtis or %g0,_0x7fffffff,%o5 3273*25c28e83SPiotr Jasiukajtis.u30: 3274*25c28e83SPiotr Jasiukajtis ba .c30 3275*25c28e83SPiotr Jasiukajtis or %g0,_0x7f800000,%o5 3276*25c28e83SPiotr Jasiukajtis.u31: 3277*25c28e83SPiotr Jasiukajtis ba .c31 3278*25c28e83SPiotr Jasiukajtis or %g0,_0x7f800000,%o5 3279*25c28e83SPiotr Jasiukajtis.u32: 3280*25c28e83SPiotr Jasiukajtis ba .c32 3281*25c28e83SPiotr Jasiukajtis or %g0,_0x7fffffff,%o5 3282*25c28e83SPiotr Jasiukajtis.u33: 3283*25c28e83SPiotr Jasiukajtis ba .c33 3284*25c28e83SPiotr Jasiukajtis or %g0,_0x7fffffff,%o5 3285*25c28e83SPiotr Jasiukajtis.u34: 3286*25c28e83SPiotr Jasiukajtis ba .c34 3287*25c28e83SPiotr Jasiukajtis or %g0,_0x7f800000,%o5 3288*25c28e83SPiotr Jasiukajtis.u35: 3289*25c28e83SPiotr Jasiukajtis ba .c35 3290*25c28e83SPiotr Jasiukajtis or %g0,_0x7f800000,%o5 3291*25c28e83SPiotr Jasiukajtis.u36: 3292*25c28e83SPiotr Jasiukajtis ba .c36 3293*25c28e83SPiotr Jasiukajtis or %g0,_0x7fffffff,%o5 3294*25c28e83SPiotr Jasiukajtis.u37: 3295*25c28e83SPiotr Jasiukajtis ba .c37 3296*25c28e83SPiotr Jasiukajtis or %g0,_0x7fffffff,%o5 3297*25c28e83SPiotr Jasiukajtis.u38: 3298*25c28e83SPiotr Jasiukajtis ba .c38 3299*25c28e83SPiotr Jasiukajtis or %g0,_0x7f800000,%o5 3300*25c28e83SPiotr Jasiukajtis.u39: 3301*25c28e83SPiotr Jasiukajtis ba .c39 3302*25c28e83SPiotr Jasiukajtis or %g0,_0x7f800000,%o5 3303*25c28e83SPiotr Jasiukajtis.up0: 3304*25c28e83SPiotr Jasiukajtis ba .co0 3305*25c28e83SPiotr Jasiukajtis or %g0,_0x7fffffff,%o5 3306*25c28e83SPiotr Jasiukajtis.up1: 3307*25c28e83SPiotr Jasiukajtis ba .co1 3308*25c28e83SPiotr Jasiukajtis or %g0,_0x7fffffff,%o5 3309*25c28e83SPiotr Jasiukajtis.up2: 3310*25c28e83SPiotr Jasiukajtis ba .co2 3311*25c28e83SPiotr Jasiukajtis or %g0,_0x7f800000,%o5 3312*25c28e83SPiotr Jasiukajtis.up3: 3313*25c28e83SPiotr Jasiukajtis ba .co3 3314*25c28e83SPiotr Jasiukajtis or %g0,_0x7f800000,%o5 3315*25c28e83SPiotr Jasiukajtis.up4: 3316*25c28e83SPiotr Jasiukajtis ba .co4 3317*25c28e83SPiotr Jasiukajtis or %g0,_0x7fffffff,%o5 3318*25c28e83SPiotr Jasiukajtis.up5: 3319*25c28e83SPiotr Jasiukajtis ba .co5 3320*25c28e83SPiotr Jasiukajtis or %g0,_0x7fffffff,%o5 3321*25c28e83SPiotr Jasiukajtis.up6: 3322*25c28e83SPiotr Jasiukajtis ba .co6 3323*25c28e83SPiotr Jasiukajtis or %g0,_0x7f800000,%o5 3324*25c28e83SPiotr Jasiukajtis.up7: 3325*25c28e83SPiotr Jasiukajtis ba .co7 3326*25c28e83SPiotr Jasiukajtis or %g0,_0x7f800000,%o5 3327*25c28e83SPiotr Jasiukajtis.up8: 3328*25c28e83SPiotr Jasiukajtis ba .co8 3329*25c28e83SPiotr Jasiukajtis or %g0,_0x7fffffff,%o5 3330*25c28e83SPiotr Jasiukajtis.up9: 3331*25c28e83SPiotr Jasiukajtis ba .co9 3332*25c28e83SPiotr Jasiukajtis or %g0,_0x7fffffff,%o5 3333*25c28e83SPiotr Jasiukajtis.up10: 3334*25c28e83SPiotr Jasiukajtis ba .co10 3335*25c28e83SPiotr Jasiukajtis or %g0,_0x7f800000,%o5 3336*25c28e83SPiotr Jasiukajtis.up11: 3337*25c28e83SPiotr Jasiukajtis ba .co11 3338*25c28e83SPiotr Jasiukajtis or %g0,_0x7f800000,%o5 3339*25c28e83SPiotr Jasiukajtis.up12: 3340*25c28e83SPiotr Jasiukajtis ba .co12 3341*25c28e83SPiotr Jasiukajtis or %g0,_0x7fffffff,%o5 3342*25c28e83SPiotr Jasiukajtis.up13: 3343*25c28e83SPiotr Jasiukajtis ba .co13 3344*25c28e83SPiotr Jasiukajtis or %g0,_0x7fffffff,%o5 3345*25c28e83SPiotr Jasiukajtis.up14: 3346*25c28e83SPiotr Jasiukajtis ba .co14 3347*25c28e83SPiotr Jasiukajtis or %g0,_0x7f800000,%o5 3348*25c28e83SPiotr Jasiukajtis.up15: 3349*25c28e83SPiotr Jasiukajtis ba .co15 3350*25c28e83SPiotr Jasiukajtis or %g0,_0x7f800000,%o5 3351*25c28e83SPiotr Jasiukajtis.up16: 3352*25c28e83SPiotr Jasiukajtis ba .co16 3353*25c28e83SPiotr Jasiukajtis or %g0,_0x7fffffff,%o5 3354*25c28e83SPiotr Jasiukajtis.up17: 3355*25c28e83SPiotr Jasiukajtis ba .co17 3356*25c28e83SPiotr Jasiukajtis or %g0,_0x7fffffff,%o5 3357*25c28e83SPiotr Jasiukajtis.up18: 3358*25c28e83SPiotr Jasiukajtis ba .co18 3359*25c28e83SPiotr Jasiukajtis or %g0,_0x7f800000,%o5 3360*25c28e83SPiotr Jasiukajtis.up19: 3361*25c28e83SPiotr Jasiukajtis ba .co19 3362*25c28e83SPiotr Jasiukajtis or %g0,_0x7f800000,%o5 3363*25c28e83SPiotr Jasiukajtis.up20: 3364*25c28e83SPiotr Jasiukajtis ba .co20 3365*25c28e83SPiotr Jasiukajtis or %g0,_0x7fffffff,%o5 3366*25c28e83SPiotr Jasiukajtis.up21: 3367*25c28e83SPiotr Jasiukajtis ba .co21 3368*25c28e83SPiotr Jasiukajtis or %g0,_0x7fffffff,%o5 3369*25c28e83SPiotr Jasiukajtis.up22: 3370*25c28e83SPiotr Jasiukajtis ba .co22 3371*25c28e83SPiotr Jasiukajtis or %g0,_0x7f800000,%o5 3372*25c28e83SPiotr Jasiukajtis.up23: 3373*25c28e83SPiotr Jasiukajtis ba .co23 3374*25c28e83SPiotr Jasiukajtis or %g0,_0x7f800000,%o5 3375*25c28e83SPiotr Jasiukajtis.exit: 3376*25c28e83SPiotr Jasiukajtis ret 3377*25c28e83SPiotr Jasiukajtis restore 3378*25c28e83SPiotr Jasiukajtis SET_SIZE(__vatan2f) 3379*25c28e83SPiotr Jasiukajtis 3380