/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2011 Nexenta Systems, Inc. All rights reserved. */ /* * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ .file "__vatan2f.S" #include "libm.h" RO_DATA .align 64 .CONST_TBL: .word 0xbff921fb, 0x54442d18 ! -M_PI_2 .word 0x3ff921fb, 0x54442d18 ! M_PI_2 .word 0xbff921fb, 0x54442d18 ! -M_PI_2 .word 0x3ff921fb, 0x54442d18 ! M_PI_2 .word 0xc00921fb, 0x54442d18 ! -M_PI .word 0x400921fb, 0x54442d18 ! M_PI .word 0x80000000, 0x00000000 ! -0.0 .word 0x00000000, 0x00000000 ! 0.0 .word 0xbff00000, 0x00000000 ! -1.0 .word 0x3ff00000, 0x00000000 ! 1.0 .word 0x3fefffff, 0xfe79bf93 ! K0 = 9.99999997160545464888e-01 .word 0xbfd55552, 0xf0db4320 ! K1 = -3.33332762919825514315e-01 .word 0x3fc998f8, 0x2493d066 ! K2 = 1.99980752811487135558e-01 .word 0xbfc240b8, 0xd994abf9 ! K3 = -1.42600160828209047720e-01 .word 0x3fbbfc9e, 0x8c2b0243 ! K4 = 1.09323415013030928421e-01 .word 0xbfb56013, 0x64b1cac3 ! K5 = -8.34972496830160174704e-02 .word 0x3fad3ad7, 0x9f53e142 ! K6 = 5.70895559303061900411e-02 .word 0xbf9f148f, 0x2a829af1 ! K7 = -3.03518647857811706139e-02 .word 0x3f857a8c, 0x747ed314 ! K8 = 1.04876492549493055747e-02 .word 0xbf5bdf39, 0x729124b6 ! K9 = -1.70117006406859722727e-03 .word 0x3fe921fb, 0x54442d18 ! M_PI_4 .word 0x36a00000, 0x00000000 ! 2^(-149) #define counter %o3 #define stridex %i4 #define stridey %i5 #define stridez %l1 #define cmul_arr %i0 #define cadd_arr %i2 #define _0x7fffffff %l0 #define _0x7f800000 %l2 #define K0 %f42 #define K1 %f44 #define K2 %f46 #define K3 %f48 #define K4 %f50 #define K5 %f52 #define K6 %f54 #define K7 %f56 #define K8 %f58 #define K9 %f60 #define tmp_counter STACK_BIAS-32 #define tmp_py STACK_BIAS-24 #define tmp_px STACK_BIAS-16 #define tmp_pz STACK_BIAS-8 ! sizeof temp storage - must be a multiple of 16 for V9 #define tmps 0x20 !-------------------------------------------------------------------- ! !!!!! vatan2f algorithm !!!!! ! uy0 = *(int*)py; ! ux0 = *(int*)px; ! ay0 = uy0 & 0x7fffffff; ! ax0 = ux0 & 0x7fffffff; ! if ( ax0 >= 0x7f800000 || ay0 >= 0x7f800000 ) ! { ! /* |X| or |Y| = Nan */ ! if ( ax0 > 0x7f800000 || ay0 > 0x7f800000 ) ! { ! ftmp0 = *(float*)&ax0 * *(float*)&ay0; ! *pz = ftmp0; ! } ! signx0 = (unsigned)ux0 >> 30; ! signx0 &= 2; ! signy0 = uy0 >> 31; ! if (ay0 == 0x7f800000) ! signx0 = (ax0 == 0x7f800000) ? signx0 + 1 : 2; ! else ! signx0 += signx0; ! res = signx0 * M_PI_4; ! signy0 <<= 3; ! dtmp0 = *(double*)((char*)(cmul_arr + 1) + signy0); ! res *= dtmp0; ! ftmp0 = (float) res; ! *pz = ftmp0; ! goto next; ! } ! if ( ax0 == 0 && ay0 == 0 ) ! { ! signy0 = uy0 >> 28; ! signx0 = ux0 >> 27; ! ldiff0 = ax0 - ay0; ! ldiff0 >>= 31; ! signx0 &= -16; ! signy0 &= -8; ! ldiff0 <<= 5; ! signx0 += signy0; ! res = *(double*)((char*)(cadd_arr + 7) + ldiff0 + signx0 + signy0); ! ftmp0 = (float) res; ! *pz = ftmp0; ! goto next; ! } ! ldiff0 = ax0 - ay0; ! ldiff0 >>= 31; ! addrc0 = (char*)px - (char*)py; ! addrc0 &= ldiff0; ! fy0 = *(float*)((char*)py + addrc0); ! fx0 = *(float*)((char*)px - addrc0); ! itmp0 = *(int*)&fy0; ! if((itmp0 & 0x7fffffff) < 0x00800000) ! { ! itmp0 >>= 28; ! itmp0 &= -8; ! fy0 = fabsf(fy0); ! dtmp0 = (double) *(int*)&fy0; ! dtmp0 *= C2ONM149; ! dsign = *(double*)((char*)cmul_arr + itmp0); ! dtmp0 *= dsign; ! y0 = dtm0; ! } ! else ! y0 = (double)fy0; ! itmp0 = *(int*)&fx0; ! if((itmp0 & 0x7fffffff) < 0x00800000) ! { ! itmp0 >>= 28; ! itmp0 &= -8; ! fx0 = fabsf(fx0); ! dtmp0 = (double) *(int*)&fx0; ! dtmp0 *= C2ONM149; ! dsign = *(double*)((char*)cmul_arr + itmp0); ! dtmp0 *= dsign; ! x0 = dtmp0; ! } ! else ! x0 = (double)fx0; ! px += stridex; ! py += stridey; ! x0 = y0 / x0; ! x20 = x0 * x0; ! dtmp0 = K9 * x20; ! dtmp0 += K8; ! dtmp0 *= x20; ! dtmp0 += K7; ! dtmp0 *= x20; ! dtmp0 += K6; ! dtmp0 *= x20; ! dtmp0 += K5; ! dtmp0 *= x20; ! dtmp0 += K4; ! dtmp0 *= x20; ! dtmp0 += K3; ! dtmp0 *= x20; ! dtmp0 += K2; ! dtmp0 *= x20; ! dtmp0 += K1; ! dtmp0 *= x20; ! dtmp0 += K0; ! x0 = dtmp0 * x0; ! signy0 = uy0 >> 28; ! signy0 &= -8; ! signx0 = ux0 >> 27; ! signx0 &= -16; ! ltmp0 = ldiff0 << 5; ! ltmp0 += (char*)cadd_arr; ! ltmp0 += signx0; ! cadd0 = *(double*)(ltmp0 + signy0); ! cmul0_ind = ldiff0 << 3; ! cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); ! dtmp0 = cmul0 * x0; ! dtmp0 = cadd0 + dtmp0; ! ftmp0 = (float)dtmp0; ! *pz = ftmp0; ! pz += stridez; ! !-------------------------------------------------------------------- ENTRY(__vatan2f) save %sp,-SA(MINFRAME)-tmps,%sp PIC_SETUP(l7) PIC_SET(l7,.CONST_TBL,g5) #ifdef __sparcv9 ldx [%fp+STACK_BIAS+176],%l7 #else ld [%fp+STACK_BIAS+92],%l7 #endif st %i0,[%fp+tmp_counter] sethi %hi(0x7ffffc00),_0x7fffffff add _0x7fffffff,1023,_0x7fffffff or %g0,%i2,%o2 sll %l7,2,stridez sethi %hi(0x7f800000),_0x7f800000 mov %g5,%g1 or %g0,stridey,%o4 add %g1,56,cadd_arr sll %o2,2,stridey add %g1,72,cmul_arr ldd [%g1+80],K0 ldd [%g1+80+8],K1 ldd [%g1+80+16],K2 ldd [%g1+80+24],K3 ldd [%g1+80+32],K4 ldd [%g1+80+40],K5 ldd [%g1+80+48],K6 ldd [%g1+80+56],K7 ldd [%g1+80+64],K8 ldd [%g1+80+72],K9 sll stridex,2,stridex stx %i1,[%fp+tmp_py] stx %i3,[%fp+tmp_px] .begin: ld [%fp+tmp_counter],counter ldx [%fp+tmp_py],%i1 ldx [%fp+tmp_px],%i3 st %g0,[%fp+tmp_counter] .begin1: subcc counter,1,counter bneg,pn %icc,.exit nop lda [%i1]0x82,%l4 ! (0_0) uy0 = *(int*)py; lda [%i3]0x82,%l3 ! (0_0) ux0 = *(int*)px; and %l4,_0x7fffffff,%l7 ! (0_0) ay0 = uy0 & 0x7fffffff; cmp %l7,_0x7f800000 bge,pn %icc,.spec0 and %l3,_0x7fffffff,%l6 ! (0_0) ax0 = ux0 & 0x7fffffff; cmp %l6,_0x7f800000 bge,pn %icc,.spec0 sethi %hi(0x00800000),%o5 cmp %l6,%o5 bl,pn %icc,.spec1 sub %l6,%l7,%o2 ! (0_0) ldiff0 = ax0 - ay0; cmp %l7,%o5 bl,pn %icc,.spec1 nop stx %o4,[%fp+tmp_pz] sra %o2,31,%l7 ! (0_0) ldiff0 >>= 31; sub %i3,%i1,%l6 ! (0_0) addrc0 = (char*)px - (char*)py; and %l6,%l7,%o2 ! (0_0) addrc0 &= ldiff0; lda [%i1+%o2]0x82,%f0 ! (0_0) fy0 = *(float*)((char*)py + addrc0); sub %i3,%o2,%o4 ! (0_0) (char*)px - addrc0 lda [%o4]0x82,%f2 ! (0_0) fx0 = *(float*)((char*)px - addrc0); sll %l7,5,%l6 ! (0_0) ltmp0 = ldiff0 << 5; sra %l3,27,%o5 ! (0_0) signx0 = ux0 >> 27; add %i1,stridey,%i1 ! py += stridey add %i3,stridex,%i3 ! px += stridex lda [%i1]0x82,%l3 ! (1_0) uy0 = *(int*)py; sra %l4,28,%o4 ! (0_0) signy0 = uy0 >> 28; add %l6,cadd_arr,%l6 ! (0_0) ltmp0 += (char*)cadd_arr; fstod %f0,%f40 ! (0_0) y0 = (double)fy0; fstod %f2,%f2 ! (0_0) x0 = (double)fx0; .spec1_cont: lda [%i3]0x82,%l4 ! (1_0) ux0 = *(int*)px; and %o5,-16,%o5 ! (0_0) signx0 &= -16; and %o4,-8,%o4 ! (0_0) signy0 &= -8; fdivd %f40,%f2,%f12 ! (0_0) x0 = y0 / x0; add %l6,%o5,%o1 ! (0_0) ltmp0 += signx0; and %l4,_0x7fffffff,%l6 ! (1_0) ax0 = ux0 & 0x7fffffff; sethi %hi(0x00800000),%o5 cmp %l6,%o5 bl,pn %icc,.u0 and %l3,_0x7fffffff,%g1 ! (1_0) ay0 = uy0 & 0x7fffffff; .c0: cmp %g1,%o5 bl,pn %icc,.u1 ldd [%o1+%o4],%f34 ! (0_0) cadd0 = *(double*)(ltmp0 + signy0); .c1: cmp %l6,_0x7f800000 bge,pn %icc,.u2 sub %l6,%g1,%o1 ! (1_0) ldiff0 = ax0 - ay0; .c2: cmp %g1,_0x7f800000 bge,pn %icc,.u3 nop .c3: sra %o1,31,%g1 ! (1_0) ldiff0 >>= 31; sub %i3,%i1,%l6 ! (1_0) addrc0 = (char*)px - (char*)py; and %l6,%g1,%o1 ! (1_0) addrc0 &= ldiff0; lda [%i1+%o1]0x82,%f0 ! (1_0) fy0 = *(float*)((char*)py + addrc0); sub %i3,%o1,%o4 ! (1_0) (char*)px - addrc0; lda [%o4]0x82,%f2 ! (1_0) fx0 = *(float*)((char*)px - addrc0); sll %g1,5,%l6 ! (1_0) ltmp0 = ldiff0 << 5; cmp %o5,_0x7f800000 ! (1_0) b0 ? 0x7f800000 bge,pn %icc,.update0 ! (1_0) if ( b0 > 0x7f800000 ) nop .cont0: add %i1,stridey,%i1 ! py += stridey fstod %f0,%f40 ! (1_0) y0 = (double)fy0; sra %l4,27,%o5 ! (1_0) signx0 = ux0 >> 27; add %i3,stridex,%i3 ! px += stridex sra %l3,28,%o4 ! (1_0) signy0 = uy0 >> 28; add %l6,cadd_arr,%l6 ! (1_0) ltmp0 += (char*)cadd_arr; fstod %f2,%f2 ! (1_0) x0 = (double)fx0; .d0: and %o5,-16,%o5 ! (1_0) signx0 &= -16; and %o4,-8,%o4 ! (1_0) signy0 &= -8; lda [%i1]0x82,%l4 ! (2_0) uy0 = *(int*)py; lda [%i3]0x82,%l3 ! (2_0) ux0 = *(int*)px; fdivd %f40,%f2,%f10 ! (1_0) x0 = y0 / x0; fmuld %f12,%f12,%f20 ! (0_0) x20 = x0 * x0; add %l6,%o5,%o2 ! (1_0) ltmp0 += signx0; and %l3,_0x7fffffff,%l6 ! (2_0) ax0 = ux0 & 0x7fffffff; sethi %hi(0x00800000),%o5 cmp %l6,%o5 bl,pn %icc,.u4 and %l4,_0x7fffffff,%g5 ! (2_0) ay0 = uy0 & 0x7fffffff; .c4: cmp %g5,%o5 bl,pn %icc,.u5 fmuld K9,%f20,%f40 ! (0_0) dtmp0 = K9 * x20; .c5: cmp %l6,_0x7f800000 bge,pn %icc,.u6 ldd [%o2+%o4],%f32 ! (1_0) cadd0 = *(double*)(ltmp0 + signy0); .c6: cmp %g5,_0x7f800000 bge,pn %icc,.u7 sub %l6,%g5,%o2 ! (2_0) ldiff0 = ax0 - ay0; .c7: sra %o2,31,%g5 ! (2_0) ldiff0 >>= 31; sub %i3,%i1,%l6 ! (2_0) addrc0 = (char*)px - (char*)py; faddd %f40,K8,%f40 ! (0_0) dtmp0 += K8; and %l6,%g5,%o2 ! (2_0) addrc0 &= ldiff0; lda [%i1+%o2]0x82,%f0 ! (2_0) fy0 = *(float*)((char*)py + addrc0); sub %i3,%o2,%o4 ! (2_0) (char*)px - addrc0; lda [%o4]0x82,%f2 ! (2_0) fx0 = *(float*)((char*)px - addrc0); cmp %o5,_0x7f800000 ! (2_0) b0 ? 0x7f800000 bge,pn %icc,.update1 ! (2_0) if ( b0 > 0x7f800000 ) nop .cont1: fmuld %f40,%f20,%f30 ! (0_0) dtmp0 *= x20; sll %g5,5,%l6 ! (2_0) ltmp0 = ldiff0 << 5; add %i1,stridey,%i1 ! py += stridey fstod %f0,%f40 ! (2_0) y0 = (double)fy0; sra %l3,27,%o5 ! (2_0) signx0 = ux0 >> 27; add %i3,stridex,%i3 ! px += stridex fstod %f2,%f2 ! (2_0) x0 = (double)fx0; sra %l4,28,%o4 ! (2_0) signy0 = uy0 >> 28; add %l6,cadd_arr,%l6 ! (2_0) ltmp0 += (char*)cadd_arr; .d1: lda [%i1]0x82,%l3 ! (3_0) uy0 = *(int*)py; and %o5,-16,%o5 ! (2_0) signx0 &= -16; faddd %f30,K7,%f30 ! (0_0) dtmp0 += K7; lda [%i3]0x82,%l4 ! (3_0) ux0 = *(int*)px; fdivd %f40,%f2,%f8 ! (2_0) x0 = y0 / x0; fmuld %f10,%f10,%f18 ! (1_0) x20 = x0 * x0; add %l6,%o5,%o1 ! (2_0) ltmp0 += signx0; and %o4,-8,%o4 ! (2_0) signy0 &= -8; fmuld %f30,%f20,%f30 ! (0_0) dtmp0 *= x20; and %l4,_0x7fffffff,%l6 ! (3_0) ax0 = ux0 & 0x7fffffff; sethi %hi(0x00800000),%o5 cmp %l6,%o5 bl,pn %icc,.u8 and %l3,_0x7fffffff,%o0 ! (3_0) ay0 = uy0 & 0x7fffffff; .c8: cmp %o0,%o5 bl,pn %icc,.u9 fmuld K9,%f18,%f40 ! (1_0) dtmp0 = K9 * x20; .c9: cmp %l6,_0x7f800000 bge,pn %icc,.u10 faddd %f30,K6,%f16 ! (0_0) dtmp0 += K6; .c10: cmp %o0,_0x7f800000 bge,pn %icc,.u11 ldd [%o1+%o4],%f30 ! (2_0) cadd0 = *(double*)(ltmp0 + signy0); .c11: sub %l6,%o0,%o1 ! (3_0) ldiff0 = ax0 - ay0; sra %o1,31,%o0 ! (3_0) ldiff0 >>= 31; sub %i3,%i1,%l6 ! (3_0) addrc0 = (char*)px - (char*)py; faddd %f40,K8,%f40 ! (1_0) dtmp0 += K8; and %l6,%o0,%o1 ! (3_0) addrc0 &= ldiff0; fmuld %f16,%f20,%f16 ! (0_0) dtmp0 *= x20; lda [%i1+%o1]0x82,%f0 ! (3_0) fy0 = *(float*)((char*)py + addrc0); sub %i3,%o1,%o4 ! (3_0) (char*)px - addrc0; lda [%o4]0x82,%f1 ! (3_0) fx0 = *(float*)((char*)px - addrc0); cmp %o5,_0x7f800000 ! (3_0) b0 ? 0x7f800000 bge,pn %icc,.update2 ! (3_0) if ( b0 > 0x7f800000 ) nop .cont2: fmuld %f40,%f18,%f28 ! (1_0) dtmp0 *= x20; sll %o0,5,%l6 ! (3_0) ltmp0 = ldiff0 << 5; add %i1,stridey,%i1 ! py += stridey fstod %f0,%f40 ! (3_0) y0 = (double)fy0; faddd %f16,K5,%f2 ! (0_0) dtmp0 += K5; sra %l4,27,%o5 ! (3_0) signx0 = ux0 >> 27; add %i3,stridex,%i3 ! px += stridex sra %l3,28,%o4 ! (3_0) signy0 = uy0 >> 28; fstod %f1,%f16 ! (3_0) x0 = (double)fx0; .d2: faddd %f28,K7,%f28 ! (1_0) dtmp0 += K7; add %l6,cadd_arr,%l6 ! (3_0) ltmp0 += (char*)cadd_arr; and %o5,-16,%o5 ! (3_0) signx0 &= -16; lda [%i1]0x82,%l4 ! (4_0) uy0 = *(int*)py; fmuld %f2,%f20,%f2 ! (0_0) dtmp0 *= x20; lda [%i3]0x82,%l3 ! (4_0) ux0 = *(int*)px; fdivd %f40,%f16,%f6 ! (3_0) x0 = y0 / x0; and %o4,-8,%o4 ! (3_0) signy0 &= -8; fmuld %f8,%f8,%f16 ! (2_0) x20 = x0 * x0; add %l6,%o5,%o2 ! (3_0) ltmp0 += signx0; fmuld %f28,%f18,%f28 ! (1_0) dtmp0 *= x20; and %l3,_0x7fffffff,%l6 ! (4_0) ax0 = ux0 & 0x7fffffff; sethi %hi(0x00800000),%o5 faddd %f2,K4,%f2 ! (0_0) dtmp0 += K4; cmp %l6,%o5 bl,pn %icc,.u12 and %l4,_0x7fffffff,%l5 ! (4_0) ay0 = uy0 & 0x7fffffff; .c12: cmp %l5,%o5 bl,pn %icc,.u13 fmuld K9,%f16,%f40 ! (2_0) dtmp0 = K9 * x20; .c13: cmp %l6,_0x7f800000 bge,pn %icc,.u14 faddd %f28,K6,%f4 ! (1_0) dtmp0 += K6; .c14: ldd [%o2+%o4],%f28 ! (3_0) cadd0 = *(double*)(ltmp0 + signy0); cmp %l5,_0x7f800000 bge,pn %icc,.u15 fmuld %f2,%f20,%f24 ! (0_0) dtmp0 *= x20; .c15: sub %l6,%l5,%o2 ! (4_0) ldiff0 = ax0 - ay0; sra %o2,31,%l5 ! (4_0) ldiff0 >>= 31; sub %i3,%i1,%l6 ! (4_0) addrc0 = (char*)px - (char*)py; faddd %f40,K8,%f40 ! (2_0) dtmp0 += K8; and %l6,%l5,%o2 ! (4_0) addrc0 &= ldiff0; fmuld %f4,%f18,%f4 ! (1_0) dtmp0 *= x20; lda [%i1+%o2]0x82,%f0 ! (4_0) fy0 = *(float*)((char*)py + addrc0); sub %i3,%o2,%o4 ! (4_0) (char*)px - addrc0; faddd %f24,K3,%f24 ! (0_0) dtmp0 += K3; lda [%o4]0x82,%f2 ! (4_0) fx0 = *(float*)((char*)px - addrc0); cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 bge,pn %icc,.update3 ! (4_0) if ( b0 > 0x7f800000 ) nop .cont3: fmuld %f40,%f16,%f26 ! (2_0) dtmp0 *= x20; sll %l5,5,%l6 ! (4_0) ltmp0 = ldiff0 << 5; add %i1,stridey,%i1 ! py += stridey fstod %f0,%f40 ! (4_0) y0 = (double)fy0; faddd %f4,K5,%f62 ! (1_0) dtmp0 += K5; add %i3,stridex,%i3 ! px += stridex fmuld %f24,%f20,%f24 ! (0_0) dtmp0 *= x20; fstod %f2,%f2 ! (4_0) x0 = (double)fx0; sra %l3,27,%o5 ! (4_0) signx0 = ux0 >> 27; sra %l4,28,%o4 ! (4_0) signy0 = uy0 >> 28; .d3: lda [%i1]0x82,%l3 ! (5_0) uy0 = *(int*)py; add %l6,cadd_arr,%l6 ! (4_0) ltmp0 += (char*)cadd_arr; faddd %f26,K7,%f26 ! (2_0) dtmp0 += K7; fmuld %f62,%f18,%f4 ! (1_0) dtmp0 *= x20; and %o5,-16,%o5 ! (4_0) signx0 &= -16; lda [%i3]0x82,%l4 ! (5_1) ux0 = *(int*)px; fdivd %f40,%f2,%f62 ! (4_1) x0 = y0 / x0; faddd %f24,K2,%f40 ! (0_1) dtmp0 += K2; and %o4,-8,%o4 ! (4_1) signy0 &= -8; fmuld %f6,%f6,%f24 ! (3_1) x20 = x0 * x0; add %l6,%o5,%o1 ! (4_1) ltmp0 += signx0; fmuld %f26,%f16,%f26 ! (2_1) dtmp0 *= x20; and %l4,_0x7fffffff,%l6 ! (5_1) ax0 = ux0 & 0x7fffffff; sethi %hi(0x00800000),%o5 faddd %f4,K4,%f4 ! (1_1) dtmp0 += K4; cmp %l6,%o5 bl,pn %icc,.u16 and %l3,_0x7fffffff,%o7 ! (5_1) ay0 = uy0 & 0x7fffffff; .c16: cmp %o7,%o5 bl,pn %icc,.u17 fmuld %f40,%f20,%f38 ! (0_1) dtmp0 *= x20; .c17: cmp %l6,_0x7f800000 bge,pn %icc,.u18 fmuld K9,%f24,%f40 ! (3_1) dtmp0 = K9 * x20; .c18: cmp %o7,_0x7f800000 bge,pn %icc,.u19 faddd %f26,K6,%f22 ! (2_1) dtmp0 += K6; .c19: ldd [%o1+%o4],%f26 ! (4_1) cadd0 = *(double*)(ltmp0 + signy0); fmuld %f4,%f18,%f4 ! (1_1) dtmp0 *= x20; sub %l6,%o7,%o1 ! (5_1) ldiff0 = ax0 - ay0; sra %o1,31,%o7 ! (5_1) ldiff0 >>= 31; sub %i3,%i1,%l6 ! (5_1) addrc0 = (char*)px - (char*)py; faddd %f38,K1,%f38 ! (0_1) dtmp0 += K1; faddd %f40,K8,%f40 ! (3_1) dtmp0 += K8; and %l6,%o7,%o1 ! (5_1) addrc0 &= ldiff0; fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20; lda [%i1+%o1]0x82,%f0 ! (5_1) fy0 = *(float*)((char*)py + addrc0); sll %o7,5,%l6 ! (5_1) ltmp0 = ldiff0 << 5; sub %i3,%o1,%o4 ! (5_1) (char*)px - addrc0; faddd %f4,K3,%f4 ! (1_1) dtmp0 += K3; lda [%o4]0x82,%f1 ! (5_1) fx0 = *(float*)((char*)px - addrc0); fmuld %f38,%f20,%f38 ! (0_1) dtmp0 *= x20; cmp %o5,_0x7f800000 ! (5_1) b0 ? 0x7f800000 bge,pn %icc,.update4 ! (5_1) if ( b0 > 0x7f800000 ) nop .cont4: fmuld %f40,%f24,%f36 ! (3_1) dtmp0 *= x20; fstod %f0,%f40 ! (5_1) y0 = (double)fy0; faddd %f22,K5,%f14 ! (2_1) dtmp0 += K5; fmuld %f4,%f18,%f4 ! (1_1) dtmp0 *= x20; add %i3,stridex,%i3 ! px += stridex sll %l7,3,%l7 ! (0_1) cmul0_ind = ldiff0 << 3; fstod %f1,%f2 ! (5_1) x0 = (double)fx0; .d4: sra %l3,28,%o4 ! (5_1) signy0 = uy0 >> 28; add %i1,stridey,%i1 ! py += stridey faddd %f36,K7,%f36 ! (3_1) dtmp0 += K7; sra %l4,27,%o5 ! (5_1) signx0 = ux0 >> 27; lda [%i1]0x82,%l4 ! (0_0) uy0 = *(int*)py; add %l6,cadd_arr,%l6 ! (5_1) ltmp0 += (char*)cadd_arr; fmuld %f14,%f16,%f22 ! (2_1) dtmp0 *= x20; faddd %f38,K0,%f38 ! (0_1) dtmp0 += K0; lda [%i3]0x82,%l3 ! (0_0) ux0 = *(int*)px; and %o5,-16,%o5 ! (5_1) signx0 &= -16; fdivd %f40,%f2,%f14 ! (5_1) x0 = y0 / x0; faddd %f4,K2,%f40 ! (1_1) dtmp0 += K2; fmuld %f62,%f62,%f4 ! (4_1) x20 = x0 * x0; ldd [cmul_arr+%l7],%f0 ! (0_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); add %l6,%o5,%o2 ! (5_1) ltmp0 += signx0; and %o4,-8,%o4 ! (5_1) signy0 &= -8; fmuld %f36,%f24,%f36 ! (3_1) dtmp0 *= x20; fmuld %f38,%f12,%f12 ! (0_1) x0 = dtmp0 * x0; and %l4,_0x7fffffff,%l7 ! (0_0) ay0 = uy0 & 0x7fffffff; sethi %hi(0x00800000),%o5 faddd %f22,K4,%f22 ! (2_1) dtmp0 += K4; and %l3,_0x7fffffff,%l6 ! (0_0) ax0 = ux0 & 0x7fffffff; cmp %l7,%o5 bl,pn %icc,.u20 fmuld %f40,%f18,%f38 ! (1_1) dtmp0 *= x20; .c20: cmp %l6,%o5 bl,pn %icc,.u21 fmuld K9,%f4,%f40 ! (4_1) dtmp0 = K9 * x20; .c21: cmp %l7,_0x7f800000 bge,pn %icc,.u22 faddd %f36,K6,%f20 ! (3_1) dtmp0 += K6; .c22: ldd [%o2+%o4],%f36 ! (5_1) cadd0 = *(double*)(ltmp0 + signy0); cmp %l6,_0x7f800000 bge,pn %icc,.u23 fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20; .c23: sub %l6,%l7,%o2 ! (0_0) ldiff0 = ax0 - ay0; fmuld %f0,%f12,%f12 ! (0_1) dtmp0 = cmul0 * x0; sra %o2,31,%l7 ! (0_0) ldiff0 >>= 31; sub %i3,%i1,%l6 ! (0_0) addrc0 = (char*)px - (char*)py; faddd %f38,K1,%f38 ! (1_1) dtmp0 += K1; faddd %f40,K8,%f40 ! (4_1) dtmp0 += K8; and %l6,%l7,%o2 ! (0_0) addrc0 &= ldiff0; fmuld %f20,%f24,%f20 ! (3_1) dtmp0 *= x20; lda [%i1+%o2]0x82,%f0 ! (0_0) fy0 = *(float*)((char*)py + addrc0); sll %g1,3,%g1 ! (1_1) cmul0_ind = ldiff0 << 3; sub %i3,%o2,%o4 ! (0_0) (char*)px - addrc0 faddd %f22,K3,%f22 ! (2_1) dtmp0 += K3; lda [%o4]0x82,%f2 ! (0_0) fx0 = *(float*)((char*)px - addrc0); sll %l7,5,%l6 ! (0_0) ltmp0 = ldiff0 << 5; fmuld %f38,%f18,%f38 ! (1_1) dtmp0 *= x20; cmp %o5,_0x7f800000 ! (0_0) b0 ? 0x7f800000 bge,pn %icc,.update5 ! (0_0) if ( b0 > 0x7f800000 ) faddd %f34,%f12,%f18 ! (0_1) dtmp0 = cadd0 + dtmp0; .cont5: fmuld %f40,%f4,%f34 ! (4_1) dtmp0 *= x20; sra %l3,27,%o5 ! (0_0) signx0 = ux0 >> 27; add %i3,stridex,%i3 ! px += stridex fstod %f0,%f40 ! (0_0) y0 = (double)fy0; faddd %f20,K5,%f12 ! (3_1) dtmp0 += K5; add %i1,stridey,%i1 ! py += stridey fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20; lda [%i1]0x82,%l3 ! (1_0) uy0 = *(int*)py; sra %l4,28,%o4 ! (0_0) signy0 = uy0 >> 28; add %l6,cadd_arr,%l6 ! (0_0) ltmp0 += (char*)cadd_arr; fstod %f2,%f2 ! (0_0) x0 = (double)fx0; .d5: lda [%i3]0x82,%l4 ! (1_0) ux0 = *(int*)px; and %o5,-16,%o5 ! (0_0) signx0 &= -16; faddd %f34,K7,%f34 ! (4_1) dtmp0 += K7; ldx [%fp+tmp_pz],%o1 fmuld %f12,%f24,%f20 ! (3_1) dtmp0 *= x20; and %o4,-8,%o4 ! (0_0) signy0 &= -8; faddd %f38,K0,%f38 ! (1_1) dtmp0 += K0; fdivd %f40,%f2,%f12 ! (0_0) x0 = y0 / x0; faddd %f22,K2,%f40 ! (2_1) dtmp0 += K2; fdtos %f18,%f2 ! (0_1) ftmp0 = (float)dtmp0; st %f2,[%o1] ! (0_1) *pz = ftmp0 add %o1,stridez,%o2 fmuld %f14,%f14,%f22 ! (5_1) x20 = x0 * x0; subcc counter,1,counter bneg,a,pn %icc,.begin or %g0,%o2,%o4 ldd [cmul_arr+%g1],%f0 ! (1_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); add %l6,%o5,%o1 ! (0_0) ltmp0 += signx0; fmuld %f34,%f4,%f34 ! (4_1) dtmp0 *= x20; fmuld %f38,%f10,%f10 ! (1_1) x0 = dtmp0 * x0; and %l4,_0x7fffffff,%l6 ! (1_0) ax0 = ux0 & 0x7fffffff; sethi %hi(0x00800000),%o5 faddd %f20,K4,%f20 ! (3_1) dtmp0 += K4; and %l3,_0x7fffffff,%g1 ! (1_0) ay0 = uy0 & 0x7fffffff; cmp %l6,%o5 bl,pn %icc,.u24 fmuld %f40,%f16,%f38 ! (2_1) dtmp0 *= x20; .c24: cmp %g1,%o5 bl,pn %icc,.u25 fmuld K9,%f22,%f40 ! (5_1) dtmp0 = K9 * x20; .c25: cmp %l6,_0x7f800000 bge,pn %icc,.u26 faddd %f34,K6,%f18 ! (4_1) dtmp0 += K6; .c26: ldd [%o1+%o4],%f34 ! (0_0) cadd0 = *(double*)(ltmp0 + signy0); cmp %g1,_0x7f800000 bge,pn %icc,.u27 fmuld %f20,%f24,%f20 ! (3_1) dtmp0 *= x20; .c27: sub %l6,%g1,%o1 ! (1_0) ldiff0 = ax0 - ay0; fmuld %f0,%f10,%f10 ! (1_1) dtmp0 = cmul0 * x0; sra %o1,31,%g1 ! (1_0) ldiff0 >>= 31; sub %i3,%i1,%l6 ! (1_0) addrc0 = (char*)px - (char*)py; faddd %f38,K1,%f38 ! (2_1) dtmp0 += K1; faddd %f40,K8,%f40 ! (5_1) dtmp0 += K8; and %l6,%g1,%o1 ! (1_0) addrc0 &= ldiff0; fmuld %f18,%f4,%f18 ! (4_1) dtmp0 *= x20; lda [%i1+%o1]0x82,%f0 ! (1_0) fy0 = *(float*)((char*)py + addrc0); sll %g5,3,%g5 ! (2_1) cmul0_ind = ldiff0 << 3; sub %i3,%o1,%o4 ! (1_0) (char*)px - addrc0; faddd %f20,K3,%f20 ! (3_1) dtmp0 += K3; lda [%o4]0x82,%f2 ! (1_0) fx0 = *(float*)((char*)px - addrc0); sll %g1,5,%l6 ! (1_0) ltmp0 = ldiff0 << 5; add %o2,stridez,%o1 ! pz += stridez fmuld %f38,%f16,%f38 ! (2_1) dtmp0 *= x20; cmp %o5,_0x7f800000 ! (1_0) b0 ? 0x7f800000 bge,pn %icc,.update6 ! (1_0) if ( b0 > 0x7f800000 ) faddd %f32,%f10,%f16 ! (1_1) dtmp0 = cadd0 + dtmp0; .cont6: fmuld %f40,%f22,%f32 ! (5_1) dtmp0 *= x20; add %i1,stridey,%i1 ! py += stridey fstod %f0,%f40 ! (1_0) y0 = (double)fy0; faddd %f18,K5,%f10 ! (4_1) dtmp0 += K5; sra %l4,27,%o5 ! (1_0) signx0 = ux0 >> 27; add %i3,stridex,%i3 ! px += stridex fmuld %f20,%f24,%f20 ! (3_1) dtmp0 *= x20; sra %l3,28,%o4 ! (1_0) signy0 = uy0 >> 28; add %l6,cadd_arr,%l6 ! (1_0) ltmp0 += (char*)cadd_arr; fstod %f2,%f2 ! (1_0) x0 = (double)fx0; .d6: faddd %f32,K7,%f32 ! (5_1) dtmp0 += K7; and %o5,-16,%o5 ! (1_0) signx0 &= -16; and %o4,-8,%o4 ! (1_0) signy0 &= -8; lda [%i1]0x82,%l4 ! (2_0) uy0 = *(int*)py; fmuld %f10,%f4,%f18 ! (4_1) dtmp0 *= x20; faddd %f38,K0,%f38 ! (2_1) dtmp0 += K0; lda [%i3]0x82,%l3 ! (2_0) ux0 = *(int*)px; fdivd %f40,%f2,%f10 ! (1_0) x0 = y0 / x0; faddd %f20,K2,%f40 ! (3_1) dtmp0 += K2; fmuld %f12,%f12,%f20 ! (0_0) x20 = x0 * x0; fdtos %f16,%f2 ! (1_1) ftmp0 = (float)dtmp0; st %f2,[%o2] ! (1_1) *pz = ftmp0; subcc counter,1,counter bneg,a,pn %icc,.begin or %g0,%o1,%o4 ldd [cmul_arr+%g5],%f0 ! (2_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); add %l6,%o5,%o2 ! (1_0) ltmp0 += signx0; fmuld %f32,%f22,%f32 ! (5_1) dtmp0 *= x20; fmuld %f38,%f8,%f8 ! (2_1) x0 = dtmp0 * x0; and %l3,_0x7fffffff,%l6 ! (2_0) ax0 = ux0 & 0x7fffffff; sethi %hi(0x00800000),%o5 faddd %f18,K4,%f18 ! (4_1) dtmp0 += K4; and %l4,_0x7fffffff,%g5 ! (2_0) ay0 = uy0 & 0x7fffffff; cmp %l6,%o5 bl,pn %icc,.u28 fmuld %f40,%f24,%f38 ! (3_1) dtmp0 *= x20; .c28: cmp %g5,%o5 bl,pn %icc,.u29 fmuld K9,%f20,%f40 ! (0_0) dtmp0 = K9 * x20; .c29: cmp %l6,_0x7f800000 bge,pn %icc,.u30 faddd %f32,K6,%f16 ! (5_1) dtmp0 += K6; .c30: ldd [%o2+%o4],%f32 ! (1_0) cadd0 = *(double*)(ltmp0 + signy0); cmp %g5,_0x7f800000 bge,pn %icc,.u31 fmuld %f18,%f4,%f18 ! (4_1) dtmp0 *= x20; .c31: sub %l6,%g5,%o2 ! (2_0) ldiff0 = ax0 - ay0; fmuld %f0,%f8,%f8 ! (2_1) dtmp0 = cmul0 * x0; sra %o2,31,%g5 ! (2_0) ldiff0 >>= 31; sub %i3,%i1,%l6 ! (2_0) addrc0 = (char*)px - (char*)py; faddd %f38,K1,%f38 ! (3_1) dtmp0 += K1; faddd %f40,K8,%f40 ! (0_0) dtmp0 += K8; and %l6,%g5,%o2 ! (2_0) addrc0 &= ldiff0; fmuld %f16,%f22,%f16 ! (5_1) dtmp0 *= x20; lda [%i1+%o2]0x82,%f0 ! (2_0) fy0 = *(float*)((char*)py + addrc0); sub %i3,%o2,%o4 ! (2_0) (char*)px - addrc0; add %o1,stridez,%o2 ! pz += stridez faddd %f18,K3,%f18 ! (4_1) dtmp0 += K3; lda [%o4]0x82,%f2 ! (2_0) fx0 = *(float*)((char*)px - addrc0); sll %o0,3,%o0 ! (3_1) cmul0_ind = ldiff0 << 3; fmuld %f38,%f24,%f38 ! (3_1) dtmp0 *= x20; cmp %o5,_0x7f800000 ! (2_0) b0 ? 0x7f800000 bge,pn %icc,.update7 ! (2_0) if ( b0 > 0x7f800000 ) faddd %f30,%f8,%f24 ! (2_1) dtmp0 = cadd0 + dtmp0; .cont7: fmuld %f40,%f20,%f30 ! (0_0) dtmp0 *= x20; sll %g5,5,%l6 ! (2_0) ltmp0 = ldiff0 << 5; add %i1,stridey,%i1 ! py += stridey fstod %f0,%f40 ! (2_0) y0 = (double)fy0; faddd %f16,K5,%f8 ! (5_1) dtmp0 += K5; sra %l3,27,%o5 ! (2_0) signx0 = ux0 >> 27; add %i3,stridex,%i3 ! px += stridex fmuld %f18,%f4,%f18 ! (4_1) dtmp0 *= x20; fstod %f2,%f2 ! (2_0) x0 = (double)fx0; sra %l4,28,%o4 ! (2_0) signy0 = uy0 >> 28; add %l6,cadd_arr,%l6 ! (2_0) ltmp0 += (char*)cadd_arr; .d7: lda [%i1]0x82,%l3 ! (3_0) uy0 = *(int*)py; and %o5,-16,%o5 ! (2_0) signx0 &= -16; faddd %f30,K7,%f30 ! (0_0) dtmp0 += K7; lda [%i3]0x82,%l4 ! (3_0) ux0 = *(int*)px; fmuld %f8,%f22,%f16 ! (5_1) dtmp0 *= x20; faddd %f38,K0,%f38 ! (3_1) dtmp0 += K0; fdivd %f40,%f2,%f8 ! (2_0) x0 = y0 / x0; faddd %f18,K2,%f40 ! (4_1) dtmp0 += K2; fmuld %f10,%f10,%f18 ! (1_0) x20 = x0 * x0; fdtos %f24,%f1 ! (2_1) ftmp0 = (float)dtmp0; st %f1,[%o1] ! (2_1) *pz = ftmp0; subcc counter,1,counter bneg,a,pn %icc,.begin or %g0,%o2,%o4 ldd [cmul_arr+%o0],%f2 ! (3_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); add %l6,%o5,%o1 ! (2_0) ltmp0 += signx0; and %o4,-8,%o4 ! (2_0) signy0 &= -8; fmuld %f30,%f20,%f30 ! (0_0) dtmp0 *= x20; fmuld %f38,%f6,%f6 ! (3_1) x0 = dtmp0 * x0; and %l4,_0x7fffffff,%l6 ! (3_0) ax0 = ux0 & 0x7fffffff; sethi %hi(0x00800000),%o5 faddd %f16,K4,%f24 ! (5_1) dtmp0 += K4; and %l3,_0x7fffffff,%o0 ! (3_0) ay0 = uy0 & 0x7fffffff; cmp %l6,%o5 bl,pn %icc,.u32 fmuld %f40,%f4,%f38 ! (4_1) dtmp0 *= x20; .c32: cmp %o0,%o5 bl,pn %icc,.u33 fmuld K9,%f18,%f40 ! (1_0) dtmp0 = K9 * x20; .c33: cmp %l6,_0x7f800000 bge,pn %icc,.u34 faddd %f30,K6,%f16 ! (0_0) dtmp0 += K6; .c34: ldd [%o1+%o4],%f30 ! (2_0) cadd0 = *(double*)(ltmp0 + signy0); cmp %o0,_0x7f800000 bge,pn %icc,.u35 fmuld %f24,%f22,%f24 ! (5_1) dtmp0 *= x20; .c35: sub %l6,%o0,%o1 ! (3_0) ldiff0 = ax0 - ay0; fmuld %f2,%f6,%f6 ! (3_1) dtmp0 = cmul0 * x0; sra %o1,31,%o0 ! (3_0) ldiff0 >>= 31; sub %i3,%i1,%l6 ! (3_0) addrc0 = (char*)px - (char*)py; faddd %f38,K1,%f38 ! (4_1) dtmp0 += K1; faddd %f40,K8,%f40 ! (1_0) dtmp0 += K8; and %l6,%o0,%o1 ! (3_0) addrc0 &= ldiff0; fmuld %f16,%f20,%f16 ! (0_0) dtmp0 *= x20; lda [%i1+%o1]0x82,%f0 ! (3_0) fy0 = *(float*)((char*)py + addrc0); sub %i3,%o1,%o4 ! (3_0) (char*)px - addrc0; add %o2,stridez,%o1 ! pz += stridez faddd %f24,K3,%f24 ! (5_1) dtmp0 += K3; lda [%o4]0x82,%f1 ! (3_0) fx0 = *(float*)((char*)px - addrc0); sll %l5,3,%l5 ! (4_1) cmul0_ind = ldiff0 << 3; fmuld %f38,%f4,%f38 ! (4_1) dtmp0 *= x20; cmp %o5,_0x7f800000 ! (3_0) b0 ? 0x7f800000 bge,pn %icc,.update8 ! (3_0) if ( b0 > 0x7f800000 ) faddd %f28,%f6,%f4 ! (3_1) dtmp0 = cadd0 + dtmp0; .cont8: fmuld %f40,%f18,%f28 ! (1_0) dtmp0 *= x20; sll %o0,5,%l6 ! (3_0) ltmp0 = ldiff0 << 5; add %i1,stridey,%i1 ! py += stridey fstod %f0,%f40 ! (3_0) y0 = (double)fy0; faddd %f16,K5,%f2 ! (0_0) dtmp0 += K5; sra %l4,27,%o5 ! (3_0) signx0 = ux0 >> 27; add %i3,stridex,%i3 ! px += stridex fmuld %f24,%f22,%f24 ! (5_1) dtmp0 *= x20; sra %l3,28,%o4 ! (3_0) signy0 = uy0 >> 28; fstod %f1,%f16 ! (3_0) x0 = (double)fx0; .d8: faddd %f28,K7,%f28 ! (1_0) dtmp0 += K7; add %l6,cadd_arr,%l6 ! (3_0) ltmp0 += (char*)cadd_arr; and %o5,-16,%o5 ! (3_0) signx0 &= -16; lda [%i1]0x82,%l4 ! (4_0) uy0 = *(int*)py; fmuld %f2,%f20,%f2 ! (0_0) dtmp0 *= x20; faddd %f38,K0,%f38 ! (4_1) dtmp0 += K0; lda [%i3]0x82,%l3 ! (4_0) ux0 = *(int*)px; fdivd %f40,%f16,%f6 ! (3_0) x0 = y0 / x0; faddd %f24,K2,%f24 ! (5_1) dtmp0 += K2; fdtos %f4,%f1 ! (3_1) ftmp0 = (float)dtmp0; and %o4,-8,%o4 ! (3_0) signy0 &= -8; st %f1,[%o2] ! (3_1) *pz = ftmp0; fmuld %f8,%f8,%f16 ! (2_0) x20 = x0 * x0; subcc counter,1,counter bneg,a,pn %icc,.begin or %g0,%o1,%o4 ldd [cmul_arr+%l5],%f0 ! (4_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); add %l6,%o5,%o2 ! (3_0) ltmp0 += signx0; fmuld %f28,%f18,%f28 ! (1_0) dtmp0 *= x20; fmuld %f38,%f62,%f62 ! (4_1) x0 = dtmp0 * x0; and %l3,_0x7fffffff,%l6 ! (4_0) ax0 = ux0 & 0x7fffffff; sethi %hi(0x00800000),%o5 faddd %f2,K4,%f2 ! (0_0) dtmp0 += K4; and %l4,_0x7fffffff,%l5 ! (4_0) ay0 = uy0 & 0x7fffffff; cmp %l6,%o5 bl,pn %icc,.u36 fmuld %f24,%f22,%f38 ! (5_1) dtmp0 *= x20; .c36: cmp %l5,%o5 bl,pn %icc,.u37 fmuld K9,%f16,%f40 ! (2_0) dtmp0 = K9 * x20; .c37: cmp %l6,_0x7f800000 bge,pn %icc,.u38 faddd %f28,K6,%f4 ! (1_0) dtmp0 += K6; .c38: ldd [%o2+%o4],%f28 ! (3_0) cadd0 = *(double*)(ltmp0 + signy0); cmp %l5,_0x7f800000 bge,pn %icc,.u39 fmuld %f2,%f20,%f24 ! (0_0) dtmp0 *= x20; .c39: sub %l6,%l5,%o2 ! (4_0) ldiff0 = ax0 - ay0; fmuld %f0,%f62,%f62 ! (4_1) dtmp0 = cmul0 * x0; sra %o2,31,%l5 ! (4_0) ldiff0 >>= 31; sub %i3,%i1,%l6 ! (4_0) addrc0 = (char*)px - (char*)py; faddd %f38,K1,%f38 ! (5_1) dtmp0 += K1; faddd %f40,K8,%f40 ! (2_0) dtmp0 += K8; and %l6,%l5,%o2 ! (4_0) addrc0 &= ldiff0; fmuld %f4,%f18,%f4 ! (1_0) dtmp0 *= x20; lda [%i1+%o2]0x82,%f0 ! (4_0) fy0 = *(float*)((char*)py + addrc0); sub %i3,%o2,%o4 ! (4_0) (char*)px - addrc0; add %o1,stridez,%o2 ! pz += stridez faddd %f24,K3,%f24 ! (0_0) dtmp0 += K3; lda [%o4]0x82,%f2 ! (4_0) fx0 = *(float*)((char*)px - addrc0); sll %o7,3,%o7 ! (5_1) cmul0_ind = ldiff0 << 3; fmuld %f38,%f22,%f38 ! (5_1) dtmp0 *= x20; cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 bge,pn %icc,.update9 ! (4_0) if ( b0 > 0x7f800000 ) faddd %f26,%f62,%f22 ! (4_1) dtmp0 = cadd0 + dtmp0; .cont9: fmuld %f40,%f16,%f26 ! (2_0) dtmp0 *= x20; sll %l5,5,%l6 ! (4_0) ltmp0 = ldiff0 << 5; add %i1,stridey,%i1 ! py += stridey fstod %f0,%f40 ! (4_0) y0 = (double)fy0; faddd %f4,K5,%f62 ! (1_0) dtmp0 += K5; sra %l3,27,%o5 ! (4_0) signx0 = ux0 >> 27; add %i3,stridex,%i3 ! px += stridex fmuld %f24,%f20,%f24 ! (0_0) dtmp0 *= x20; fstod %f2,%f2 ! (4_0) x0 = (double)fx0; sra %l4,28,%o4 ! (4_0) signy0 = uy0 >> 28; .d9: lda [%i1]0x82,%l3 ! (5_0) uy0 = *(int*)py; add %l6,cadd_arr,%l6 ! (4_0) ltmp0 += (char*)cadd_arr; faddd %f26,K7,%f26 ! (2_0) dtmp0 += K7; fmuld %f62,%f18,%f4 ! (1_0) dtmp0 *= x20; and %o5,-16,%o5 ! (4_0) signx0 &= -16; faddd %f38,K0,%f38 ! (5_1) dtmp0 += K0; subcc counter,5,counter bneg,pn %icc,.tail nop ba .main_loop nop .align 16 .main_loop: lda [%i3]0x82,%l4 ! (5_1) ux0 = *(int*)px; nop fdivd %f40,%f2,%f62 ! (4_1) x0 = y0 / x0; faddd %f24,K2,%f40 ! (0_1) dtmp0 += K2; fdtos %f22,%f22 ! (4_2) ftmp0 = (float)dtmp0; and %o4,-8,%o4 ! (4_1) signy0 &= -8; st %f22,[%o1] ! (4_2) *pz = ftmp0; fmuld %f6,%f6,%f24 ! (3_1) x20 = x0 * x0; ldd [cmul_arr+%o7],%f0 ! (5_2) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); add %l6,%o5,%o1 ! (4_1) ltmp0 += signx0; fmuld %f26,%f16,%f26 ! (2_1) dtmp0 *= x20; fmuld %f38,%f14,%f14 ! (5_2) x0 = dtmp0 * x0; and %l4,_0x7fffffff,%l6 ! (5_1) ax0 = ux0 & 0x7fffffff; sethi %hi(0x00800000),%o5 faddd %f4,K4,%f4 ! (1_1) dtmp0 += K4; and %l3,_0x7fffffff,%o7 ! (5_1) ay0 = uy0 & 0x7fffffff; fmuld %f40,%f20,%f38 ! (0_1) dtmp0 *= x20; cmp %l6,%o5 bl,pn %icc,.up0 fmuld K9,%f24,%f40 ! (3_1) dtmp0 = K9 * x20; .co0: nop cmp %o7,%o5 bl,pn %icc,.up1 faddd %f26,K6,%f22 ! (2_1) dtmp0 += K6; .co1: ldd [%o1+%o4],%f26 ! (4_1) cadd0 = *(double*)(ltmp0 + signy0); cmp %l6,_0x7f800000 bge,pn %icc,.up2 fmuld %f4,%f18,%f4 ! (1_1) dtmp0 *= x20; .co2: sub %l6,%o7,%o1 ! (5_1) ldiff0 = ax0 - ay0; cmp %o7,_0x7f800000 bge,pn %icc,.up3 fmuld %f0,%f14,%f14 ! (5_2) dtmp0 = cmul0 * x0; .co3: sra %o1,31,%o7 ! (5_1) ldiff0 >>= 31; sub %i3,%i1,%l6 ! (5_1) addrc0 = (char*)px - (char*)py; faddd %f38,K1,%f38 ! (0_1) dtmp0 += K1; faddd %f40,K8,%f40 ! (3_1) dtmp0 += K8; and %l6,%o7,%o1 ! (5_1) addrc0 &= ldiff0; fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20; lda [%i1+%o1]0x82,%f0 ! (5_1) fy0 = *(float*)((char*)py + addrc0); sll %o7,5,%l6 ! (5_1) ltmp0 = ldiff0 << 5; sub %i3,%o1,%o4 ! (5_1) (char*)px - addrc0; faddd %f4,K3,%f4 ! (1_1) dtmp0 += K3; lda [%o4]0x82,%f2 ! (5_1) fx0 = *(float*)((char*)px - addrc0); fmuld %f38,%f20,%f38 ! (0_1) dtmp0 *= x20; cmp %o5,_0x7f800000 ! (5_1) b0 ? 0x7f800000 bge,pn %icc,.update10 ! (5_1) if ( b0 > 0x7f800000 ) faddd %f36,%f14,%f20 ! (5_2) dtmp0 = cadd0 + dtmp0; .cont10: fmuld %f40,%f24,%f36 ! (3_1) dtmp0 *= x20; nop fstod %f0,%f40 ! (5_1) y0 = (double)fy0; faddd %f22,K5,%f14 ! (2_1) dtmp0 += K5; add %o2,stridez,%o1 ! pz += stridez fmuld %f4,%f18,%f4 ! (1_1) dtmp0 *= x20; sll %l7,3,%l7 ! (0_1) cmul0_ind = ldiff0 << 3; add %i3,stridex,%i3 ! px += stridex fstod %f2,%f2 ! (5_1) x0 = (double)fx0; .den0: sra %l3,28,%o4 ! (5_1) signy0 = uy0 >> 28; add %i1,stridey,%i1 ! py += stridey faddd %f36,K7,%f36 ! (3_1) dtmp0 += K7; sra %l4,27,%o5 ! (5_1) signx0 = ux0 >> 27; lda [%i1]0x82,%l4 ! (0_0) uy0 = *(int*)py; add %l6,cadd_arr,%l6 ! (5_1) ltmp0 += (char*)cadd_arr; fmuld %f14,%f16,%f22 ! (2_1) dtmp0 *= x20; faddd %f38,K0,%f38 ! (0_1) dtmp0 += K0; lda [%i3]0x82,%l3 ! (0_0) ux0 = *(int*)px; and %o5,-16,%o5 ! (5_1) signx0 &= -16; fdivd %f40,%f2,%f14 ! (5_1) x0 = y0 / x0; faddd %f4,K2,%f40 ! (1_1) dtmp0 += K2; fdtos %f20,%f2 ! (5_2) ftmp0 = (float)dtmp0; st %f2,[%o2] ! (5_2) *pz = ftmp0; fmuld %f62,%f62,%f4 ! (4_1) x20 = x0 * x0; ldd [cmul_arr+%l7],%f0 ! (0_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); add %l6,%o5,%o2 ! (5_1) ltmp0 += signx0; and %o4,-8,%o4 ! (5_1) signy0 &= -8; fmuld %f36,%f24,%f36 ! (3_1) dtmp0 *= x20; fmuld %f38,%f12,%f12 ! (0_1) x0 = dtmp0 * x0; and %l4,_0x7fffffff,%l7 ! (0_0) ay0 = uy0 & 0x7fffffff; sethi %hi(0x00800000),%o5 faddd %f22,K4,%f22 ! (2_1) dtmp0 += K4; and %l3,_0x7fffffff,%l6 ! (0_0) ax0 = ux0 & 0x7fffffff; fmuld %f40,%f18,%f38 ! (1_1) dtmp0 *= x20; cmp %l7,%o5 bl,pn %icc,.up4 fmuld K9,%f4,%f40 ! (4_1) dtmp0 = K9 * x20; .co4: nop cmp %l6,%o5 bl,pn %icc,.up5 faddd %f36,K6,%f20 ! (3_1) dtmp0 += K6; .co5: ldd [%o2+%o4],%f36 ! (5_1) cadd0 = *(double*)(ltmp0 + signy0); cmp %l7,_0x7f800000 bge,pn %icc,.up6 fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20; .co6: sub %l6,%l7,%o2 ! (0_0) ldiff0 = ax0 - ay0; cmp %l6,_0x7f800000 bge,pn %icc,.up7 fmuld %f0,%f12,%f12 ! (0_1) dtmp0 = cmul0 * x0; .co7: sra %o2,31,%l7 ! (0_0) ldiff0 >>= 31; sub %i3,%i1,%l6 ! (0_0) addrc0 = (char*)px - (char*)py; faddd %f38,K1,%f38 ! (1_1) dtmp0 += K1; faddd %f40,K8,%f40 ! (4_1) dtmp0 += K8; and %l6,%l7,%o2 ! (0_0) addrc0 &= ldiff0; fmuld %f20,%f24,%f20 ! (3_1) dtmp0 *= x20; lda [%i1+%o2]0x82,%f0 ! (0_0) fy0 = *(float*)((char*)py + addrc0); sll %g1,3,%g1 ! (1_1) cmul0_ind = ldiff0 << 3; sub %i3,%o2,%o4 ! (0_0) (char*)px - addrc0 faddd %f22,K3,%f22 ! (2_1) dtmp0 += K3; lda [%o4]0x82,%f2 ! (0_0) fx0 = *(float*)((char*)px - addrc0); sll %l7,5,%l6 ! (0_0) ltmp0 = ldiff0 << 5; add %o1,stridez,%o2 ! pz += stridez fmuld %f38,%f18,%f38 ! (1_1) dtmp0 *= x20; cmp %o5,_0x7f800000 ! (0_0) b0 ? 0x7f800000 bge,pn %icc,.update11 ! (0_0) if ( b0 > 0x7f800000 ) faddd %f34,%f12,%f18 ! (0_1) dtmp0 = cadd0 + dtmp0; .cont11: fmuld %f40,%f4,%f34 ! (4_1) dtmp0 *= x20; sra %l3,27,%o5 ! (0_0) signx0 = ux0 >> 27; add %i3,stridex,%i3 ! px += stridex fstod %f0,%f40 ! (0_0) y0 = (double)fy0; faddd %f20,K5,%f12 ! (3_1) dtmp0 += K5; add %i1,stridey,%i1 ! py += stridey fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20; lda [%i1]0x82,%l3 ! (1_0) uy0 = *(int*)py; sra %l4,28,%o4 ! (0_0) signy0 = uy0 >> 28; add %l6,cadd_arr,%l6 ! (0_0) ltmp0 += (char*)cadd_arr; fstod %f2,%f2 ! (0_0) x0 = (double)fx0; .den1: lda [%i3]0x82,%l4 ! (1_0) ux0 = *(int*)px; and %o5,-16,%o5 ! (0_0) signx0 &= -16; faddd %f34,K7,%f34 ! (4_1) dtmp0 += K7; fmuld %f12,%f24,%f20 ! (3_1) dtmp0 *= x20; and %o4,-8,%o4 ! (0_0) signy0 &= -8; faddd %f38,K0,%f38 ! (1_1) dtmp0 += K0; fdivd %f40,%f2,%f12 ! (0_0) x0 = y0 / x0; faddd %f22,K2,%f40 ! (2_1) dtmp0 += K2; fdtos %f18,%f2 ! (0_1) ftmp0 = (float)dtmp0; nop st %f2,[%o1] ! (0_1) *pz = ftmp0 fmuld %f14,%f14,%f22 ! (5_1) x20 = x0 * x0; ldd [cmul_arr+%g1],%f0 ! (1_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); add %l6,%o5,%o1 ! (0_0) ltmp0 += signx0; fmuld %f34,%f4,%f34 ! (4_1) dtmp0 *= x20; fmuld %f38,%f10,%f10 ! (1_1) x0 = dtmp0 * x0; and %l4,_0x7fffffff,%l6 ! (1_0) ax0 = ux0 & 0x7fffffff; sethi %hi(0x00800000),%o5 faddd %f20,K4,%f20 ! (3_1) dtmp0 += K4; and %l3,_0x7fffffff,%g1 ! (1_0) ay0 = uy0 & 0x7fffffff; fmuld %f40,%f16,%f38 ! (2_1) dtmp0 *= x20; cmp %l6,%o5 bl,pn %icc,.up8 fmuld K9,%f22,%f40 ! (5_1) dtmp0 = K9 * x20; .co8: nop cmp %g1,%o5 bl,pn %icc,.up9 faddd %f34,K6,%f18 ! (4_1) dtmp0 += K6; .co9: ldd [%o1+%o4],%f34 ! (0_0) cadd0 = *(double*)(ltmp0 + signy0); cmp %l6,_0x7f800000 bge,pn %icc,.up10 fmuld %f20,%f24,%f20 ! (3_1) dtmp0 *= x20; .co10: sub %l6,%g1,%o1 ! (1_0) ldiff0 = ax0 - ay0; cmp %g1,_0x7f800000 bge,pn %icc,.up11 fmuld %f0,%f10,%f10 ! (1_1) dtmp0 = cmul0 * x0; .co11: sra %o1,31,%g1 ! (1_0) ldiff0 >>= 31; sub %i3,%i1,%l6 ! (1_0) addrc0 = (char*)px - (char*)py; faddd %f38,K1,%f38 ! (2_1) dtmp0 += K1; faddd %f40,K8,%f40 ! (5_1) dtmp0 += K8; and %l6,%g1,%o1 ! (1_0) addrc0 &= ldiff0; fmuld %f18,%f4,%f18 ! (4_1) dtmp0 *= x20; lda [%i1+%o1]0x82,%f0 ! (1_0) fy0 = *(float*)((char*)py + addrc0); sll %g5,3,%g5 ! (2_1) cmul0_ind = ldiff0 << 3; sub %i3,%o1,%o4 ! (1_0) (char*)px - addrc0; faddd %f20,K3,%f20 ! (3_1) dtmp0 += K3; lda [%o4]0x82,%f2 ! (1_0) fx0 = *(float*)((char*)px - addrc0); sll %g1,5,%l6 ! (1_0) ltmp0 = ldiff0 << 5; add %o2,stridez,%o1 ! pz += stridez fmuld %f38,%f16,%f38 ! (2_1) dtmp0 *= x20; cmp %o5,_0x7f800000 ! (1_0) b0 ? 0x7f800000 bge,pn %icc,.update12 ! (1_0) if ( b0 > 0x7f800000 ) faddd %f32,%f10,%f16 ! (1_1) dtmp0 = cadd0 + dtmp0; .cont12: fmuld %f40,%f22,%f32 ! (5_1) dtmp0 *= x20; add %i1,stridey,%i1 ! py += stridey nop fstod %f0,%f40 ! (1_0) y0 = (double)fy0; faddd %f18,K5,%f10 ! (4_1) dtmp0 += K5; sra %l4,27,%o5 ! (1_0) signx0 = ux0 >> 27; add %i3,stridex,%i3 ! px += stridex fmuld %f20,%f24,%f20 ! (3_1) dtmp0 *= x20; sra %l3,28,%o4 ! (1_0) signy0 = uy0 >> 28; add %l6,cadd_arr,%l6 ! (1_0) ltmp0 += (char*)cadd_arr; fstod %f2,%f2 ! (1_0) x0 = (double)fx0; .den2: faddd %f32,K7,%f32 ! (5_1) dtmp0 += K7; and %o5,-16,%o5 ! (1_0) signx0 &= -16; and %o4,-8,%o4 ! (1_0) signy0 &= -8; lda [%i1]0x82,%l4 ! (2_0) uy0 = *(int*)py; fmuld %f10,%f4,%f18 ! (4_1) dtmp0 *= x20; faddd %f38,K0,%f38 ! (2_1) dtmp0 += K0; lda [%i3]0x82,%l3 ! (2_0) ux0 = *(int*)px; fdivd %f40,%f2,%f10 ! (1_0) x0 = y0 / x0; faddd %f20,K2,%f40 ! (3_1) dtmp0 += K2; fdtos %f16,%f2 ! (1_1) ftmp0 = (float)dtmp0; nop st %f2,[%o2] ! (1_1) *pz = ftmp0; fmuld %f12,%f12,%f20 ! (0_0) x20 = x0 * x0; ldd [cmul_arr+%g5],%f0 ! (2_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); add %l6,%o5,%o2 ! (1_0) ltmp0 += signx0; fmuld %f32,%f22,%f32 ! (5_1) dtmp0 *= x20; fmuld %f38,%f8,%f8 ! (2_1) x0 = dtmp0 * x0; and %l3,_0x7fffffff,%l6 ! (2_0) ax0 = ux0 & 0x7fffffff; sethi %hi(0x00800000),%o5 faddd %f18,K4,%f18 ! (4_1) dtmp0 += K4; and %l4,_0x7fffffff,%g5 ! (2_0) ay0 = uy0 & 0x7fffffff; fmuld %f40,%f24,%f38 ! (3_1) dtmp0 *= x20; cmp %l6,%o5 bl,pn %icc,.up12 fmuld K9,%f20,%f40 ! (0_0) dtmp0 = K9 * x20; .co12: nop cmp %g5,%o5 bl,pn %icc,.up13 faddd %f32,K6,%f16 ! (5_1) dtmp0 += K6; .co13: ldd [%o2+%o4],%f32 ! (1_0) cadd0 = *(double*)(ltmp0 + signy0); cmp %l6,_0x7f800000 bge,pn %icc,.up14 fmuld %f18,%f4,%f18 ! (4_1) dtmp0 *= x20; .co14: sub %l6,%g5,%o2 ! (2_0) ldiff0 = ax0 - ay0; cmp %g5,_0x7f800000 bge,pn %icc,.up15 fmuld %f0,%f8,%f8 ! (2_1) dtmp0 = cmul0 * x0; .co15: sra %o2,31,%g5 ! (2_0) ldiff0 >>= 31; sub %i3,%i1,%l6 ! (2_0) addrc0 = (char*)px - (char*)py; faddd %f38,K1,%f38 ! (3_1) dtmp0 += K1; faddd %f40,K8,%f40 ! (0_0) dtmp0 += K8; and %l6,%g5,%o2 ! (2_0) addrc0 &= ldiff0; fmuld %f16,%f22,%f16 ! (5_1) dtmp0 *= x20; lda [%i1+%o2]0x82,%f0 ! (2_0) fy0 = *(float*)((char*)py + addrc0); sub %i3,%o2,%o4 ! (2_0) (char*)px - addrc0; add %o1,stridez,%o2 ! pz += stridez faddd %f18,K3,%f18 ! (4_1) dtmp0 += K3; lda [%o4]0x82,%f2 ! (2_0) fx0 = *(float*)((char*)px - addrc0); sll %o0,3,%o0 ! (3_1) cmul0_ind = ldiff0 << 3; add %i3,stridex,%i3 ! px += stridex fmuld %f38,%f24,%f38 ! (3_1) dtmp0 *= x20; cmp %o5,_0x7f800000 ! (2_0) b0 ? 0x7f800000 bge,pn %icc,.update13 ! (2_0) if ( b0 > 0x7f800000 ) faddd %f30,%f8,%f24 ! (2_1) dtmp0 = cadd0 + dtmp0; .cont13: fmuld %f40,%f20,%f30 ! (0_0) dtmp0 *= x20; sll %g5,5,%l6 ! (2_0) ltmp0 = ldiff0 << 5; add %i1,stridey,%i1 ! py += stridey fstod %f0,%f40 ! (2_0) y0 = (double)fy0; faddd %f16,K5,%f8 ! (5_1) dtmp0 += K5; sra %l3,27,%o5 ! (2_0) signx0 = ux0 >> 27; fmuld %f18,%f4,%f18 ! (4_1) dtmp0 *= x20; fstod %f2,%f2 ! (2_0) x0 = (double)fx0; sra %l4,28,%o4 ! (2_0) signy0 = uy0 >> 28; add %l6,cadd_arr,%l6 ! (2_0) ltmp0 += (char*)cadd_arr; .den3: lda [%i1]0x82,%l3 ! (3_0) uy0 = *(int*)py; and %o5,-16,%o5 ! (2_0) signx0 &= -16; faddd %f30,K7,%f30 ! (0_0) dtmp0 += K7; lda [%i3]0x82,%l4 ! (3_0) ux0 = *(int*)px; fmuld %f8,%f22,%f16 ! (5_1) dtmp0 *= x20; faddd %f38,K0,%f38 ! (3_1) dtmp0 += K0; fdivd %f40,%f2,%f8 ! (2_0) x0 = y0 / x0; faddd %f18,K2,%f40 ! (4_1) dtmp0 += K2; fdtos %f24,%f1 ! (2_1) ftmp0 = (float)dtmp0; st %f1,[%o1] ! (2_1) *pz = ftmp0; fmuld %f10,%f10,%f18 ! (1_0) x20 = x0 * x0; ldd [cmul_arr+%o0],%f2 ! (3_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); add %l6,%o5,%o1 ! (2_0) ltmp0 += signx0; and %o4,-8,%o4 ! (2_0) signy0 &= -8; fmuld %f30,%f20,%f30 ! (0_0) dtmp0 *= x20; fmuld %f38,%f6,%f6 ! (3_1) x0 = dtmp0 * x0; and %l4,_0x7fffffff,%l6 ! (3_0) ax0 = ux0 & 0x7fffffff; sethi %hi(0x00800000),%o5 faddd %f16,K4,%f24 ! (5_1) dtmp0 += K4; and %l3,_0x7fffffff,%o0 ! (3_0) ay0 = uy0 & 0x7fffffff; fmuld %f40,%f4,%f38 ! (4_1) dtmp0 *= x20; cmp %l6,%o5 bl,pn %icc,.up16 fmuld K9,%f18,%f40 ! (1_0) dtmp0 = K9 * x20; .co16: nop cmp %o0,%o5 bl,pn %icc,.up17 faddd %f30,K6,%f16 ! (0_0) dtmp0 += K6; .co17: ldd [%o1+%o4],%f30 ! (2_0) cadd0 = *(double*)(ltmp0 + signy0); cmp %l6,_0x7f800000 bge,pn %icc,.up18 fmuld %f24,%f22,%f24 ! (5_1) dtmp0 *= x20; .co18: sub %l6,%o0,%o1 ! (3_0) ldiff0 = ax0 - ay0; cmp %o0,_0x7f800000 bge,pn %icc,.up19 fmuld %f2,%f6,%f6 ! (3_1) dtmp0 = cmul0 * x0; .co19: sra %o1,31,%o0 ! (3_0) ldiff0 >>= 31; sub %i3,%i1,%l6 ! (3_0) addrc0 = (char*)px - (char*)py; faddd %f38,K1,%f38 ! (4_1) dtmp0 += K1; faddd %f40,K8,%f40 ! (1_0) dtmp0 += K8; and %l6,%o0,%o1 ! (3_0) addrc0 &= ldiff0; fmuld %f16,%f20,%f16 ! (0_0) dtmp0 *= x20; lda [%i1+%o1]0x82,%f0 ! (3_0) fy0 = *(float*)((char*)py + addrc0); sub %i3,%o1,%o4 ! (3_0) (char*)px - addrc0; add %o2,stridez,%o1 ! pz += stridez faddd %f24,K3,%f24 ! (5_1) dtmp0 += K3; lda [%o4]0x82,%f1 ! (3_0) fx0 = *(float*)((char*)px - addrc0); sll %l5,3,%l5 ! (4_1) cmul0_ind = ldiff0 << 3; add %i3,stridex,%i3 ! px += stridex fmuld %f38,%f4,%f38 ! (4_1) dtmp0 *= x20; cmp %o5,_0x7f800000 ! (3_0) b0 ? 0x7f800000 bge,pn %icc,.update14 ! (3_0) if ( b0 > 0x7f800000 ) faddd %f28,%f6,%f4 ! (3_1) dtmp0 = cadd0 + dtmp0; .cont14: fmuld %f40,%f18,%f28 ! (1_0) dtmp0 *= x20; sll %o0,5,%l6 ! (3_0) ltmp0 = ldiff0 << 5; add %i1,stridey,%i1 ! py += stridey fstod %f0,%f40 ! (3_0) y0 = (double)fy0; faddd %f16,K5,%f2 ! (0_0) dtmp0 += K5; sra %l4,27,%o5 ! (3_0) signx0 = ux0 >> 27; fmuld %f24,%f22,%f24 ! (5_1) dtmp0 *= x20; sra %l3,28,%o4 ! (3_0) signy0 = uy0 >> 28; fstod %f1,%f16 ! (3_0) x0 = (double)fx0; .den4: faddd %f28,K7,%f28 ! (1_0) dtmp0 += K7; add %l6,cadd_arr,%l6 ! (3_0) ltmp0 += (char*)cadd_arr; and %o5,-16,%o5 ! (3_0) signx0 &= -16; lda [%i1]0x82,%l4 ! (4_0) uy0 = *(int*)py; fmuld %f2,%f20,%f2 ! (0_0) dtmp0 *= x20; faddd %f38,K0,%f38 ! (4_1) dtmp0 += K0; lda [%i3]0x82,%l3 ! (4_0) ux0 = *(int*)px; fdivd %f40,%f16,%f6 ! (3_0) x0 = y0 / x0; faddd %f24,K2,%f24 ! (5_1) dtmp0 += K2; fdtos %f4,%f1 ! (3_1) ftmp0 = (float)dtmp0; and %o4,-8,%o4 ! (3_0) signy0 &= -8; st %f1,[%o2] ! (3_1) *pz = ftmp0; fmuld %f8,%f8,%f16 ! (2_0) x20 = x0 * x0; ldd [cmul_arr+%l5],%f0 ! (4_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); add %l6,%o5,%o2 ! (3_0) ltmp0 += signx0; fmuld %f28,%f18,%f28 ! (1_0) dtmp0 *= x20; fmuld %f38,%f62,%f62 ! (4_1) x0 = dtmp0 * x0; and %l3,_0x7fffffff,%l6 ! (4_0) ax0 = ux0 & 0x7fffffff; sethi %hi(0x00800000),%o5 faddd %f2,K4,%f2 ! (0_0) dtmp0 += K4; and %l4,_0x7fffffff,%l5 ! (4_0) ay0 = uy0 & 0x7fffffff; fmuld %f24,%f22,%f38 ! (5_1) dtmp0 *= x20; cmp %l6,%o5 bl,pn %icc,.up20 fmuld K9,%f16,%f40 ! (2_0) dtmp0 = K9 * x20; .co20: nop cmp %l5,%o5 bl,pn %icc,.up21 faddd %f28,K6,%f4 ! (1_0) dtmp0 += K6; .co21: ldd [%o2+%o4],%f28 ! (3_0) cadd0 = *(double*)(ltmp0 + signy0); cmp %l6,_0x7f800000 bge,pn %icc,.up22 fmuld %f2,%f20,%f24 ! (0_0) dtmp0 *= x20; .co22: sub %l6,%l5,%o2 ! (4_0) ldiff0 = ax0 - ay0; cmp %l5,_0x7f800000 bge,pn %icc,.up23 fmuld %f0,%f62,%f62 ! (4_1) dtmp0 = cmul0 * x0; .co23: sra %o2,31,%l5 ! (4_0) ldiff0 >>= 31; sub %i3,%i1,%l6 ! (4_0) addrc0 = (char*)px - (char*)py; faddd %f38,K1,%f38 ! (5_1) dtmp0 += K1; faddd %f40,K8,%f40 ! (2_0) dtmp0 += K8; and %l6,%l5,%o2 ! (4_0) addrc0 &= ldiff0; fmuld %f4,%f18,%f4 ! (1_0) dtmp0 *= x20; lda [%i1+%o2]0x82,%f0 ! (4_0) fy0 = *(float*)((char*)py + addrc0); sub %i3,%o2,%o4 ! (4_0) (char*)px - addrc0; add %o1,stridez,%o2 ! pz += stridez faddd %f24,K3,%f24 ! (0_0) dtmp0 += K3; lda [%o4]0x82,%f2 ! (4_0) fx0 = *(float*)((char*)px - addrc0); sll %o7,3,%o7 ! (5_1) cmul0_ind = ldiff0 << 3; add %i3,stridex,%i3 ! px += stridex fmuld %f38,%f22,%f38 ! (5_1) dtmp0 *= x20; cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 bge,pn %icc,.update15 ! (4_0) if ( b0 > 0x7f800000 ) faddd %f26,%f62,%f22 ! (4_1) dtmp0 = cadd0 + dtmp0; .cont15: fmuld %f40,%f16,%f26 ! (2_0) dtmp0 *= x20; sll %l5,5,%l6 ! (4_0) ltmp0 = ldiff0 << 5; add %i1,stridey,%i1 ! py += stridey fstod %f0,%f40 ! (4_0) y0 = (double)fy0; faddd %f4,K5,%f62 ! (1_0) dtmp0 += K5; sra %l3,27,%o5 ! (4_0) signx0 = ux0 >> 27; fmuld %f24,%f20,%f24 ! (0_0) dtmp0 *= x20; fstod %f2,%f2 ! (4_0) x0 = (double)fx0; sra %l4,28,%o4 ! (4_0) signy0 = uy0 >> 28; .den5: lda [%i1]0x82,%l3 ! (5_0) uy0 = *(int*)py; subcc counter,6,counter ! counter? add %l6,cadd_arr,%l6 ! (4_0) ltmp0 += (char*)cadd_arr; faddd %f26,K7,%f26 ! (2_0) dtmp0 += K7; fmuld %f62,%f18,%f4 ! (1_0) dtmp0 *= x20; and %o5,-16,%o5 ! (4_0) signx0 &= -16; bpos,pt %icc,.main_loop faddd %f38,K0,%f38 ! (5_1) dtmp0 += K0; .tail: addcc counter,5,counter bneg,a,pn %icc,.begin or %g0,%o1,%o4 faddd %f24,K2,%f40 ! (0_1) dtmp0 += K2; fdtos %f22,%f22 ! (4_2) ftmp0 = (float)dtmp0; st %f22,[%o1] ! (4_2) *pz = ftmp0; subcc counter,1,counter bneg,a,pn %icc,.begin or %g0,%o2,%o4 ldd [cmul_arr+%o7],%f0 ! (5_2) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); fmuld %f26,%f16,%f26 ! (2_1) dtmp0 *= x20; fmuld %f38,%f14,%f14 ! (5_2) x0 = dtmp0 * x0; faddd %f4,K4,%f4 ! (1_1) dtmp0 += K4; fmuld %f40,%f20,%f38 ! (0_1) dtmp0 *= x20; faddd %f26,K6,%f22 ! (2_1) dtmp0 += K6; fmuld %f4,%f18,%f4 ! (1_1) dtmp0 *= x20; fmuld %f0,%f14,%f14 ! (5_2) dtmp0 = cmul0 * x0; faddd %f38,K1,%f38 ! (0_1) dtmp0 += K1; fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20; faddd %f4,K3,%f4 ! (1_1) dtmp0 += K3; fmuld %f38,%f20,%f38 ! (0_1) dtmp0 *= x20; faddd %f36,%f14,%f20 ! (5_2) dtmp0 = cadd0 + dtmp0; faddd %f22,K5,%f14 ! (2_1) dtmp0 += K5; add %o2,stridez,%o1 ! pz += stridez fmuld %f4,%f18,%f4 ! (1_1) dtmp0 *= x20; sll %l7,3,%l7 ! (0_1) cmul0_ind = ldiff0 << 3; fmuld %f14,%f16,%f22 ! (2_1) dtmp0 *= x20; faddd %f38,K0,%f38 ! (0_1) dtmp0 += K0; faddd %f4,K2,%f40 ! (1_1) dtmp0 += K2; fdtos %f20,%f2 ! (5_2) ftmp0 = (float)dtmp0; st %f2,[%o2] ! (5_2) *pz = ftmp0; subcc counter,1,counter bneg,a,pn %icc,.begin or %g0,%o1,%o4 ldd [cmul_arr+%l7],%f0 ! (0_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); fmuld %f38,%f12,%f12 ! (0_1) x0 = dtmp0 * x0; faddd %f22,K4,%f22 ! (2_1) dtmp0 += K4; fmuld %f40,%f18,%f38 ! (1_1) dtmp0 *= x20; fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20; fmuld %f0,%f12,%f12 ! (0_1) dtmp0 = cmul0 * x0; faddd %f38,K1,%f38 ! (1_1) dtmp0 += K1; sll %g1,3,%g1 ! (1_1) cmul0_ind = ldiff0 << 3; faddd %f22,K3,%f22 ! (2_1) dtmp0 += K3; add %o1,stridez,%o2 ! pz += stridez fmuld %f38,%f18,%f38 ! (1_1) dtmp0 *= x20; faddd %f34,%f12,%f18 ! (0_1) dtmp0 = cadd0 + dtmp0; fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20; faddd %f38,K0,%f38 ! (1_1) dtmp0 += K0; faddd %f22,K2,%f40 ! (2_1) dtmp0 += K2; fdtos %f18,%f2 ! (0_1) ftmp0 = (float)dtmp0; st %f2,[%o1] ! (0_1) *pz = ftmp0 subcc counter,1,counter bneg,a,pn %icc,.begin or %g0,%o2,%o4 ldd [cmul_arr+%g1],%f0 ! (1_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); fmuld %f38,%f10,%f10 ! (1_1) x0 = dtmp0 * x0; fmuld %f40,%f16,%f38 ! (2_1) dtmp0 *= x20; fmuld %f0,%f10,%f10 ! (1_1) dtmp0 = cmul0 * x0; faddd %f38,K1,%f38 ! (2_1) dtmp0 += K1; sll %g5,3,%g5 ! (2_1) cmul0_ind = ldiff0 << 3; add %o2,stridez,%o1 ! pz += stridez fmuld %f38,%f16,%f38 ! (2_1) dtmp0 *= x20; faddd %f32,%f10,%f16 ! (1_1) dtmp0 = cadd0 + dtmp0; faddd %f38,K0,%f38 ! (2_1) dtmp0 += K0; fdtos %f16,%f2 ! (1_1) ftmp0 = (float)dtmp0; st %f2,[%o2] ! (1_1) *pz = ftmp0; subcc counter,1,counter bneg,a,pn %icc,.begin or %g0,%o1,%o4 ldd [cmul_arr+%g5],%f0 ! (2_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); fmuld %f38,%f8,%f8 ! (2_1) x0 = dtmp0 * x0; fmuld %f0,%f8,%f8 ! (2_1) dtmp0 = cmul0 * x0; add %o1,stridez,%o2 ! pz += stridez faddd %f30,%f8,%f24 ! (2_1) dtmp0 = cadd0 + dtmp0; fdtos %f24,%f1 ! (2_1) ftmp0 = (float)dtmp0; st %f1,[%o1] ! (2_1) *pz = ftmp0; ba .begin or %g0,%o2,%o4 .align 16 .spec0: cmp %l6,_0x7f800000 ! ax0 ? 0x7f800000 bg 2f ! if ( ax0 >= 0x7f800000 ) srl %l3,30,%l3 ! signx0 = (unsigned)ux0 >> 30; cmp %l7,_0x7f800000 ! ay0 ? 0x7f800000 bg 2f ! if ( ay0 >= 0x7f800000 ) and %l3,2,%l3 ! signx0 &= 2; sra %l4,31,%l4 ! signy0 = uy0 >> 31; bne,a 1f ! if (ay0 != 0x7f800000) add %l3,%l3,%l3 ! signx0 += signx0; cmp %l6,_0x7f800000 ! ax0 ? 0x7f800000 bne,a 1f ! if ( ax0 != 0x7f800000 ) add %g0,2,%l3 ! signx0 = 2 add %l3,1,%l3 ! signx0 ++; 1: sll %l4,3,%l4 ! signy0 <<= 3; st %l3,[%fp+tmp_pz] ! STORE signx0 ldd [cmul_arr+88],%f0 ! LOAD M_PI_4 ld [%fp+tmp_pz],%f2 ! LOAD signx0 ldd [cmul_arr+%l4],%f4 ! dtmp0 = *(double*)((char*)(cmul_arr + 1) + signy0); add %i1,stridey,%i1 ! py += stridey; fitod %f2,%f2 ! dtmp1 = (double)signx0; add %i3,stridex,%i3 ! px += stridex; fmuld %f2,%f0,%f0 ! res = signx0 * M_PI_4; fmuld %f0,%f4,%f0 ! res *= dtmp0; fdtos %f0,%f0 ! ftmp0 = (float) res; st %f0,[%o4] ! *pz = ftmp0; ba .begin1 add %o4,stridez,%o4 ! pz += stridez; 2: std %l6,[%fp+tmp_pz] ! *(float*)&ax0, *(float*)&ay0 ldd [%fp+tmp_pz],%f0 ! *(float*)&ax0, *(float*)&ay0 add %i1,stridey,%i1 ! py += stridey; fmuls %f0,%f1,%f0 ! ftmp0 = *(float*)&ax0 * *(float*)&ay0; add %i3,stridex,%i3 ! pz += stridex; st %f0,[%o4] ! *pz = ftmp0; ba .begin1 add %o4,stridez,%o4 ! pz += stridez; .align 16 .spec1: cmp %l6,0 bne,pn %icc,1f nop cmp %l7,0 bne,pn %icc,1f nop sra %l4,28,%l4 ! signy0 = uy0 >> 28; sra %l3,27,%l3 ! signx0 = ux0 >> 27; and %l4,-8,%l4 ! signy0 &= -8; sra %o2,31,%o2 ! ldiff0 >>= 31; and %l3,-16,%l3 ! signx0 &= -16; sll %o2,5,%o2 ! ldiff0 <<= 5; add %l4,%l3,%l3 ! signx0 += signy0; add %o2,%l3,%l3 ! signx0 += ldiff0; add %i1,stridey,%i1 ! py += stridey; ldd [cadd_arr+%l3],%f0 ! res = *(double*)((char*)(cadd_arr + 7) + signx0); add %i3,stridex,%i3 ! px += stridex; fdtos %f0,%f0 ! ftmp0 = (float) res; st %f0,[%o4] ! *pz = ftmp0; ba .begin1 add %o4,stridez,%o4 ! pz += stridez; 1: stx %o4,[%fp+tmp_pz] sra %o2,31,%l7 ! (0_0) ldiff0 >>= 31; sub %i3,%i1,%l6 ! (0_0) addrc0 = (char*)px - (char*)py; and %l6,%l7,%o2 ! (0_0) addrc0 &= ldiff0; lda [%i1+%o2]0x82,%f0 ! (0_0) fy0 = *(float*)((char*)py + addrc0); sub %i3,%o2,%o4 ! (0_0) (char*)px - addrc0 lda [%i1+%o2]0x82,%l5 ! (0_0) fy0 = *(float*)((char*)py + addrc0); lda [%o4]0x82,%f2 ! (0_0) fx0 = *(float*)((char*)px - addrc0); sll %l7,5,%l6 ! (0_0) ltmp0 = ldiff0 << 5; lda [%o4]0x82,%g5 ! (0_0) fx0 = *(float*)((char*)px - addrc0); sra %l3,27,%o5 ! (0_0) signx0 = ux0 >> 27; add %i1,stridey,%i1 ! py += stridey add %i3,stridex,%i3 ! px += stridex lda [%i1]0x82,%l3 ! (1_0) uy0 = *(int*)py; sra %l4,28,%o4 ! (0_0) signy0 = uy0 >> 28; add %l6,cadd_arr,%l6 ! (0_0) ltmp0 += (char*)cadd_arr; and %l5,_0x7fffffff,%l4 sethi %hi(0x00800000),%g1 cmp %l4,%g1 bge,a %icc,1f fstod %f0,%f40 ! (0_0) y0 = (double)fy0; fabss %f0,%f0 ! fy0 = fabsf(fy0); ldd [cmul_arr+96],%f40 sra %l5,28,%l4 ! itmp0 >>= 28; and %l4,-8,%l4 fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0; fmuld %f40,%f0,%f40 ! dtmp0 *= C2ONM149; ldd [cmul_arr+%l4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); fmuld %f40,%f0,%f40 ! dtmp0 *= dsign; 1: and %g5,_0x7fffffff,%l4 cmp %l4,%g1 bge,a %icc,.spec1_cont fstod %f2,%f2 ! (0_0) x0 = (double)fx0; fabss %f2,%f2 ! fx0 = fabsf(fx0); ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 sra %g5,28,%l4 ! itmp0 >>= 28; and %l4,-8,%l4 ! itmp0 = -8; fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0; fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149; ldd [cmul_arr+%l4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); ba .spec1_cont fmuld %f2,%f0,%f2 ! dtmp0 *= dsign; .align 16 .update0: cmp counter,0 bg,pn %icc,1f nop ld [cmul_arr],%f2 ba .cont0 fzero %f0 1: cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 bg,pt %icc,1f nop 2: sub counter,0,counter st counter,[%fp+tmp_counter] stx %i1,[%fp+tmp_py] stx %i3,[%fp+tmp_px] ld [cmul_arr],%f2 or %g0,0,counter ba .cont0 fzero %f0 1: andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff bne,pn %icc,1f sethi %hi(0x00800000),%o5 andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff be,pn %icc,2b nop 1: st %f0,[%fp+tmp_px] st %f2,[%fp+tmp_px+4] ld [%fp+tmp_px],%o4 and %o4,_0x7fffffff,%l5 ! itmp0 & 0x7fffffff cmp %l5,%o5 bge,a 1f fstod %f0,%f40 ! (0_0) y0 = (double)fy0; ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 sra %o4,28,%o4 ! itmp0 >>= 28; fabss %f0,%f0 ! fy0 = fabsf(fy0); and %o4,-8,%o4 ! itmp0 = -8; fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0; fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149; ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); fmuld %f0,%f40,%f40 ! dtmp0 *= dsign; 1: add %i3,stridex,%i3 ! px += stridex add %i1,stridey,%i1 ! py += stridey ld [%fp+tmp_px+4],%o4 and %o4,_0x7fffffff,%l5 ! itmp0 & 0x7fffffff cmp %l5,%o5 bge,a 1f fstod %f2,%f2 ! (5_1) x0 = (double)fx0; ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 sra %o4,28,%o4 ! itmp0 >>= 28; fabss %f2,%f2 ! fx0 = fabsf(fx0); and %o4,-8,%o4 ! itmp0 = -8; fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0; fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149; ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); fmuld %f2,%f0,%f2 ! dtmp0 *= dsign; 1: sra %l4,27,%o5 ! (1_0) signx0 = ux0 >> 27; sra %l3,28,%o4 ! (1_0) signy0 = uy0 >> 28; ba .d0 add %l6,cadd_arr,%l6 ! (1_0) ltmp0 += (char*)cadd_arr; .align 16 .update1: cmp counter,1 bg,pn %icc,1f nop fzero %f0 ba .cont1 ld [cmul_arr],%f2 1: cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 bg,pt %icc,1f nop 2: sub counter,1,counter st counter,[%fp+tmp_counter] stx %i1,[%fp+tmp_py] stx %i3,[%fp+tmp_px] ld [cmul_arr],%f2 or %g0,1,counter ba .cont1 fzero %f0 1: andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff bne,pn %icc,1f sethi %hi(0x00800000),%o5 andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff be,pn %icc,2b nop 1: st %f0,[%fp+tmp_px] st %f2,[%fp+tmp_px+4] ld [%fp+tmp_px],%o4 fmuld %f40,%f20,%f30 ! (0_0) dtmp0 *= x20; and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff cmp %l6,%o5 bge,a 1f fstod %f0,%f40 ! (0_0) y0 = (double)fy0; ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 sra %o4,28,%o4 ! itmp0 >>= 28; fabss %f0,%f0 ! fy0 = fabsf(fy0); and %o4,-8,%o4 ! itmp0 = -8; fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0; fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149; ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); fmuld %f0,%f40,%f40 ! dtmp0 *= dsign; 1: add %i1,stridey,%i1 ! py += stridey ld [%fp+tmp_px+4],%o4 and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff cmp %l6,%o5 bge,a 1f fstod %f2,%f2 ! (5_1) x0 = (double)fx0; ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 sra %o4,28,%o4 ! itmp0 >>= 28; fabss %f2,%f2 ! fx0 = fabsf(fx0); and %o4,-8,%o4 ! itmp0 = -8; fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0; fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149; ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); fmuld %f2,%f0,%f2 ! dtmp0 *= dsign; 1: sll %g5,5,%l6 ! (2_0) ltmp0 = ldiff0 << 5; sra %l3,27,%o5 ! (2_0) signx0 = ux0 >> 27; add %i3,stridex,%i3 ! px += stridex sra %l4,28,%o4 ! (2_0) signy0 = uy0 >> 28; ba .d1 add %l6,cadd_arr,%l6 ! (2_0) ltmp0 += (char*)cadd_arr; .align 16 .update2: cmp counter,2 bg,pn %icc,1f nop ld [cmul_arr],%f1 ba .cont2 fzeros %f0 1: cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 bg,pt %icc,1f nop 2: sub counter,2,counter st counter,[%fp+tmp_counter] stx %i1,[%fp+tmp_py] stx %i3,[%fp+tmp_px] ld [cmul_arr],%f1 or %g0,2,counter ba .cont2 fzeros %f0 1: andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff bne,pn %icc,1f sethi %hi(0x00800000),%o5 andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff be,pn %icc,2b nop 1: std %f0,[%fp+tmp_px] ld [%fp+tmp_px],%o4 fmuld %f40,%f18,%f28 ! (1_0) dtmp0 *= x20; faddd %f16,K5,%f2 ! (0_0) dtmp0 += K5; and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff cmp %l6,%o5 bge,a 1f fstod %f0,%f40 ! (0_0) y0 = (double)fy0; ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 sra %o4,28,%o4 ! itmp0 >>= 28; fabss %f0,%f0 ! fy0 = fabsf(fy0); and %o4,-8,%o4 ! itmp0 = -8; fitod %f0,%f16 ! dtmp0 = (double) *(int*)&fy0; fmuld %f16,%f40,%f40 ! dtmp0 *= C2ONM149; ldd [cmul_arr+%o4],%f16 ! dsign = *(double*)((char*)cmul_arr + itmp0); fmuld %f16,%f40,%f40 ! dtmp0 *= dsign; 1: add %i1,stridey,%i1 ! py += stridey ld [%fp+tmp_px+4],%o4 and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff cmp %l6,%o5 bge,a 1f fstod %f1,%f16 ! (5_1) x0 = (double)fx0; fabss %f1,%f16 ! fx0 = fabsf(fx0); ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 sra %o4,28,%o4 ! itmp0 >>= 28; and %o4,-8,%o4 ! itmp0 = -8; fitod %f16,%f16 ! dtmp0 = (double) *(int*)&fx0; fmuld %f16,%f0,%f16 ! dtmp0 *= C2ONM149; ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); fmuld %f16,%f0,%f16 ! dtmp0 *= dsign; 1: sll %o0,5,%l6 ! (3_0) ltmp0 = ldiff0 << 5; sra %l4,27,%o5 ! (3_0) signx0 = ux0 >> 27; add %i3,stridex,%i3 ! px += stridex ba .d2 sra %l3,28,%o4 ! (3_0) signy0 = uy0 >> 28; .align 16 .update3: cmp counter,3 bg,pn %icc,1f nop fzero %f0 ba .cont3 ld [cmul_arr],%f2 1: cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 bg,pt %icc,1f nop 2: sub counter,3,counter st counter,[%fp+tmp_counter] stx %i1,[%fp+tmp_py] stx %i3,[%fp+tmp_px] ld [cmul_arr],%f2 or %g0,3,counter ba .cont3 fzero %f0 1: andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff bne,pn %icc,1f sethi %hi(0x00800000),%o5 andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff be,pn %icc,2b nop 1: st %f0,[%fp+tmp_px] st %f2,[%fp+tmp_px+4] ld [%fp+tmp_px],%o4 fmuld %f40,%f16,%f26 ! (2_0) dtmp0 *= x20; and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff cmp %l6,%o5 bge,a 1f fstod %f0,%f40 ! (0_0) y0 = (double)fy0; ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 sra %o4,28,%o4 ! itmp0 >>= 28; fabss %f0,%f0 ! fy0 = fabsf(fy0); and %o4,-8,%o4 ! itmp0 = -8; fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0; fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149; ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); fmuld %f0,%f40,%f40 ! dtmp0 *= dsign; 1: add %i1,stridey,%i1 ! py += stridey faddd %f4,K5,%f62 ! (1_0) dtmp0 += K5; fmuld %f24,%f20,%f24 ! (0_0) dtmp0 *= x20; ld [%fp+tmp_px+4],%o4 and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff cmp %l6,%o5 bge,a 1f fstod %f2,%f2 ! (5_1) x0 = (double)fx0; fabss %f2,%f2 ! fx0 = fabsf(fx0); ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 sra %o4,28,%o4 ! itmp0 >>= 28; and %o4,-8,%o4 ! itmp0 = -8; fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0; fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149; ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); fmuld %f2,%f0,%f2 ! dtmp0 *= dsign; 1: sll %l5,5,%l6 ! (4_0) ltmp0 = ldiff0 << 5; sra %l3,27,%o5 ! (4_0) signx0 = ux0 >> 27; add %i3,stridex,%i3 ! px += stridex ba .d3 sra %l4,28,%o4 ! (4_0) signy0 = uy0 >> 28; .align 16 .update4: cmp counter,4 bg,pn %icc,1f nop ld [cmul_arr],%f1 ba .cont4 fzeros %f0 1: cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 bg,pt %icc,1f nop 2: sub counter,4,counter st counter,[%fp+tmp_counter] stx %i1,[%fp+tmp_py] stx %i3,[%fp+tmp_px] ld [cmul_arr],%f1 or %g0,4,counter ba .cont4 fzeros %f0 1: andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff bne,pn %icc,1f sethi %hi(0x00800000),%o5 andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff be,pn %icc,2b nop 1: std %f0,[%fp+tmp_px] ld [%fp+tmp_px],%o4 fmuld %f40,%f24,%f36 ! (3_1) dtmp0 *= x20; and %o4,_0x7fffffff,%o1 ! itmp0 & 0x7fffffff cmp %o1,%o5 bge,a 1f fstod %f0,%f40 ! (0_0) y0 = (double)fy0; ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 sra %o4,28,%o4 ! itmp0 >>= 28; fabss %f0,%f0 ! fy0 = fabsf(fy0); and %o4,-8,%o4 ! itmp0 = -8; fitod %f0,%f14 ! dtmp0 = (double) *(int*)&fy0; fmuld %f14,%f40,%f40 ! dtmp0 *= C2ONM149; ldd [cmul_arr+%o4],%f14 ! dsign = *(double*)((char*)cmul_arr + itmp0); fmuld %f14,%f40,%f40 ! dtmp0 *= dsign; 1: faddd %f22,K5,%f14 ! (2_1) dtmp0 += K5; fmuld %f4,%f18,%f4 ! (1_1) dtmp0 *= x20; ld [%fp+tmp_px+4],%o4 and %o4,_0x7fffffff,%o1 ! itmp0 & 0x7fffffff cmp %o1,%o5 bge,a 1f fstod %f1,%f2 ! (5_1) x0 = (double)fx0; fabss %f1,%f22 ! fx0 = fabsf(fx0); ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 sra %o4,28,%o4 ! itmp0 >>= 28; and %o4,-8,%o4 ! itmp0 = -8; fitod %f22,%f22 ! dtmp0 = (double) *(int*)&fx0; fmuld %f22,%f0,%f22 ! dtmp0 *= C2ONM149; ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); fmuld %f22,%f0,%f2 ! dtmp0 *= dsign; 1: sll %l7,3,%l7 ! (0_1) cmul0_ind = ldiff0 << 3; ba .d4 add %i3,stridex,%i3 ! px += stridex .align 16 .update5: cmp counter,5 bg,pn %icc,1f nop ld [cmul_arr],%f2 ba .cont5 fzero %f0 1: cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 bg,pt %icc,1f nop 2: sub counter,5,counter st counter,[%fp+tmp_counter] stx %i1,[%fp+tmp_py] stx %i3,[%fp+tmp_px] ld [cmul_arr],%f2 or %g0,5,counter ba .cont5 fzero %f0 1: andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff bne,pn %icc,1f sethi %hi(0x00800000),%o5 andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff be,pn %icc,2b nop 1: st %f0,[%fp+tmp_px] st %f2,[%fp+tmp_px+4] ld [%fp+tmp_px],%o4 fmuld %f40,%f4,%f34 ! (4_1) dtmp0 *= x20; stx %l5,[%fp+tmp_py] and %o4,_0x7fffffff,%l5 ! itmp0 & 0x7fffffff cmp %l5,%o5 bge,a 1f fstod %f0,%f40 ! (0_0) y0 = (double)fy0; ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 sra %o4,28,%o4 ! itmp0 >>= 28; fabss %f0,%f0 ! fy0 = fabsf(fy0); and %o4,-8,%o4 ! itmp0 = -8; fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0; fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149; ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); fmuld %f0,%f40,%f40 ! dtmp0 *= dsign; 1: faddd %f20,K5,%f12 ! (3_1) dtmp0 += K5; add %i1,stridey,%i1 ! py += stridey fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20; ld [%fp+tmp_px+4],%o4 and %o4,_0x7fffffff,%l5 ! itmp0 & 0x7fffffff cmp %l5,%o5 bge,a 1f fstod %f2,%f2 ! (5_1) x0 = (double)fx0; ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 sra %o4,28,%o4 ! itmp0 >>= 28; fabss %f2,%f2 ! fx0 = fabsf(fx0); and %o4,-8,%o4 ! itmp0 = -8; fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0; fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149; ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); fmuld %f2,%f0,%f2 ! dtmp0 *= dsign; 1: ldx [%fp+tmp_py],%l5 sra %l3,27,%o5 ! (0_0) signx0 = ux0 >> 27; add %i3,stridex,%i3 ! px += stridex lda [%i1]0x82,%l3 ! (1_0) uy0 = *(int*)py; sra %l4,28,%o4 ! (0_0) signy0 = uy0 >> 28; ba .d5 add %l6,cadd_arr,%l6 ! (0_0) ltmp0 += (char*)cadd_arr; .align 16 .update6: cmp counter,5 bg,pn %icc,1f nop ld [cmul_arr],%f2 ba .cont6 fzero %f0 1: cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 bg,pt %icc,1f nop 2: sub counter,5,counter st counter,[%fp+tmp_counter] stx %i1,[%fp+tmp_py] stx %i3,[%fp+tmp_px] ld [cmul_arr],%f2 or %g0,5,counter ba .cont6 fzero %f0 1: andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff bne,pn %icc,1f sethi %hi(0x00800000),%o5 andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff be,pn %icc,2b nop 1: st %f0,[%fp+tmp_pz] st %f2,[%fp+tmp_pz+4] ld [%fp+tmp_pz],%o4 fmuld %f40,%f22,%f32 ! (5_1) dtmp0 *= x20; stx %l5,[%fp+tmp_px] and %o4,_0x7fffffff,%l5 ! itmp0 & 0x7fffffff cmp %l5,%o5 bge,a 1f fstod %f0,%f40 ! (0_0) y0 = (double)fy0; ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 sra %o4,28,%o4 ! itmp0 >>= 28; fabss %f0,%f0 ! fy0 = fabsf(fy0); and %o4,-8,%o4 ! itmp0 = -8; fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0; fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149; ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); fmuld %f0,%f40,%f40 ! dtmp0 *= dsign; 1: faddd %f18,K5,%f10 ! (4_1) dtmp0 += K5; add %i3,stridex,%i3 ! px += stridex add %i1,stridey,%i1 ! py += stridey fmuld %f20,%f24,%f20 ! (3_1) dtmp0 *= x20; ld [%fp+tmp_pz+4],%o4 and %o4,_0x7fffffff,%l5 ! itmp0 & 0x7fffffff cmp %l5,%o5 bge,a 1f fstod %f2,%f2 ! (5_1) x0 = (double)fx0; ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 sra %o4,28,%o4 ! itmp0 >>= 28; fabss %f2,%f2 ! fx0 = fabsf(fx0); and %o4,-8,%o4 ! itmp0 = -8; fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0; fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149; ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); fmuld %f2,%f0,%f2 ! dtmp0 *= dsign; 1: ldx [%fp+tmp_px],%l5 sra %l4,27,%o5 ! (1_0) signx0 = ux0 >> 27; sra %l3,28,%o4 ! (1_0) signy0 = uy0 >> 28; ba .d6 add %l6,cadd_arr,%l6 ! (1_0) ltmp0 += (char*)cadd_arr; .align 16 .update7: cmp counter,5 bg,pn %icc,1f nop ld [cmul_arr],%f2 ba .cont7 fzero %f0 1: cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 bg,pt %icc,1f nop 2: sub counter,5,counter st counter,[%fp+tmp_counter] stx %i1,[%fp+tmp_py] stx %i3,[%fp+tmp_px] ld [cmul_arr],%f2 or %g0,5,counter ba .cont7 fzero %f0 1: andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff bne,pn %icc,1f sethi %hi(0x00800000),%o5 andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff be,pn %icc,2b nop 1: st %f0,[%fp+tmp_pz] st %f2,[%fp+tmp_pz+4] ld [%fp+tmp_pz],%o4 fmuld %f40,%f20,%f30 ! (0_0) dtmp0 *= x20; and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff cmp %l6,%o5 bge,a 1f fstod %f0,%f40 ! (0_0) y0 = (double)fy0; ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 sra %o4,28,%o4 ! itmp0 >>= 28; fabss %f0,%f0 ! fy0 = fabsf(fy0); and %o4,-8,%o4 ! itmp0 = -8; fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0; fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149; ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); fmuld %f0,%f40,%f40 ! dtmp0 *= dsign; 1: faddd %f16,K5,%f8 ! (5_1) dtmp0 += K5; add %i1,stridey,%i1 ! py += stridey fmuld %f18,%f4,%f18 ! (4_1) dtmp0 *= x20; ld [%fp+tmp_pz+4],%o4 and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff cmp %l6,%o5 bge,a 1f fstod %f2,%f2 ! (5_1) x0 = (double)fx0; ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 sra %o4,28,%o4 ! itmp0 >>= 28; fabss %f2,%f2 ! fx0 = fabsf(fx0); and %o4,-8,%o4 ! itmp0 = -8; fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0; fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149; ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); fmuld %f2,%f0,%f2 ! dtmp0 *= dsign; 1: sll %g5,5,%l6 ! (2_0) ltmp0 = ldiff0 << 5; sra %l3,27,%o5 ! (2_0) signx0 = ux0 >> 27; add %i3,stridex,%i3 ! px += stridex sra %l4,28,%o4 ! (2_0) signy0 = uy0 >> 28; ba .d7 add %l6,cadd_arr,%l6 ! (2_0) ltmp0 += (char*)cadd_arr; .align 16 .update8: cmp counter,5 bg,pn %icc,1f nop ld [cmul_arr],%f1 ba .cont8 fzeros %f0 1: cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 bg,pt %icc,1f nop 2: sub counter,5,counter st counter,[%fp+tmp_counter] stx %i1,[%fp+tmp_py] stx %i3,[%fp+tmp_px] ld [cmul_arr],%f1 or %g0,5,counter ba .cont8 fzeros %f0 1: andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff bne,pn %icc,1f sethi %hi(0x00800000),%o5 andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff be,pn %icc,2b nop 1: std %f0,[%fp+tmp_pz] ld [%fp+tmp_pz],%o4 fmuld %f40,%f18,%f28 ! (1_0) dtmp0 *= x20; faddd %f16,K5,%f2 ! (0_0) dtmp0 += K5; and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff cmp %l6,%o5 bge,a 1f fstod %f0,%f40 ! (0_0) y0 = (double)fy0; ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 sra %o4,28,%o4 ! itmp0 >>= 28; fabss %f0,%f0 ! fy0 = fabsf(fy0); and %o4,-8,%o4 ! itmp0 = -8; fitod %f0,%f16 ! dtmp0 = (double) *(int*)&fy0; fmuld %f16,%f40,%f40 ! dtmp0 *= C2ONM149; ldd [cmul_arr+%o4],%f16 ! dsign = *(double*)((char*)cmul_arr + itmp0); fmuld %f16,%f40,%f40 ! dtmp0 *= dsign; 1: add %i1,stridey,%i1 ! py += stridey fmuld %f24,%f22,%f24 ! (5_1) dtmp0 *= x20; ld [%fp+tmp_pz+4],%o4 and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff cmp %l6,%o5 bge,a 1f fstod %f1,%f16 ! (5_1) x0 = (double)fx0; fabss %f1,%f16 ! fx0 = fabsf(fx0); ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 sra %o4,28,%o4 ! itmp0 >>= 28; and %o4,-8,%o4 ! itmp0 = -8; fitod %f16,%f16 ! dtmp0 = (double) *(int*)&fx0; fmuld %f16,%f0,%f16 ! dtmp0 *= C2ONM149; ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); fmuld %f16,%f0,%f16 ! dtmp0 *= dsign; 1: sll %o0,5,%l6 ! (3_0) ltmp0 = ldiff0 << 5; sra %l4,27,%o5 ! (3_0) signx0 = ux0 >> 27; add %i3,stridex,%i3 ! px += stridex ba .d8 sra %l3,28,%o4 ! (3_0) signy0 = uy0 >> 28; .align 16 .update9: cmp counter,5 bg,pn %icc,1f nop ld [cmul_arr],%f2 ba .cont9 fzero %f0 1: cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 bg,pt %icc,1f nop 2: sub counter,5,counter st counter,[%fp+tmp_counter] stx %i1,[%fp+tmp_py] stx %i3,[%fp+tmp_px] ld [cmul_arr],%f2 or %g0,5,counter ba .cont9 fzero %f0 1: andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff bne,pn %icc,1f sethi %hi(0x00800000),%o5 andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff be,pn %icc,2b nop 1: st %f0,[%fp+tmp_pz] st %f2,[%fp+tmp_pz+4] ld [%fp+tmp_pz],%o4 fmuld %f40,%f16,%f26 ! (2_0) dtmp0 *= x20; and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff cmp %l6,%o5 bge,a 1f fstod %f0,%f40 ! (0_0) y0 = (double)fy0; ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 sra %o4,28,%o4 ! itmp0 >>= 28; fabss %f0,%f0 ! fy0 = fabsf(fy0); and %o4,-8,%o4 ! itmp0 = -8; fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0; fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149; ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); fmuld %f0,%f40,%f40 ! dtmp0 *= dsign; 1: add %i1,stridey,%i1 ! py += stridey faddd %f4,K5,%f62 ! (1_0) dtmp0 += K5; fmuld %f24,%f20,%f24 ! (0_0) dtmp0 *= x20; ld [%fp+tmp_pz+4],%o4 and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff cmp %l6,%o5 bge,a 1f fstod %f2,%f2 ! (5_1) x0 = (double)fx0; fabss %f2,%f2 ! fx0 = fabsf(fx0); ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 sra %o4,28,%o4 ! itmp0 >>= 28; and %o4,-8,%o4 ! itmp0 = -8; fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0; fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149; ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); fmuld %f2,%f0,%f2 ! dtmp0 *= dsign; 1: sll %l5,5,%l6 ! (4_0) ltmp0 = ldiff0 << 5; sra %l3,27,%o5 ! (4_0) signx0 = ux0 >> 27; add %i3,stridex,%i3 ! px += stridex ba .d9 sra %l4,28,%o4 ! (4_0) signy0 = uy0 >> 28; .align 16 .update10: cmp counter,1 bg,pn %icc,1f nop ld [cmul_arr],%f2 ba .cont10 fzero %f0 1: cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 bg,pt %icc,1f nop 2: sub counter,1,counter st counter,[%fp+tmp_counter] stx %i1,[%fp+tmp_py] stx %i3,[%fp+tmp_px] ld [cmul_arr],%f2 or %g0,1,counter ba .cont10 fzero %f0 1: andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff bne,pn %icc,1f sethi %hi(0x00800000),%o5 andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff be,pn %icc,2b nop 1: st %f0,[%fp+tmp_pz] st %f2,[%fp+tmp_pz+4] ld [%fp+tmp_pz],%o1 fmuld %f40,%f24,%f36 ! (3_1) dtmp0 *= x20; and %o1,_0x7fffffff,%o4 ! itmp0 & 0x7fffffff cmp %o4,%o5 bge,a 1f fstod %f0,%f40 ! (5_1) y0 = (double)fy0; ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 sra %o1,28,%o1 ! itmp0 >>= 28; fabss %f0,%f0 ! fy0 = fabsf(fy0); and %o1,-8,%o1 ! itmp0 = -8; fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0; fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149; ldd [cmul_arr+%o1],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); fmuld %f0,%f40,%f40 ! dtmp0 *= dsign; 1: faddd %f22,K5,%f14 ! (2_1) dtmp0 += K5; fmuld %f4,%f18,%f4 ! (1_1) dtmp0 *= x20; sll %l7,3,%l7 ! (0_1) cmul0_ind = ldiff0 << 3; add %i3,stridex,%i3 ! px += stridex ld [%fp+tmp_pz+4],%o1 and %o1,_0x7fffffff,%o4 ! itmp0 & 0x7fffffff cmp %o4,%o5 bge,a 1f fstod %f2,%f2 ! (5_1) x0 = (double)fx0; ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 sra %o1,28,%o1 ! itmp0 >>= 28; fabss %f2,%f2 ! fx0 = fabsf(fx0); and %o1,-8,%o1 ! itmp0 = -8; fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0; fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149; ldd [cmul_arr+%o1],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); fmuld %f2,%f0,%f2 ! dtmp0 *= dsign; 1: ba .den0 add %o2,stridez,%o1 ! pz += stridez .align 16 .update11: cmp counter,2 bg,pn %icc,1f nop ld [cmul_arr],%f2 ba .cont11 fzero %f0 1: cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 bg,pt %icc,1f nop 2: sub counter,2,counter st counter,[%fp+tmp_counter] stx %i1,[%fp+tmp_py] stx %i3,[%fp+tmp_px] ld [cmul_arr],%f2 or %g0,2,counter ba .cont11 fzero %f0 1: andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff bne,pn %icc,1f sethi %hi(0x00800000),%o5 andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff be,pn %icc,2b nop 1: st %f0,[%fp+tmp_pz] st %f2,[%fp+tmp_pz+4] ld [%fp+tmp_pz],%o4 fmuld %f40,%f4,%f34 ! (4_1) dtmp0 *= x20; stx %l5,[%fp+tmp_px] and %o4,_0x7fffffff,%l5 ! itmp0 & 0x7fffffff cmp %l5,%o5 bge,a 1f fstod %f0,%f40 ! (0_0) y0 = (double)fy0; ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 sra %o4,28,%o4 ! itmp0 >>= 28; fabss %f0,%f0 ! fy0 = fabsf(fy0); and %o4,-8,%o4 ! itmp0 = -8; fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0; fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149; ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); fmuld %f0,%f40,%f40 ! dtmp0 *= dsign; 1: faddd %f20,K5,%f12 ! (3_1) dtmp0 += K5; add %i1,stridey,%i1 ! py += stridey fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20; ld [%fp+tmp_pz+4],%o4 and %o4,_0x7fffffff,%l5 ! itmp0 & 0x7fffffff cmp %l5,%o5 bge,a 1f fstod %f2,%f2 ! (5_1) x0 = (double)fx0; ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 sra %o4,28,%o4 ! itmp0 >>= 28; fabss %f2,%f2 ! fx0 = fabsf(fx0); and %o4,-8,%o4 ! itmp0 = -8; fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0; fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149; ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); fmuld %f2,%f0,%f2 ! dtmp0 *= dsign; 1: ldx [%fp+tmp_px],%l5 sra %l3,27,%o5 ! (0_0) signx0 = ux0 >> 27; add %i3,stridex,%i3 ! px += stridex lda [%i1]0x82,%l3 ! (1_0) uy0 = *(int*)py; sra %l4,28,%o4 ! (0_0) signy0 = uy0 >> 28; ba .den1 add %l6,cadd_arr,%l6 ! (0_0) ltmp0 += (char*)cadd_arr; .align 16 .update12: cmp counter,3 bg,pn %icc,1f nop ld [cmul_arr],%f2 ba .cont12 fzero %f0 1: cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 bg,pt %icc,1f nop 2: sub counter,3,counter st counter,[%fp+tmp_counter] stx %i1,[%fp+tmp_py] stx %i3,[%fp+tmp_px] ld [cmul_arr],%f2 or %g0,3,counter ba .cont12 fzero %f0 1: andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff bne,pn %icc,1f sethi %hi(0x00800000),%o5 andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff be,pn %icc,2b nop 1: st %f0,[%fp+tmp_pz] st %f2,[%fp+tmp_pz+4] ld [%fp+tmp_pz],%o4 fmuld %f40,%f22,%f32 ! (5_1) dtmp0 *= x20; stx %l5,[%fp+tmp_px] and %o4,_0x7fffffff,%l5 ! itmp0 & 0x7fffffff cmp %l5,%o5 bge,a 1f fstod %f0,%f40 ! (0_0) y0 = (double)fy0; ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 sra %o4,28,%o4 ! itmp0 >>= 28; fabss %f0,%f0 ! fy0 = fabsf(fy0); and %o4,-8,%o4 ! itmp0 = -8; fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0; fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149; ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); fmuld %f0,%f40,%f40 ! dtmp0 *= dsign; 1: faddd %f18,K5,%f10 ! (4_1) dtmp0 += K5; add %i3,stridex,%i3 ! px += stridex add %i1,stridey,%i1 ! py += stridey fmuld %f20,%f24,%f20 ! (3_1) dtmp0 *= x20; ld [%fp+tmp_pz+4],%o4 and %o4,_0x7fffffff,%l5 ! itmp0 & 0x7fffffff cmp %l5,%o5 bge,a 1f fstod %f2,%f2 ! (5_1) x0 = (double)fx0; ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 sra %o4,28,%o4 ! itmp0 >>= 28; fabss %f2,%f2 ! fx0 = fabsf(fx0); and %o4,-8,%o4 ! itmp0 = -8; fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0; fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149; ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); fmuld %f2,%f0,%f2 ! dtmp0 *= dsign; 1: ldx [%fp+tmp_px],%l5 sra %l4,27,%o5 ! (1_0) signx0 = ux0 >> 27; sra %l3,28,%o4 ! (1_0) signy0 = uy0 >> 28; ba .den2 add %l6,cadd_arr,%l6 ! (1_0) ltmp0 += (char*)cadd_arr; .align 16 .update13: cmp counter,4 bg,pn %icc,1f nop ld [cmul_arr],%f2 ba .cont13 fzero %f0 1: cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 bg,pt %icc,1f nop 2: sub counter,4,counter st counter,[%fp+tmp_counter] stx %i1,[%fp+tmp_py] sub %i3,stridex,%o5 stx %o5,[%fp+tmp_px] ld [cmul_arr],%f2 or %g0,4,counter ba .cont13 fzero %f0 1: andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff bne,pn %icc,1f sethi %hi(0x00800000),%o5 andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff be,pn %icc,2b nop 1: st %f0,[%fp+tmp_pz] st %f2,[%fp+tmp_pz+4] ld [%fp+tmp_pz],%o4 fmuld %f40,%f20,%f30 ! (0_0) dtmp0 *= x20; and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff cmp %l6,%o5 bge,a 1f fstod %f0,%f40 ! (0_0) y0 = (double)fy0; ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 sra %o4,28,%o4 ! itmp0 >>= 28; fabss %f0,%f0 ! fy0 = fabsf(fy0); and %o4,-8,%o4 ! itmp0 = -8; fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0; fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149; ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); fmuld %f0,%f40,%f40 ! dtmp0 *= dsign; 1: faddd %f16,K5,%f8 ! (5_1) dtmp0 += K5; add %i1,stridey,%i1 ! py += stridey fmuld %f18,%f4,%f18 ! (4_1) dtmp0 *= x20; ld [%fp+tmp_pz+4],%o4 and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff cmp %l6,%o5 bge,a 1f fstod %f2,%f2 ! (5_1) x0 = (double)fx0; ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 sra %o4,28,%o4 ! itmp0 >>= 28; fabss %f2,%f2 ! fx0 = fabsf(fx0); and %o4,-8,%o4 ! itmp0 = -8; fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0; fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149; ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); fmuld %f2,%f0,%f2 ! dtmp0 *= dsign; 1: sll %g5,5,%l6 ! (2_0) ltmp0 = ldiff0 << 5; sra %l3,27,%o5 ! (2_0) signx0 = ux0 >> 27; sra %l4,28,%o4 ! (2_0) signy0 = uy0 >> 28; ba .den3 add %l6,cadd_arr,%l6 ! (2_0) ltmp0 += (char*)cadd_arr; .align 16 .update14: cmp counter,5 bg,pn %icc,1f nop ld [cmul_arr],%f1 ba .cont14 fzeros %f0 1: cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 bg,pt %icc,1f nop 2: sub counter,5,counter st counter,[%fp+tmp_counter] stx %i1,[%fp+tmp_py] sub %i3,stridex,%o5 stx %o5,[%fp+tmp_px] ld [cmul_arr],%f1 or %g0,5,counter ba .cont14 fzeros %f0 1: andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff bne,pn %icc,1f sethi %hi(0x00800000),%o5 andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff be,pn %icc,2b nop 1: std %f0,[%fp+tmp_pz] ld [%fp+tmp_pz],%o4 fmuld %f40,%f18,%f28 ! (1_0) dtmp0 *= x20; faddd %f16,K5,%f2 ! (0_0) dtmp0 += K5; and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff cmp %l6,%o5 bge,a 1f fstod %f0,%f40 ! (0_0) y0 = (double)fy0; ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 sra %o4,28,%o4 ! itmp0 >>= 28; fabss %f0,%f0 ! fy0 = fabsf(fy0); and %o4,-8,%o4 ! itmp0 = -8; fitod %f0,%f16 ! dtmp0 = (double) *(int*)&fy0; fmuld %f16,%f40,%f40 ! dtmp0 *= C2ONM149; ldd [cmul_arr+%o4],%f16 ! dsign = *(double*)((char*)cmul_arr + itmp0); fmuld %f16,%f40,%f40 ! dtmp0 *= dsign; 1: add %i1,stridey,%i1 ! py += stridey fmuld %f24,%f22,%f24 ! (5_1) dtmp0 *= x20; ld [%fp+tmp_pz+4],%o4 and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff cmp %l6,%o5 bge,a 1f fstod %f1,%f16 ! (5_1) x0 = (double)fx0; fabss %f1,%f16 ! fx0 = fabsf(fx0); ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 sra %o4,28,%o4 ! itmp0 >>= 28; and %o4,-8,%o4 ! itmp0 = -8; fitod %f16,%f16 ! dtmp0 = (double) *(int*)&fx0; fmuld %f16,%f0,%f16 ! dtmp0 *= C2ONM149; ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); fmuld %f16,%f0,%f16 ! dtmp0 *= dsign; 1: sll %o0,5,%l6 ! (3_0) ltmp0 = ldiff0 << 5; sra %l4,27,%o5 ! (3_0) signx0 = ux0 >> 27; ba .den4 sra %l3,28,%o4 ! (3_0) signy0 = uy0 >> 28; .align 16 .update15: cmp counter,6 bg,pn %icc,1f nop ld [cmul_arr],%f2 ba .cont15 fzero %f0 1: cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 bg,pt %icc,1f nop 2: sub counter,6,counter st counter,[%fp+tmp_counter] stx %i1,[%fp+tmp_py] sub %i3,stridex,%o5 stx %o5,[%fp+tmp_px] ld [cmul_arr],%f2 or %g0,6,counter ba .cont15 fzero %f0 1: andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff bne,pn %icc,1f sethi %hi(0x00800000),%o5 andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff be,pn %icc,2b nop 1: st %f0,[%fp+tmp_pz] st %f2,[%fp+tmp_pz+4] ld [%fp+tmp_pz],%o4 fmuld %f40,%f16,%f26 ! (2_0) dtmp0 *= x20; and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff cmp %l6,%o5 bge,a 1f fstod %f0,%f40 ! (0_0) y0 = (double)fy0; ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 sra %o4,28,%o4 ! itmp0 >>= 28; fabss %f0,%f0 ! fy0 = fabsf(fy0); and %o4,-8,%o4 ! itmp0 = -8; fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0; fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149; ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); fmuld %f0,%f40,%f40 ! dtmp0 *= dsign; 1: add %i1,stridey,%i1 ! py += stridey faddd %f4,K5,%f62 ! (1_0) dtmp0 += K5; fmuld %f24,%f20,%f24 ! (0_0) dtmp0 *= x20; ld [%fp+tmp_pz+4],%o4 and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff cmp %l6,%o5 bge,a 1f fstod %f2,%f2 ! (5_1) x0 = (double)fx0; fabss %f2,%f2 ! fx0 = fabsf(fx0); ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 sra %o4,28,%o4 ! itmp0 >>= 28; and %o4,-8,%o4 ! itmp0 = -8; fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0; fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149; ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); fmuld %f2,%f0,%f2 ! dtmp0 *= dsign; 1: sll %l5,5,%l6 ! (4_0) ltmp0 = ldiff0 << 5; sra %l3,27,%o5 ! (4_0) signx0 = ux0 >> 27; ba .den5 sra %l4,28,%o4 ! (4_0) signy0 = uy0 >> 28; .align 16 .u0: ba .c0 or %g0,_0x7fffffff,%o5 .u1: ba .c1 or %g0,_0x7fffffff,%o5 .u2: ba .c2 or %g0,_0x7f800000,%o5 .u3: ba .c3 or %g0,_0x7f800000,%o5 .u4: ba .c4 or %g0,_0x7fffffff,%o5 .u5: ba .c5 or %g0,_0x7fffffff,%o5 .u6: ba .c6 or %g0,_0x7f800000,%o5 .u7: ba .c7 or %g0,_0x7f800000,%o5 .u8: ba .c8 or %g0,_0x7fffffff,%o5 .u9: ba .c9 or %g0,_0x7fffffff,%o5 .u10: ba .c10 or %g0,_0x7f800000,%o5 .u11: ba .c11 or %g0,_0x7f800000,%o5 .u12: ba .c12 or %g0,_0x7fffffff,%o5 .u13: ba .c13 or %g0,_0x7fffffff,%o5 .u14: ba .c14 or %g0,_0x7f800000,%o5 .u15: ba .c15 or %g0,_0x7f800000,%o5 .u16: ba .c16 or %g0,_0x7fffffff,%o5 .u17: ba .c17 or %g0,_0x7fffffff,%o5 .u18: ba .c18 or %g0,_0x7f800000,%o5 .u19: ba .c19 or %g0,_0x7f800000,%o5 .u20: ba .c20 or %g0,_0x7fffffff,%o5 .u21: ba .c21 or %g0,_0x7fffffff,%o5 .u22: ba .c22 or %g0,_0x7f800000,%o5 .u23: ba .c23 or %g0,_0x7f800000,%o5 .u24: ba .c24 or %g0,_0x7fffffff,%o5 .u25: ba .c25 or %g0,_0x7fffffff,%o5 .u26: ba .c26 or %g0,_0x7f800000,%o5 .u27: ba .c27 or %g0,_0x7f800000,%o5 .u28: ba .c28 or %g0,_0x7fffffff,%o5 .u29: ba .c29 or %g0,_0x7fffffff,%o5 .u30: ba .c30 or %g0,_0x7f800000,%o5 .u31: ba .c31 or %g0,_0x7f800000,%o5 .u32: ba .c32 or %g0,_0x7fffffff,%o5 .u33: ba .c33 or %g0,_0x7fffffff,%o5 .u34: ba .c34 or %g0,_0x7f800000,%o5 .u35: ba .c35 or %g0,_0x7f800000,%o5 .u36: ba .c36 or %g0,_0x7fffffff,%o5 .u37: ba .c37 or %g0,_0x7fffffff,%o5 .u38: ba .c38 or %g0,_0x7f800000,%o5 .u39: ba .c39 or %g0,_0x7f800000,%o5 .up0: ba .co0 or %g0,_0x7fffffff,%o5 .up1: ba .co1 or %g0,_0x7fffffff,%o5 .up2: ba .co2 or %g0,_0x7f800000,%o5 .up3: ba .co3 or %g0,_0x7f800000,%o5 .up4: ba .co4 or %g0,_0x7fffffff,%o5 .up5: ba .co5 or %g0,_0x7fffffff,%o5 .up6: ba .co6 or %g0,_0x7f800000,%o5 .up7: ba .co7 or %g0,_0x7f800000,%o5 .up8: ba .co8 or %g0,_0x7fffffff,%o5 .up9: ba .co9 or %g0,_0x7fffffff,%o5 .up10: ba .co10 or %g0,_0x7f800000,%o5 .up11: ba .co11 or %g0,_0x7f800000,%o5 .up12: ba .co12 or %g0,_0x7fffffff,%o5 .up13: ba .co13 or %g0,_0x7fffffff,%o5 .up14: ba .co14 or %g0,_0x7f800000,%o5 .up15: ba .co15 or %g0,_0x7f800000,%o5 .up16: ba .co16 or %g0,_0x7fffffff,%o5 .up17: ba .co17 or %g0,_0x7fffffff,%o5 .up18: ba .co18 or %g0,_0x7f800000,%o5 .up19: ba .co19 or %g0,_0x7f800000,%o5 .up20: ba .co20 or %g0,_0x7fffffff,%o5 .up21: ba .co21 or %g0,_0x7fffffff,%o5 .up22: ba .co22 or %g0,_0x7f800000,%o5 .up23: ba .co23 or %g0,_0x7f800000,%o5 .exit: ret restore SET_SIZE(__vatan2f)