1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 23 */ 24/* 25 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 26 * Use is subject to license terms. 27 */ 28 29 .file "__vatan2f.S" 30 31#include "libm.h" 32 33 RO_DATA 34 .align 64 35.CONST_TBL: 36 .word 0xbff921fb, 0x54442d18 ! -M_PI_2 37 .word 0x3ff921fb, 0x54442d18 ! M_PI_2 38 .word 0xbff921fb, 0x54442d18 ! -M_PI_2 39 .word 0x3ff921fb, 0x54442d18 ! M_PI_2 40 .word 0xc00921fb, 0x54442d18 ! -M_PI 41 .word 0x400921fb, 0x54442d18 ! M_PI 42 .word 0x80000000, 0x00000000 ! -0.0 43 .word 0x00000000, 0x00000000 ! 0.0 44 45 .word 0xbff00000, 0x00000000 ! -1.0 46 .word 0x3ff00000, 0x00000000 ! 1.0 47 48 .word 0x3fefffff, 0xfe79bf93 ! K0 = 9.99999997160545464888e-01 49 .word 0xbfd55552, 0xf0db4320 ! K1 = -3.33332762919825514315e-01 50 .word 0x3fc998f8, 0x2493d066 ! K2 = 1.99980752811487135558e-01 51 .word 0xbfc240b8, 0xd994abf9 ! K3 = -1.42600160828209047720e-01 52 .word 0x3fbbfc9e, 0x8c2b0243 ! K4 = 1.09323415013030928421e-01 53 .word 0xbfb56013, 0x64b1cac3 ! K5 = -8.34972496830160174704e-02 54 .word 0x3fad3ad7, 0x9f53e142 ! K6 = 5.70895559303061900411e-02 55 .word 0xbf9f148f, 0x2a829af1 ! K7 = -3.03518647857811706139e-02 56 .word 0x3f857a8c, 0x747ed314 ! K8 = 1.04876492549493055747e-02 57 .word 0xbf5bdf39, 0x729124b6 ! K9 = -1.70117006406859722727e-03 58 59 .word 0x3fe921fb, 0x54442d18 ! M_PI_4 60 .word 0x36a00000, 0x00000000 ! 2^(-149) 61 62#define counter %o3 63#define stridex %i4 64#define stridey %i5 65#define stridez %l1 66#define cmul_arr %i0 67#define cadd_arr %i2 68#define _0x7fffffff %l0 69#define _0x7f800000 %l2 70 71#define K0 %f42 72#define K1 %f44 73#define K2 %f46 74#define K3 %f48 75#define K4 %f50 76#define K5 %f52 77#define K6 %f54 78#define K7 %f56 79#define K8 %f58 80#define K9 %f60 81 82#define tmp_counter STACK_BIAS-32 83#define tmp_py STACK_BIAS-24 84#define tmp_px STACK_BIAS-16 85#define tmp_pz STACK_BIAS-8 86 87! sizeof temp storage - must be a multiple of 16 for V9 88#define tmps 0x20 89 90!-------------------------------------------------------------------- 91! !!!!! vatan2f algorithm !!!!! 92! uy0 = *(int*)py; 93! ux0 = *(int*)px; 94! ay0 = uy0 & 0x7fffffff; 95! ax0 = ux0 & 0x7fffffff; 96! if ( ax0 >= 0x7f800000 || ay0 >= 0x7f800000 ) 97! { 98! /* |X| or |Y| = Nan */ 99! if ( ax0 > 0x7f800000 || ay0 > 0x7f800000 ) 100! { 101! ftmp0 = *(float*)&ax0 * *(float*)&ay0; 102! *pz = ftmp0; 103! } 104! signx0 = (unsigned)ux0 >> 30; 105! signx0 &= 2; 106! signy0 = uy0 >> 31; 107! if (ay0 == 0x7f800000) 108! signx0 = (ax0 == 0x7f800000) ? signx0 + 1 : 2; 109! else 110! signx0 += signx0; 111! res = signx0 * M_PI_4; 112! signy0 <<= 3; 113! dtmp0 = *(double*)((char*)(cmul_arr + 1) + signy0); 114! res *= dtmp0; 115! ftmp0 = (float) res; 116! *pz = ftmp0; 117! goto next; 118! } 119! if ( ax0 == 0 && ay0 == 0 ) 120! { 121! signy0 = uy0 >> 28; 122! signx0 = ux0 >> 27; 123! ldiff0 = ax0 - ay0; 124! ldiff0 >>= 31; 125! signx0 &= -16; 126! signy0 &= -8; 127! ldiff0 <<= 5; 128! signx0 += signy0; 129! res = *(double*)((char*)(cadd_arr + 7) + ldiff0 + signx0 + signy0); 130! ftmp0 = (float) res; 131! *pz = ftmp0; 132! goto next; 133! } 134! ldiff0 = ax0 - ay0; 135! ldiff0 >>= 31; 136! addrc0 = (char*)px - (char*)py; 137! addrc0 &= ldiff0; 138! fy0 = *(float*)((char*)py + addrc0); 139! fx0 = *(float*)((char*)px - addrc0); 140! itmp0 = *(int*)&fy0; 141! if((itmp0 & 0x7fffffff) < 0x00800000) 142! { 143! itmp0 >>= 28; 144! itmp0 &= -8; 145! fy0 = fabsf(fy0); 146! dtmp0 = (double) *(int*)&fy0; 147! dtmp0 *= C2ONM149; 148! dsign = *(double*)((char*)cmul_arr + itmp0); 149! dtmp0 *= dsign; 150! y0 = dtm0; 151! } 152! else 153! y0 = (double)fy0; 154! itmp0 = *(int*)&fx0; 155! if((itmp0 & 0x7fffffff) < 0x00800000) 156! { 157! itmp0 >>= 28; 158! itmp0 &= -8; 159! fx0 = fabsf(fx0); 160! dtmp0 = (double) *(int*)&fx0; 161! dtmp0 *= C2ONM149; 162! dsign = *(double*)((char*)cmul_arr + itmp0); 163! dtmp0 *= dsign; 164! x0 = dtmp0; 165! } 166! else 167! x0 = (double)fx0; 168! px += stridex; 169! py += stridey; 170! x0 = y0 / x0; 171! x20 = x0 * x0; 172! dtmp0 = K9 * x20; 173! dtmp0 += K8; 174! dtmp0 *= x20; 175! dtmp0 += K7; 176! dtmp0 *= x20; 177! dtmp0 += K6; 178! dtmp0 *= x20; 179! dtmp0 += K5; 180! dtmp0 *= x20; 181! dtmp0 += K4; 182! dtmp0 *= x20; 183! dtmp0 += K3; 184! dtmp0 *= x20; 185! dtmp0 += K2; 186! dtmp0 *= x20; 187! dtmp0 += K1; 188! dtmp0 *= x20; 189! dtmp0 += K0; 190! x0 = dtmp0 * x0; 191! signy0 = uy0 >> 28; 192! signy0 &= -8; 193! signx0 = ux0 >> 27; 194! signx0 &= -16; 195! ltmp0 = ldiff0 << 5; 196! ltmp0 += (char*)cadd_arr; 197! ltmp0 += signx0; 198! cadd0 = *(double*)(ltmp0 + signy0); 199! cmul0_ind = ldiff0 << 3; 200! cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); 201! dtmp0 = cmul0 * x0; 202! dtmp0 = cadd0 + dtmp0; 203! ftmp0 = (float)dtmp0; 204! *pz = ftmp0; 205! pz += stridez; 206! 207!-------------------------------------------------------------------- 208 209 ENTRY(__vatan2f) 210 save %sp,-SA(MINFRAME)-tmps,%sp 211 PIC_SETUP(l7) 212 PIC_SET(l7,.CONST_TBL,g5) 213 214#ifdef __sparcv9 215 ldx [%fp+STACK_BIAS+176],%l7 216#else 217 ld [%fp+STACK_BIAS+92],%l7 218#endif 219 220 st %i0,[%fp+tmp_counter] 221 sethi %hi(0x7ffffc00),_0x7fffffff 222 add _0x7fffffff,1023,_0x7fffffff 223 or %g0,%i2,%o2 224 sll %l7,2,stridez 225 226 sethi %hi(0x7f800000),_0x7f800000 227 mov %g5,%g1 228 229 or %g0,stridey,%o4 230 add %g1,56,cadd_arr 231 232 sll %o2,2,stridey 233 add %g1,72,cmul_arr 234 235 ldd [%g1+80],K0 236 ldd [%g1+80+8],K1 237 ldd [%g1+80+16],K2 238 ldd [%g1+80+24],K3 239 ldd [%g1+80+32],K4 240 ldd [%g1+80+40],K5 241 ldd [%g1+80+48],K6 242 ldd [%g1+80+56],K7 243 ldd [%g1+80+64],K8 244 ldd [%g1+80+72],K9 245 246 sll stridex,2,stridex 247 248 stx %i1,[%fp+tmp_py] 249 stx %i3,[%fp+tmp_px] 250.begin: 251 ld [%fp+tmp_counter],counter 252 ldx [%fp+tmp_py],%i1 253 ldx [%fp+tmp_px],%i3 254 st %g0,[%fp+tmp_counter] 255.begin1: 256 subcc counter,1,counter 257 bneg,pn %icc,.exit 258 nop 259 260 lda [%i1]0x82,%l4 ! (0_0) uy0 = *(int*)py; 261 262 lda [%i3]0x82,%l3 ! (0_0) ux0 = *(int*)px; 263 264 and %l4,_0x7fffffff,%l7 ! (0_0) ay0 = uy0 & 0x7fffffff; 265 266 cmp %l7,_0x7f800000 267 bge,pn %icc,.spec0 268 and %l3,_0x7fffffff,%l6 ! (0_0) ax0 = ux0 & 0x7fffffff; 269 270 cmp %l6,_0x7f800000 271 bge,pn %icc,.spec0 272 sethi %hi(0x00800000),%o5 273 274 cmp %l6,%o5 275 bl,pn %icc,.spec1 276 sub %l6,%l7,%o2 ! (0_0) ldiff0 = ax0 - ay0; 277 278 cmp %l7,%o5 279 bl,pn %icc,.spec1 280 nop 281 282 stx %o4,[%fp+tmp_pz] 283 sra %o2,31,%l7 ! (0_0) ldiff0 >>= 31; 284 sub %i3,%i1,%l6 ! (0_0) addrc0 = (char*)px - (char*)py; 285 286 and %l6,%l7,%o2 ! (0_0) addrc0 &= ldiff0; 287 288 lda [%i1+%o2]0x82,%f0 ! (0_0) fy0 = *(float*)((char*)py + addrc0); 289 sub %i3,%o2,%o4 ! (0_0) (char*)px - addrc0 290 291 lda [%o4]0x82,%f2 ! (0_0) fx0 = *(float*)((char*)px - addrc0); 292 sll %l7,5,%l6 ! (0_0) ltmp0 = ldiff0 << 5; 293 294 sra %l3,27,%o5 ! (0_0) signx0 = ux0 >> 27; 295 add %i1,stridey,%i1 ! py += stridey 296 297 add %i3,stridex,%i3 ! px += stridex 298 299 lda [%i1]0x82,%l3 ! (1_0) uy0 = *(int*)py; 300 sra %l4,28,%o4 ! (0_0) signy0 = uy0 >> 28; 301 302 add %l6,cadd_arr,%l6 ! (0_0) ltmp0 += (char*)cadd_arr; 303 304 fstod %f0,%f40 ! (0_0) y0 = (double)fy0; 305 306 fstod %f2,%f2 ! (0_0) x0 = (double)fx0; 307 308.spec1_cont: 309 lda [%i3]0x82,%l4 ! (1_0) ux0 = *(int*)px; 310 and %o5,-16,%o5 ! (0_0) signx0 &= -16; 311 312 and %o4,-8,%o4 ! (0_0) signy0 &= -8; 313 314 fdivd %f40,%f2,%f12 ! (0_0) x0 = y0 / x0; 315 316 add %l6,%o5,%o1 ! (0_0) ltmp0 += signx0; 317 318 and %l4,_0x7fffffff,%l6 ! (1_0) ax0 = ux0 & 0x7fffffff; 319 sethi %hi(0x00800000),%o5 320 321 cmp %l6,%o5 322 bl,pn %icc,.u0 323 and %l3,_0x7fffffff,%g1 ! (1_0) ay0 = uy0 & 0x7fffffff; 324.c0: 325 cmp %g1,%o5 326 bl,pn %icc,.u1 327 ldd [%o1+%o4],%f34 ! (0_0) cadd0 = *(double*)(ltmp0 + signy0); 328.c1: 329 cmp %l6,_0x7f800000 330 bge,pn %icc,.u2 331 sub %l6,%g1,%o1 ! (1_0) ldiff0 = ax0 - ay0; 332.c2: 333 cmp %g1,_0x7f800000 334 bge,pn %icc,.u3 335 nop 336.c3: 337 sra %o1,31,%g1 ! (1_0) ldiff0 >>= 31; 338 sub %i3,%i1,%l6 ! (1_0) addrc0 = (char*)px - (char*)py; 339 340 and %l6,%g1,%o1 ! (1_0) addrc0 &= ldiff0; 341 342 lda [%i1+%o1]0x82,%f0 ! (1_0) fy0 = *(float*)((char*)py + addrc0); 343 sub %i3,%o1,%o4 ! (1_0) (char*)px - addrc0; 344 345 lda [%o4]0x82,%f2 ! (1_0) fx0 = *(float*)((char*)px - addrc0); 346 sll %g1,5,%l6 ! (1_0) ltmp0 = ldiff0 << 5; 347 348 cmp %o5,_0x7f800000 ! (1_0) b0 ? 0x7f800000 349 bge,pn %icc,.update0 ! (1_0) if ( b0 > 0x7f800000 ) 350 nop 351.cont0: 352 add %i1,stridey,%i1 ! py += stridey 353 fstod %f0,%f40 ! (1_0) y0 = (double)fy0; 354 355 sra %l4,27,%o5 ! (1_0) signx0 = ux0 >> 27; 356 add %i3,stridex,%i3 ! px += stridex 357 358 sra %l3,28,%o4 ! (1_0) signy0 = uy0 >> 28; 359 add %l6,cadd_arr,%l6 ! (1_0) ltmp0 += (char*)cadd_arr; 360 fstod %f2,%f2 ! (1_0) x0 = (double)fx0; 361.d0: 362 and %o5,-16,%o5 ! (1_0) signx0 &= -16; 363 and %o4,-8,%o4 ! (1_0) signy0 &= -8; 364 365 lda [%i1]0x82,%l4 ! (2_0) uy0 = *(int*)py; 366 367 lda [%i3]0x82,%l3 ! (2_0) ux0 = *(int*)px; 368 fdivd %f40,%f2,%f10 ! (1_0) x0 = y0 / x0; 369 370 fmuld %f12,%f12,%f20 ! (0_0) x20 = x0 * x0; 371 372 add %l6,%o5,%o2 ! (1_0) ltmp0 += signx0; 373 374 and %l3,_0x7fffffff,%l6 ! (2_0) ax0 = ux0 & 0x7fffffff; 375 sethi %hi(0x00800000),%o5 376 377 cmp %l6,%o5 378 bl,pn %icc,.u4 379 and %l4,_0x7fffffff,%g5 ! (2_0) ay0 = uy0 & 0x7fffffff; 380.c4: 381 cmp %g5,%o5 382 bl,pn %icc,.u5 383 fmuld K9,%f20,%f40 ! (0_0) dtmp0 = K9 * x20; 384.c5: 385 cmp %l6,_0x7f800000 386 bge,pn %icc,.u6 387 ldd [%o2+%o4],%f32 ! (1_0) cadd0 = *(double*)(ltmp0 + signy0); 388.c6: 389 cmp %g5,_0x7f800000 390 bge,pn %icc,.u7 391 sub %l6,%g5,%o2 ! (2_0) ldiff0 = ax0 - ay0; 392.c7: 393 sra %o2,31,%g5 ! (2_0) ldiff0 >>= 31; 394 sub %i3,%i1,%l6 ! (2_0) addrc0 = (char*)px - (char*)py; 395 396 faddd %f40,K8,%f40 ! (0_0) dtmp0 += K8; 397 and %l6,%g5,%o2 ! (2_0) addrc0 &= ldiff0; 398 399 lda [%i1+%o2]0x82,%f0 ! (2_0) fy0 = *(float*)((char*)py + addrc0); 400 sub %i3,%o2,%o4 ! (2_0) (char*)px - addrc0; 401 402 lda [%o4]0x82,%f2 ! (2_0) fx0 = *(float*)((char*)px - addrc0); 403 404 cmp %o5,_0x7f800000 ! (2_0) b0 ? 0x7f800000 405 bge,pn %icc,.update1 ! (2_0) if ( b0 > 0x7f800000 ) 406 nop 407.cont1: 408 fmuld %f40,%f20,%f30 ! (0_0) dtmp0 *= x20; 409 sll %g5,5,%l6 ! (2_0) ltmp0 = ldiff0 << 5; 410 add %i1,stridey,%i1 ! py += stridey 411 fstod %f0,%f40 ! (2_0) y0 = (double)fy0; 412 413 sra %l3,27,%o5 ! (2_0) signx0 = ux0 >> 27; 414 add %i3,stridex,%i3 ! px += stridex 415 416 fstod %f2,%f2 ! (2_0) x0 = (double)fx0; 417 sra %l4,28,%o4 ! (2_0) signy0 = uy0 >> 28; 418 add %l6,cadd_arr,%l6 ! (2_0) ltmp0 += (char*)cadd_arr; 419.d1: 420 lda [%i1]0x82,%l3 ! (3_0) uy0 = *(int*)py; 421 and %o5,-16,%o5 ! (2_0) signx0 &= -16; 422 faddd %f30,K7,%f30 ! (0_0) dtmp0 += K7; 423 424 lda [%i3]0x82,%l4 ! (3_0) ux0 = *(int*)px; 425 426 fdivd %f40,%f2,%f8 ! (2_0) x0 = y0 / x0; 427 428 fmuld %f10,%f10,%f18 ! (1_0) x20 = x0 * x0; 429 430 add %l6,%o5,%o1 ! (2_0) ltmp0 += signx0; 431 and %o4,-8,%o4 ! (2_0) signy0 &= -8; 432 fmuld %f30,%f20,%f30 ! (0_0) dtmp0 *= x20; 433 434 and %l4,_0x7fffffff,%l6 ! (3_0) ax0 = ux0 & 0x7fffffff; 435 sethi %hi(0x00800000),%o5 436 437 cmp %l6,%o5 438 bl,pn %icc,.u8 439 and %l3,_0x7fffffff,%o0 ! (3_0) ay0 = uy0 & 0x7fffffff; 440.c8: 441 cmp %o0,%o5 442 bl,pn %icc,.u9 443 fmuld K9,%f18,%f40 ! (1_0) dtmp0 = K9 * x20; 444.c9: 445 cmp %l6,_0x7f800000 446 bge,pn %icc,.u10 447 faddd %f30,K6,%f16 ! (0_0) dtmp0 += K6; 448.c10: 449 cmp %o0,_0x7f800000 450 bge,pn %icc,.u11 451 ldd [%o1+%o4],%f30 ! (2_0) cadd0 = *(double*)(ltmp0 + signy0); 452.c11: 453 sub %l6,%o0,%o1 ! (3_0) ldiff0 = ax0 - ay0; 454 455 sra %o1,31,%o0 ! (3_0) ldiff0 >>= 31; 456 sub %i3,%i1,%l6 ! (3_0) addrc0 = (char*)px - (char*)py; 457 458 faddd %f40,K8,%f40 ! (1_0) dtmp0 += K8; 459 and %l6,%o0,%o1 ! (3_0) addrc0 &= ldiff0; 460 fmuld %f16,%f20,%f16 ! (0_0) dtmp0 *= x20; 461 462 lda [%i1+%o1]0x82,%f0 ! (3_0) fy0 = *(float*)((char*)py + addrc0); 463 sub %i3,%o1,%o4 ! (3_0) (char*)px - addrc0; 464 465 lda [%o4]0x82,%f1 ! (3_0) fx0 = *(float*)((char*)px - addrc0); 466 467 cmp %o5,_0x7f800000 ! (3_0) b0 ? 0x7f800000 468 bge,pn %icc,.update2 ! (3_0) if ( b0 > 0x7f800000 ) 469 nop 470.cont2: 471 fmuld %f40,%f18,%f28 ! (1_0) dtmp0 *= x20; 472 sll %o0,5,%l6 ! (3_0) ltmp0 = ldiff0 << 5; 473 add %i1,stridey,%i1 ! py += stridey 474 fstod %f0,%f40 ! (3_0) y0 = (double)fy0; 475 476 faddd %f16,K5,%f2 ! (0_0) dtmp0 += K5; 477 sra %l4,27,%o5 ! (3_0) signx0 = ux0 >> 27; 478 add %i3,stridex,%i3 ! px += stridex 479 480 sra %l3,28,%o4 ! (3_0) signy0 = uy0 >> 28; 481 fstod %f1,%f16 ! (3_0) x0 = (double)fx0; 482.d2: 483 faddd %f28,K7,%f28 ! (1_0) dtmp0 += K7; 484 add %l6,cadd_arr,%l6 ! (3_0) ltmp0 += (char*)cadd_arr; 485 and %o5,-16,%o5 ! (3_0) signx0 &= -16; 486 487 lda [%i1]0x82,%l4 ! (4_0) uy0 = *(int*)py; 488 fmuld %f2,%f20,%f2 ! (0_0) dtmp0 *= x20; 489 490 lda [%i3]0x82,%l3 ! (4_0) ux0 = *(int*)px; 491 fdivd %f40,%f16,%f6 ! (3_0) x0 = y0 / x0; 492 493 and %o4,-8,%o4 ! (3_0) signy0 &= -8; 494 fmuld %f8,%f8,%f16 ! (2_0) x20 = x0 * x0; 495 496 add %l6,%o5,%o2 ! (3_0) ltmp0 += signx0; 497 fmuld %f28,%f18,%f28 ! (1_0) dtmp0 *= x20; 498 499 and %l3,_0x7fffffff,%l6 ! (4_0) ax0 = ux0 & 0x7fffffff; 500 sethi %hi(0x00800000),%o5 501 faddd %f2,K4,%f2 ! (0_0) dtmp0 += K4; 502 503 cmp %l6,%o5 504 bl,pn %icc,.u12 505 and %l4,_0x7fffffff,%l5 ! (4_0) ay0 = uy0 & 0x7fffffff; 506.c12: 507 cmp %l5,%o5 508 bl,pn %icc,.u13 509 fmuld K9,%f16,%f40 ! (2_0) dtmp0 = K9 * x20; 510.c13: 511 cmp %l6,_0x7f800000 512 bge,pn %icc,.u14 513 faddd %f28,K6,%f4 ! (1_0) dtmp0 += K6; 514.c14: 515 ldd [%o2+%o4],%f28 ! (3_0) cadd0 = *(double*)(ltmp0 + signy0); 516 cmp %l5,_0x7f800000 517 bge,pn %icc,.u15 518 fmuld %f2,%f20,%f24 ! (0_0) dtmp0 *= x20; 519.c15: 520 sub %l6,%l5,%o2 ! (4_0) ldiff0 = ax0 - ay0; 521 522 sra %o2,31,%l5 ! (4_0) ldiff0 >>= 31; 523 sub %i3,%i1,%l6 ! (4_0) addrc0 = (char*)px - (char*)py; 524 525 faddd %f40,K8,%f40 ! (2_0) dtmp0 += K8; 526 and %l6,%l5,%o2 ! (4_0) addrc0 &= ldiff0; 527 fmuld %f4,%f18,%f4 ! (1_0) dtmp0 *= x20; 528 529 lda [%i1+%o2]0x82,%f0 ! (4_0) fy0 = *(float*)((char*)py + addrc0); 530 sub %i3,%o2,%o4 ! (4_0) (char*)px - addrc0; 531 faddd %f24,K3,%f24 ! (0_0) dtmp0 += K3; 532 533 lda [%o4]0x82,%f2 ! (4_0) fx0 = *(float*)((char*)px - addrc0); 534 535 cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 536 bge,pn %icc,.update3 ! (4_0) if ( b0 > 0x7f800000 ) 537 nop 538.cont3: 539 fmuld %f40,%f16,%f26 ! (2_0) dtmp0 *= x20; 540 sll %l5,5,%l6 ! (4_0) ltmp0 = ldiff0 << 5; 541 add %i1,stridey,%i1 ! py += stridey 542 fstod %f0,%f40 ! (4_0) y0 = (double)fy0; 543 544 faddd %f4,K5,%f62 ! (1_0) dtmp0 += K5; 545 add %i3,stridex,%i3 ! px += stridex 546 fmuld %f24,%f20,%f24 ! (0_0) dtmp0 *= x20; 547 548 fstod %f2,%f2 ! (4_0) x0 = (double)fx0; 549 sra %l3,27,%o5 ! (4_0) signx0 = ux0 >> 27; 550 sra %l4,28,%o4 ! (4_0) signy0 = uy0 >> 28; 551.d3: 552 lda [%i1]0x82,%l3 ! (5_0) uy0 = *(int*)py; 553 add %l6,cadd_arr,%l6 ! (4_0) ltmp0 += (char*)cadd_arr; 554 faddd %f26,K7,%f26 ! (2_0) dtmp0 += K7; 555 556 fmuld %f62,%f18,%f4 ! (1_0) dtmp0 *= x20; 557 and %o5,-16,%o5 ! (4_0) signx0 &= -16; 558 559 lda [%i3]0x82,%l4 ! (5_1) ux0 = *(int*)px; 560 fdivd %f40,%f2,%f62 ! (4_1) x0 = y0 / x0; 561 faddd %f24,K2,%f40 ! (0_1) dtmp0 += K2; 562 563 and %o4,-8,%o4 ! (4_1) signy0 &= -8; 564 fmuld %f6,%f6,%f24 ! (3_1) x20 = x0 * x0; 565 566 add %l6,%o5,%o1 ! (4_1) ltmp0 += signx0; 567 fmuld %f26,%f16,%f26 ! (2_1) dtmp0 *= x20; 568 569 and %l4,_0x7fffffff,%l6 ! (5_1) ax0 = ux0 & 0x7fffffff; 570 sethi %hi(0x00800000),%o5 571 faddd %f4,K4,%f4 ! (1_1) dtmp0 += K4; 572 573 cmp %l6,%o5 574 bl,pn %icc,.u16 575 and %l3,_0x7fffffff,%o7 ! (5_1) ay0 = uy0 & 0x7fffffff; 576.c16: 577 cmp %o7,%o5 578 bl,pn %icc,.u17 579 fmuld %f40,%f20,%f38 ! (0_1) dtmp0 *= x20; 580.c17: 581 cmp %l6,_0x7f800000 582 bge,pn %icc,.u18 583 fmuld K9,%f24,%f40 ! (3_1) dtmp0 = K9 * x20; 584.c18: 585 cmp %o7,_0x7f800000 586 bge,pn %icc,.u19 587 faddd %f26,K6,%f22 ! (2_1) dtmp0 += K6; 588.c19: 589 ldd [%o1+%o4],%f26 ! (4_1) cadd0 = *(double*)(ltmp0 + signy0); 590 fmuld %f4,%f18,%f4 ! (1_1) dtmp0 *= x20; 591 592 sub %l6,%o7,%o1 ! (5_1) ldiff0 = ax0 - ay0; 593 594 sra %o1,31,%o7 ! (5_1) ldiff0 >>= 31; 595 sub %i3,%i1,%l6 ! (5_1) addrc0 = (char*)px - (char*)py; 596 faddd %f38,K1,%f38 ! (0_1) dtmp0 += K1; 597 598 faddd %f40,K8,%f40 ! (3_1) dtmp0 += K8; 599 and %l6,%o7,%o1 ! (5_1) addrc0 &= ldiff0; 600 fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20; 601 602 lda [%i1+%o1]0x82,%f0 ! (5_1) fy0 = *(float*)((char*)py + addrc0); 603 sll %o7,5,%l6 ! (5_1) ltmp0 = ldiff0 << 5; 604 sub %i3,%o1,%o4 ! (5_1) (char*)px - addrc0; 605 faddd %f4,K3,%f4 ! (1_1) dtmp0 += K3; 606 607 lda [%o4]0x82,%f1 ! (5_1) fx0 = *(float*)((char*)px - addrc0); 608 609 fmuld %f38,%f20,%f38 ! (0_1) dtmp0 *= x20; 610 cmp %o5,_0x7f800000 ! (5_1) b0 ? 0x7f800000 611 bge,pn %icc,.update4 ! (5_1) if ( b0 > 0x7f800000 ) 612 nop 613.cont4: 614 fmuld %f40,%f24,%f36 ! (3_1) dtmp0 *= x20; 615 fstod %f0,%f40 ! (5_1) y0 = (double)fy0; 616 617 faddd %f22,K5,%f14 ! (2_1) dtmp0 += K5; 618 fmuld %f4,%f18,%f4 ! (1_1) dtmp0 *= x20; 619 620 add %i3,stridex,%i3 ! px += stridex 621 sll %l7,3,%l7 ! (0_1) cmul0_ind = ldiff0 << 3; 622 fstod %f1,%f2 ! (5_1) x0 = (double)fx0; 623.d4: 624 sra %l3,28,%o4 ! (5_1) signy0 = uy0 >> 28; 625 add %i1,stridey,%i1 ! py += stridey 626 627 faddd %f36,K7,%f36 ! (3_1) dtmp0 += K7; 628 sra %l4,27,%o5 ! (5_1) signx0 = ux0 >> 27; 629 630 lda [%i1]0x82,%l4 ! (0_0) uy0 = *(int*)py; 631 add %l6,cadd_arr,%l6 ! (5_1) ltmp0 += (char*)cadd_arr; 632 fmuld %f14,%f16,%f22 ! (2_1) dtmp0 *= x20; 633 faddd %f38,K0,%f38 ! (0_1) dtmp0 += K0; 634 635 lda [%i3]0x82,%l3 ! (0_0) ux0 = *(int*)px; 636 and %o5,-16,%o5 ! (5_1) signx0 &= -16; 637 fdivd %f40,%f2,%f14 ! (5_1) x0 = y0 / x0; 638 faddd %f4,K2,%f40 ! (1_1) dtmp0 += K2; 639 640 fmuld %f62,%f62,%f4 ! (4_1) x20 = x0 * x0; 641 642 ldd [cmul_arr+%l7],%f0 ! (0_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); 643 add %l6,%o5,%o2 ! (5_1) ltmp0 += signx0; 644 and %o4,-8,%o4 ! (5_1) signy0 &= -8; 645 fmuld %f36,%f24,%f36 ! (3_1) dtmp0 *= x20; 646 647 fmuld %f38,%f12,%f12 ! (0_1) x0 = dtmp0 * x0; 648 and %l4,_0x7fffffff,%l7 ! (0_0) ay0 = uy0 & 0x7fffffff; 649 sethi %hi(0x00800000),%o5 650 faddd %f22,K4,%f22 ! (2_1) dtmp0 += K4; 651 652 and %l3,_0x7fffffff,%l6 ! (0_0) ax0 = ux0 & 0x7fffffff; 653 cmp %l7,%o5 654 bl,pn %icc,.u20 655 fmuld %f40,%f18,%f38 ! (1_1) dtmp0 *= x20; 656.c20: 657 cmp %l6,%o5 658 bl,pn %icc,.u21 659 fmuld K9,%f4,%f40 ! (4_1) dtmp0 = K9 * x20; 660.c21: 661 cmp %l7,_0x7f800000 662 bge,pn %icc,.u22 663 faddd %f36,K6,%f20 ! (3_1) dtmp0 += K6; 664.c22: 665 ldd [%o2+%o4],%f36 ! (5_1) cadd0 = *(double*)(ltmp0 + signy0); 666 cmp %l6,_0x7f800000 667 bge,pn %icc,.u23 668 fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20; 669.c23: 670 sub %l6,%l7,%o2 ! (0_0) ldiff0 = ax0 - ay0; 671 672 fmuld %f0,%f12,%f12 ! (0_1) dtmp0 = cmul0 * x0; 673 sra %o2,31,%l7 ! (0_0) ldiff0 >>= 31; 674 sub %i3,%i1,%l6 ! (0_0) addrc0 = (char*)px - (char*)py; 675 faddd %f38,K1,%f38 ! (1_1) dtmp0 += K1; 676 677 faddd %f40,K8,%f40 ! (4_1) dtmp0 += K8; 678 and %l6,%l7,%o2 ! (0_0) addrc0 &= ldiff0; 679 fmuld %f20,%f24,%f20 ! (3_1) dtmp0 *= x20; 680 681 lda [%i1+%o2]0x82,%f0 ! (0_0) fy0 = *(float*)((char*)py + addrc0); 682 sll %g1,3,%g1 ! (1_1) cmul0_ind = ldiff0 << 3; 683 sub %i3,%o2,%o4 ! (0_0) (char*)px - addrc0 684 faddd %f22,K3,%f22 ! (2_1) dtmp0 += K3; 685 686 lda [%o4]0x82,%f2 ! (0_0) fx0 = *(float*)((char*)px - addrc0); 687 sll %l7,5,%l6 ! (0_0) ltmp0 = ldiff0 << 5; 688 689 fmuld %f38,%f18,%f38 ! (1_1) dtmp0 *= x20; 690 cmp %o5,_0x7f800000 ! (0_0) b0 ? 0x7f800000 691 bge,pn %icc,.update5 ! (0_0) if ( b0 > 0x7f800000 ) 692 faddd %f34,%f12,%f18 ! (0_1) dtmp0 = cadd0 + dtmp0; 693.cont5: 694 fmuld %f40,%f4,%f34 ! (4_1) dtmp0 *= x20; 695 sra %l3,27,%o5 ! (0_0) signx0 = ux0 >> 27; 696 add %i3,stridex,%i3 ! px += stridex 697 fstod %f0,%f40 ! (0_0) y0 = (double)fy0; 698 699 faddd %f20,K5,%f12 ! (3_1) dtmp0 += K5; 700 add %i1,stridey,%i1 ! py += stridey 701 fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20; 702 703 lda [%i1]0x82,%l3 ! (1_0) uy0 = *(int*)py; 704 sra %l4,28,%o4 ! (0_0) signy0 = uy0 >> 28; 705 add %l6,cadd_arr,%l6 ! (0_0) ltmp0 += (char*)cadd_arr; 706 fstod %f2,%f2 ! (0_0) x0 = (double)fx0; 707.d5: 708 lda [%i3]0x82,%l4 ! (1_0) ux0 = *(int*)px; 709 and %o5,-16,%o5 ! (0_0) signx0 &= -16; 710 faddd %f34,K7,%f34 ! (4_1) dtmp0 += K7; 711 712 ldx [%fp+tmp_pz],%o1 713 fmuld %f12,%f24,%f20 ! (3_1) dtmp0 *= x20; 714 and %o4,-8,%o4 ! (0_0) signy0 &= -8; 715 faddd %f38,K0,%f38 ! (1_1) dtmp0 += K0; 716 717 fdivd %f40,%f2,%f12 ! (0_0) x0 = y0 / x0; 718 faddd %f22,K2,%f40 ! (2_1) dtmp0 += K2; 719 720 fdtos %f18,%f2 ! (0_1) ftmp0 = (float)dtmp0; 721 st %f2,[%o1] ! (0_1) *pz = ftmp0 722 add %o1,stridez,%o2 723 fmuld %f14,%f14,%f22 ! (5_1) x20 = x0 * x0; 724 725 subcc counter,1,counter 726 bneg,a,pn %icc,.begin 727 or %g0,%o2,%o4 728 729 ldd [cmul_arr+%g1],%f0 ! (1_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); 730 add %l6,%o5,%o1 ! (0_0) ltmp0 += signx0; 731 fmuld %f34,%f4,%f34 ! (4_1) dtmp0 *= x20; 732 733 fmuld %f38,%f10,%f10 ! (1_1) x0 = dtmp0 * x0; 734 and %l4,_0x7fffffff,%l6 ! (1_0) ax0 = ux0 & 0x7fffffff; 735 sethi %hi(0x00800000),%o5 736 faddd %f20,K4,%f20 ! (3_1) dtmp0 += K4; 737 738 and %l3,_0x7fffffff,%g1 ! (1_0) ay0 = uy0 & 0x7fffffff; 739 cmp %l6,%o5 740 bl,pn %icc,.u24 741 fmuld %f40,%f16,%f38 ! (2_1) dtmp0 *= x20; 742.c24: 743 cmp %g1,%o5 744 bl,pn %icc,.u25 745 fmuld K9,%f22,%f40 ! (5_1) dtmp0 = K9 * x20; 746.c25: 747 cmp %l6,_0x7f800000 748 bge,pn %icc,.u26 749 faddd %f34,K6,%f18 ! (4_1) dtmp0 += K6; 750.c26: 751 ldd [%o1+%o4],%f34 ! (0_0) cadd0 = *(double*)(ltmp0 + signy0); 752 cmp %g1,_0x7f800000 753 bge,pn %icc,.u27 754 fmuld %f20,%f24,%f20 ! (3_1) dtmp0 *= x20; 755.c27: 756 sub %l6,%g1,%o1 ! (1_0) ldiff0 = ax0 - ay0; 757 758 fmuld %f0,%f10,%f10 ! (1_1) dtmp0 = cmul0 * x0; 759 sra %o1,31,%g1 ! (1_0) ldiff0 >>= 31; 760 sub %i3,%i1,%l6 ! (1_0) addrc0 = (char*)px - (char*)py; 761 faddd %f38,K1,%f38 ! (2_1) dtmp0 += K1; 762 763 faddd %f40,K8,%f40 ! (5_1) dtmp0 += K8; 764 and %l6,%g1,%o1 ! (1_0) addrc0 &= ldiff0; 765 fmuld %f18,%f4,%f18 ! (4_1) dtmp0 *= x20; 766 767 lda [%i1+%o1]0x82,%f0 ! (1_0) fy0 = *(float*)((char*)py + addrc0); 768 sll %g5,3,%g5 ! (2_1) cmul0_ind = ldiff0 << 3; 769 sub %i3,%o1,%o4 ! (1_0) (char*)px - addrc0; 770 faddd %f20,K3,%f20 ! (3_1) dtmp0 += K3; 771 772 lda [%o4]0x82,%f2 ! (1_0) fx0 = *(float*)((char*)px - addrc0); 773 sll %g1,5,%l6 ! (1_0) ltmp0 = ldiff0 << 5; 774 add %o2,stridez,%o1 ! pz += stridez 775 776 fmuld %f38,%f16,%f38 ! (2_1) dtmp0 *= x20; 777 cmp %o5,_0x7f800000 ! (1_0) b0 ? 0x7f800000 778 bge,pn %icc,.update6 ! (1_0) if ( b0 > 0x7f800000 ) 779 faddd %f32,%f10,%f16 ! (1_1) dtmp0 = cadd0 + dtmp0; 780.cont6: 781 fmuld %f40,%f22,%f32 ! (5_1) dtmp0 *= x20; 782 add %i1,stridey,%i1 ! py += stridey 783 fstod %f0,%f40 ! (1_0) y0 = (double)fy0; 784 785 faddd %f18,K5,%f10 ! (4_1) dtmp0 += K5; 786 sra %l4,27,%o5 ! (1_0) signx0 = ux0 >> 27; 787 add %i3,stridex,%i3 ! px += stridex 788 fmuld %f20,%f24,%f20 ! (3_1) dtmp0 *= x20; 789 790 sra %l3,28,%o4 ! (1_0) signy0 = uy0 >> 28; 791 add %l6,cadd_arr,%l6 ! (1_0) ltmp0 += (char*)cadd_arr; 792 fstod %f2,%f2 ! (1_0) x0 = (double)fx0; 793.d6: 794 faddd %f32,K7,%f32 ! (5_1) dtmp0 += K7; 795 and %o5,-16,%o5 ! (1_0) signx0 &= -16; 796 and %o4,-8,%o4 ! (1_0) signy0 &= -8; 797 798 lda [%i1]0x82,%l4 ! (2_0) uy0 = *(int*)py; 799 fmuld %f10,%f4,%f18 ! (4_1) dtmp0 *= x20; 800 faddd %f38,K0,%f38 ! (2_1) dtmp0 += K0; 801 802 lda [%i3]0x82,%l3 ! (2_0) ux0 = *(int*)px; 803 fdivd %f40,%f2,%f10 ! (1_0) x0 = y0 / x0; 804 faddd %f20,K2,%f40 ! (3_1) dtmp0 += K2; 805 806 fmuld %f12,%f12,%f20 ! (0_0) x20 = x0 * x0; 807 fdtos %f16,%f2 ! (1_1) ftmp0 = (float)dtmp0; 808 st %f2,[%o2] ! (1_1) *pz = ftmp0; 809 810 subcc counter,1,counter 811 bneg,a,pn %icc,.begin 812 or %g0,%o1,%o4 813 814 ldd [cmul_arr+%g5],%f0 ! (2_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); 815 add %l6,%o5,%o2 ! (1_0) ltmp0 += signx0; 816 fmuld %f32,%f22,%f32 ! (5_1) dtmp0 *= x20; 817 818 fmuld %f38,%f8,%f8 ! (2_1) x0 = dtmp0 * x0; 819 and %l3,_0x7fffffff,%l6 ! (2_0) ax0 = ux0 & 0x7fffffff; 820 sethi %hi(0x00800000),%o5 821 faddd %f18,K4,%f18 ! (4_1) dtmp0 += K4; 822 823 and %l4,_0x7fffffff,%g5 ! (2_0) ay0 = uy0 & 0x7fffffff; 824 cmp %l6,%o5 825 bl,pn %icc,.u28 826 fmuld %f40,%f24,%f38 ! (3_1) dtmp0 *= x20; 827.c28: 828 cmp %g5,%o5 829 bl,pn %icc,.u29 830 fmuld K9,%f20,%f40 ! (0_0) dtmp0 = K9 * x20; 831.c29: 832 cmp %l6,_0x7f800000 833 bge,pn %icc,.u30 834 faddd %f32,K6,%f16 ! (5_1) dtmp0 += K6; 835.c30: 836 ldd [%o2+%o4],%f32 ! (1_0) cadd0 = *(double*)(ltmp0 + signy0); 837 cmp %g5,_0x7f800000 838 bge,pn %icc,.u31 839 fmuld %f18,%f4,%f18 ! (4_1) dtmp0 *= x20; 840.c31: 841 sub %l6,%g5,%o2 ! (2_0) ldiff0 = ax0 - ay0; 842 843 fmuld %f0,%f8,%f8 ! (2_1) dtmp0 = cmul0 * x0; 844 sra %o2,31,%g5 ! (2_0) ldiff0 >>= 31; 845 sub %i3,%i1,%l6 ! (2_0) addrc0 = (char*)px - (char*)py; 846 faddd %f38,K1,%f38 ! (3_1) dtmp0 += K1; 847 848 faddd %f40,K8,%f40 ! (0_0) dtmp0 += K8; 849 and %l6,%g5,%o2 ! (2_0) addrc0 &= ldiff0; 850 fmuld %f16,%f22,%f16 ! (5_1) dtmp0 *= x20; 851 852 lda [%i1+%o2]0x82,%f0 ! (2_0) fy0 = *(float*)((char*)py + addrc0); 853 sub %i3,%o2,%o4 ! (2_0) (char*)px - addrc0; 854 add %o1,stridez,%o2 ! pz += stridez 855 faddd %f18,K3,%f18 ! (4_1) dtmp0 += K3; 856 857 lda [%o4]0x82,%f2 ! (2_0) fx0 = *(float*)((char*)px - addrc0); 858 sll %o0,3,%o0 ! (3_1) cmul0_ind = ldiff0 << 3; 859 860 fmuld %f38,%f24,%f38 ! (3_1) dtmp0 *= x20; 861 cmp %o5,_0x7f800000 ! (2_0) b0 ? 0x7f800000 862 bge,pn %icc,.update7 ! (2_0) if ( b0 > 0x7f800000 ) 863 faddd %f30,%f8,%f24 ! (2_1) dtmp0 = cadd0 + dtmp0; 864.cont7: 865 fmuld %f40,%f20,%f30 ! (0_0) dtmp0 *= x20; 866 sll %g5,5,%l6 ! (2_0) ltmp0 = ldiff0 << 5; 867 add %i1,stridey,%i1 ! py += stridey 868 fstod %f0,%f40 ! (2_0) y0 = (double)fy0; 869 870 faddd %f16,K5,%f8 ! (5_1) dtmp0 += K5; 871 sra %l3,27,%o5 ! (2_0) signx0 = ux0 >> 27; 872 add %i3,stridex,%i3 ! px += stridex 873 fmuld %f18,%f4,%f18 ! (4_1) dtmp0 *= x20; 874 875 fstod %f2,%f2 ! (2_0) x0 = (double)fx0; 876 sra %l4,28,%o4 ! (2_0) signy0 = uy0 >> 28; 877 add %l6,cadd_arr,%l6 ! (2_0) ltmp0 += (char*)cadd_arr; 878.d7: 879 lda [%i1]0x82,%l3 ! (3_0) uy0 = *(int*)py; 880 and %o5,-16,%o5 ! (2_0) signx0 &= -16; 881 faddd %f30,K7,%f30 ! (0_0) dtmp0 += K7; 882 883 lda [%i3]0x82,%l4 ! (3_0) ux0 = *(int*)px; 884 fmuld %f8,%f22,%f16 ! (5_1) dtmp0 *= x20; 885 faddd %f38,K0,%f38 ! (3_1) dtmp0 += K0; 886 887 fdivd %f40,%f2,%f8 ! (2_0) x0 = y0 / x0; 888 faddd %f18,K2,%f40 ! (4_1) dtmp0 += K2; 889 890 fmuld %f10,%f10,%f18 ! (1_0) x20 = x0 * x0; 891 fdtos %f24,%f1 ! (2_1) ftmp0 = (float)dtmp0; 892 st %f1,[%o1] ! (2_1) *pz = ftmp0; 893 894 subcc counter,1,counter 895 bneg,a,pn %icc,.begin 896 or %g0,%o2,%o4 897 898 ldd [cmul_arr+%o0],%f2 ! (3_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); 899 add %l6,%o5,%o1 ! (2_0) ltmp0 += signx0; 900 and %o4,-8,%o4 ! (2_0) signy0 &= -8; 901 fmuld %f30,%f20,%f30 ! (0_0) dtmp0 *= x20; 902 903 fmuld %f38,%f6,%f6 ! (3_1) x0 = dtmp0 * x0; 904 and %l4,_0x7fffffff,%l6 ! (3_0) ax0 = ux0 & 0x7fffffff; 905 sethi %hi(0x00800000),%o5 906 faddd %f16,K4,%f24 ! (5_1) dtmp0 += K4; 907 908 and %l3,_0x7fffffff,%o0 ! (3_0) ay0 = uy0 & 0x7fffffff; 909 cmp %l6,%o5 910 bl,pn %icc,.u32 911 fmuld %f40,%f4,%f38 ! (4_1) dtmp0 *= x20; 912.c32: 913 cmp %o0,%o5 914 bl,pn %icc,.u33 915 fmuld K9,%f18,%f40 ! (1_0) dtmp0 = K9 * x20; 916.c33: 917 cmp %l6,_0x7f800000 918 bge,pn %icc,.u34 919 faddd %f30,K6,%f16 ! (0_0) dtmp0 += K6; 920.c34: 921 ldd [%o1+%o4],%f30 ! (2_0) cadd0 = *(double*)(ltmp0 + signy0); 922 cmp %o0,_0x7f800000 923 bge,pn %icc,.u35 924 fmuld %f24,%f22,%f24 ! (5_1) dtmp0 *= x20; 925.c35: 926 sub %l6,%o0,%o1 ! (3_0) ldiff0 = ax0 - ay0; 927 928 fmuld %f2,%f6,%f6 ! (3_1) dtmp0 = cmul0 * x0; 929 sra %o1,31,%o0 ! (3_0) ldiff0 >>= 31; 930 sub %i3,%i1,%l6 ! (3_0) addrc0 = (char*)px - (char*)py; 931 faddd %f38,K1,%f38 ! (4_1) dtmp0 += K1; 932 933 faddd %f40,K8,%f40 ! (1_0) dtmp0 += K8; 934 and %l6,%o0,%o1 ! (3_0) addrc0 &= ldiff0; 935 fmuld %f16,%f20,%f16 ! (0_0) dtmp0 *= x20; 936 937 lda [%i1+%o1]0x82,%f0 ! (3_0) fy0 = *(float*)((char*)py + addrc0); 938 sub %i3,%o1,%o4 ! (3_0) (char*)px - addrc0; 939 add %o2,stridez,%o1 ! pz += stridez 940 faddd %f24,K3,%f24 ! (5_1) dtmp0 += K3; 941 942 lda [%o4]0x82,%f1 ! (3_0) fx0 = *(float*)((char*)px - addrc0); 943 sll %l5,3,%l5 ! (4_1) cmul0_ind = ldiff0 << 3; 944 945 fmuld %f38,%f4,%f38 ! (4_1) dtmp0 *= x20; 946 cmp %o5,_0x7f800000 ! (3_0) b0 ? 0x7f800000 947 bge,pn %icc,.update8 ! (3_0) if ( b0 > 0x7f800000 ) 948 faddd %f28,%f6,%f4 ! (3_1) dtmp0 = cadd0 + dtmp0; 949.cont8: 950 fmuld %f40,%f18,%f28 ! (1_0) dtmp0 *= x20; 951 sll %o0,5,%l6 ! (3_0) ltmp0 = ldiff0 << 5; 952 add %i1,stridey,%i1 ! py += stridey 953 fstod %f0,%f40 ! (3_0) y0 = (double)fy0; 954 955 faddd %f16,K5,%f2 ! (0_0) dtmp0 += K5; 956 sra %l4,27,%o5 ! (3_0) signx0 = ux0 >> 27; 957 add %i3,stridex,%i3 ! px += stridex 958 fmuld %f24,%f22,%f24 ! (5_1) dtmp0 *= x20; 959 960 sra %l3,28,%o4 ! (3_0) signy0 = uy0 >> 28; 961 fstod %f1,%f16 ! (3_0) x0 = (double)fx0; 962.d8: 963 faddd %f28,K7,%f28 ! (1_0) dtmp0 += K7; 964 add %l6,cadd_arr,%l6 ! (3_0) ltmp0 += (char*)cadd_arr; 965 and %o5,-16,%o5 ! (3_0) signx0 &= -16; 966 967 lda [%i1]0x82,%l4 ! (4_0) uy0 = *(int*)py; 968 fmuld %f2,%f20,%f2 ! (0_0) dtmp0 *= x20; 969 faddd %f38,K0,%f38 ! (4_1) dtmp0 += K0; 970 971 lda [%i3]0x82,%l3 ! (4_0) ux0 = *(int*)px; 972 fdivd %f40,%f16,%f6 ! (3_0) x0 = y0 / x0; 973 faddd %f24,K2,%f24 ! (5_1) dtmp0 += K2; 974 975 fdtos %f4,%f1 ! (3_1) ftmp0 = (float)dtmp0; 976 and %o4,-8,%o4 ! (3_0) signy0 &= -8; 977 st %f1,[%o2] ! (3_1) *pz = ftmp0; 978 fmuld %f8,%f8,%f16 ! (2_0) x20 = x0 * x0; 979 980 subcc counter,1,counter 981 bneg,a,pn %icc,.begin 982 or %g0,%o1,%o4 983 984 ldd [cmul_arr+%l5],%f0 ! (4_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); 985 add %l6,%o5,%o2 ! (3_0) ltmp0 += signx0; 986 fmuld %f28,%f18,%f28 ! (1_0) dtmp0 *= x20; 987 988 fmuld %f38,%f62,%f62 ! (4_1) x0 = dtmp0 * x0; 989 and %l3,_0x7fffffff,%l6 ! (4_0) ax0 = ux0 & 0x7fffffff; 990 sethi %hi(0x00800000),%o5 991 faddd %f2,K4,%f2 ! (0_0) dtmp0 += K4; 992 993 and %l4,_0x7fffffff,%l5 ! (4_0) ay0 = uy0 & 0x7fffffff; 994 cmp %l6,%o5 995 bl,pn %icc,.u36 996 fmuld %f24,%f22,%f38 ! (5_1) dtmp0 *= x20; 997.c36: 998 cmp %l5,%o5 999 bl,pn %icc,.u37 1000 fmuld K9,%f16,%f40 ! (2_0) dtmp0 = K9 * x20; 1001.c37: 1002 cmp %l6,_0x7f800000 1003 bge,pn %icc,.u38 1004 faddd %f28,K6,%f4 ! (1_0) dtmp0 += K6; 1005.c38: 1006 ldd [%o2+%o4],%f28 ! (3_0) cadd0 = *(double*)(ltmp0 + signy0); 1007 cmp %l5,_0x7f800000 1008 bge,pn %icc,.u39 1009 fmuld %f2,%f20,%f24 ! (0_0) dtmp0 *= x20; 1010.c39: 1011 sub %l6,%l5,%o2 ! (4_0) ldiff0 = ax0 - ay0; 1012 1013 fmuld %f0,%f62,%f62 ! (4_1) dtmp0 = cmul0 * x0; 1014 sra %o2,31,%l5 ! (4_0) ldiff0 >>= 31; 1015 sub %i3,%i1,%l6 ! (4_0) addrc0 = (char*)px - (char*)py; 1016 faddd %f38,K1,%f38 ! (5_1) dtmp0 += K1; 1017 1018 faddd %f40,K8,%f40 ! (2_0) dtmp0 += K8; 1019 and %l6,%l5,%o2 ! (4_0) addrc0 &= ldiff0; 1020 fmuld %f4,%f18,%f4 ! (1_0) dtmp0 *= x20; 1021 1022 lda [%i1+%o2]0x82,%f0 ! (4_0) fy0 = *(float*)((char*)py + addrc0); 1023 sub %i3,%o2,%o4 ! (4_0) (char*)px - addrc0; 1024 add %o1,stridez,%o2 ! pz += stridez 1025 faddd %f24,K3,%f24 ! (0_0) dtmp0 += K3; 1026 1027 lda [%o4]0x82,%f2 ! (4_0) fx0 = *(float*)((char*)px - addrc0); 1028 sll %o7,3,%o7 ! (5_1) cmul0_ind = ldiff0 << 3; 1029 1030 fmuld %f38,%f22,%f38 ! (5_1) dtmp0 *= x20; 1031 cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 1032 bge,pn %icc,.update9 ! (4_0) if ( b0 > 0x7f800000 ) 1033 faddd %f26,%f62,%f22 ! (4_1) dtmp0 = cadd0 + dtmp0; 1034.cont9: 1035 fmuld %f40,%f16,%f26 ! (2_0) dtmp0 *= x20; 1036 sll %l5,5,%l6 ! (4_0) ltmp0 = ldiff0 << 5; 1037 add %i1,stridey,%i1 ! py += stridey 1038 fstod %f0,%f40 ! (4_0) y0 = (double)fy0; 1039 1040 faddd %f4,K5,%f62 ! (1_0) dtmp0 += K5; 1041 sra %l3,27,%o5 ! (4_0) signx0 = ux0 >> 27; 1042 add %i3,stridex,%i3 ! px += stridex 1043 fmuld %f24,%f20,%f24 ! (0_0) dtmp0 *= x20; 1044 1045 fstod %f2,%f2 ! (4_0) x0 = (double)fx0; 1046 sra %l4,28,%o4 ! (4_0) signy0 = uy0 >> 28; 1047.d9: 1048 lda [%i1]0x82,%l3 ! (5_0) uy0 = *(int*)py; 1049 add %l6,cadd_arr,%l6 ! (4_0) ltmp0 += (char*)cadd_arr; 1050 faddd %f26,K7,%f26 ! (2_0) dtmp0 += K7; 1051 1052 fmuld %f62,%f18,%f4 ! (1_0) dtmp0 *= x20; 1053 and %o5,-16,%o5 ! (4_0) signx0 &= -16; 1054 faddd %f38,K0,%f38 ! (5_1) dtmp0 += K0; 1055 1056 subcc counter,5,counter 1057 bneg,pn %icc,.tail 1058 nop 1059 1060 ba .main_loop 1061 nop 1062 1063 .align 16 1064.main_loop: 1065 lda [%i3]0x82,%l4 ! (5_1) ux0 = *(int*)px; 1066 nop 1067 fdivd %f40,%f2,%f62 ! (4_1) x0 = y0 / x0; 1068 faddd %f24,K2,%f40 ! (0_1) dtmp0 += K2; 1069 1070 fdtos %f22,%f22 ! (4_2) ftmp0 = (float)dtmp0; 1071 and %o4,-8,%o4 ! (4_1) signy0 &= -8; 1072 st %f22,[%o1] ! (4_2) *pz = ftmp0; 1073 fmuld %f6,%f6,%f24 ! (3_1) x20 = x0 * x0; 1074 1075 ldd [cmul_arr+%o7],%f0 ! (5_2) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); 1076 add %l6,%o5,%o1 ! (4_1) ltmp0 += signx0; 1077 fmuld %f26,%f16,%f26 ! (2_1) dtmp0 *= x20; 1078 1079 fmuld %f38,%f14,%f14 ! (5_2) x0 = dtmp0 * x0; 1080 and %l4,_0x7fffffff,%l6 ! (5_1) ax0 = ux0 & 0x7fffffff; 1081 sethi %hi(0x00800000),%o5 1082 faddd %f4,K4,%f4 ! (1_1) dtmp0 += K4; 1083 1084 and %l3,_0x7fffffff,%o7 ! (5_1) ay0 = uy0 & 0x7fffffff; 1085 fmuld %f40,%f20,%f38 ! (0_1) dtmp0 *= x20; 1086 1087 cmp %l6,%o5 1088 bl,pn %icc,.up0 1089 fmuld K9,%f24,%f40 ! (3_1) dtmp0 = K9 * x20; 1090.co0: 1091 nop 1092 cmp %o7,%o5 1093 bl,pn %icc,.up1 1094 faddd %f26,K6,%f22 ! (2_1) dtmp0 += K6; 1095.co1: 1096 ldd [%o1+%o4],%f26 ! (4_1) cadd0 = *(double*)(ltmp0 + signy0); 1097 cmp %l6,_0x7f800000 1098 bge,pn %icc,.up2 1099 fmuld %f4,%f18,%f4 ! (1_1) dtmp0 *= x20; 1100.co2: 1101 sub %l6,%o7,%o1 ! (5_1) ldiff0 = ax0 - ay0; 1102 cmp %o7,_0x7f800000 1103 bge,pn %icc,.up3 1104 1105 fmuld %f0,%f14,%f14 ! (5_2) dtmp0 = cmul0 * x0; 1106.co3: 1107 sra %o1,31,%o7 ! (5_1) ldiff0 >>= 31; 1108 sub %i3,%i1,%l6 ! (5_1) addrc0 = (char*)px - (char*)py; 1109 faddd %f38,K1,%f38 ! (0_1) dtmp0 += K1; 1110 1111 faddd %f40,K8,%f40 ! (3_1) dtmp0 += K8; 1112 and %l6,%o7,%o1 ! (5_1) addrc0 &= ldiff0; 1113 fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20; 1114 1115 lda [%i1+%o1]0x82,%f0 ! (5_1) fy0 = *(float*)((char*)py + addrc0); 1116 sll %o7,5,%l6 ! (5_1) ltmp0 = ldiff0 << 5; 1117 sub %i3,%o1,%o4 ! (5_1) (char*)px - addrc0; 1118 faddd %f4,K3,%f4 ! (1_1) dtmp0 += K3; 1119 1120 lda [%o4]0x82,%f2 ! (5_1) fx0 = *(float*)((char*)px - addrc0); 1121 1122 fmuld %f38,%f20,%f38 ! (0_1) dtmp0 *= x20; 1123 cmp %o5,_0x7f800000 ! (5_1) b0 ? 0x7f800000 1124 bge,pn %icc,.update10 ! (5_1) if ( b0 > 0x7f800000 ) 1125 faddd %f36,%f14,%f20 ! (5_2) dtmp0 = cadd0 + dtmp0; 1126.cont10: 1127 fmuld %f40,%f24,%f36 ! (3_1) dtmp0 *= x20; 1128 nop 1129 fstod %f0,%f40 ! (5_1) y0 = (double)fy0; 1130 1131 faddd %f22,K5,%f14 ! (2_1) dtmp0 += K5; 1132 add %o2,stridez,%o1 ! pz += stridez 1133 fmuld %f4,%f18,%f4 ! (1_1) dtmp0 *= x20; 1134 1135 sll %l7,3,%l7 ! (0_1) cmul0_ind = ldiff0 << 3; 1136 add %i3,stridex,%i3 ! px += stridex 1137 fstod %f2,%f2 ! (5_1) x0 = (double)fx0; 1138.den0: 1139 sra %l3,28,%o4 ! (5_1) signy0 = uy0 >> 28; 1140 add %i1,stridey,%i1 ! py += stridey 1141 1142 faddd %f36,K7,%f36 ! (3_1) dtmp0 += K7; 1143 sra %l4,27,%o5 ! (5_1) signx0 = ux0 >> 27; 1144 1145 lda [%i1]0x82,%l4 ! (0_0) uy0 = *(int*)py; 1146 add %l6,cadd_arr,%l6 ! (5_1) ltmp0 += (char*)cadd_arr; 1147 fmuld %f14,%f16,%f22 ! (2_1) dtmp0 *= x20; 1148 faddd %f38,K0,%f38 ! (0_1) dtmp0 += K0; 1149 1150 lda [%i3]0x82,%l3 ! (0_0) ux0 = *(int*)px; 1151 and %o5,-16,%o5 ! (5_1) signx0 &= -16; 1152 fdivd %f40,%f2,%f14 ! (5_1) x0 = y0 / x0; 1153 faddd %f4,K2,%f40 ! (1_1) dtmp0 += K2; 1154 1155 fdtos %f20,%f2 ! (5_2) ftmp0 = (float)dtmp0; 1156 st %f2,[%o2] ! (5_2) *pz = ftmp0; 1157 fmuld %f62,%f62,%f4 ! (4_1) x20 = x0 * x0; 1158 1159 ldd [cmul_arr+%l7],%f0 ! (0_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); 1160 add %l6,%o5,%o2 ! (5_1) ltmp0 += signx0; 1161 and %o4,-8,%o4 ! (5_1) signy0 &= -8; 1162 fmuld %f36,%f24,%f36 ! (3_1) dtmp0 *= x20; 1163 1164 fmuld %f38,%f12,%f12 ! (0_1) x0 = dtmp0 * x0; 1165 and %l4,_0x7fffffff,%l7 ! (0_0) ay0 = uy0 & 0x7fffffff; 1166 sethi %hi(0x00800000),%o5 1167 faddd %f22,K4,%f22 ! (2_1) dtmp0 += K4; 1168 1169 and %l3,_0x7fffffff,%l6 ! (0_0) ax0 = ux0 & 0x7fffffff; 1170 fmuld %f40,%f18,%f38 ! (1_1) dtmp0 *= x20; 1171 1172 cmp %l7,%o5 1173 bl,pn %icc,.up4 1174 fmuld K9,%f4,%f40 ! (4_1) dtmp0 = K9 * x20; 1175.co4: 1176 nop 1177 cmp %l6,%o5 1178 bl,pn %icc,.up5 1179 faddd %f36,K6,%f20 ! (3_1) dtmp0 += K6; 1180.co5: 1181 ldd [%o2+%o4],%f36 ! (5_1) cadd0 = *(double*)(ltmp0 + signy0); 1182 cmp %l7,_0x7f800000 1183 bge,pn %icc,.up6 1184 fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20; 1185.co6: 1186 sub %l6,%l7,%o2 ! (0_0) ldiff0 = ax0 - ay0; 1187 cmp %l6,_0x7f800000 1188 bge,pn %icc,.up7 1189 1190 fmuld %f0,%f12,%f12 ! (0_1) dtmp0 = cmul0 * x0; 1191.co7: 1192 sra %o2,31,%l7 ! (0_0) ldiff0 >>= 31; 1193 sub %i3,%i1,%l6 ! (0_0) addrc0 = (char*)px - (char*)py; 1194 faddd %f38,K1,%f38 ! (1_1) dtmp0 += K1; 1195 1196 faddd %f40,K8,%f40 ! (4_1) dtmp0 += K8; 1197 and %l6,%l7,%o2 ! (0_0) addrc0 &= ldiff0; 1198 fmuld %f20,%f24,%f20 ! (3_1) dtmp0 *= x20; 1199 1200 lda [%i1+%o2]0x82,%f0 ! (0_0) fy0 = *(float*)((char*)py + addrc0); 1201 sll %g1,3,%g1 ! (1_1) cmul0_ind = ldiff0 << 3; 1202 sub %i3,%o2,%o4 ! (0_0) (char*)px - addrc0 1203 faddd %f22,K3,%f22 ! (2_1) dtmp0 += K3; 1204 1205 lda [%o4]0x82,%f2 ! (0_0) fx0 = *(float*)((char*)px - addrc0); 1206 sll %l7,5,%l6 ! (0_0) ltmp0 = ldiff0 << 5; 1207 add %o1,stridez,%o2 ! pz += stridez 1208 1209 fmuld %f38,%f18,%f38 ! (1_1) dtmp0 *= x20; 1210 cmp %o5,_0x7f800000 ! (0_0) b0 ? 0x7f800000 1211 bge,pn %icc,.update11 ! (0_0) if ( b0 > 0x7f800000 ) 1212 faddd %f34,%f12,%f18 ! (0_1) dtmp0 = cadd0 + dtmp0; 1213.cont11: 1214 fmuld %f40,%f4,%f34 ! (4_1) dtmp0 *= x20; 1215 sra %l3,27,%o5 ! (0_0) signx0 = ux0 >> 27; 1216 add %i3,stridex,%i3 ! px += stridex 1217 fstod %f0,%f40 ! (0_0) y0 = (double)fy0; 1218 1219 faddd %f20,K5,%f12 ! (3_1) dtmp0 += K5; 1220 add %i1,stridey,%i1 ! py += stridey 1221 fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20; 1222 1223 lda [%i1]0x82,%l3 ! (1_0) uy0 = *(int*)py; 1224 sra %l4,28,%o4 ! (0_0) signy0 = uy0 >> 28; 1225 add %l6,cadd_arr,%l6 ! (0_0) ltmp0 += (char*)cadd_arr; 1226 fstod %f2,%f2 ! (0_0) x0 = (double)fx0; 1227.den1: 1228 lda [%i3]0x82,%l4 ! (1_0) ux0 = *(int*)px; 1229 and %o5,-16,%o5 ! (0_0) signx0 &= -16; 1230 faddd %f34,K7,%f34 ! (4_1) dtmp0 += K7; 1231 1232 fmuld %f12,%f24,%f20 ! (3_1) dtmp0 *= x20; 1233 and %o4,-8,%o4 ! (0_0) signy0 &= -8; 1234 faddd %f38,K0,%f38 ! (1_1) dtmp0 += K0; 1235 1236 fdivd %f40,%f2,%f12 ! (0_0) x0 = y0 / x0; 1237 faddd %f22,K2,%f40 ! (2_1) dtmp0 += K2; 1238 1239 fdtos %f18,%f2 ! (0_1) ftmp0 = (float)dtmp0; 1240 nop 1241 st %f2,[%o1] ! (0_1) *pz = ftmp0 1242 fmuld %f14,%f14,%f22 ! (5_1) x20 = x0 * x0; 1243 1244 ldd [cmul_arr+%g1],%f0 ! (1_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); 1245 add %l6,%o5,%o1 ! (0_0) ltmp0 += signx0; 1246 fmuld %f34,%f4,%f34 ! (4_1) dtmp0 *= x20; 1247 1248 fmuld %f38,%f10,%f10 ! (1_1) x0 = dtmp0 * x0; 1249 and %l4,_0x7fffffff,%l6 ! (1_0) ax0 = ux0 & 0x7fffffff; 1250 sethi %hi(0x00800000),%o5 1251 faddd %f20,K4,%f20 ! (3_1) dtmp0 += K4; 1252 1253 and %l3,_0x7fffffff,%g1 ! (1_0) ay0 = uy0 & 0x7fffffff; 1254 fmuld %f40,%f16,%f38 ! (2_1) dtmp0 *= x20; 1255 1256 cmp %l6,%o5 1257 bl,pn %icc,.up8 1258 fmuld K9,%f22,%f40 ! (5_1) dtmp0 = K9 * x20; 1259.co8: 1260 nop 1261 cmp %g1,%o5 1262 bl,pn %icc,.up9 1263 faddd %f34,K6,%f18 ! (4_1) dtmp0 += K6; 1264.co9: 1265 ldd [%o1+%o4],%f34 ! (0_0) cadd0 = *(double*)(ltmp0 + signy0); 1266 cmp %l6,_0x7f800000 1267 bge,pn %icc,.up10 1268 fmuld %f20,%f24,%f20 ! (3_1) dtmp0 *= x20; 1269.co10: 1270 sub %l6,%g1,%o1 ! (1_0) ldiff0 = ax0 - ay0; 1271 cmp %g1,_0x7f800000 1272 bge,pn %icc,.up11 1273 1274 fmuld %f0,%f10,%f10 ! (1_1) dtmp0 = cmul0 * x0; 1275.co11: 1276 sra %o1,31,%g1 ! (1_0) ldiff0 >>= 31; 1277 sub %i3,%i1,%l6 ! (1_0) addrc0 = (char*)px - (char*)py; 1278 faddd %f38,K1,%f38 ! (2_1) dtmp0 += K1; 1279 1280 faddd %f40,K8,%f40 ! (5_1) dtmp0 += K8; 1281 and %l6,%g1,%o1 ! (1_0) addrc0 &= ldiff0; 1282 fmuld %f18,%f4,%f18 ! (4_1) dtmp0 *= x20; 1283 1284 lda [%i1+%o1]0x82,%f0 ! (1_0) fy0 = *(float*)((char*)py + addrc0); 1285 sll %g5,3,%g5 ! (2_1) cmul0_ind = ldiff0 << 3; 1286 sub %i3,%o1,%o4 ! (1_0) (char*)px - addrc0; 1287 faddd %f20,K3,%f20 ! (3_1) dtmp0 += K3; 1288 1289 lda [%o4]0x82,%f2 ! (1_0) fx0 = *(float*)((char*)px - addrc0); 1290 sll %g1,5,%l6 ! (1_0) ltmp0 = ldiff0 << 5; 1291 add %o2,stridez,%o1 ! pz += stridez 1292 1293 fmuld %f38,%f16,%f38 ! (2_1) dtmp0 *= x20; 1294 cmp %o5,_0x7f800000 ! (1_0) b0 ? 0x7f800000 1295 bge,pn %icc,.update12 ! (1_0) if ( b0 > 0x7f800000 ) 1296 faddd %f32,%f10,%f16 ! (1_1) dtmp0 = cadd0 + dtmp0; 1297.cont12: 1298 fmuld %f40,%f22,%f32 ! (5_1) dtmp0 *= x20; 1299 add %i1,stridey,%i1 ! py += stridey 1300 nop 1301 fstod %f0,%f40 ! (1_0) y0 = (double)fy0; 1302 1303 faddd %f18,K5,%f10 ! (4_1) dtmp0 += K5; 1304 sra %l4,27,%o5 ! (1_0) signx0 = ux0 >> 27; 1305 add %i3,stridex,%i3 ! px += stridex 1306 fmuld %f20,%f24,%f20 ! (3_1) dtmp0 *= x20; 1307 1308 sra %l3,28,%o4 ! (1_0) signy0 = uy0 >> 28; 1309 add %l6,cadd_arr,%l6 ! (1_0) ltmp0 += (char*)cadd_arr; 1310 fstod %f2,%f2 ! (1_0) x0 = (double)fx0; 1311.den2: 1312 faddd %f32,K7,%f32 ! (5_1) dtmp0 += K7; 1313 and %o5,-16,%o5 ! (1_0) signx0 &= -16; 1314 and %o4,-8,%o4 ! (1_0) signy0 &= -8; 1315 1316 lda [%i1]0x82,%l4 ! (2_0) uy0 = *(int*)py; 1317 fmuld %f10,%f4,%f18 ! (4_1) dtmp0 *= x20; 1318 faddd %f38,K0,%f38 ! (2_1) dtmp0 += K0; 1319 1320 lda [%i3]0x82,%l3 ! (2_0) ux0 = *(int*)px; 1321 fdivd %f40,%f2,%f10 ! (1_0) x0 = y0 / x0; 1322 faddd %f20,K2,%f40 ! (3_1) dtmp0 += K2; 1323 1324 fdtos %f16,%f2 ! (1_1) ftmp0 = (float)dtmp0; 1325 nop 1326 st %f2,[%o2] ! (1_1) *pz = ftmp0; 1327 fmuld %f12,%f12,%f20 ! (0_0) x20 = x0 * x0; 1328 1329 ldd [cmul_arr+%g5],%f0 ! (2_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); 1330 add %l6,%o5,%o2 ! (1_0) ltmp0 += signx0; 1331 fmuld %f32,%f22,%f32 ! (5_1) dtmp0 *= x20; 1332 1333 fmuld %f38,%f8,%f8 ! (2_1) x0 = dtmp0 * x0; 1334 and %l3,_0x7fffffff,%l6 ! (2_0) ax0 = ux0 & 0x7fffffff; 1335 sethi %hi(0x00800000),%o5 1336 faddd %f18,K4,%f18 ! (4_1) dtmp0 += K4; 1337 1338 and %l4,_0x7fffffff,%g5 ! (2_0) ay0 = uy0 & 0x7fffffff; 1339 fmuld %f40,%f24,%f38 ! (3_1) dtmp0 *= x20; 1340 1341 cmp %l6,%o5 1342 bl,pn %icc,.up12 1343 fmuld K9,%f20,%f40 ! (0_0) dtmp0 = K9 * x20; 1344.co12: 1345 nop 1346 cmp %g5,%o5 1347 bl,pn %icc,.up13 1348 faddd %f32,K6,%f16 ! (5_1) dtmp0 += K6; 1349.co13: 1350 ldd [%o2+%o4],%f32 ! (1_0) cadd0 = *(double*)(ltmp0 + signy0); 1351 cmp %l6,_0x7f800000 1352 bge,pn %icc,.up14 1353 fmuld %f18,%f4,%f18 ! (4_1) dtmp0 *= x20; 1354.co14: 1355 sub %l6,%g5,%o2 ! (2_0) ldiff0 = ax0 - ay0; 1356 cmp %g5,_0x7f800000 1357 bge,pn %icc,.up15 1358 1359 fmuld %f0,%f8,%f8 ! (2_1) dtmp0 = cmul0 * x0; 1360.co15: 1361 sra %o2,31,%g5 ! (2_0) ldiff0 >>= 31; 1362 sub %i3,%i1,%l6 ! (2_0) addrc0 = (char*)px - (char*)py; 1363 faddd %f38,K1,%f38 ! (3_1) dtmp0 += K1; 1364 1365 faddd %f40,K8,%f40 ! (0_0) dtmp0 += K8; 1366 and %l6,%g5,%o2 ! (2_0) addrc0 &= ldiff0; 1367 fmuld %f16,%f22,%f16 ! (5_1) dtmp0 *= x20; 1368 1369 lda [%i1+%o2]0x82,%f0 ! (2_0) fy0 = *(float*)((char*)py + addrc0); 1370 sub %i3,%o2,%o4 ! (2_0) (char*)px - addrc0; 1371 add %o1,stridez,%o2 ! pz += stridez 1372 faddd %f18,K3,%f18 ! (4_1) dtmp0 += K3; 1373 1374 lda [%o4]0x82,%f2 ! (2_0) fx0 = *(float*)((char*)px - addrc0); 1375 sll %o0,3,%o0 ! (3_1) cmul0_ind = ldiff0 << 3; 1376 add %i3,stridex,%i3 ! px += stridex 1377 1378 fmuld %f38,%f24,%f38 ! (3_1) dtmp0 *= x20; 1379 cmp %o5,_0x7f800000 ! (2_0) b0 ? 0x7f800000 1380 bge,pn %icc,.update13 ! (2_0) if ( b0 > 0x7f800000 ) 1381 faddd %f30,%f8,%f24 ! (2_1) dtmp0 = cadd0 + dtmp0; 1382.cont13: 1383 fmuld %f40,%f20,%f30 ! (0_0) dtmp0 *= x20; 1384 sll %g5,5,%l6 ! (2_0) ltmp0 = ldiff0 << 5; 1385 add %i1,stridey,%i1 ! py += stridey 1386 fstod %f0,%f40 ! (2_0) y0 = (double)fy0; 1387 1388 faddd %f16,K5,%f8 ! (5_1) dtmp0 += K5; 1389 sra %l3,27,%o5 ! (2_0) signx0 = ux0 >> 27; 1390 fmuld %f18,%f4,%f18 ! (4_1) dtmp0 *= x20; 1391 1392 fstod %f2,%f2 ! (2_0) x0 = (double)fx0; 1393 sra %l4,28,%o4 ! (2_0) signy0 = uy0 >> 28; 1394 add %l6,cadd_arr,%l6 ! (2_0) ltmp0 += (char*)cadd_arr; 1395.den3: 1396 lda [%i1]0x82,%l3 ! (3_0) uy0 = *(int*)py; 1397 and %o5,-16,%o5 ! (2_0) signx0 &= -16; 1398 faddd %f30,K7,%f30 ! (0_0) dtmp0 += K7; 1399 1400 lda [%i3]0x82,%l4 ! (3_0) ux0 = *(int*)px; 1401 fmuld %f8,%f22,%f16 ! (5_1) dtmp0 *= x20; 1402 faddd %f38,K0,%f38 ! (3_1) dtmp0 += K0; 1403 1404 fdivd %f40,%f2,%f8 ! (2_0) x0 = y0 / x0; 1405 faddd %f18,K2,%f40 ! (4_1) dtmp0 += K2; 1406 1407 fdtos %f24,%f1 ! (2_1) ftmp0 = (float)dtmp0; 1408 st %f1,[%o1] ! (2_1) *pz = ftmp0; 1409 fmuld %f10,%f10,%f18 ! (1_0) x20 = x0 * x0; 1410 1411 ldd [cmul_arr+%o0],%f2 ! (3_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); 1412 add %l6,%o5,%o1 ! (2_0) ltmp0 += signx0; 1413 and %o4,-8,%o4 ! (2_0) signy0 &= -8; 1414 fmuld %f30,%f20,%f30 ! (0_0) dtmp0 *= x20; 1415 1416 fmuld %f38,%f6,%f6 ! (3_1) x0 = dtmp0 * x0; 1417 and %l4,_0x7fffffff,%l6 ! (3_0) ax0 = ux0 & 0x7fffffff; 1418 sethi %hi(0x00800000),%o5 1419 faddd %f16,K4,%f24 ! (5_1) dtmp0 += K4; 1420 1421 and %l3,_0x7fffffff,%o0 ! (3_0) ay0 = uy0 & 0x7fffffff; 1422 fmuld %f40,%f4,%f38 ! (4_1) dtmp0 *= x20; 1423 1424 cmp %l6,%o5 1425 bl,pn %icc,.up16 1426 fmuld K9,%f18,%f40 ! (1_0) dtmp0 = K9 * x20; 1427.co16: 1428 nop 1429 cmp %o0,%o5 1430 bl,pn %icc,.up17 1431 faddd %f30,K6,%f16 ! (0_0) dtmp0 += K6; 1432.co17: 1433 ldd [%o1+%o4],%f30 ! (2_0) cadd0 = *(double*)(ltmp0 + signy0); 1434 cmp %l6,_0x7f800000 1435 bge,pn %icc,.up18 1436 fmuld %f24,%f22,%f24 ! (5_1) dtmp0 *= x20; 1437.co18: 1438 sub %l6,%o0,%o1 ! (3_0) ldiff0 = ax0 - ay0; 1439 cmp %o0,_0x7f800000 1440 bge,pn %icc,.up19 1441 1442 fmuld %f2,%f6,%f6 ! (3_1) dtmp0 = cmul0 * x0; 1443.co19: 1444 sra %o1,31,%o0 ! (3_0) ldiff0 >>= 31; 1445 sub %i3,%i1,%l6 ! (3_0) addrc0 = (char*)px - (char*)py; 1446 faddd %f38,K1,%f38 ! (4_1) dtmp0 += K1; 1447 1448 faddd %f40,K8,%f40 ! (1_0) dtmp0 += K8; 1449 and %l6,%o0,%o1 ! (3_0) addrc0 &= ldiff0; 1450 fmuld %f16,%f20,%f16 ! (0_0) dtmp0 *= x20; 1451 1452 lda [%i1+%o1]0x82,%f0 ! (3_0) fy0 = *(float*)((char*)py + addrc0); 1453 sub %i3,%o1,%o4 ! (3_0) (char*)px - addrc0; 1454 add %o2,stridez,%o1 ! pz += stridez 1455 faddd %f24,K3,%f24 ! (5_1) dtmp0 += K3; 1456 1457 lda [%o4]0x82,%f1 ! (3_0) fx0 = *(float*)((char*)px - addrc0); 1458 sll %l5,3,%l5 ! (4_1) cmul0_ind = ldiff0 << 3; 1459 add %i3,stridex,%i3 ! px += stridex 1460 1461 fmuld %f38,%f4,%f38 ! (4_1) dtmp0 *= x20; 1462 cmp %o5,_0x7f800000 ! (3_0) b0 ? 0x7f800000 1463 bge,pn %icc,.update14 ! (3_0) if ( b0 > 0x7f800000 ) 1464 faddd %f28,%f6,%f4 ! (3_1) dtmp0 = cadd0 + dtmp0; 1465.cont14: 1466 fmuld %f40,%f18,%f28 ! (1_0) dtmp0 *= x20; 1467 sll %o0,5,%l6 ! (3_0) ltmp0 = ldiff0 << 5; 1468 add %i1,stridey,%i1 ! py += stridey 1469 fstod %f0,%f40 ! (3_0) y0 = (double)fy0; 1470 1471 faddd %f16,K5,%f2 ! (0_0) dtmp0 += K5; 1472 sra %l4,27,%o5 ! (3_0) signx0 = ux0 >> 27; 1473 fmuld %f24,%f22,%f24 ! (5_1) dtmp0 *= x20; 1474 1475 sra %l3,28,%o4 ! (3_0) signy0 = uy0 >> 28; 1476 fstod %f1,%f16 ! (3_0) x0 = (double)fx0; 1477.den4: 1478 faddd %f28,K7,%f28 ! (1_0) dtmp0 += K7; 1479 add %l6,cadd_arr,%l6 ! (3_0) ltmp0 += (char*)cadd_arr; 1480 and %o5,-16,%o5 ! (3_0) signx0 &= -16; 1481 1482 lda [%i1]0x82,%l4 ! (4_0) uy0 = *(int*)py; 1483 fmuld %f2,%f20,%f2 ! (0_0) dtmp0 *= x20; 1484 faddd %f38,K0,%f38 ! (4_1) dtmp0 += K0; 1485 1486 lda [%i3]0x82,%l3 ! (4_0) ux0 = *(int*)px; 1487 fdivd %f40,%f16,%f6 ! (3_0) x0 = y0 / x0; 1488 faddd %f24,K2,%f24 ! (5_1) dtmp0 += K2; 1489 1490 fdtos %f4,%f1 ! (3_1) ftmp0 = (float)dtmp0; 1491 and %o4,-8,%o4 ! (3_0) signy0 &= -8; 1492 st %f1,[%o2] ! (3_1) *pz = ftmp0; 1493 fmuld %f8,%f8,%f16 ! (2_0) x20 = x0 * x0; 1494 1495 ldd [cmul_arr+%l5],%f0 ! (4_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); 1496 add %l6,%o5,%o2 ! (3_0) ltmp0 += signx0; 1497 fmuld %f28,%f18,%f28 ! (1_0) dtmp0 *= x20; 1498 1499 fmuld %f38,%f62,%f62 ! (4_1) x0 = dtmp0 * x0; 1500 and %l3,_0x7fffffff,%l6 ! (4_0) ax0 = ux0 & 0x7fffffff; 1501 sethi %hi(0x00800000),%o5 1502 faddd %f2,K4,%f2 ! (0_0) dtmp0 += K4; 1503 1504 and %l4,_0x7fffffff,%l5 ! (4_0) ay0 = uy0 & 0x7fffffff; 1505 fmuld %f24,%f22,%f38 ! (5_1) dtmp0 *= x20; 1506 1507 cmp %l6,%o5 1508 bl,pn %icc,.up20 1509 fmuld K9,%f16,%f40 ! (2_0) dtmp0 = K9 * x20; 1510.co20: 1511 nop 1512 cmp %l5,%o5 1513 bl,pn %icc,.up21 1514 faddd %f28,K6,%f4 ! (1_0) dtmp0 += K6; 1515.co21: 1516 ldd [%o2+%o4],%f28 ! (3_0) cadd0 = *(double*)(ltmp0 + signy0); 1517 cmp %l6,_0x7f800000 1518 bge,pn %icc,.up22 1519 fmuld %f2,%f20,%f24 ! (0_0) dtmp0 *= x20; 1520.co22: 1521 sub %l6,%l5,%o2 ! (4_0) ldiff0 = ax0 - ay0; 1522 cmp %l5,_0x7f800000 1523 bge,pn %icc,.up23 1524 1525 fmuld %f0,%f62,%f62 ! (4_1) dtmp0 = cmul0 * x0; 1526.co23: 1527 sra %o2,31,%l5 ! (4_0) ldiff0 >>= 31; 1528 sub %i3,%i1,%l6 ! (4_0) addrc0 = (char*)px - (char*)py; 1529 faddd %f38,K1,%f38 ! (5_1) dtmp0 += K1; 1530 1531 faddd %f40,K8,%f40 ! (2_0) dtmp0 += K8; 1532 and %l6,%l5,%o2 ! (4_0) addrc0 &= ldiff0; 1533 fmuld %f4,%f18,%f4 ! (1_0) dtmp0 *= x20; 1534 1535 lda [%i1+%o2]0x82,%f0 ! (4_0) fy0 = *(float*)((char*)py + addrc0); 1536 sub %i3,%o2,%o4 ! (4_0) (char*)px - addrc0; 1537 add %o1,stridez,%o2 ! pz += stridez 1538 faddd %f24,K3,%f24 ! (0_0) dtmp0 += K3; 1539 1540 lda [%o4]0x82,%f2 ! (4_0) fx0 = *(float*)((char*)px - addrc0); 1541 sll %o7,3,%o7 ! (5_1) cmul0_ind = ldiff0 << 3; 1542 add %i3,stridex,%i3 ! px += stridex 1543 1544 fmuld %f38,%f22,%f38 ! (5_1) dtmp0 *= x20; 1545 cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 1546 bge,pn %icc,.update15 ! (4_0) if ( b0 > 0x7f800000 ) 1547 faddd %f26,%f62,%f22 ! (4_1) dtmp0 = cadd0 + dtmp0; 1548.cont15: 1549 fmuld %f40,%f16,%f26 ! (2_0) dtmp0 *= x20; 1550 sll %l5,5,%l6 ! (4_0) ltmp0 = ldiff0 << 5; 1551 add %i1,stridey,%i1 ! py += stridey 1552 fstod %f0,%f40 ! (4_0) y0 = (double)fy0; 1553 1554 faddd %f4,K5,%f62 ! (1_0) dtmp0 += K5; 1555 sra %l3,27,%o5 ! (4_0) signx0 = ux0 >> 27; 1556 fmuld %f24,%f20,%f24 ! (0_0) dtmp0 *= x20; 1557 1558 fstod %f2,%f2 ! (4_0) x0 = (double)fx0; 1559 sra %l4,28,%o4 ! (4_0) signy0 = uy0 >> 28; 1560.den5: 1561 lda [%i1]0x82,%l3 ! (5_0) uy0 = *(int*)py; 1562 subcc counter,6,counter ! counter? 1563 add %l6,cadd_arr,%l6 ! (4_0) ltmp0 += (char*)cadd_arr; 1564 faddd %f26,K7,%f26 ! (2_0) dtmp0 += K7; 1565 1566 fmuld %f62,%f18,%f4 ! (1_0) dtmp0 *= x20; 1567 and %o5,-16,%o5 ! (4_0) signx0 &= -16; 1568 bpos,pt %icc,.main_loop 1569 faddd %f38,K0,%f38 ! (5_1) dtmp0 += K0; 1570 1571.tail: 1572 addcc counter,5,counter 1573 bneg,a,pn %icc,.begin 1574 or %g0,%o1,%o4 1575 1576 faddd %f24,K2,%f40 ! (0_1) dtmp0 += K2; 1577 1578 fdtos %f22,%f22 ! (4_2) ftmp0 = (float)dtmp0; 1579 st %f22,[%o1] ! (4_2) *pz = ftmp0; 1580 1581 subcc counter,1,counter 1582 bneg,a,pn %icc,.begin 1583 or %g0,%o2,%o4 1584 1585 ldd [cmul_arr+%o7],%f0 ! (5_2) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); 1586 fmuld %f26,%f16,%f26 ! (2_1) dtmp0 *= x20; 1587 1588 fmuld %f38,%f14,%f14 ! (5_2) x0 = dtmp0 * x0; 1589 faddd %f4,K4,%f4 ! (1_1) dtmp0 += K4; 1590 1591 fmuld %f40,%f20,%f38 ! (0_1) dtmp0 *= x20; 1592 1593 1594 faddd %f26,K6,%f22 ! (2_1) dtmp0 += K6; 1595 1596 fmuld %f4,%f18,%f4 ! (1_1) dtmp0 *= x20; 1597 1598 fmuld %f0,%f14,%f14 ! (5_2) dtmp0 = cmul0 * x0; 1599 faddd %f38,K1,%f38 ! (0_1) dtmp0 += K1; 1600 1601 fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20; 1602 1603 faddd %f4,K3,%f4 ! (1_1) dtmp0 += K3; 1604 1605 fmuld %f38,%f20,%f38 ! (0_1) dtmp0 *= x20; 1606 faddd %f36,%f14,%f20 ! (5_2) dtmp0 = cadd0 + dtmp0; 1607 1608 faddd %f22,K5,%f14 ! (2_1) dtmp0 += K5; 1609 add %o2,stridez,%o1 ! pz += stridez 1610 fmuld %f4,%f18,%f4 ! (1_1) dtmp0 *= x20; 1611 1612 sll %l7,3,%l7 ! (0_1) cmul0_ind = ldiff0 << 3; 1613 1614 fmuld %f14,%f16,%f22 ! (2_1) dtmp0 *= x20; 1615 faddd %f38,K0,%f38 ! (0_1) dtmp0 += K0; 1616 1617 faddd %f4,K2,%f40 ! (1_1) dtmp0 += K2; 1618 1619 fdtos %f20,%f2 ! (5_2) ftmp0 = (float)dtmp0; 1620 st %f2,[%o2] ! (5_2) *pz = ftmp0; 1621 1622 subcc counter,1,counter 1623 bneg,a,pn %icc,.begin 1624 or %g0,%o1,%o4 1625 1626 ldd [cmul_arr+%l7],%f0 ! (0_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); 1627 1628 fmuld %f38,%f12,%f12 ! (0_1) x0 = dtmp0 * x0; 1629 faddd %f22,K4,%f22 ! (2_1) dtmp0 += K4; 1630 1631 fmuld %f40,%f18,%f38 ! (1_1) dtmp0 *= x20; 1632 1633 fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20; 1634 1635 fmuld %f0,%f12,%f12 ! (0_1) dtmp0 = cmul0 * x0; 1636 faddd %f38,K1,%f38 ! (1_1) dtmp0 += K1; 1637 1638 sll %g1,3,%g1 ! (1_1) cmul0_ind = ldiff0 << 3; 1639 faddd %f22,K3,%f22 ! (2_1) dtmp0 += K3; 1640 1641 add %o1,stridez,%o2 ! pz += stridez 1642 1643 fmuld %f38,%f18,%f38 ! (1_1) dtmp0 *= x20; 1644 faddd %f34,%f12,%f18 ! (0_1) dtmp0 = cadd0 + dtmp0; 1645 1646 fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20; 1647 1648 faddd %f38,K0,%f38 ! (1_1) dtmp0 += K0; 1649 1650 faddd %f22,K2,%f40 ! (2_1) dtmp0 += K2; 1651 1652 fdtos %f18,%f2 ! (0_1) ftmp0 = (float)dtmp0; 1653 st %f2,[%o1] ! (0_1) *pz = ftmp0 1654 1655 subcc counter,1,counter 1656 bneg,a,pn %icc,.begin 1657 or %g0,%o2,%o4 1658 1659 ldd [cmul_arr+%g1],%f0 ! (1_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); 1660 1661 fmuld %f38,%f10,%f10 ! (1_1) x0 = dtmp0 * x0; 1662 1663 fmuld %f40,%f16,%f38 ! (2_1) dtmp0 *= x20; 1664 1665 fmuld %f0,%f10,%f10 ! (1_1) dtmp0 = cmul0 * x0; 1666 faddd %f38,K1,%f38 ! (2_1) dtmp0 += K1; 1667 1668 sll %g5,3,%g5 ! (2_1) cmul0_ind = ldiff0 << 3; 1669 1670 add %o2,stridez,%o1 ! pz += stridez 1671 1672 fmuld %f38,%f16,%f38 ! (2_1) dtmp0 *= x20; 1673 faddd %f32,%f10,%f16 ! (1_1) dtmp0 = cadd0 + dtmp0; 1674 1675 faddd %f38,K0,%f38 ! (2_1) dtmp0 += K0; 1676 1677 fdtos %f16,%f2 ! (1_1) ftmp0 = (float)dtmp0; 1678 st %f2,[%o2] ! (1_1) *pz = ftmp0; 1679 1680 subcc counter,1,counter 1681 bneg,a,pn %icc,.begin 1682 or %g0,%o1,%o4 1683 1684 ldd [cmul_arr+%g5],%f0 ! (2_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind); 1685 1686 fmuld %f38,%f8,%f8 ! (2_1) x0 = dtmp0 * x0; 1687 1688 fmuld %f0,%f8,%f8 ! (2_1) dtmp0 = cmul0 * x0; 1689 1690 add %o1,stridez,%o2 ! pz += stridez 1691 1692 faddd %f30,%f8,%f24 ! (2_1) dtmp0 = cadd0 + dtmp0; 1693 1694 fdtos %f24,%f1 ! (2_1) ftmp0 = (float)dtmp0; 1695 st %f1,[%o1] ! (2_1) *pz = ftmp0; 1696 1697 ba .begin 1698 or %g0,%o2,%o4 1699 1700 .align 16 1701.spec0: 1702 cmp %l6,_0x7f800000 ! ax0 ? 0x7f800000 1703 bg 2f ! if ( ax0 >= 0x7f800000 ) 1704 srl %l3,30,%l3 ! signx0 = (unsigned)ux0 >> 30; 1705 1706 cmp %l7,_0x7f800000 ! ay0 ? 0x7f800000 1707 bg 2f ! if ( ay0 >= 0x7f800000 ) 1708 and %l3,2,%l3 ! signx0 &= 2; 1709 1710 sra %l4,31,%l4 ! signy0 = uy0 >> 31; 1711 bne,a 1f ! if (ay0 != 0x7f800000) 1712 add %l3,%l3,%l3 ! signx0 += signx0; 1713 1714 cmp %l6,_0x7f800000 ! ax0 ? 0x7f800000 1715 bne,a 1f ! if ( ax0 != 0x7f800000 ) 1716 add %g0,2,%l3 ! signx0 = 2 1717 1718 add %l3,1,%l3 ! signx0 ++; 17191: 1720 sll %l4,3,%l4 ! signy0 <<= 3; 1721 st %l3,[%fp+tmp_pz] ! STORE signx0 1722 1723 ldd [cmul_arr+88],%f0 ! LOAD M_PI_4 1724 1725 ld [%fp+tmp_pz],%f2 ! LOAD signx0 1726 1727 ldd [cmul_arr+%l4],%f4 ! dtmp0 = *(double*)((char*)(cmul_arr + 1) + signy0); 1728 1729 add %i1,stridey,%i1 ! py += stridey; 1730 fitod %f2,%f2 ! dtmp1 = (double)signx0; 1731 1732 add %i3,stridex,%i3 ! px += stridex; 1733 1734 fmuld %f2,%f0,%f0 ! res = signx0 * M_PI_4; 1735 1736 fmuld %f0,%f4,%f0 ! res *= dtmp0; 1737 fdtos %f0,%f0 ! ftmp0 = (float) res; 1738 st %f0,[%o4] ! *pz = ftmp0; 1739 1740 ba .begin1 1741 add %o4,stridez,%o4 ! pz += stridez; 17422: 1743 std %l6,[%fp+tmp_pz] ! *(float*)&ax0, *(float*)&ay0 1744 ldd [%fp+tmp_pz],%f0 ! *(float*)&ax0, *(float*)&ay0 1745 1746 add %i1,stridey,%i1 ! py += stridey; 1747 1748 fmuls %f0,%f1,%f0 ! ftmp0 = *(float*)&ax0 * *(float*)&ay0; 1749 add %i3,stridex,%i3 ! pz += stridex; 1750 st %f0,[%o4] ! *pz = ftmp0; 1751 1752 ba .begin1 1753 add %o4,stridez,%o4 ! pz += stridez; 1754 1755 .align 16 1756.spec1: 1757 cmp %l6,0 1758 bne,pn %icc,1f 1759 nop 1760 1761 cmp %l7,0 1762 bne,pn %icc,1f 1763 nop 1764 1765 sra %l4,28,%l4 ! signy0 = uy0 >> 28; 1766 1767 sra %l3,27,%l3 ! signx0 = ux0 >> 27; 1768 and %l4,-8,%l4 ! signy0 &= -8; 1769 1770 sra %o2,31,%o2 ! ldiff0 >>= 31; 1771 and %l3,-16,%l3 ! signx0 &= -16; 1772 1773 sll %o2,5,%o2 ! ldiff0 <<= 5; 1774 add %l4,%l3,%l3 ! signx0 += signy0; 1775 1776 add %o2,%l3,%l3 ! signx0 += ldiff0; 1777 add %i1,stridey,%i1 ! py += stridey; 1778 1779 ldd [cadd_arr+%l3],%f0 ! res = *(double*)((char*)(cadd_arr + 7) + signx0); 1780 add %i3,stridex,%i3 ! px += stridex; 1781 1782 fdtos %f0,%f0 ! ftmp0 = (float) res; 1783 st %f0,[%o4] ! *pz = ftmp0; 1784 1785 ba .begin1 1786 add %o4,stridez,%o4 ! pz += stridez; 17871: 1788 stx %o4,[%fp+tmp_pz] 1789 sra %o2,31,%l7 ! (0_0) ldiff0 >>= 31; 1790 sub %i3,%i1,%l6 ! (0_0) addrc0 = (char*)px - (char*)py; 1791 1792 and %l6,%l7,%o2 ! (0_0) addrc0 &= ldiff0; 1793 1794 lda [%i1+%o2]0x82,%f0 ! (0_0) fy0 = *(float*)((char*)py + addrc0); 1795 sub %i3,%o2,%o4 ! (0_0) (char*)px - addrc0 1796 1797 lda [%i1+%o2]0x82,%l5 ! (0_0) fy0 = *(float*)((char*)py + addrc0); 1798 1799 lda [%o4]0x82,%f2 ! (0_0) fx0 = *(float*)((char*)px - addrc0); 1800 sll %l7,5,%l6 ! (0_0) ltmp0 = ldiff0 << 5; 1801 1802 lda [%o4]0x82,%g5 ! (0_0) fx0 = *(float*)((char*)px - addrc0); 1803 1804 sra %l3,27,%o5 ! (0_0) signx0 = ux0 >> 27; 1805 add %i1,stridey,%i1 ! py += stridey 1806 1807 add %i3,stridex,%i3 ! px += stridex 1808 1809 lda [%i1]0x82,%l3 ! (1_0) uy0 = *(int*)py; 1810 sra %l4,28,%o4 ! (0_0) signy0 = uy0 >> 28; 1811 1812 add %l6,cadd_arr,%l6 ! (0_0) ltmp0 += (char*)cadd_arr; 1813 1814 and %l5,_0x7fffffff,%l4 1815 sethi %hi(0x00800000),%g1 1816 1817 cmp %l4,%g1 1818 bge,a %icc,1f 1819 fstod %f0,%f40 ! (0_0) y0 = (double)fy0; 1820 1821 fabss %f0,%f0 ! fy0 = fabsf(fy0); 1822 ldd [cmul_arr+96],%f40 1823 sra %l5,28,%l4 ! itmp0 >>= 28; 1824 1825 and %l4,-8,%l4 1826 fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0; 1827 1828 fmuld %f40,%f0,%f40 ! dtmp0 *= C2ONM149; 1829 ldd [cmul_arr+%l4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); 1830 1831 fmuld %f40,%f0,%f40 ! dtmp0 *= dsign; 18321: 1833 and %g5,_0x7fffffff,%l4 1834 cmp %l4,%g1 1835 bge,a %icc,.spec1_cont 1836 fstod %f2,%f2 ! (0_0) x0 = (double)fx0; 1837 1838 fabss %f2,%f2 ! fx0 = fabsf(fx0); 1839 ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 1840 sra %g5,28,%l4 ! itmp0 >>= 28; 1841 1842 and %l4,-8,%l4 ! itmp0 = -8; 1843 fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0; 1844 1845 fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149; 1846 ldd [cmul_arr+%l4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); 1847 1848 ba .spec1_cont 1849 fmuld %f2,%f0,%f2 ! dtmp0 *= dsign; 1850 1851 .align 16 1852.update0: 1853 cmp counter,0 1854 bg,pn %icc,1f 1855 nop 1856 1857 ld [cmul_arr],%f2 1858 ba .cont0 1859 fzero %f0 18601: 1861 cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 1862 bg,pt %icc,1f 1863 nop 18642: 1865 sub counter,0,counter 1866 st counter,[%fp+tmp_counter] 1867 stx %i1,[%fp+tmp_py] 1868 stx %i3,[%fp+tmp_px] 1869 1870 ld [cmul_arr],%f2 1871 or %g0,0,counter 1872 ba .cont0 1873 fzero %f0 18741: 1875 andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff 1876 bne,pn %icc,1f 1877 sethi %hi(0x00800000),%o5 1878 1879 andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff 1880 be,pn %icc,2b 1881 nop 18821: 1883 st %f0,[%fp+tmp_px] 1884 st %f2,[%fp+tmp_px+4] 1885 ld [%fp+tmp_px],%o4 1886 1887 and %o4,_0x7fffffff,%l5 ! itmp0 & 0x7fffffff 1888 cmp %l5,%o5 1889 bge,a 1f 1890 fstod %f0,%f40 ! (0_0) y0 = (double)fy0; 1891 1892 ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 1893 sra %o4,28,%o4 ! itmp0 >>= 28; 1894 fabss %f0,%f0 ! fy0 = fabsf(fy0); 1895 1896 and %o4,-8,%o4 ! itmp0 = -8; 1897 fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0; 1898 1899 fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149; 1900 ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); 1901 1902 fmuld %f0,%f40,%f40 ! dtmp0 *= dsign; 19031: 1904 add %i3,stridex,%i3 ! px += stridex 1905 add %i1,stridey,%i1 ! py += stridey 1906 1907 ld [%fp+tmp_px+4],%o4 1908 and %o4,_0x7fffffff,%l5 ! itmp0 & 0x7fffffff 1909 cmp %l5,%o5 1910 bge,a 1f 1911 fstod %f2,%f2 ! (5_1) x0 = (double)fx0; 1912 1913 ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 1914 sra %o4,28,%o4 ! itmp0 >>= 28; 1915 fabss %f2,%f2 ! fx0 = fabsf(fx0); 1916 1917 and %o4,-8,%o4 ! itmp0 = -8; 1918 fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0; 1919 1920 fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149; 1921 ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); 1922 1923 fmuld %f2,%f0,%f2 ! dtmp0 *= dsign; 19241: 1925 sra %l4,27,%o5 ! (1_0) signx0 = ux0 >> 27; 1926 1927 sra %l3,28,%o4 ! (1_0) signy0 = uy0 >> 28; 1928 ba .d0 1929 add %l6,cadd_arr,%l6 ! (1_0) ltmp0 += (char*)cadd_arr; 1930 1931 .align 16 1932.update1: 1933 cmp counter,1 1934 bg,pn %icc,1f 1935 nop 1936 1937 fzero %f0 1938 ba .cont1 1939 ld [cmul_arr],%f2 19401: 1941 cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 1942 bg,pt %icc,1f 1943 nop 19442: 1945 sub counter,1,counter 1946 st counter,[%fp+tmp_counter] 1947 stx %i1,[%fp+tmp_py] 1948 stx %i3,[%fp+tmp_px] 1949 1950 ld [cmul_arr],%f2 1951 or %g0,1,counter 1952 ba .cont1 1953 fzero %f0 19541: 1955 andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff 1956 bne,pn %icc,1f 1957 sethi %hi(0x00800000),%o5 1958 1959 andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff 1960 be,pn %icc,2b 1961 nop 19621: 1963 st %f0,[%fp+tmp_px] 1964 st %f2,[%fp+tmp_px+4] 1965 ld [%fp+tmp_px],%o4 1966 fmuld %f40,%f20,%f30 ! (0_0) dtmp0 *= x20; 1967 1968 and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff 1969 cmp %l6,%o5 1970 bge,a 1f 1971 fstod %f0,%f40 ! (0_0) y0 = (double)fy0; 1972 1973 ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 1974 sra %o4,28,%o4 ! itmp0 >>= 28; 1975 fabss %f0,%f0 ! fy0 = fabsf(fy0); 1976 1977 and %o4,-8,%o4 ! itmp0 = -8; 1978 fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0; 1979 1980 fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149; 1981 ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); 1982 1983 fmuld %f0,%f40,%f40 ! dtmp0 *= dsign; 19841: 1985 1986 add %i1,stridey,%i1 ! py += stridey 1987 1988 ld [%fp+tmp_px+4],%o4 1989 and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff 1990 cmp %l6,%o5 1991 bge,a 1f 1992 fstod %f2,%f2 ! (5_1) x0 = (double)fx0; 1993 1994 ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 1995 sra %o4,28,%o4 ! itmp0 >>= 28; 1996 fabss %f2,%f2 ! fx0 = fabsf(fx0); 1997 1998 and %o4,-8,%o4 ! itmp0 = -8; 1999 fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0; 2000 2001 fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149; 2002 ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); 2003 2004 fmuld %f2,%f0,%f2 ! dtmp0 *= dsign; 20051: 2006 sll %g5,5,%l6 ! (2_0) ltmp0 = ldiff0 << 5; 2007 sra %l3,27,%o5 ! (2_0) signx0 = ux0 >> 27; 2008 add %i3,stridex,%i3 ! px += stridex 2009 2010 sra %l4,28,%o4 ! (2_0) signy0 = uy0 >> 28; 2011 ba .d1 2012 add %l6,cadd_arr,%l6 ! (2_0) ltmp0 += (char*)cadd_arr; 2013 2014 .align 16 2015.update2: 2016 cmp counter,2 2017 bg,pn %icc,1f 2018 nop 2019 2020 ld [cmul_arr],%f1 2021 ba .cont2 2022 fzeros %f0 20231: 2024 cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 2025 bg,pt %icc,1f 2026 nop 20272: 2028 sub counter,2,counter 2029 st counter,[%fp+tmp_counter] 2030 stx %i1,[%fp+tmp_py] 2031 stx %i3,[%fp+tmp_px] 2032 2033 ld [cmul_arr],%f1 2034 or %g0,2,counter 2035 ba .cont2 2036 fzeros %f0 20371: 2038 andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff 2039 bne,pn %icc,1f 2040 sethi %hi(0x00800000),%o5 2041 2042 andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff 2043 be,pn %icc,2b 2044 nop 20451: 2046 std %f0,[%fp+tmp_px] 2047 ld [%fp+tmp_px],%o4 2048 fmuld %f40,%f18,%f28 ! (1_0) dtmp0 *= x20; 2049 2050 faddd %f16,K5,%f2 ! (0_0) dtmp0 += K5; 2051 2052 and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff 2053 cmp %l6,%o5 2054 bge,a 1f 2055 fstod %f0,%f40 ! (0_0) y0 = (double)fy0; 2056 2057 ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 2058 sra %o4,28,%o4 ! itmp0 >>= 28; 2059 fabss %f0,%f0 ! fy0 = fabsf(fy0); 2060 2061 and %o4,-8,%o4 ! itmp0 = -8; 2062 fitod %f0,%f16 ! dtmp0 = (double) *(int*)&fy0; 2063 2064 fmuld %f16,%f40,%f40 ! dtmp0 *= C2ONM149; 2065 ldd [cmul_arr+%o4],%f16 ! dsign = *(double*)((char*)cmul_arr + itmp0); 2066 2067 fmuld %f16,%f40,%f40 ! dtmp0 *= dsign; 20681: 2069 add %i1,stridey,%i1 ! py += stridey 2070 2071 ld [%fp+tmp_px+4],%o4 2072 and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff 2073 cmp %l6,%o5 2074 bge,a 1f 2075 fstod %f1,%f16 ! (5_1) x0 = (double)fx0; 2076 2077 fabss %f1,%f16 ! fx0 = fabsf(fx0); 2078 ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 2079 sra %o4,28,%o4 ! itmp0 >>= 28; 2080 2081 and %o4,-8,%o4 ! itmp0 = -8; 2082 fitod %f16,%f16 ! dtmp0 = (double) *(int*)&fx0; 2083 2084 fmuld %f16,%f0,%f16 ! dtmp0 *= C2ONM149; 2085 ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); 2086 2087 fmuld %f16,%f0,%f16 ! dtmp0 *= dsign; 20881: 2089 sll %o0,5,%l6 ! (3_0) ltmp0 = ldiff0 << 5; 2090 sra %l4,27,%o5 ! (3_0) signx0 = ux0 >> 27; 2091 2092 add %i3,stridex,%i3 ! px += stridex 2093 ba .d2 2094 sra %l3,28,%o4 ! (3_0) signy0 = uy0 >> 28; 2095 2096 .align 16 2097.update3: 2098 cmp counter,3 2099 bg,pn %icc,1f 2100 nop 2101 2102 fzero %f0 2103 ba .cont3 2104 ld [cmul_arr],%f2 21051: 2106 cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 2107 bg,pt %icc,1f 2108 nop 21092: 2110 sub counter,3,counter 2111 st counter,[%fp+tmp_counter] 2112 stx %i1,[%fp+tmp_py] 2113 stx %i3,[%fp+tmp_px] 2114 2115 ld [cmul_arr],%f2 2116 or %g0,3,counter 2117 ba .cont3 2118 fzero %f0 21191: 2120 andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff 2121 bne,pn %icc,1f 2122 sethi %hi(0x00800000),%o5 2123 2124 andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff 2125 be,pn %icc,2b 2126 nop 21271: 2128 st %f0,[%fp+tmp_px] 2129 st %f2,[%fp+tmp_px+4] 2130 ld [%fp+tmp_px],%o4 2131 fmuld %f40,%f16,%f26 ! (2_0) dtmp0 *= x20; 2132 2133 and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff 2134 cmp %l6,%o5 2135 bge,a 1f 2136 fstod %f0,%f40 ! (0_0) y0 = (double)fy0; 2137 2138 ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 2139 sra %o4,28,%o4 ! itmp0 >>= 28; 2140 fabss %f0,%f0 ! fy0 = fabsf(fy0); 2141 2142 and %o4,-8,%o4 ! itmp0 = -8; 2143 fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0; 2144 2145 fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149; 2146 ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); 2147 2148 fmuld %f0,%f40,%f40 ! dtmp0 *= dsign; 21491: 2150 add %i1,stridey,%i1 ! py += stridey 2151 faddd %f4,K5,%f62 ! (1_0) dtmp0 += K5; 2152 fmuld %f24,%f20,%f24 ! (0_0) dtmp0 *= x20; 2153 2154 ld [%fp+tmp_px+4],%o4 2155 and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff 2156 cmp %l6,%o5 2157 bge,a 1f 2158 fstod %f2,%f2 ! (5_1) x0 = (double)fx0; 2159 2160 fabss %f2,%f2 ! fx0 = fabsf(fx0); 2161 ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 2162 sra %o4,28,%o4 ! itmp0 >>= 28; 2163 2164 and %o4,-8,%o4 ! itmp0 = -8; 2165 fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0; 2166 2167 fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149; 2168 ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); 2169 2170 fmuld %f2,%f0,%f2 ! dtmp0 *= dsign; 21711: 2172 sll %l5,5,%l6 ! (4_0) ltmp0 = ldiff0 << 5; 2173 sra %l3,27,%o5 ! (4_0) signx0 = ux0 >> 27; 2174 2175 add %i3,stridex,%i3 ! px += stridex 2176 ba .d3 2177 sra %l4,28,%o4 ! (4_0) signy0 = uy0 >> 28; 2178 2179 .align 16 2180.update4: 2181 cmp counter,4 2182 bg,pn %icc,1f 2183 nop 2184 2185 ld [cmul_arr],%f1 2186 ba .cont4 2187 fzeros %f0 21881: 2189 cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 2190 bg,pt %icc,1f 2191 nop 21922: 2193 sub counter,4,counter 2194 st counter,[%fp+tmp_counter] 2195 stx %i1,[%fp+tmp_py] 2196 stx %i3,[%fp+tmp_px] 2197 2198 ld [cmul_arr],%f1 2199 or %g0,4,counter 2200 ba .cont4 2201 fzeros %f0 22021: 2203 andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff 2204 bne,pn %icc,1f 2205 sethi %hi(0x00800000),%o5 2206 2207 andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff 2208 be,pn %icc,2b 2209 nop 22101: 2211 std %f0,[%fp+tmp_px] 2212 ld [%fp+tmp_px],%o4 2213 fmuld %f40,%f24,%f36 ! (3_1) dtmp0 *= x20; 2214 2215 and %o4,_0x7fffffff,%o1 ! itmp0 & 0x7fffffff 2216 cmp %o1,%o5 2217 bge,a 1f 2218 fstod %f0,%f40 ! (0_0) y0 = (double)fy0; 2219 2220 ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 2221 sra %o4,28,%o4 ! itmp0 >>= 28; 2222 fabss %f0,%f0 ! fy0 = fabsf(fy0); 2223 2224 and %o4,-8,%o4 ! itmp0 = -8; 2225 fitod %f0,%f14 ! dtmp0 = (double) *(int*)&fy0; 2226 2227 fmuld %f14,%f40,%f40 ! dtmp0 *= C2ONM149; 2228 ldd [cmul_arr+%o4],%f14 ! dsign = *(double*)((char*)cmul_arr + itmp0); 2229 2230 fmuld %f14,%f40,%f40 ! dtmp0 *= dsign; 22311: 2232 faddd %f22,K5,%f14 ! (2_1) dtmp0 += K5; 2233 fmuld %f4,%f18,%f4 ! (1_1) dtmp0 *= x20; 2234 2235 ld [%fp+tmp_px+4],%o4 2236 and %o4,_0x7fffffff,%o1 ! itmp0 & 0x7fffffff 2237 cmp %o1,%o5 2238 bge,a 1f 2239 fstod %f1,%f2 ! (5_1) x0 = (double)fx0; 2240 2241 fabss %f1,%f22 ! fx0 = fabsf(fx0); 2242 ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 2243 sra %o4,28,%o4 ! itmp0 >>= 28; 2244 2245 and %o4,-8,%o4 ! itmp0 = -8; 2246 fitod %f22,%f22 ! dtmp0 = (double) *(int*)&fx0; 2247 2248 fmuld %f22,%f0,%f22 ! dtmp0 *= C2ONM149; 2249 ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); 2250 2251 fmuld %f22,%f0,%f2 ! dtmp0 *= dsign; 22521: 2253 sll %l7,3,%l7 ! (0_1) cmul0_ind = ldiff0 << 3; 2254 ba .d4 2255 add %i3,stridex,%i3 ! px += stridex 2256 2257 .align 16 2258.update5: 2259 cmp counter,5 2260 bg,pn %icc,1f 2261 nop 2262 2263 ld [cmul_arr],%f2 2264 ba .cont5 2265 fzero %f0 22661: 2267 cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 2268 bg,pt %icc,1f 2269 nop 22702: 2271 sub counter,5,counter 2272 st counter,[%fp+tmp_counter] 2273 stx %i1,[%fp+tmp_py] 2274 stx %i3,[%fp+tmp_px] 2275 2276 ld [cmul_arr],%f2 2277 or %g0,5,counter 2278 ba .cont5 2279 fzero %f0 22801: 2281 andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff 2282 bne,pn %icc,1f 2283 sethi %hi(0x00800000),%o5 2284 2285 andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff 2286 be,pn %icc,2b 2287 nop 22881: 2289 st %f0,[%fp+tmp_px] 2290 st %f2,[%fp+tmp_px+4] 2291 ld [%fp+tmp_px],%o4 2292 fmuld %f40,%f4,%f34 ! (4_1) dtmp0 *= x20; 2293 2294 stx %l5,[%fp+tmp_py] 2295 and %o4,_0x7fffffff,%l5 ! itmp0 & 0x7fffffff 2296 cmp %l5,%o5 2297 bge,a 1f 2298 fstod %f0,%f40 ! (0_0) y0 = (double)fy0; 2299 2300 ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 2301 sra %o4,28,%o4 ! itmp0 >>= 28; 2302 fabss %f0,%f0 ! fy0 = fabsf(fy0); 2303 2304 and %o4,-8,%o4 ! itmp0 = -8; 2305 fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0; 2306 2307 fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149; 2308 ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); 2309 2310 fmuld %f0,%f40,%f40 ! dtmp0 *= dsign; 23111: 2312 faddd %f20,K5,%f12 ! (3_1) dtmp0 += K5; 2313 add %i1,stridey,%i1 ! py += stridey 2314 fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20; 2315 2316 ld [%fp+tmp_px+4],%o4 2317 and %o4,_0x7fffffff,%l5 ! itmp0 & 0x7fffffff 2318 cmp %l5,%o5 2319 bge,a 1f 2320 fstod %f2,%f2 ! (5_1) x0 = (double)fx0; 2321 2322 ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 2323 sra %o4,28,%o4 ! itmp0 >>= 28; 2324 fabss %f2,%f2 ! fx0 = fabsf(fx0); 2325 2326 and %o4,-8,%o4 ! itmp0 = -8; 2327 fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0; 2328 2329 fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149; 2330 ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); 2331 2332 fmuld %f2,%f0,%f2 ! dtmp0 *= dsign; 23331: 2334 ldx [%fp+tmp_py],%l5 2335 sra %l3,27,%o5 ! (0_0) signx0 = ux0 >> 27; 2336 add %i3,stridex,%i3 ! px += stridex 2337 2338 lda [%i1]0x82,%l3 ! (1_0) uy0 = *(int*)py; 2339 sra %l4,28,%o4 ! (0_0) signy0 = uy0 >> 28; 2340 ba .d5 2341 add %l6,cadd_arr,%l6 ! (0_0) ltmp0 += (char*)cadd_arr; 2342 2343 .align 16 2344.update6: 2345 cmp counter,5 2346 bg,pn %icc,1f 2347 nop 2348 2349 ld [cmul_arr],%f2 2350 ba .cont6 2351 fzero %f0 23521: 2353 cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 2354 bg,pt %icc,1f 2355 nop 23562: 2357 sub counter,5,counter 2358 st counter,[%fp+tmp_counter] 2359 stx %i1,[%fp+tmp_py] 2360 stx %i3,[%fp+tmp_px] 2361 2362 ld [cmul_arr],%f2 2363 or %g0,5,counter 2364 ba .cont6 2365 fzero %f0 23661: 2367 andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff 2368 bne,pn %icc,1f 2369 sethi %hi(0x00800000),%o5 2370 2371 andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff 2372 be,pn %icc,2b 2373 nop 23741: 2375 st %f0,[%fp+tmp_pz] 2376 st %f2,[%fp+tmp_pz+4] 2377 ld [%fp+tmp_pz],%o4 2378 fmuld %f40,%f22,%f32 ! (5_1) dtmp0 *= x20; 2379 2380 stx %l5,[%fp+tmp_px] 2381 and %o4,_0x7fffffff,%l5 ! itmp0 & 0x7fffffff 2382 cmp %l5,%o5 2383 bge,a 1f 2384 fstod %f0,%f40 ! (0_0) y0 = (double)fy0; 2385 2386 ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 2387 sra %o4,28,%o4 ! itmp0 >>= 28; 2388 fabss %f0,%f0 ! fy0 = fabsf(fy0); 2389 2390 and %o4,-8,%o4 ! itmp0 = -8; 2391 fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0; 2392 2393 fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149; 2394 ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); 2395 2396 fmuld %f0,%f40,%f40 ! dtmp0 *= dsign; 23971: 2398 faddd %f18,K5,%f10 ! (4_1) dtmp0 += K5; 2399 add %i3,stridex,%i3 ! px += stridex 2400 add %i1,stridey,%i1 ! py += stridey 2401 fmuld %f20,%f24,%f20 ! (3_1) dtmp0 *= x20; 2402 2403 ld [%fp+tmp_pz+4],%o4 2404 and %o4,_0x7fffffff,%l5 ! itmp0 & 0x7fffffff 2405 cmp %l5,%o5 2406 bge,a 1f 2407 fstod %f2,%f2 ! (5_1) x0 = (double)fx0; 2408 2409 ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 2410 sra %o4,28,%o4 ! itmp0 >>= 28; 2411 fabss %f2,%f2 ! fx0 = fabsf(fx0); 2412 2413 and %o4,-8,%o4 ! itmp0 = -8; 2414 fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0; 2415 2416 fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149; 2417 ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); 2418 2419 fmuld %f2,%f0,%f2 ! dtmp0 *= dsign; 24201: 2421 ldx [%fp+tmp_px],%l5 2422 2423 sra %l4,27,%o5 ! (1_0) signx0 = ux0 >> 27; 2424 2425 sra %l3,28,%o4 ! (1_0) signy0 = uy0 >> 28; 2426 ba .d6 2427 add %l6,cadd_arr,%l6 ! (1_0) ltmp0 += (char*)cadd_arr; 2428 2429 .align 16 2430.update7: 2431 cmp counter,5 2432 bg,pn %icc,1f 2433 nop 2434 2435 ld [cmul_arr],%f2 2436 ba .cont7 2437 fzero %f0 24381: 2439 cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 2440 bg,pt %icc,1f 2441 nop 24422: 2443 sub counter,5,counter 2444 st counter,[%fp+tmp_counter] 2445 stx %i1,[%fp+tmp_py] 2446 stx %i3,[%fp+tmp_px] 2447 2448 ld [cmul_arr],%f2 2449 or %g0,5,counter 2450 ba .cont7 2451 fzero %f0 24521: 2453 andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff 2454 bne,pn %icc,1f 2455 sethi %hi(0x00800000),%o5 2456 2457 andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff 2458 be,pn %icc,2b 2459 nop 24601: 2461 st %f0,[%fp+tmp_pz] 2462 st %f2,[%fp+tmp_pz+4] 2463 ld [%fp+tmp_pz],%o4 2464 fmuld %f40,%f20,%f30 ! (0_0) dtmp0 *= x20; 2465 2466 and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff 2467 cmp %l6,%o5 2468 bge,a 1f 2469 fstod %f0,%f40 ! (0_0) y0 = (double)fy0; 2470 2471 ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 2472 sra %o4,28,%o4 ! itmp0 >>= 28; 2473 fabss %f0,%f0 ! fy0 = fabsf(fy0); 2474 2475 and %o4,-8,%o4 ! itmp0 = -8; 2476 fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0; 2477 2478 fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149; 2479 ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); 2480 2481 fmuld %f0,%f40,%f40 ! dtmp0 *= dsign; 24821: 2483 faddd %f16,K5,%f8 ! (5_1) dtmp0 += K5; 2484 add %i1,stridey,%i1 ! py += stridey 2485 fmuld %f18,%f4,%f18 ! (4_1) dtmp0 *= x20; 2486 2487 ld [%fp+tmp_pz+4],%o4 2488 and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff 2489 cmp %l6,%o5 2490 bge,a 1f 2491 fstod %f2,%f2 ! (5_1) x0 = (double)fx0; 2492 2493 ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 2494 sra %o4,28,%o4 ! itmp0 >>= 28; 2495 fabss %f2,%f2 ! fx0 = fabsf(fx0); 2496 2497 and %o4,-8,%o4 ! itmp0 = -8; 2498 fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0; 2499 2500 fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149; 2501 ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); 2502 2503 fmuld %f2,%f0,%f2 ! dtmp0 *= dsign; 25041: 2505 sll %g5,5,%l6 ! (2_0) ltmp0 = ldiff0 << 5; 2506 sra %l3,27,%o5 ! (2_0) signx0 = ux0 >> 27; 2507 add %i3,stridex,%i3 ! px += stridex 2508 2509 sra %l4,28,%o4 ! (2_0) signy0 = uy0 >> 28; 2510 ba .d7 2511 add %l6,cadd_arr,%l6 ! (2_0) ltmp0 += (char*)cadd_arr; 2512 2513 .align 16 2514.update8: 2515 cmp counter,5 2516 bg,pn %icc,1f 2517 nop 2518 2519 ld [cmul_arr],%f1 2520 ba .cont8 2521 fzeros %f0 25221: 2523 cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 2524 bg,pt %icc,1f 2525 nop 25262: 2527 sub counter,5,counter 2528 st counter,[%fp+tmp_counter] 2529 stx %i1,[%fp+tmp_py] 2530 stx %i3,[%fp+tmp_px] 2531 2532 ld [cmul_arr],%f1 2533 or %g0,5,counter 2534 ba .cont8 2535 fzeros %f0 25361: 2537 andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff 2538 bne,pn %icc,1f 2539 sethi %hi(0x00800000),%o5 2540 2541 andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff 2542 be,pn %icc,2b 2543 nop 25441: 2545 std %f0,[%fp+tmp_pz] 2546 ld [%fp+tmp_pz],%o4 2547 fmuld %f40,%f18,%f28 ! (1_0) dtmp0 *= x20; 2548 2549 faddd %f16,K5,%f2 ! (0_0) dtmp0 += K5; 2550 2551 and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff 2552 cmp %l6,%o5 2553 bge,a 1f 2554 fstod %f0,%f40 ! (0_0) y0 = (double)fy0; 2555 2556 ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 2557 sra %o4,28,%o4 ! itmp0 >>= 28; 2558 fabss %f0,%f0 ! fy0 = fabsf(fy0); 2559 2560 and %o4,-8,%o4 ! itmp0 = -8; 2561 fitod %f0,%f16 ! dtmp0 = (double) *(int*)&fy0; 2562 2563 fmuld %f16,%f40,%f40 ! dtmp0 *= C2ONM149; 2564 ldd [cmul_arr+%o4],%f16 ! dsign = *(double*)((char*)cmul_arr + itmp0); 2565 2566 fmuld %f16,%f40,%f40 ! dtmp0 *= dsign; 25671: 2568 add %i1,stridey,%i1 ! py += stridey 2569 fmuld %f24,%f22,%f24 ! (5_1) dtmp0 *= x20; 2570 2571 ld [%fp+tmp_pz+4],%o4 2572 and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff 2573 cmp %l6,%o5 2574 bge,a 1f 2575 fstod %f1,%f16 ! (5_1) x0 = (double)fx0; 2576 2577 fabss %f1,%f16 ! fx0 = fabsf(fx0); 2578 ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 2579 sra %o4,28,%o4 ! itmp0 >>= 28; 2580 2581 and %o4,-8,%o4 ! itmp0 = -8; 2582 fitod %f16,%f16 ! dtmp0 = (double) *(int*)&fx0; 2583 2584 fmuld %f16,%f0,%f16 ! dtmp0 *= C2ONM149; 2585 ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); 2586 2587 fmuld %f16,%f0,%f16 ! dtmp0 *= dsign; 25881: 2589 sll %o0,5,%l6 ! (3_0) ltmp0 = ldiff0 << 5; 2590 sra %l4,27,%o5 ! (3_0) signx0 = ux0 >> 27; 2591 2592 add %i3,stridex,%i3 ! px += stridex 2593 ba .d8 2594 sra %l3,28,%o4 ! (3_0) signy0 = uy0 >> 28; 2595 2596 .align 16 2597.update9: 2598 cmp counter,5 2599 bg,pn %icc,1f 2600 nop 2601 2602 ld [cmul_arr],%f2 2603 ba .cont9 2604 fzero %f0 26051: 2606 cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 2607 bg,pt %icc,1f 2608 nop 26092: 2610 sub counter,5,counter 2611 st counter,[%fp+tmp_counter] 2612 stx %i1,[%fp+tmp_py] 2613 stx %i3,[%fp+tmp_px] 2614 2615 ld [cmul_arr],%f2 2616 or %g0,5,counter 2617 ba .cont9 2618 fzero %f0 26191: 2620 andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff 2621 bne,pn %icc,1f 2622 sethi %hi(0x00800000),%o5 2623 2624 andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff 2625 be,pn %icc,2b 2626 nop 26271: 2628 st %f0,[%fp+tmp_pz] 2629 st %f2,[%fp+tmp_pz+4] 2630 ld [%fp+tmp_pz],%o4 2631 fmuld %f40,%f16,%f26 ! (2_0) dtmp0 *= x20; 2632 2633 and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff 2634 cmp %l6,%o5 2635 bge,a 1f 2636 fstod %f0,%f40 ! (0_0) y0 = (double)fy0; 2637 2638 ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 2639 sra %o4,28,%o4 ! itmp0 >>= 28; 2640 fabss %f0,%f0 ! fy0 = fabsf(fy0); 2641 2642 and %o4,-8,%o4 ! itmp0 = -8; 2643 fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0; 2644 2645 fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149; 2646 ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); 2647 2648 fmuld %f0,%f40,%f40 ! dtmp0 *= dsign; 26491: 2650 add %i1,stridey,%i1 ! py += stridey 2651 faddd %f4,K5,%f62 ! (1_0) dtmp0 += K5; 2652 fmuld %f24,%f20,%f24 ! (0_0) dtmp0 *= x20; 2653 2654 ld [%fp+tmp_pz+4],%o4 2655 and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff 2656 cmp %l6,%o5 2657 bge,a 1f 2658 fstod %f2,%f2 ! (5_1) x0 = (double)fx0; 2659 2660 fabss %f2,%f2 ! fx0 = fabsf(fx0); 2661 ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 2662 sra %o4,28,%o4 ! itmp0 >>= 28; 2663 2664 and %o4,-8,%o4 ! itmp0 = -8; 2665 fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0; 2666 2667 fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149; 2668 ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); 2669 2670 fmuld %f2,%f0,%f2 ! dtmp0 *= dsign; 26711: 2672 sll %l5,5,%l6 ! (4_0) ltmp0 = ldiff0 << 5; 2673 sra %l3,27,%o5 ! (4_0) signx0 = ux0 >> 27; 2674 2675 add %i3,stridex,%i3 ! px += stridex 2676 ba .d9 2677 sra %l4,28,%o4 ! (4_0) signy0 = uy0 >> 28; 2678 2679 .align 16 2680.update10: 2681 cmp counter,1 2682 bg,pn %icc,1f 2683 nop 2684 2685 ld [cmul_arr],%f2 2686 ba .cont10 2687 fzero %f0 26881: 2689 cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 2690 bg,pt %icc,1f 2691 nop 26922: 2693 sub counter,1,counter 2694 st counter,[%fp+tmp_counter] 2695 stx %i1,[%fp+tmp_py] 2696 stx %i3,[%fp+tmp_px] 2697 2698 ld [cmul_arr],%f2 2699 or %g0,1,counter 2700 ba .cont10 2701 fzero %f0 27021: 2703 andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff 2704 bne,pn %icc,1f 2705 sethi %hi(0x00800000),%o5 2706 2707 andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff 2708 be,pn %icc,2b 2709 nop 27101: 2711 st %f0,[%fp+tmp_pz] 2712 st %f2,[%fp+tmp_pz+4] 2713 ld [%fp+tmp_pz],%o1 2714 fmuld %f40,%f24,%f36 ! (3_1) dtmp0 *= x20; 2715 2716 and %o1,_0x7fffffff,%o4 ! itmp0 & 0x7fffffff 2717 cmp %o4,%o5 2718 bge,a 1f 2719 fstod %f0,%f40 ! (5_1) y0 = (double)fy0; 2720 2721 ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 2722 sra %o1,28,%o1 ! itmp0 >>= 28; 2723 fabss %f0,%f0 ! fy0 = fabsf(fy0); 2724 2725 and %o1,-8,%o1 ! itmp0 = -8; 2726 fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0; 2727 2728 fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149; 2729 ldd [cmul_arr+%o1],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); 2730 2731 fmuld %f0,%f40,%f40 ! dtmp0 *= dsign; 27321: 2733 faddd %f22,K5,%f14 ! (2_1) dtmp0 += K5; 2734 fmuld %f4,%f18,%f4 ! (1_1) dtmp0 *= x20; 2735 2736 sll %l7,3,%l7 ! (0_1) cmul0_ind = ldiff0 << 3; 2737 add %i3,stridex,%i3 ! px += stridex 2738 2739 ld [%fp+tmp_pz+4],%o1 2740 and %o1,_0x7fffffff,%o4 ! itmp0 & 0x7fffffff 2741 cmp %o4,%o5 2742 bge,a 1f 2743 fstod %f2,%f2 ! (5_1) x0 = (double)fx0; 2744 2745 ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 2746 sra %o1,28,%o1 ! itmp0 >>= 28; 2747 fabss %f2,%f2 ! fx0 = fabsf(fx0); 2748 2749 and %o1,-8,%o1 ! itmp0 = -8; 2750 fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0; 2751 2752 fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149; 2753 ldd [cmul_arr+%o1],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); 2754 2755 fmuld %f2,%f0,%f2 ! dtmp0 *= dsign; 27561: 2757 ba .den0 2758 add %o2,stridez,%o1 ! pz += stridez 2759 2760 .align 16 2761.update11: 2762 cmp counter,2 2763 bg,pn %icc,1f 2764 nop 2765 2766 ld [cmul_arr],%f2 2767 ba .cont11 2768 fzero %f0 27691: 2770 cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 2771 bg,pt %icc,1f 2772 nop 27732: 2774 sub counter,2,counter 2775 st counter,[%fp+tmp_counter] 2776 stx %i1,[%fp+tmp_py] 2777 stx %i3,[%fp+tmp_px] 2778 2779 ld [cmul_arr],%f2 2780 or %g0,2,counter 2781 ba .cont11 2782 fzero %f0 27831: 2784 andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff 2785 bne,pn %icc,1f 2786 sethi %hi(0x00800000),%o5 2787 2788 andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff 2789 be,pn %icc,2b 2790 nop 27911: 2792 st %f0,[%fp+tmp_pz] 2793 st %f2,[%fp+tmp_pz+4] 2794 ld [%fp+tmp_pz],%o4 2795 fmuld %f40,%f4,%f34 ! (4_1) dtmp0 *= x20; 2796 2797 stx %l5,[%fp+tmp_px] 2798 and %o4,_0x7fffffff,%l5 ! itmp0 & 0x7fffffff 2799 cmp %l5,%o5 2800 bge,a 1f 2801 fstod %f0,%f40 ! (0_0) y0 = (double)fy0; 2802 2803 ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 2804 sra %o4,28,%o4 ! itmp0 >>= 28; 2805 fabss %f0,%f0 ! fy0 = fabsf(fy0); 2806 2807 and %o4,-8,%o4 ! itmp0 = -8; 2808 fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0; 2809 2810 fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149; 2811 ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); 2812 2813 fmuld %f0,%f40,%f40 ! dtmp0 *= dsign; 28141: 2815 faddd %f20,K5,%f12 ! (3_1) dtmp0 += K5; 2816 add %i1,stridey,%i1 ! py += stridey 2817 fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20; 2818 2819 ld [%fp+tmp_pz+4],%o4 2820 and %o4,_0x7fffffff,%l5 ! itmp0 & 0x7fffffff 2821 cmp %l5,%o5 2822 bge,a 1f 2823 fstod %f2,%f2 ! (5_1) x0 = (double)fx0; 2824 2825 ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 2826 sra %o4,28,%o4 ! itmp0 >>= 28; 2827 fabss %f2,%f2 ! fx0 = fabsf(fx0); 2828 2829 and %o4,-8,%o4 ! itmp0 = -8; 2830 fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0; 2831 2832 fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149; 2833 ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); 2834 2835 fmuld %f2,%f0,%f2 ! dtmp0 *= dsign; 28361: 2837 ldx [%fp+tmp_px],%l5 2838 sra %l3,27,%o5 ! (0_0) signx0 = ux0 >> 27; 2839 add %i3,stridex,%i3 ! px += stridex 2840 2841 lda [%i1]0x82,%l3 ! (1_0) uy0 = *(int*)py; 2842 sra %l4,28,%o4 ! (0_0) signy0 = uy0 >> 28; 2843 ba .den1 2844 add %l6,cadd_arr,%l6 ! (0_0) ltmp0 += (char*)cadd_arr; 2845 2846 .align 16 2847.update12: 2848 cmp counter,3 2849 bg,pn %icc,1f 2850 nop 2851 2852 ld [cmul_arr],%f2 2853 ba .cont12 2854 fzero %f0 28551: 2856 cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 2857 bg,pt %icc,1f 2858 nop 28592: 2860 sub counter,3,counter 2861 st counter,[%fp+tmp_counter] 2862 stx %i1,[%fp+tmp_py] 2863 stx %i3,[%fp+tmp_px] 2864 2865 ld [cmul_arr],%f2 2866 or %g0,3,counter 2867 ba .cont12 2868 fzero %f0 28691: 2870 andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff 2871 bne,pn %icc,1f 2872 sethi %hi(0x00800000),%o5 2873 2874 andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff 2875 be,pn %icc,2b 2876 nop 28771: 2878 st %f0,[%fp+tmp_pz] 2879 st %f2,[%fp+tmp_pz+4] 2880 ld [%fp+tmp_pz],%o4 2881 fmuld %f40,%f22,%f32 ! (5_1) dtmp0 *= x20; 2882 2883 stx %l5,[%fp+tmp_px] 2884 and %o4,_0x7fffffff,%l5 ! itmp0 & 0x7fffffff 2885 cmp %l5,%o5 2886 bge,a 1f 2887 fstod %f0,%f40 ! (0_0) y0 = (double)fy0; 2888 2889 ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 2890 sra %o4,28,%o4 ! itmp0 >>= 28; 2891 fabss %f0,%f0 ! fy0 = fabsf(fy0); 2892 2893 and %o4,-8,%o4 ! itmp0 = -8; 2894 fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0; 2895 2896 fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149; 2897 ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); 2898 2899 fmuld %f0,%f40,%f40 ! dtmp0 *= dsign; 29001: 2901 faddd %f18,K5,%f10 ! (4_1) dtmp0 += K5; 2902 add %i3,stridex,%i3 ! px += stridex 2903 add %i1,stridey,%i1 ! py += stridey 2904 fmuld %f20,%f24,%f20 ! (3_1) dtmp0 *= x20; 2905 2906 ld [%fp+tmp_pz+4],%o4 2907 and %o4,_0x7fffffff,%l5 ! itmp0 & 0x7fffffff 2908 cmp %l5,%o5 2909 bge,a 1f 2910 fstod %f2,%f2 ! (5_1) x0 = (double)fx0; 2911 2912 ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 2913 sra %o4,28,%o4 ! itmp0 >>= 28; 2914 fabss %f2,%f2 ! fx0 = fabsf(fx0); 2915 2916 and %o4,-8,%o4 ! itmp0 = -8; 2917 fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0; 2918 2919 fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149; 2920 ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); 2921 2922 fmuld %f2,%f0,%f2 ! dtmp0 *= dsign; 29231: 2924 ldx [%fp+tmp_px],%l5 2925 2926 sra %l4,27,%o5 ! (1_0) signx0 = ux0 >> 27; 2927 2928 sra %l3,28,%o4 ! (1_0) signy0 = uy0 >> 28; 2929 ba .den2 2930 add %l6,cadd_arr,%l6 ! (1_0) ltmp0 += (char*)cadd_arr; 2931 2932 .align 16 2933.update13: 2934 cmp counter,4 2935 bg,pn %icc,1f 2936 nop 2937 2938 ld [cmul_arr],%f2 2939 ba .cont13 2940 fzero %f0 29411: 2942 cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 2943 bg,pt %icc,1f 2944 nop 29452: 2946 sub counter,4,counter 2947 st counter,[%fp+tmp_counter] 2948 stx %i1,[%fp+tmp_py] 2949 sub %i3,stridex,%o5 2950 stx %o5,[%fp+tmp_px] 2951 2952 ld [cmul_arr],%f2 2953 or %g0,4,counter 2954 ba .cont13 2955 fzero %f0 29561: 2957 andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff 2958 bne,pn %icc,1f 2959 sethi %hi(0x00800000),%o5 2960 2961 andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff 2962 be,pn %icc,2b 2963 nop 29641: 2965 st %f0,[%fp+tmp_pz] 2966 st %f2,[%fp+tmp_pz+4] 2967 ld [%fp+tmp_pz],%o4 2968 fmuld %f40,%f20,%f30 ! (0_0) dtmp0 *= x20; 2969 2970 and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff 2971 cmp %l6,%o5 2972 bge,a 1f 2973 fstod %f0,%f40 ! (0_0) y0 = (double)fy0; 2974 2975 ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 2976 sra %o4,28,%o4 ! itmp0 >>= 28; 2977 fabss %f0,%f0 ! fy0 = fabsf(fy0); 2978 2979 and %o4,-8,%o4 ! itmp0 = -8; 2980 fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0; 2981 2982 fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149; 2983 ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); 2984 2985 fmuld %f0,%f40,%f40 ! dtmp0 *= dsign; 29861: 2987 faddd %f16,K5,%f8 ! (5_1) dtmp0 += K5; 2988 add %i1,stridey,%i1 ! py += stridey 2989 fmuld %f18,%f4,%f18 ! (4_1) dtmp0 *= x20; 2990 2991 ld [%fp+tmp_pz+4],%o4 2992 and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff 2993 cmp %l6,%o5 2994 bge,a 1f 2995 fstod %f2,%f2 ! (5_1) x0 = (double)fx0; 2996 2997 ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 2998 sra %o4,28,%o4 ! itmp0 >>= 28; 2999 fabss %f2,%f2 ! fx0 = fabsf(fx0); 3000 3001 and %o4,-8,%o4 ! itmp0 = -8; 3002 fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0; 3003 3004 fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149; 3005 ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); 3006 3007 fmuld %f2,%f0,%f2 ! dtmp0 *= dsign; 30081: 3009 sll %g5,5,%l6 ! (2_0) ltmp0 = ldiff0 << 5; 3010 sra %l3,27,%o5 ! (2_0) signx0 = ux0 >> 27; 3011 3012 sra %l4,28,%o4 ! (2_0) signy0 = uy0 >> 28; 3013 ba .den3 3014 add %l6,cadd_arr,%l6 ! (2_0) ltmp0 += (char*)cadd_arr; 3015 3016 .align 16 3017.update14: 3018 cmp counter,5 3019 bg,pn %icc,1f 3020 nop 3021 3022 ld [cmul_arr],%f1 3023 ba .cont14 3024 fzeros %f0 30251: 3026 cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 3027 bg,pt %icc,1f 3028 nop 30292: 3030 sub counter,5,counter 3031 st counter,[%fp+tmp_counter] 3032 stx %i1,[%fp+tmp_py] 3033 sub %i3,stridex,%o5 3034 stx %o5,[%fp+tmp_px] 3035 3036 ld [cmul_arr],%f1 3037 or %g0,5,counter 3038 ba .cont14 3039 fzeros %f0 30401: 3041 andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff 3042 bne,pn %icc,1f 3043 sethi %hi(0x00800000),%o5 3044 3045 andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff 3046 be,pn %icc,2b 3047 nop 30481: 3049 std %f0,[%fp+tmp_pz] 3050 ld [%fp+tmp_pz],%o4 3051 fmuld %f40,%f18,%f28 ! (1_0) dtmp0 *= x20; 3052 3053 faddd %f16,K5,%f2 ! (0_0) dtmp0 += K5; 3054 3055 and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff 3056 cmp %l6,%o5 3057 bge,a 1f 3058 fstod %f0,%f40 ! (0_0) y0 = (double)fy0; 3059 3060 ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 3061 sra %o4,28,%o4 ! itmp0 >>= 28; 3062 fabss %f0,%f0 ! fy0 = fabsf(fy0); 3063 3064 and %o4,-8,%o4 ! itmp0 = -8; 3065 fitod %f0,%f16 ! dtmp0 = (double) *(int*)&fy0; 3066 3067 fmuld %f16,%f40,%f40 ! dtmp0 *= C2ONM149; 3068 ldd [cmul_arr+%o4],%f16 ! dsign = *(double*)((char*)cmul_arr + itmp0); 3069 3070 fmuld %f16,%f40,%f40 ! dtmp0 *= dsign; 30711: 3072 add %i1,stridey,%i1 ! py += stridey 3073 fmuld %f24,%f22,%f24 ! (5_1) dtmp0 *= x20; 3074 3075 ld [%fp+tmp_pz+4],%o4 3076 and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff 3077 cmp %l6,%o5 3078 bge,a 1f 3079 fstod %f1,%f16 ! (5_1) x0 = (double)fx0; 3080 3081 fabss %f1,%f16 ! fx0 = fabsf(fx0); 3082 ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 3083 sra %o4,28,%o4 ! itmp0 >>= 28; 3084 3085 and %o4,-8,%o4 ! itmp0 = -8; 3086 fitod %f16,%f16 ! dtmp0 = (double) *(int*)&fx0; 3087 3088 fmuld %f16,%f0,%f16 ! dtmp0 *= C2ONM149; 3089 ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); 3090 3091 fmuld %f16,%f0,%f16 ! dtmp0 *= dsign; 30921: 3093 sll %o0,5,%l6 ! (3_0) ltmp0 = ldiff0 << 5; 3094 sra %l4,27,%o5 ! (3_0) signx0 = ux0 >> 27; 3095 3096 ba .den4 3097 sra %l3,28,%o4 ! (3_0) signy0 = uy0 >> 28; 3098 3099 .align 16 3100.update15: 3101 cmp counter,6 3102 bg,pn %icc,1f 3103 nop 3104 3105 ld [cmul_arr],%f2 3106 ba .cont15 3107 fzero %f0 31081: 3109 cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000 3110 bg,pt %icc,1f 3111 nop 31122: 3113 sub counter,6,counter 3114 st counter,[%fp+tmp_counter] 3115 stx %i1,[%fp+tmp_py] 3116 sub %i3,stridex,%o5 3117 stx %o5,[%fp+tmp_px] 3118 3119 ld [cmul_arr],%f2 3120 or %g0,6,counter 3121 ba .cont15 3122 fzero %f0 31231: 3124 andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff 3125 bne,pn %icc,1f 3126 sethi %hi(0x00800000),%o5 3127 3128 andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff 3129 be,pn %icc,2b 3130 nop 31311: 3132 st %f0,[%fp+tmp_pz] 3133 st %f2,[%fp+tmp_pz+4] 3134 ld [%fp+tmp_pz],%o4 3135 fmuld %f40,%f16,%f26 ! (2_0) dtmp0 *= x20; 3136 3137 and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff 3138 cmp %l6,%o5 3139 bge,a 1f 3140 fstod %f0,%f40 ! (0_0) y0 = (double)fy0; 3141 3142 ldd [cmul_arr+96],%f40 ! LOAD C2ONM149 3143 sra %o4,28,%o4 ! itmp0 >>= 28; 3144 fabss %f0,%f0 ! fy0 = fabsf(fy0); 3145 3146 and %o4,-8,%o4 ! itmp0 = -8; 3147 fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0; 3148 3149 fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149; 3150 ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); 3151 3152 fmuld %f0,%f40,%f40 ! dtmp0 *= dsign; 31531: 3154 add %i1,stridey,%i1 ! py += stridey 3155 faddd %f4,K5,%f62 ! (1_0) dtmp0 += K5; 3156 fmuld %f24,%f20,%f24 ! (0_0) dtmp0 *= x20; 3157 3158 ld [%fp+tmp_pz+4],%o4 3159 and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff 3160 cmp %l6,%o5 3161 bge,a 1f 3162 fstod %f2,%f2 ! (5_1) x0 = (double)fx0; 3163 3164 fabss %f2,%f2 ! fx0 = fabsf(fx0); 3165 ldd [cmul_arr+96],%f0 ! LOAD C2ONM149 3166 sra %o4,28,%o4 ! itmp0 >>= 28; 3167 3168 and %o4,-8,%o4 ! itmp0 = -8; 3169 fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0; 3170 3171 fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149; 3172 ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0); 3173 3174 fmuld %f2,%f0,%f2 ! dtmp0 *= dsign; 31751: 3176 sll %l5,5,%l6 ! (4_0) ltmp0 = ldiff0 << 5; 3177 sra %l3,27,%o5 ! (4_0) signx0 = ux0 >> 27; 3178 3179 ba .den5 3180 sra %l4,28,%o4 ! (4_0) signy0 = uy0 >> 28; 3181 3182 .align 16 3183.u0: 3184 ba .c0 3185 or %g0,_0x7fffffff,%o5 3186.u1: 3187 ba .c1 3188 or %g0,_0x7fffffff,%o5 3189.u2: 3190 ba .c2 3191 or %g0,_0x7f800000,%o5 3192.u3: 3193 ba .c3 3194 or %g0,_0x7f800000,%o5 3195.u4: 3196 ba .c4 3197 or %g0,_0x7fffffff,%o5 3198.u5: 3199 ba .c5 3200 or %g0,_0x7fffffff,%o5 3201.u6: 3202 ba .c6 3203 or %g0,_0x7f800000,%o5 3204.u7: 3205 ba .c7 3206 or %g0,_0x7f800000,%o5 3207.u8: 3208 ba .c8 3209 or %g0,_0x7fffffff,%o5 3210.u9: 3211 ba .c9 3212 or %g0,_0x7fffffff,%o5 3213.u10: 3214 ba .c10 3215 or %g0,_0x7f800000,%o5 3216.u11: 3217 ba .c11 3218 or %g0,_0x7f800000,%o5 3219.u12: 3220 ba .c12 3221 or %g0,_0x7fffffff,%o5 3222.u13: 3223 ba .c13 3224 or %g0,_0x7fffffff,%o5 3225.u14: 3226 ba .c14 3227 or %g0,_0x7f800000,%o5 3228.u15: 3229 ba .c15 3230 or %g0,_0x7f800000,%o5 3231.u16: 3232 ba .c16 3233 or %g0,_0x7fffffff,%o5 3234.u17: 3235 ba .c17 3236 or %g0,_0x7fffffff,%o5 3237.u18: 3238 ba .c18 3239 or %g0,_0x7f800000,%o5 3240.u19: 3241 ba .c19 3242 or %g0,_0x7f800000,%o5 3243.u20: 3244 ba .c20 3245 or %g0,_0x7fffffff,%o5 3246.u21: 3247 ba .c21 3248 or %g0,_0x7fffffff,%o5 3249.u22: 3250 ba .c22 3251 or %g0,_0x7f800000,%o5 3252.u23: 3253 ba .c23 3254 or %g0,_0x7f800000,%o5 3255.u24: 3256 ba .c24 3257 or %g0,_0x7fffffff,%o5 3258.u25: 3259 ba .c25 3260 or %g0,_0x7fffffff,%o5 3261.u26: 3262 ba .c26 3263 or %g0,_0x7f800000,%o5 3264.u27: 3265 ba .c27 3266 or %g0,_0x7f800000,%o5 3267.u28: 3268 ba .c28 3269 or %g0,_0x7fffffff,%o5 3270.u29: 3271 ba .c29 3272 or %g0,_0x7fffffff,%o5 3273.u30: 3274 ba .c30 3275 or %g0,_0x7f800000,%o5 3276.u31: 3277 ba .c31 3278 or %g0,_0x7f800000,%o5 3279.u32: 3280 ba .c32 3281 or %g0,_0x7fffffff,%o5 3282.u33: 3283 ba .c33 3284 or %g0,_0x7fffffff,%o5 3285.u34: 3286 ba .c34 3287 or %g0,_0x7f800000,%o5 3288.u35: 3289 ba .c35 3290 or %g0,_0x7f800000,%o5 3291.u36: 3292 ba .c36 3293 or %g0,_0x7fffffff,%o5 3294.u37: 3295 ba .c37 3296 or %g0,_0x7fffffff,%o5 3297.u38: 3298 ba .c38 3299 or %g0,_0x7f800000,%o5 3300.u39: 3301 ba .c39 3302 or %g0,_0x7f800000,%o5 3303.up0: 3304 ba .co0 3305 or %g0,_0x7fffffff,%o5 3306.up1: 3307 ba .co1 3308 or %g0,_0x7fffffff,%o5 3309.up2: 3310 ba .co2 3311 or %g0,_0x7f800000,%o5 3312.up3: 3313 ba .co3 3314 or %g0,_0x7f800000,%o5 3315.up4: 3316 ba .co4 3317 or %g0,_0x7fffffff,%o5 3318.up5: 3319 ba .co5 3320 or %g0,_0x7fffffff,%o5 3321.up6: 3322 ba .co6 3323 or %g0,_0x7f800000,%o5 3324.up7: 3325 ba .co7 3326 or %g0,_0x7f800000,%o5 3327.up8: 3328 ba .co8 3329 or %g0,_0x7fffffff,%o5 3330.up9: 3331 ba .co9 3332 or %g0,_0x7fffffff,%o5 3333.up10: 3334 ba .co10 3335 or %g0,_0x7f800000,%o5 3336.up11: 3337 ba .co11 3338 or %g0,_0x7f800000,%o5 3339.up12: 3340 ba .co12 3341 or %g0,_0x7fffffff,%o5 3342.up13: 3343 ba .co13 3344 or %g0,_0x7fffffff,%o5 3345.up14: 3346 ba .co14 3347 or %g0,_0x7f800000,%o5 3348.up15: 3349 ba .co15 3350 or %g0,_0x7f800000,%o5 3351.up16: 3352 ba .co16 3353 or %g0,_0x7fffffff,%o5 3354.up17: 3355 ba .co17 3356 or %g0,_0x7fffffff,%o5 3357.up18: 3358 ba .co18 3359 or %g0,_0x7f800000,%o5 3360.up19: 3361 ba .co19 3362 or %g0,_0x7f800000,%o5 3363.up20: 3364 ba .co20 3365 or %g0,_0x7fffffff,%o5 3366.up21: 3367 ba .co21 3368 or %g0,_0x7fffffff,%o5 3369.up22: 3370 ba .co22 3371 or %g0,_0x7f800000,%o5 3372.up23: 3373 ba .co23 3374 or %g0,_0x7f800000,%o5 3375.exit: 3376 ret 3377 restore 3378 SET_SIZE(__vatan2f) 3379 3380