1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 23 */ 24/* 25 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 26 * Use is subject to license terms. 27 */ 28 29 .file "__vatanf.S" 30 31#include "libm.h" 32 33 RO_DATA 34 .align 64 35 36.CONST_TBL: 37 .word 0x3fefffff, 0xfffccbbc ! K0 = 9.99999999976686608841e-01 38 .word 0xbfd55554, 0x51c6b90f ! K1 = -3.33333091601972730504e-01 39 .word 0x3fc98d6d, 0x926596cc ! K2 = 1.99628540499523379702e-01 40 .word 0x00020000, 0x00000000 ! DC1 41 .word 0xfffc0000, 0x00000000 ! DC2 42 .word 0x7ff00000, 0x00000000 ! DC3 43 .word 0x3ff00000, 0x00000000 ! DONE = 1.0 44 .word 0x40000000, 0x00000000 ! DTWO = 2.0 45 46! parr0 = *(int*)&(1.0 / *(double*)&(((long long)i << 45) | 0x3ff0100000000000ULL)) + 0x3ff00000, i = [0, 127] 47 48 .word 0x7fdfe01f, 0x7fdfa11c, 0x7fdf6310, 0x7fdf25f6 49 .word 0x7fdee9c7, 0x7fdeae80, 0x7fde741a, 0x7fde3a91 50 .word 0x7fde01e0, 0x7fddca01, 0x7fdd92f2, 0x7fdd5cac 51 .word 0x7fdd272c, 0x7fdcf26e, 0x7fdcbe6d, 0x7fdc8b26 52 .word 0x7fdc5894, 0x7fdc26b5, 0x7fdbf583, 0x7fdbc4fd 53 .word 0x7fdb951e, 0x7fdb65e2, 0x7fdb3748, 0x7fdb094b 54 .word 0x7fdadbe8, 0x7fdaaf1d, 0x7fda82e6, 0x7fda5741 55 .word 0x7fda2c2a, 0x7fda01a0, 0x7fd9d79f, 0x7fd9ae24 56 .word 0x7fd9852f, 0x7fd95cbb, 0x7fd934c6, 0x7fd90d4f 57 .word 0x7fd8e652, 0x7fd8bfce, 0x7fd899c0, 0x7fd87427 58 .word 0x7fd84f00, 0x7fd82a4a, 0x7fd80601, 0x7fd7e225 59 .word 0x7fd7beb3, 0x7fd79baa, 0x7fd77908, 0x7fd756ca 60 .word 0x7fd734f0, 0x7fd71378, 0x7fd6f260, 0x7fd6d1a6 61 .word 0x7fd6b149, 0x7fd69147, 0x7fd6719f, 0x7fd6524f 62 .word 0x7fd63356, 0x7fd614b3, 0x7fd5f664, 0x7fd5d867 63 .word 0x7fd5babc, 0x7fd59d61, 0x7fd58056, 0x7fd56397 64 .word 0x7fd54725, 0x7fd52aff, 0x7fd50f22, 0x7fd4f38f 65 .word 0x7fd4d843, 0x7fd4bd3e, 0x7fd4a27f, 0x7fd48805 66 .word 0x7fd46dce, 0x7fd453d9, 0x7fd43a27, 0x7fd420b5 67 .word 0x7fd40782, 0x7fd3ee8f, 0x7fd3d5d9, 0x7fd3bd60 68 .word 0x7fd3a524, 0x7fd38d22, 0x7fd3755b, 0x7fd35dce 69 .word 0x7fd34679, 0x7fd32f5c, 0x7fd31877, 0x7fd301c8 70 .word 0x7fd2eb4e, 0x7fd2d50a, 0x7fd2bef9, 0x7fd2a91c 71 .word 0x7fd29372, 0x7fd27dfa, 0x7fd268b3, 0x7fd2539d 72 .word 0x7fd23eb7, 0x7fd22a01, 0x7fd21579, 0x7fd20120 73 .word 0x7fd1ecf4, 0x7fd1d8f5, 0x7fd1c522, 0x7fd1b17c 74 .word 0x7fd19e01, 0x7fd18ab0, 0x7fd1778a, 0x7fd1648d 75 .word 0x7fd151b9, 0x7fd13f0e, 0x7fd12c8b, 0x7fd11a30 76 .word 0x7fd107fb, 0x7fd0f5ed, 0x7fd0e406, 0x7fd0d244 77 .word 0x7fd0c0a7, 0x7fd0af2f, 0x7fd09ddb, 0x7fd08cab 78 .word 0x7fd07b9f, 0x7fd06ab5, 0x7fd059ee, 0x7fd04949 79 .word 0x7fd038c6, 0x7fd02864, 0x7fd01824, 0x7fd00804 80 81 .word 0x3ff00000, 0x00000000 ! 1.0 82 .word 0xbff00000, 0x00000000 ! -1.0 83 84! parr1[i] = atan((double)*(float*)&((i + 460) << 21)), i = [0, 155] 85 86 .word 0x3f2fffff, 0xf555555c, 0x3f33ffff, 0xf595555f 87 .word 0x3f37ffff, 0xee000018, 0x3f3bffff, 0xe36aaadf 88 .word 0x3f3fffff, 0xd55555bc, 0x3f43ffff, 0xd65555f2 89 .word 0x3f47ffff, 0xb8000185, 0x3f4bffff, 0x8daaadf3 90 .word 0x3f4fffff, 0x55555bbc, 0x3f53ffff, 0x59555f19 91 .word 0x3f57fffe, 0xe000184d, 0x3f5bfffe, 0x36aadf30 92 .word 0x3f5ffffd, 0x5555bbbc, 0x3f63fffd, 0x6555f195 93 .word 0x3f67fffb, 0x800184cc, 0x3f6bfff8, 0xdaadf302 94 .word 0x3f6ffff5, 0x555bbbb7, 0x3f73fff5, 0x955f194a 95 .word 0x3f77ffee, 0x00184ca6, 0x3f7bffe3, 0x6adf2fd1 96 .word 0x3f7fffd5, 0x55bbba97, 0x3f83ffd6, 0x55f1929c 97 .word 0x3f87ffb8, 0x0184c30a, 0x3f8bff8d, 0xadf2e78c 98 .word 0x3f8fff55, 0x5bbb729b, 0x3f93ff59, 0x5f18a700 99 .word 0x3f97fee0, 0x184a5c36, 0x3f9bfe36, 0xdf291712 100 .word 0x3f9ffd55, 0xbba97625, 0x3fa3fd65, 0xf169c9d9 101 .word 0x3fa7fb81, 0x8430da2a, 0x3fabf8dd, 0xf139c444 102 .word 0x3faff55b, 0xb72cfdea, 0x3fb3f59f, 0x0e7c559d 103 .word 0x3fb7ee18, 0x2602f10f, 0x3fbbe39e, 0xbe6f07c4 104 .word 0x3fbfd5ba, 0x9aac2f6e, 0x3fc3d6ee, 0xe8c6626c 105 .word 0x3fc7b97b, 0x4bce5b02, 0x3fcb90d7, 0x529260a2 106 .word 0x3fcf5b75, 0xf92c80dd, 0x3fd36277, 0x3707ebcc 107 .word 0x3fd6f619, 0x41e4def1, 0x3fda64ee, 0xc3cc23fd 108 .word 0x3fddac67, 0x0561bb4f, 0x3fe1e00b, 0xabdefeb4 109 .word 0x3fe4978f, 0xa3269ee1, 0x3fe700a7, 0xc5784634 110 .word 0x3fe921fb, 0x54442d18, 0x3fecac7c, 0x57846f9e 111 .word 0x3fef730b, 0xd281f69b, 0x3ff0d38f, 0x2c5ba09f 112 .word 0x3ff1b6e1, 0x92ebbe44, 0x3ff30b6d, 0x796a4da8 113 .word 0x3ff3fc17, 0x6b7a8560, 0x3ff4ae10, 0xfc6589a5 114 .word 0x3ff5368c, 0x951e9cfd, 0x3ff5f973, 0x15254857 115 .word 0x3ff67d88, 0x63bc99bd, 0x3ff6dcc5, 0x7bb565fd 116 .word 0x3ff7249f, 0xaa996a21, 0x3ff789bd, 0x2c160054 117 .word 0x3ff7cd6f, 0x6dc59db4, 0x3ff7fde8, 0x0870c2a0 118 .word 0x3ff82250, 0x768ac529, 0x3ff8555a, 0x2787981f 119 .word 0x3ff87769, 0xeb8e956b, 0x3ff88fc2, 0x18ace9dc 120 .word 0x3ff8a205, 0xfd558740, 0x3ff8bb9a, 0x63718f45 121 .word 0x3ff8cca9, 0x27cf0b3d, 0x3ff8d8d8, 0xbf65316f 122 .word 0x3ff8e1fc, 0xa98cb633, 0x3ff8eec8, 0xcfd00665 123 .word 0x3ff8f751, 0x0eba96e6, 0x3ff8fd69, 0x4acf36b0 124 .word 0x3ff901fb, 0x7eee715e, 0x3ff90861, 0xd082d9b5 125 .word 0x3ff90ca6, 0x0b9322c5, 0x3ff90fb2, 0x37a7ea27 126 .word 0x3ff911fb, 0x59997f3a, 0x3ff9152e, 0x8a326c38 127 .word 0x3ff91750, 0xab2e0d12, 0x3ff918d6, 0xc2f9c9e2 128 .word 0x3ff919fb, 0x54eed7a9, 0x3ff91b94, 0xee352849 129 .word 0x3ff91ca5, 0xff216922, 0x3ff91d69, 0x0b3f72ff 130 .word 0x3ff91dfb, 0x5459826d, 0x3ff91ec8, 0x211be619 131 .word 0x3ff91f50, 0xa99fd49a, 0x3ff91fb2, 0x2fb5defa 132 .word 0x3ff91ffb, 0x5446d7c3, 0x3ff92061, 0xbaabf105 133 .word 0x3ff920a5, 0xfeefa208, 0x3ff920d6, 0xc1fb87e7 134 .word 0x3ff920fb, 0x5444826e, 0x3ff9212e, 0x87778bfc 135 .word 0x3ff92150, 0xa9999bb6, 0x3ff92169, 0x0b1faabb 136 .word 0x3ff9217b, 0x544437c3, 0x3ff92194, 0xedddcc28 137 .word 0x3ff921a5, 0xfeeedaec, 0x3ff921b2, 0x2fb1e5f1 138 .word 0x3ff921bb, 0x54442e6e, 0x3ff921c8, 0x2110fa94 139 .word 0x3ff921d0, 0xa99982d3, 0x3ff921d6, 0xc1fb08c6 140 .word 0x3ff921db, 0x54442d43, 0x3ff921e1, 0xbaaa9395 141 .word 0x3ff921e5, 0xfeeed7d0, 0x3ff921e9, 0x0b1f9ad7 142 .word 0x3ff921eb, 0x54442d1e, 0x3ff921ee, 0x8777604e 143 .word 0x3ff921f0, 0xa999826f, 0x3ff921f2, 0x2fb1e3f5 144 .word 0x3ff921f3, 0x54442d19, 0x3ff921f4, 0xedddc6b2 145 .word 0x3ff921f5, 0xfeeed7c3, 0x3ff921f6, 0xc1fb0886 146 .word 0x3ff921f7, 0x54442d18, 0x3ff921f8, 0x2110f9e5 147 .word 0x3ff921f8, 0xa999826e, 0x3ff921f9, 0x0b1f9acf 148 .word 0x3ff921f9, 0x54442d18, 0x3ff921f9, 0xbaaa937f 149 .word 0x3ff921f9, 0xfeeed7c3, 0x3ff921fa, 0x2fb1e3f4 150 .word 0x3ff921fa, 0x54442d18, 0x3ff921fa, 0x8777604b 151 .word 0x3ff921fa, 0xa999826e, 0x3ff921fa, 0xc1fb0886 152 .word 0x3ff921fa, 0xd4442d18, 0x3ff921fa, 0xedddc6b2 153 .word 0x3ff921fa, 0xfeeed7c3, 0x3ff921fb, 0x0b1f9acf 154 .word 0x3ff921fb, 0x14442d18, 0x3ff921fb, 0x2110f9e5 155 .word 0x3ff921fb, 0x2999826e, 0x3ff921fb, 0x2fb1e3f4 156 .word 0x3ff921fb, 0x34442d18, 0x3ff921fb, 0x3aaa937f 157 .word 0x3ff921fb, 0x3eeed7c3, 0x3ff921fb, 0x41fb0886 158 .word 0x3ff921fb, 0x44442d18, 0x3ff921fb, 0x4777604b 159 .word 0x3ff921fb, 0x4999826e, 0x3ff921fb, 0x4b1f9acf 160 .word 0x3ff921fb, 0x4c442d18, 0x3ff921fb, 0x4dddc6b2 161 .word 0x3ff921fb, 0x4eeed7c3, 0x3ff921fb, 0x4fb1e3f4 162 .word 0x3ff921fb, 0x50442d18, 0x3ff921fb, 0x5110f9e5 163 .word 0x3ff921fb, 0x5199826e, 0x3ff921fb, 0x51fb0886 164 165#define DC2 %f2 166#define DTWO %f6 167#define DONE %f52 168#define K0 %f54 169#define K1 %f56 170#define K2 %f58 171#define DC1 %f60 172#define DC3 %f62 173 174#define stridex %o2 175#define stridey %o3 176#define MASK_0x7fffffff %i1 177#define MASK_0x100000 %i5 178 179#define tmp_px STACK_BIAS-32 180#define tmp_counter STACK_BIAS-24 181#define tmp0 STACK_BIAS-16 182#define tmp1 STACK_BIAS-8 183 184#define counter %l1 185 186! sizeof temp storage - must be a multiple of 16 for V9 187#define tmps 0x20 188 189!-------------------------------------------------------------------- 190! !!!!! vatanf algorithm !!!!! 191! ux = ((int*)px)[0]; 192! ax = ux & 0x7fffffff; 193! 194! if ( ax < 0x39b89c55 ) 195! { 196! *(int*)py = ux; 197! goto next; 198! } 199! 200! if ( ax > 0x4c700518 ) 201! { 202! if ( ax > 0x7f800000 ) 203! { 204! float fpx = fabsf(*px); 205! fpx *= fpx; 206! *py = fpx; 207! goto next; 208! } 209! 210! sign = ux & 0x80000000; 211! sign |= pi_2; 212! *(int*)py = sign; 213! goto next; 214! } 215! 216! ftmp0 = *px; 217! x = (double)ftmp0; 218! px += stridex; 219! y = vis_fpadd32(x,DC1); 220! y = vis_fand(y,DC2); 221! div = x * y; 222! xx = x - y; 223! div += DONE; 224! i = ((unsigned long long*)&div)[0]; 225! y0 = vis_fand(div,DC3); 226! i >>= 43; 227! i &= 508; 228! *(float*)&dtmp0 = *(float*)((char*)parr0 + i); 229! y0 = vis_fpsub32(dtmp0, y0); 230! dtmp0 = div0 * y0; 231! dtmp0 = DTWO - dtmp0; 232! y0 *= dtmp0; 233! dtmp1 = div0 * y0; 234! dtmp1 = DTWO - dtmp1; 235! y0 *= dtmp1; 236! ax = ux & 0x7fffffff; 237! ax += 0x00100000; 238! ax >>= 18; 239! ax &= -8; 240! res = *(double*)((char*)parr1 + ax); 241! ux >>= 28; 242! ux &= -8; 243! dtmp0 = *(double*)((char*)sign_arr + ux); 244! res *= dtmp0; 245! xx *= y0; 246! x2 = xx * xx; 247! dtmp0 = K2 * x2; 248! dtmp0 += K1; 249! dtmp0 *= x2; 250! dtmp0 += K0; 251! dtmp0 *= xx; 252! res += dtmp0; 253! ftmp0 = (float)res; 254! py[0] = ftmp0; 255! py += stridey; 256!-------------------------------------------------------------------- 257 258 ENTRY(__vatanf) 259 save %sp,-SA(MINFRAME)-tmps,%sp 260 PIC_SETUP(l7) 261 PIC_SET(l7,.CONST_TBL,l2) 262 263 st %i0,[%fp+tmp_counter] 264 265 sllx %i2,2,stridex 266 sllx %i4,2,stridey 267 268 or %g0,%i3,%o1 269 stx %i1,[%fp+tmp_px] 270 271 ldd [%l2],K0 272 ldd [%l2+8],K1 273 ldd [%l2+16],K2 274 ldd [%l2+24],DC1 275 ldd [%l2+32],DC2 276 ldd [%l2+40],DC3 277 ldd [%l2+48],DONE 278 ldd [%l2+56],DTWO 279 280 add %l2,64,%i4 281 add %l2,64+512,%l0 282 add %l2,64+512+16-0x1cc*8,%l7 283 284 sethi %hi(0x100000),MASK_0x100000 285 sethi %hi(0x7ffffc00),MASK_0x7fffffff 286 add MASK_0x7fffffff,1023,MASK_0x7fffffff 287 288 sethi %hi(0x39b89c00),%o4 289 add %o4,0x55,%o4 290 sethi %hi(0x4c700400),%o5 291 add %o5,0x118,%o5 292 293.begin: 294 ld [%fp+tmp_counter],counter 295 ldx [%fp+tmp_px],%i3 296 st %g0,[%fp+tmp_counter] 297.begin1: 298 cmp counter,0 299 ble,pn %icc,.exit 300 nop 301 302 lda [%i3]0x82,%l6 ! (0_0) ux = ((int*)px)[0]; 303 304 and %l6,MASK_0x7fffffff,%l5 ! (0_0) ax = ux & 0x7fffffff; 305 lda [%i3]0x82,%f0 ! (0_0) ftmp0 = *px; 306 307 cmp %l5,%o4 ! (0_0) ax ? 0x39b89c55 308 bl,pn %icc,.spec0 ! (0_0) if ( ax < 0x39b89c55 ) 309 nop 310 311 cmp %l5,%o5 ! (0_0) ax ? 0x4c700518 312 bg,pn %icc,.spec1 ! (0_0) if ( ax > 0x4c700518 ) 313 nop 314 315 add %i3,stridex,%l5 ! px += stridex; 316 fstod %f0,%f22 ! (0_0) ftmp0 = *px; 317 mov %l6,%i3 318 319 lda [%l5]0x82,%l6 ! (1_0) ux = ((int*)px)[0]; 320 321 and %l6,MASK_0x7fffffff,%o7 ! (1_0) ax = ux & 0x7fffffff; 322 lda [%l5]0x82,%f0 ! (1_0) ftmp0 = *px; 323 add %l5,stridex,%l4 ! px += stridex; 324 fpadd32 %f22,DC1,%f24 ! (0_0) y = vis_fpadd32(x,dconst1); 325 326 cmp %o7,%o4 ! (1_0) ax ? 0x39b89c55 327 bl,pn %icc,.update0 ! (1_0) if ( ax < 0x39b89c55 ) 328 nop 329.cont0: 330 cmp %o7,%o5 ! (1_0) ax ? 0x4c700518 331 bg,pn %icc,.update1 ! (1_0) if ( ax > 0x4c700518 ) 332 nop 333.cont1: 334 fstod %f0,%f20 ! (1_0) x = (double)ftmp0; 335 mov %l6,%l5 336 337 fand %f24,DC2,%f26 ! (0_0) y = vis_fand(y,dconst2); 338 339 fmuld %f22,%f26,%f32 ! (0_0) div = x * y; 340 341 lda [%l4]0x82,%l6 ! (2_0) ux = ((int*)px)[0]; 342 fsubd %f22,%f26,%f22 ! (0_0) xx = x - y; 343 344 and %l6,MASK_0x7fffffff,%o7 ! (2_0) ax = ux & 0x7fffffff; 345 lda [%l4]0x82,%f0 ! (2_0) ftmp0 = *px; 346 add %l4,stridex,%l3 ! px += stridex; 347 fpadd32 %f20,DC1,%f24 ! (1_0) y = vis_fpadd32(x,dconst1); 348 349 cmp %o7,%o4 ! (2_0) ax ? 0x39b89c55 350 bl,pn %icc,.update2 ! (2_0) if ( ax < 0x39b89c55 ) 351 faddd DONE,%f32,%f32 ! (0_0) div += done; 352.cont2: 353 cmp %o7,%o5 ! (2_0) ax ? 0x4c700518 354 bg,pn %icc,.update3 ! (2_0) if ( ax > 0x4c700518 ) 355 nop 356.cont3: 357 std %f32,[%fp+tmp0] ! (0_0) i = ((unsigned long long*)&div)[0]; 358 mov %l6,%l4 359 fstod %f0,%f18 ! (2_0) x = (double)ftmp0; 360 361 fand %f24,DC2,%f26 ! (1_0) y = vis_fand(y,dconst2); 362 363 fmuld %f20,%f26,%f30 ! (1_0) div = x * y; 364 365 lda [%l3]0x82,%l6 ! (3_0) ux = ((int*)px)[0]; 366 fsubd %f20,%f26,%f20 ! (1_0) xx = x - y; 367 368 and %l6,MASK_0x7fffffff,%o7 ! (3_0) ax = ux & 0x7fffffff; 369 lda [%l3]0x82,%f0 ! (3_0) ftmp0 = *px; 370 add %l3,stridex,%i0 ! px += stridex; 371 fpadd32 %f18,DC1,%f24 ! (2_0) y = vis_fpadd32(x,dconst1); 372 373 cmp %o7,%o4 ! (3_0) ax ? 0x39b89c55 374 bl,pn %icc,.update4 ! (3_0) if ( ax < 0x39b89c55 ) 375 faddd DONE,%f30,%f30 ! (1_0) div += done; 376.cont4: 377 cmp %o7,%o5 ! (3_0) ax ? 0x4c700518 378 bg,pn %icc,.update5 ! (3_0) if ( ax > 0x4c700518 ) 379 nop 380.cont5: 381 std %f30,[%fp+tmp1] ! (1_0) i = ((unsigned long long*)&div)[0]; 382 mov %l6,%l3 383 fstod %f0,%f16 ! (3_0) x = (double)ftmp0; 384 385 ldx [%fp+tmp0],%o0 ! (0_0) i = ((unsigned long long*)&div)[0]; 386 fand %f24,DC2,%f26 ! (2_0) y = vis_fand(y,dconst2); 387 388 fand %f32,DC3,%f24 ! (0_0) y0 = vis_fand(div,dconst3); 389 390 srlx %o0,43,%o0 ! (0_0) i >>= 43; 391 392 and %o0,508,%l6 ! (0_0) i &= 508; 393 394 ld [%i4+%l6],%f0 ! (0_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); 395 396 fmuld %f18,%f26,%f28 ! (2_0) div = x * y; 397 398 lda [%i0]0x82,%l6 ! (4_0) ux = ((int*)px)[0]; 399 fsubd %f18,%f26,%f18 ! (2_0) xx = x - y; 400 401 fpsub32 %f0,%f24,%f40 ! (0_0) y0 = vis_fpsub32(dtmp0, y0); 402 403 and %l6,MASK_0x7fffffff,%o7 ! (4_0) ax = ux & 0x7fffffff; 404 lda [%i0]0x82,%f0 ! (4_0) ftmp0 = *px; 405 add %i0,stridex,%i2 ! px += stridex; 406 fpadd32 %f16,DC1,%f24 ! (3_0) y = vis_fpadd32(x,dconst1); 407 408 cmp %o7,%o4 ! (4_0) ax ? 0x39b89c55 409 bl,pn %icc,.update6 ! (4_0) if ( ax < 0x39b89c55 ) 410 faddd DONE,%f28,%f28 ! (2_0) div += done; 411.cont6: 412 fmuld %f32,%f40,%f42 ! (0_0) dtmp0 = div0 * y0; 413 cmp %o7,%o5 ! (4_0) ax ? 0x4c700518 414 bg,pn %icc,.update7 ! (4_0) if ( ax > 0x4c700518 ) 415 nop 416.cont7: 417 std %f28,[%fp+tmp0] ! (2_0) i = ((unsigned long long*)&div)[0]; 418 mov %l6,%i0 419 fstod %f0,%f14 ! (4_0) x = (double)ftmp0; 420 421 ldx [%fp+tmp1],%g1 ! (1_0) i = ((unsigned long long*)&div)[0]; 422 fand %f24,DC2,%f26 ! (3_0) y = vis_fand(y,dconst2); 423 424 fand %f30,DC3,%f24 ! (1_0) y0 = vis_fand(div,dconst3); 425 426 fsubd DTWO,%f42,%f44 ! (0_0) dtmp0 = dtwo - dtmp0; 427 srlx %g1,43,%g1 ! (1_0) i >>= 43; 428 429 and %g1,508,%l6 ! (1_0) i &= 508; 430 431 ld [%i4+%l6],%f0 ! (1_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); 432 433 fmuld %f16,%f26,%f34 ! (3_0) div = x * y; 434 435 lda [%i2]0x82,%l6 ! (5_0) ux = ((int*)px)[0]; 436 fsubd %f16,%f26,%f16 ! (3_0) xx = x - y; 437 438 fpsub32 %f0,%f24,%f38 ! (1_0) y0 = vis_fpsub32(dtmp0, y0); 439 add %i2,stridex,%l2 ! px += stridex; 440 441 fmuld %f40,%f44,%f40 ! (0_0) y0 *= dtmp0; 442 and %l6,MASK_0x7fffffff,%o7 ! (5_0) ax = ux & 0x7fffffff; 443 lda [%i2]0x82,%f0 ! (5_0) ftmp0 = *px; 444 fpadd32 %f14,DC1,%f24 ! (4_0) y = vis_fpadd32(x,dconst1); 445 446 cmp %o7,%o4 ! (5_0) ax ? 0x39b89c55 447 bl,pn %icc,.update8 ! (5_0) if ( ax < 0x39b89c55 ) 448 faddd DONE,%f34,%f34 ! (3_0) div += done; 449.cont8: 450 fmuld %f30,%f38,%f42 ! (1_0) dtmp0 = div0 * y0; 451 cmp %o7,%o5 ! (5_0) ax ? 0x4c700518 452 bg,pn %icc,.update9 ! (5_0) if ( ax > 0x4c700518 ) 453 nop 454.cont9: 455 std %f34,[%fp+tmp1] ! (3_0) i = ((unsigned long long*)&div)[0]; 456 mov %l6,%i2 457 fstod %f0,%f36 ! (5_0) x = (double)ftmp0; 458 459 fmuld %f32,%f40,%f32 ! (0_0) dtmp1 = div0 * y0; 460 ldx [%fp+tmp0],%o0 ! (2_0) i = ((unsigned long long*)&div)[0]; 461 fand %f24,DC2,%f26 ! (4_0) y = vis_fand(y,dconst2); 462 463 fand %f28,DC3,%f24 ! (2_0) y0 = vis_fand(div,dconst3); 464 465 fsubd DTWO,%f42,%f44 ! (1_0) dtmp0 = dtwo - dtmp0; 466 srlx %o0,43,%o0 ! (2_0) i >>= 43; 467 468 and %o0,508,%l6 ! (2_0) i &= 508; 469 fsubd DTWO,%f32,%f46 ! (0_0) dtmp1 = dtwo - dtmp1; 470 471 ld [%i4+%l6],%f0 ! (2_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); 472 473 fmuld %f14,%f26,%f32 ! (4_0) div = x * y; 474 475 lda [%l2]0x82,%l6 ! (6_0) ux = ((int*)px)[0]; 476 fsubd %f14,%f26,%f14 ! (4_0) xx = x - y; 477 478 fmuld %f40,%f46,%f26 ! (0_0) y0 *= dtmp1; 479 add %l2,stridex,%g5 ! px += stridex; 480 fpsub32 %f0,%f24,%f40 ! (2_0) y0 = vis_fpsub32(dtmp0, y0); 481 482 fmuld %f38,%f44,%f38 ! (1_0) y0 *= dtmp0; 483 and %l6,MASK_0x7fffffff,%o7 ! (6_0) ax = ux & 0x7fffffff; 484 lda [%l2]0x82,%f0 ! (6_0) ftmp0 = *px; 485 fpadd32 %f36,DC1,%f24 ! (5_0) y = vis_fpadd32(x,dconst1); 486 487 cmp %o7,%o4 ! (6_0) ax ? 0x39b89c55 488 bl,pn %icc,.update10 ! (6_0) if ( ax < 0x39b89c55 ) 489 faddd DONE,%f32,%f32 ! (4_0) div += done; 490.cont10: 491 fmuld %f28,%f40,%f42 ! (2_0) dtmp0 = div0 * y0; 492 cmp %o7,%o5 ! (6_0) ax ? 0x4c700518 493 bg,pn %icc,.update11 ! (6_0) if ( ax > 0x4c700518 ) 494 nop 495.cont11: 496 fmuld %f22,%f26,%f22 ! (0_0) xx *= y0; 497 mov %l6,%l2 498 std %f32,[%fp+tmp0] ! (4_0) i = ((unsigned long long*)&div)[0]; 499 fstod %f0,%f10 ! (6_0) x = (double)ftmp0; 500 501 fmuld %f30,%f38,%f30 ! (1_0) dtmp1 = div0 * y0; 502 ldx [%fp+tmp1],%g1 ! (3_0) i = ((unsigned long long*)&div)[0]; 503 fand %f24,DC2,%f26 ! (5_0) y = vis_fand(y,dconst2); 504 505 fand %f34,DC3,%f24 ! (3_0) y0 = vis_fand(div,dconst3); 506 507 fmuld %f22,%f22,%f50 ! (0_0) x2 = xx * xx; 508 srlx %g1,43,%g1 ! (3_0) i >>= 43; 509 fsubd DTWO,%f42,%f44 ! (2_0) dtmp0 = dtwo - dtmp0; 510 511 and %g1,508,%l6 ! (3_0) i &= 508; 512 mov %i3,%o7 513 fsubd DTWO,%f30,%f46 ! (1_0) dtmp1 = dtwo - dtmp1; 514 515 ld [%i4+%l6],%f0 ! (3_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); 516 517 fmuld %f36,%f26,%f30 ! (5_0) div = x * y; 518 srl %o7,28,%g1 ! (0_0) ux >>= 28; 519 add %g5,stridex,%i3 ! px += stridex; 520 521 fmuld K2,%f50,%f4 ! (0_0) dtmp0 = K2 * x2; 522 and %o7,MASK_0x7fffffff,%o0 ! (0_0) ax = ux & 0x7fffffff; 523 lda [%g5]0x82,%l6 ! (7_0) ux = ((int*)px)[0]; 524 fsubd %f36,%f26,%f36 ! (5_0) xx = x - y; 525 526 fmuld %f38,%f46,%f26 ! (1_0) y0 *= dtmp1; 527 add %o0,MASK_0x100000,%o0 ! (0_0) ax += 0x00100000; 528 and %g1,-8,%g1 ! (0_0) ux &= -8; 529 fpsub32 %f0,%f24,%f38 ! (3_0) y0 = vis_fpsub32(dtmp0, y0); 530 531 fmuld %f40,%f44,%f40 ! (2_0) y0 *= dtmp0; 532 and %l6,MASK_0x7fffffff,%o7 ! (7_0) ax = ux & 0x7fffffff; 533 lda [%g5]0x82,%f0 ! (7_0) ftmp0 = *px; 534 fpadd32 %f10,DC1,%f24 ! (6_0) y = vis_fpadd32(x,dconst1); 535 536 cmp %o7,%o4 ! (7_0) ax ? 0x39b89c55 537 bl,pn %icc,.update12 ! (7_0) if ( ax < 0x39b89c55 ) 538 faddd DONE,%f30,%f30 ! (5_0) div += done; 539.cont12: 540 fmuld %f34,%f38,%f42 ! (3_0) dtmp0 = div0 * y0; 541 cmp %o7,%o5 ! (7_0) ax ? 0x4c700518 542 bg,pn %icc,.update13 ! (7_0) if ( ax > 0x4c700518 ) 543 faddd %f4,K1,%f4 ! (0_0) dtmp0 += K1; 544.cont13: 545 fmuld %f20,%f26,%f20 ! (1_0) xx *= y0; 546 srl %o0,18,%o7 ! (0_0) ax >>= 18; 547 std %f30,[%fp+tmp1] ! (5_0) i = ((unsigned long long*)&div)[0]; 548 fstod %f0,%f8 ! (7_0) x = (double)ftmp0; 549 550 fmuld %f28,%f40,%f28 ! (2_0) dtmp1 = div0 * y0; 551 and %o7,-8,%o7 ! (0_0) ux &= -8; 552 ldx [%fp+tmp0],%o0 ! (4_0) i = ((unsigned long long*)&div)[0]; 553 fand %f24,DC2,%f26 ! (6_0) y = vis_fand(y,dconst2); 554 555 add %o7,%l7,%o7 ! (0_0) (char*)parr1 + ax; 556 mov %l6,%g5 557 ldd [%l0+%g1],%f48 ! (0_0) dtmp0 = *(double*)((char*)sign_arr + ux); 558 559 fmuld %f4,%f50,%f4 ! (0_0) dtmp0 *= x2; 560 srlx %o0,43,%o0 ! (4_0) i >>= 43; 561 ldd [%o7],%f0 ! (0_0) res = *(double*)((char*)parr1 + ax); 562 fand %f32,DC3,%f24 ! (4_0) y0 = vis_fand(div,dconst3); 563 564 fmuld %f20,%f20,%f50 ! (1_0) x2 = xx * xx; 565 and %o0,508,%l6 ! (4_0) i &= 508; 566 mov %l5,%o7 567 fsubd DTWO,%f42,%f44 ! (3_0) dtmp0 = dtwo - dtmp0; 568 569 fsubd DTWO,%f28,%f46 ! (2_0) dtmp1 = dtwo - dtmp1; 570 571 fmuld %f0,%f48,%f48 ! (0_0) res *= dtmp0; 572 srl %o7,28,%l5 ! (1_0) ux >>= 28; 573 ld [%i4+%l6],%f0 ! (4_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); 574 575 fmuld %f10,%f26,%f28 ! (6_0) div = x * y; 576 faddd %f4,K0,%f42 ! (0_0) dtmp0 += K0; 577 578 subcc counter,8,counter 579 bneg,pn %icc,.tail 580 or %g0,%o1,%o0 581 582 add %fp,tmp0,%g1 583 lda [%i3]0x82,%l6 ! (0_0) ux = ((int*)px)[0]; 584 585 ba .main_loop 586 add %i3,stridex,%l5 ! px += stridex; 587 588 .align 16 589.main_loop: 590 fsubd %f10,%f26,%f10 ! (6_1) xx = x - y; 591 and %o7,MASK_0x7fffffff,%o1 ! (1_1) ax = ux & 0x7fffffff; 592 st %f12,[%g1] ! (7_1) py[0] = ftmp0; 593 fmuld K2,%f50,%f4 ! (1_1) dtmp0 = K2 * x2; 594 595 fmuld %f40,%f46,%f26 ! (2_1) y0 *= dtmp1; 596 srl %o7,28,%o7 ! (1_0) ux >>= 28; 597 add %o1,MASK_0x100000,%g1 ! (1_1) ax += 0x00100000; 598 fpsub32 %f0,%f24,%f40 ! (4_1) y0 = vis_fpsub32(dtmp0, y0); 599 600 fmuld %f38,%f44,%f38 ! (3_1) y0 *= dtmp0; 601 and %l6,MASK_0x7fffffff,%o1 ! (0_0) ax = ux & 0x7fffffff; 602 lda [%i3]0x82,%f0 ! (0_0) ftmp0 = *px; 603 fpadd32 %f8,DC1,%f24 ! (7_1) y = vis_fpadd32(x,dconst1); 604 605 fmuld %f42,%f22,%f44 ! (0_1) dtmp0 *= xx; 606 cmp %o1,%o4 ! (0_0) ax ? 0x39b89c55 607 bl,pn %icc,.update14 ! (0_0) if ( ax < 0x39b89c55 ) 608 faddd DONE,%f28,%f28 ! (6_1) div += done; 609.cont14: 610 fmuld %f32,%f40,%f42 ! (4_1) dtmp0 = div0 * y0; 611 cmp %o1,%o5 ! (0_0) ax ? 0x4c700518 612 bg,pn %icc,.update15 ! (0_0) if ( ax > 0x4c700518 ) 613 faddd %f4,K1,%f4 ! (1_1) dtmp0 += K1; 614.cont15: 615 fmuld %f18,%f26,%f18 ! (2_1) xx *= y0; 616 srl %g1,18,%o1 ! (1_1) ax >>= 18; 617 std %f28,[%fp+tmp0] ! (6_1) i = ((unsigned long long*)&div)[0]; 618 fstod %f0,%f22 ! (0_0) ftmp0 = *px; 619 620 fmuld %f34,%f38,%f34 ! (3_1) dtmp1 = div0 * y0; 621 and %o1,-8,%o1 ! (1_1) ax &= -8; 622 ldx [%fp+tmp1],%g1 ! (5_1) i = ((unsigned long long*)&div)[0]; 623 fand %f24,DC2,%f26 ! (7_1) y = vis_fand(y,dconst2); 624 625 ldd [%o1+%l7],%f0 ! (1_1) res = *(double*)((char*)parr1 + ax); 626 and %o7,-8,%o7 ! (1_1) ux &= -8; 627 mov %l6,%i3 628 faddd %f48,%f44,%f12 ! (0_1) res += dtmp0; 629 630 fmuld %f4,%f50,%f4 ! (1_1) dtmp0 *= x2; 631 nop 632 ldd [%l0+%o7],%f48 ! (1_1) dtmp0 = *(double*)((char*)sign_arr + ux); 633 fand %f30,DC3,%f24 ! (5_1) y0 = vis_fand(div,dconst3); 634 635 fmuld %f18,%f18,%f50 ! (2_1) x2 = xx * xx; 636 srlx %g1,43,%g1 ! (5_1) i >>= 43; 637 mov %l4,%o7 638 fsubd DTWO,%f42,%f44 ! (4_1) dtmp0 = dtwo - dtmp0; 639 640 and %g1,508,%l6 ! (5_1) i &= 508; 641 nop 642 bn,pn %icc,.exit 643 fsubd DTWO,%f34,%f46 ! (3_1) dtmp1 = dtwo - dtmp1; 644 645 fmuld %f0,%f48,%f48 ! (1_1) res *= dtmp0; 646 add %o0,stridey,%g1 ! py += stridey; 647 ld [%i4+%l6],%f0 ! (5_1) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); 648 fdtos %f12,%f12 ! (0_1) ftmp0 = (float)res; 649 650 fmuld %f8,%f26,%f34 ! (7_1) div = x * y; 651 srl %o7,28,%o1 ! (2_1) ux >>= 28; 652 lda [%l5]0x82,%l6 ! (1_0) ux = ((int*)px)[0]; 653 faddd %f4,K0,%f42 ! (1_1) dtmp0 += K0; 654 655 fmuld K2,%f50,%f4 ! (2_1) dtmp0 = K2 * x2; 656 and %o7,MASK_0x7fffffff,%o7 ! (2_1) ax = ux & 0x7fffffff; 657 st %f12,[%o0] ! (0_1) py[0] = ftmp0; 658 fsubd %f8,%f26,%f8 ! (7_1) xx = x - y; 659 660 fmuld %f38,%f46,%f26 ! (3_1) y0 *= dtmp1; 661 add %l5,stridex,%l4 ! px += stridex; 662 add %o7,MASK_0x100000,%o0 ! (2_1) ax += 0x00100000; 663 fpsub32 %f0,%f24,%f38 ! (5_1) y0 = vis_fpsub32(dtmp0, y0); 664 665 fmuld %f40,%f44,%f40 ! (4_1) y0 *= dtmp0; 666 and %l6,MASK_0x7fffffff,%o7 ! (1_0) ax = ux & 0x7fffffff; 667 lda [%l5]0x82,%f0 ! (1_0) ftmp0 = *px; 668 fpadd32 %f22,DC1,%f24 ! (0_0) y = vis_fpadd32(x,dconst1); 669 670 fmuld %f42,%f20,%f44 ! (1_1) dtmp0 *= xx; 671 cmp %o7,%o4 ! (1_0) ax ? 0x39b89c55 672 bl,pn %icc,.update16 ! (1_0) if ( ax < 0x39b89c55 ) 673 faddd DONE,%f34,%f34 ! (7_1) div += done; 674.cont16: 675 fmuld %f30,%f38,%f42 ! (5_1) dtmp0 = div0 * y0; 676 cmp %o7,%o5 ! (1_0) ax ? 0x4c700518 677 bg,pn %icc,.update17 ! (1_0) if ( ax > 0x4c700518 ) 678 faddd %f4,K1,%f4 ! (2_1) dtmp0 += K1; 679.cont17: 680 fmuld %f16,%f26,%f16 ! (3_1) xx *= y0; 681 srl %o0,18,%o7 ! (2_1) ax >>= 18; 682 std %f34,[%fp+tmp1] ! (7_1) i = ((unsigned long long*)&div)[0]; 683 fstod %f0,%f20 ! (1_0) x = (double)ftmp0; 684 685 fmuld %f32,%f40,%f32 ! (4_1) dtmp1 = div0 * y0; 686 ldx [%fp+tmp0],%o0 ! (6_1) i = ((unsigned long long*)&div)[0]; 687 and %o1,-8,%o1 ! (2_1) ux &= -8; 688 fand %f24,DC2,%f26 ! (0_0) y = vis_fand(y,dconst2); 689 690 faddd %f48,%f44,%f12 ! (1_1) res += dtmp0; 691 and %o7,-8,%o7 ! (2_1) ax &= -8; 692 ldd [%l0+%o1],%f48 ! (2_1) dtmp0 = *(double*)((char*)sign_arr + ux); 693 bn,pn %icc,.exit 694 695 ldd [%o7+%l7],%f0 ! (2_1) res = *(double*)((char*)parr1 + ax); 696 mov %l6,%l5 697 fmuld %f4,%f50,%f4 ! (2_1) dtmp0 *= x2; 698 fand %f28,DC3,%f24 ! (6_1) y0 = vis_fand(div,dconst3); 699 700 fmuld %f16,%f16,%f50 ! (3_1) x2 = xx * xx; 701 srlx %o0,43,%o0 ! (6_1) i >>= 43; 702 mov %l3,%o7 703 fsubd DTWO,%f42,%f44 ! (5_1) dtmp0 = dtwo - dtmp0; 704 705 and %o0,508,%l6 ! (6_1) i &= 508; 706 add %l4,stridex,%l3 ! px += stridex; 707 bn,pn %icc,.exit 708 fsubd DTWO,%f32,%f46 ! (4_1) dtmp1 = dtwo - dtmp1; 709 710 fmuld %f0,%f48,%f48 ! (2_1) res *= dtmp0; 711 add %g1,stridey,%o0 ! py += stridey; 712 ld [%i4+%l6],%f0 ! (6_1) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); 713 fdtos %f12,%f12 ! (1_1) ftmp0 = (float)res; 714 715 fmuld %f22,%f26,%f32 ! (0_0) div = x * y; 716 srl %o7,28,%o1 ! (3_1) ux >>= 28; 717 lda [%l4]0x82,%l6 ! (2_0) ux = ((int*)px)[0]; 718 faddd %f4,K0,%f42 ! (2_1) dtmp0 += K0; 719 720 fmuld K2,%f50,%f4 ! (3_1) dtmp0 = K2 * x2; 721 and %o7,MASK_0x7fffffff,%o7 ! (3_1) ax = ux & 0x7fffffff; 722 st %f12,[%g1] ! (1_1) py[0] = ftmp0; 723 fsubd %f22,%f26,%f22 ! (0_0) xx = x - y; 724 725 fmuld %f40,%f46,%f26 ! (4_1) y0 *= dtmp1; 726 add %o7,MASK_0x100000,%g1 ! (3_1) ax += 0x00100000; 727 and %o1,-8,%o1 ! (3_1) ux &= -8; 728 fpsub32 %f0,%f24,%f40 ! (6_1) y0 = vis_fpsub32(dtmp0, y0); 729 730 fmuld %f38,%f44,%f38 ! (5_1) y0 *= dtmp0; 731 and %l6,MASK_0x7fffffff,%o7 ! (2_0) ax = ux & 0x7fffffff; 732 lda [%l4]0x82,%f0 ! (2_0) ftmp0 = *px; 733 fpadd32 %f20,DC1,%f24 ! (1_0) y = vis_fpadd32(x,dconst1); 734 735 fmuld %f42,%f18,%f44 ! (2_1) dtmp0 *= xx; 736 cmp %o7,%o4 ! (2_0) ax ? 0x39b89c55 737 bl,pn %icc,.update18 ! (2_0) if ( ax < 0x39b89c55 ) 738 faddd DONE,%f32,%f32 ! (0_0) div += done; 739.cont18: 740 fmuld %f28,%f40,%f42 ! (6_1) dtmp0 = div0 * y0; 741 cmp %o7,%o5 ! (2_0) ax ? 0x4c700518 742 bg,pn %icc,.update19 ! (2_0) if ( ax > 0x4c700518 ) 743 faddd %f4,K1,%f4 ! (3_1) dtmp0 += K1; 744.cont19: 745 fmuld %f14,%f26,%f14 ! (4_1) xx *= y0; 746 srl %g1,18,%o7 ! (3_1) ax >>= 18; 747 std %f32,[%fp+tmp0] ! (0_0) i = ((unsigned long long*)&div)[0]; 748 fstod %f0,%f18 ! (2_0) x = (double)ftmp0; 749 750 fmuld %f30,%f38,%f30 ! (5_1) dtmp1 = div0 * y0; 751 and %o7,-8,%o7 ! (3_1) ax &= -8; 752 ldx [%fp+tmp1],%g1 ! (7_1) i = ((unsigned long long*)&div)[0]; 753 fand %f24,DC2,%f26 ! (1_0) y = vis_fand(y,dconst2); 754 755 faddd %f48,%f44,%f12 ! (2_1) res += dtmp0; 756 mov %l6,%l4 757 ldd [%l0+%o1],%f48 ! (3_1) dtmp0 = *(double*)((char*)sign_arr + ux); 758 bn,pn %icc,.exit 759 760 fmuld %f4,%f50,%f4 ! (3_1) dtmp0 *= x2; 761 ldd [%o7+%l7],%f0 ! (3_1) res = *(double*)((char*)parr1 + ax) 762 nop 763 fand %f34,DC3,%f24 ! (7_1) y0 = vis_fand(div,dconst3); 764 765 fmuld %f14,%f14,%f50 ! (4_1) x2 = xx * xx; 766 srlx %g1,43,%g1 ! (7_1) i >>= 43; 767 mov %i0,%o7 768 fsubd DTWO,%f42,%f44 ! (6_1) dtmp0 = dtwo - dtmp0; 769 770 and %g1,508,%l6 ! (7_1) i &= 508; 771 add %l3,stridex,%i0 ! px += stridex; 772 bn,pn %icc,.exit 773 fsubd DTWO,%f30,%f46 ! (5_1) dtmp1 = dtwo - dtmp1; 774 775 fmuld %f0,%f48,%f48 ! (3_1) res *= dtmp0; 776 add %o0,stridey,%g1 ! py += stridey; 777 ld [%i4+%l6],%f0 ! (7_1) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); 778 fdtos %f12,%f12 ! (2_1) ftmp0 = (float)res; 779 780 fmuld %f20,%f26,%f30 ! (1_0) div = x * y; 781 srl %o7,28,%o1 ! (4_1) ux >>= 28; 782 lda [%l3]0x82,%l6 ! (3_0) ux = ((int*)px)[0]; 783 faddd %f4,K0,%f42 ! (3_1) dtmp0 += K0; 784 785 fmuld K2,%f50,%f4 ! (4_1) dtmp0 = K2 * x2; 786 and %o7,MASK_0x7fffffff,%o7 ! (4_1) ax = ux & 0x7fffffff; 787 st %f12,[%o0] ! (2_1) py[0] = ftmp0; 788 fsubd %f20,%f26,%f20 ! (1_0) xx = x - y; 789 790 fmuld %f38,%f46,%f26 ! (5_1) y0 *= dtmp1; 791 add %o7,MASK_0x100000,%o0 ! (4_1) ax += 0x00100000; 792 and %o1,-8,%o1 ! (4_1) ux &= -8; 793 fpsub32 %f0,%f24,%f38 ! (7_1) y0 = vis_fpsub32(dtmp0, y0); 794 795 fmuld %f40,%f44,%f40 ! (6_1) y0 *= dtmp0; 796 and %l6,MASK_0x7fffffff,%o7 ! (3_0) ax = ux & 0x7fffffff; 797 lda [%l3]0x82,%f0 ! (3_0) ftmp0 = *px; 798 fpadd32 %f18,DC1,%f24 ! (2_0) y = vis_fpadd32(x,dconst1); 799 800 fmuld %f42,%f16,%f44 ! (3_1) dtmp0 *= xx; 801 cmp %o7,%o4 ! (3_0) ax ? 0x39b89c55 802 bl,pn %icc,.update20 ! (3_0) if ( ax < 0x39b89c55 ) 803 faddd DONE,%f30,%f30 ! (1_0) div += done; 804.cont20: 805 fmuld %f34,%f38,%f42 ! (7_1) dtmp0 = div0 * y0; 806 cmp %o7,%o5 ! (3_0) ax ? 0x4c700518 807 bg,pn %icc,.update21 ! (3_0) if ( ax > 0x4c700518 ) 808 faddd %f4,K1,%f4 ! (4_1) dtmp0 += K1; 809.cont21: 810 fmuld %f36,%f26,%f36 ! (5_1) xx *= y0; 811 srl %o0,18,%o7 ! (4_1) ax >>= 18; 812 std %f30,[%fp+tmp1] ! (1_0) i = ((unsigned long long*)&div)[0]; 813 fstod %f0,%f16 ! (3_0) x = (double)ftmp0; 814 815 fmuld %f28,%f40,%f28 ! (6_1) dtmp1 = div0 * y0; 816 and %o7,-8,%o7 ! (4_1) ax &= -8; 817 ldx [%fp+tmp0],%o0 ! (0_0) i = ((unsigned long long*)&div)[0]; 818 fand %f24,DC2,%f26 ! (2_0) y = vis_fand(y,dconst2); 819 820 faddd %f48,%f44,%f12 ! (3_1) res += dtmp0; 821 nop 822 ldd [%l0+%o1],%f48 ! (4_1) dtmp0 = *(double*)((char*)sign_arr + ux); 823 bn,pn %icc,.exit 824 825 ldd [%o7+%l7],%f0 ! (4_1) res = *(double*)((char*)parr1 + ax); 826 mov %l6,%l3 827 fmuld %f4,%f50,%f4 ! (4_1) dtmp0 *= x2; 828 fand %f32,DC3,%f24 ! (0_0) y0 = vis_fand(div,dconst3); 829 830 fmuld %f36,%f36,%f50 ! (5_1) x2 = xx * xx; 831 srlx %o0,43,%o0 ! (0_0) i >>= 43; 832 mov %i2,%o7 833 fsubd DTWO,%f42,%f44 ! (7_1) dtmp0 = dtwo - dtmp0; 834 835 and %o0,508,%l6 ! (0_0) i &= 508; 836 add %i0,stridex,%i2 ! px += stridex; 837 bn,pn %icc,.exit 838 fsubd DTWO,%f28,%f46 ! (6_1) dtmp1 = dtwo - dtmp1; 839 840 fmuld %f0,%f48,%f48 ! (4_1) res *= dtmp0; 841 add %g1,stridey,%o0 ! py += stridey; 842 ld [%i4+%l6],%f0 ! (0_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); 843 fdtos %f12,%f12 ! (3_1) ftmp0 = (float)res; 844 845 fmuld %f18,%f26,%f28 ! (2_0) div = x * y; 846 srl %o7,28,%o1 ! (5_1) ux >>= 28; 847 lda [%i0]0x82,%l6 ! (4_0) ux = ((int*)px)[0]; 848 faddd %f4,K0,%f42 ! (4_1) dtmp0 += K0; 849 850 fmuld K2,%f50,%f4 ! (5_1) dtmp0 = K2 * x2; 851 and %o7,MASK_0x7fffffff,%o7 ! (5_1) ax = ux & 0x7fffffff; 852 st %f12,[%g1] ! (3_1) py[0] = ftmp0; 853 fsubd %f18,%f26,%f18 ! (2_0) xx = x - y; 854 855 fmuld %f40,%f46,%f26 ! (6_1) y0 *= dtmp1; 856 add %o7,MASK_0x100000,%g1 ! (5_1) ax += 0x00100000; 857 and %o1,-8,%o1 ! (5_1) ux &= -8; 858 fpsub32 %f0,%f24,%f40 ! (0_0) y0 = vis_fpsub32(dtmp0, y0); 859 860 fmuld %f38,%f44,%f38 ! (7_1) y0 *= dtmp0; 861 and %l6,MASK_0x7fffffff,%o7 ! (4_0) ax = ux & 0x7fffffff; 862 lda [%i0]0x82,%f0 ! (4_0) ftmp0 = *px; 863 fpadd32 %f16,DC1,%f24 ! (3_0) y = vis_fpadd32(x,dconst1); 864 865 fmuld %f42,%f14,%f44 ! (4_1) dtmp0 *= xx; 866 cmp %o7,%o4 ! (4_0) ax ? 0x39b89c55 867 bl,pn %icc,.update22 ! (4_0) if ( ax < 0x39b89c55 ) 868 faddd DONE,%f28,%f28 ! (2_0) div += done; 869.cont22: 870 fmuld %f32,%f40,%f42 ! (0_0) dtmp0 = div0 * y0; 871 cmp %o7,%o5 ! (4_0) ax ? 0x4c700518 872 bg,pn %icc,.update23 ! (4_0) if ( ax > 0x4c700518 ) 873 faddd %f4,K1,%f4 ! (5_1) dtmp0 += K1; 874.cont23: 875 fmuld %f10,%f26,%f10 ! (6_1) xx *= y0; 876 srl %g1,18,%o7 ! (5_1) ax >>= 18; 877 std %f28,[%fp+tmp0] ! (2_0) i = ((unsigned long long*)&div)[0]; 878 fstod %f0,%f14 ! (4_0) x = (double)ftmp0; 879 880 fmuld %f34,%f38,%f34 ! (7_1) dtmp1 = div0 * y0; 881 and %o7,-8,%o7 ! (5_1) ax &= -8; 882 ldx [%fp+tmp1],%g1 ! (1_0) i = ((unsigned long long*)&div)[0]; 883 fand %f24,DC2,%f26 ! (3_0) y = vis_fand(y,dconst2); 884 885 faddd %f48,%f44,%f12 ! (4_1) res += dtmp0; 886 mov %l6,%i0 887 ldd [%l0+%o1],%f48 ! (5_1) dtmp0 = *(double*)((char*)sign_arr + ux); 888 bn,pn %icc,.exit 889 890 ldd [%o7+%l7],%f0 ! (5_1) res = *(double*)((char*)parr1 + ax); 891 nop 892 fmuld %f4,%f50,%f4 ! (5_1) dtmp0 *= x2; 893 fand %f30,DC3,%f24 ! (1_0) y0 = vis_fand(div,dconst3); 894 895 fmuld %f10,%f10,%f50 ! (6_1) x2 = xx * xx; 896 srlx %g1,43,%g1 ! (1_0) i >>= 43; 897 mov %l2,%o7 898 fsubd DTWO,%f42,%f44 ! (0_0) dtmp0 = dtwo - dtmp0; 899 900 and %g1,508,%l6 ! (1_0) i &= 508; 901 add %i2,stridex,%l2 ! px += stridex; 902 bn,pn %icc,.exit 903 fsubd DTWO,%f34,%f46 ! (7_1) dtmp1 = dtwo - dtmp1; 904 905 fmuld %f0,%f48,%f48 ! (5_1) res *= dtmp0; 906 add %o0,stridey,%g1 ! py += stridey; 907 ld [%i4+%l6],%f0 ! (1_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); 908 fdtos %f12,%f12 ! (4_1) ftmp0 = (float)res; 909 910 fmuld %f16,%f26,%f34 ! (3_0) div = x * y; 911 srl %o7,28,%o1 ! (6_1) ux >>= 28; 912 lda [%i2]0x82,%l6 ! (5_0) ux = ((int*)px)[0]; 913 faddd %f4,K0,%f42 ! (5_1) dtmp0 += K0; 914 915 fmuld K2,%f50,%f4 ! (6_1) dtmp0 = K2 * x2; 916 and %o7,MASK_0x7fffffff,%o7 ! (6_1) ax = ux & 0x7fffffff; 917 st %f12,[%o0] ! (4_1) py[0] = ftmp0; 918 fsubd %f16,%f26,%f16 ! (3_0) xx = x - y; 919 920 fmuld %f38,%f46,%f26 ! (7_1) y0 *= dtmp1; 921 add %o7,MASK_0x100000,%o0 ! (6_1) ax += 0x00100000; 922 and %o1,-8,%o1 ! (6_1) ux &= -8; 923 fpsub32 %f0,%f24,%f38 ! (1_0) y0 = vis_fpsub32(dtmp0, y0); 924 925 fmuld %f40,%f44,%f40 ! (0_0) y0 *= dtmp0; 926 and %l6,MASK_0x7fffffff,%o7 ! (5_0) ax = ux & 0x7fffffff; 927 lda [%i2]0x82,%f0 ! (5_0) ftmp0 = *px; 928 fpadd32 %f14,DC1,%f24 ! (4_0) y = vis_fpadd32(x,dconst1); 929 930 fmuld %f42,%f36,%f44 ! (5_1) dtmp0 *= xx; 931 cmp %o7,%o4 ! (5_0) ax ? 0x39b89c55 932 bl,pn %icc,.update24 ! (5_0) if ( ax < 0x39b89c55 ) 933 faddd DONE,%f34,%f34 ! (3_0) div += done; 934.cont24: 935 fmuld %f30,%f38,%f42 ! (1_0) dtmp0 = div0 * y0; 936 cmp %o7,%o5 ! (5_0) ax ? 0x4c700518 937 bg,pn %icc,.update25 ! (5_0) if ( ax > 0x4c700518 ) 938 faddd %f4,K1,%f4 ! (6_1) dtmp0 += K1; 939.cont25: 940 fmuld %f8,%f26,%f8 ! (7_1) xx *= y0; 941 srl %o0,18,%o7 ! (6_1) ax >>= 18; 942 std %f34,[%fp+tmp1] ! (3_0) i = ((unsigned long long*)&div)[0]; 943 fstod %f0,%f36 ! (5_0) x = (double)ftmp0; 944 945 fmuld %f32,%f40,%f32 ! (0_0) dtmp1 = div0 * y0; 946 and %o7,-8,%o7 ! (6_1) ax &= -8; 947 ldx [%fp+tmp0],%o0 ! (2_0) i = ((unsigned long long*)&div)[0]; 948 fand %f24,DC2,%f26 ! (4_0) y = vis_fand(y,dconst2); 949 950 faddd %f48,%f44,%f12 ! (5_1) res += dtmp0; 951 mov %l6,%i2 952 ldd [%l0+%o1],%f48 ! (6_1) dtmp0 = *(double*)((char*)sign_arr + ux); 953 bn,pn %icc,.exit 954 955 ldd [%o7+%l7],%f0 ! (6_1) res = *(double*)((char*)parr1 + ax); 956 nop 957 fmuld %f4,%f50,%f4 ! (6_1) dtmp0 *= x2; 958 fand %f28,DC3,%f24 ! (2_0) y0 = vis_fand(div,dconst3); 959 960 fmuld %f8,%f8,%f50 ! (7_1) x2 = xx * xx; 961 srlx %o0,43,%o0 ! (2_0) i >>= 43; 962 mov %g5,%o7 963 fsubd DTWO,%f42,%f44 ! (1_0) dtmp0 = dtwo - dtmp0; 964 965 and %o0,508,%l6 ! (2_0) i &= 508; 966 add %l2,stridex,%g5 ! px += stridex; 967 bn,pn %icc,.exit 968 fsubd DTWO,%f32,%f46 ! (0_0) dtmp1 = dtwo - dtmp1; 969 970 fmuld %f0,%f48,%f48 ! (6_1) res *= dtmp0; 971 add %g1,stridey,%o0 ! py += stridey; 972 ld [%i4+%l6],%f0 ! (2_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); 973 fdtos %f12,%f12 ! (5_1) ftmp0 = (float)res; 974 975 fmuld %f14,%f26,%f32 ! (4_0) div = x * y; 976 srl %o7,28,%o1 ! (7_1) ux >>= 28; 977 lda [%l2]0x82,%l6 ! (6_0) ux = ((int*)px)[0]; 978 faddd %f4,K0,%f42 ! (6_1) dtmp0 += K0; 979 980 fmuld K2,%f50,%f4 ! (7_1) dtmp0 = K2 * x2; 981 and %o7,MASK_0x7fffffff,%o7 ! (7_1) ax = ux & 0x7fffffff; 982 st %f12,[%g1] ! (5_1) py[0] = ftmp0; 983 fsubd %f14,%f26,%f14 ! (4_0) xx = x - y; 984 985 fmuld %f40,%f46,%f26 ! (0_0) y0 *= dtmp1; 986 add %o7,MASK_0x100000,%g1 ! (7_1) ax += 0x00100000; 987 and %o1,-8,%o1 ! (7_1) ux &= -8; 988 fpsub32 %f0,%f24,%f40 ! (2_0) y0 = vis_fpsub32(dtmp0, y0); 989 990 fmuld %f38,%f44,%f38 ! (1_0) y0 *= dtmp0; 991 and %l6,MASK_0x7fffffff,%o7 ! (6_0) ax = ux & 0x7fffffff; 992 lda [%l2]0x82,%f0 ! (6_0) ftmp0 = *px; 993 fpadd32 %f36,DC1,%f24 ! (5_0) y = vis_fpadd32(x,dconst1); 994 995 fmuld %f42,%f10,%f44 ! (6_1) dtmp0 *= xx; 996 cmp %o7,%o4 ! (6_0) ax ? 0x39b89c55 997 bl,pn %icc,.update26 ! (6_0) if ( ax < 0x39b89c55 ) 998 faddd DONE,%f32,%f32 ! (4_0) div += done; 999.cont26: 1000 fmuld %f28,%f40,%f42 ! (2_0) dtmp0 = div0 * y0; 1001 cmp %o7,%o5 ! (6_0) ax ? 0x4c700518 1002 bg,pn %icc,.update27 ! (6_0) if ( ax > 0x4c700518 ) 1003 faddd %f4,K1,%f4 ! (7_1) dtmp0 += K1; 1004.cont27: 1005 fmuld %f22,%f26,%f22 ! (0_0) xx *= y0; 1006 srl %g1,18,%o7 ! (7_1) ax >>= 18; 1007 std %f32,[%fp+tmp0] ! (4_0) i = ((unsigned long long*)&div)[0]; 1008 fstod %f0,%f10 ! (6_0) x = (double)ftmp0; 1009 1010 fmuld %f30,%f38,%f30 ! (1_0) dtmp1 = div0 * y0; 1011 and %o7,-8,%o7 ! (7_1) ax &= -8; 1012 ldx [%fp+tmp1],%g1 ! (3_0) i = ((unsigned long long*)&div)[0]; 1013 fand %f24,DC2,%f26 ! (5_0) y = vis_fand(y,dconst2); 1014 1015 faddd %f48,%f44,%f12 ! (6_1) res += dtmp0; 1016 mov %l6,%l2 1017 ldd [%l0+%o1],%f48 ! (7_1) dtmp0 = *(double*)((char*)sign_arr + ux); 1018 bn,pn %icc,.exit 1019 1020 ldd [%o7+%l7],%f0 ! (7_1) res = *(double*)((char*)parr1 + ax); 1021 nop 1022 fmuld %f4,%f50,%f4 ! (7_1) dtmp0 *= x2; 1023 fand %f34,DC3,%f24 ! (3_0) y0 = vis_fand(div,dconst3); 1024 1025 fmuld %f22,%f22,%f50 ! (0_0) x2 = xx * xx; 1026 srlx %g1,43,%g1 ! (3_0) i >>= 43; 1027 mov %i3,%o7 1028 fsubd DTWO,%f42,%f44 ! (2_0) dtmp0 = dtwo - dtmp0; 1029 1030 and %g1,508,%l6 ! (3_0) i &= 508; 1031 add %g5,stridex,%i3 ! px += stridex; 1032 bn,pn %icc,.exit 1033 fsubd DTWO,%f30,%f46 ! (1_0) dtmp1 = dtwo - dtmp1; 1034 1035 fmuld %f0,%f48,%f48 ! (7_1) res *= dtmp0; 1036 add %o0,stridey,%g1 ! py += stridey; 1037 ld [%i4+%l6],%f0 ! (3_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); 1038 fdtos %f12,%f12 ! (6_1) ftmp0 = (float)res; 1039 1040 fmuld %f36,%f26,%f30 ! (5_0) div = x * y; 1041 srl %o7,28,%o1 ! (0_0) ux >>= 28; 1042 lda [%g5]0x82,%l6 ! (7_0) ux = ((int*)px)[0]; 1043 faddd %f4,K0,%f42 ! (7_1) dtmp0 += K0; 1044 1045 fmuld K2,%f50,%f4 ! (0_0) dtmp0 = K2 * x2; 1046 and %o7,MASK_0x7fffffff,%o7 ! (0_0) ax = ux & 0x7fffffff; 1047 st %f12,[%o0] ! (6_1) py[0] = ftmp0; 1048 fsubd %f36,%f26,%f36 ! (5_0) xx = x - y; 1049 1050 fmuld %f38,%f46,%f26 ! (1_0) y0 *= dtmp1; 1051 add %o7,MASK_0x100000,%o0 ! (0_0) ax += 0x00100000; 1052 and %o1,-8,%o1 ! (0_0) ux &= -8; 1053 fpsub32 %f0,%f24,%f38 ! (3_0) y0 = vis_fpsub32(dtmp0, y0); 1054 1055 fmuld %f40,%f44,%f40 ! (2_0) y0 *= dtmp0; 1056 and %l6,MASK_0x7fffffff,%o7 ! (7_0) ax = ux & 0x7fffffff; 1057 lda [%g5]0x82,%f0 ! (7_0) ftmp0 = *px; 1058 fpadd32 %f10,DC1,%f24 ! (6_0) y = vis_fpadd32(x,dconst1); 1059 1060 fmuld %f42,%f8,%f44 ! (7_1) dtmp0 *= xx; 1061 cmp %o7,%o4 ! (7_0) ax ? 0x39b89c55 1062 bl,pn %icc,.update28 ! (7_0) if ( ax < 0x39b89c55 ) 1063 faddd DONE,%f30,%f30 ! (5_0) div += done; 1064.cont28: 1065 fmuld %f34,%f38,%f42 ! (3_0) dtmp0 = div0 * y0; 1066 cmp %o7,%o5 ! (7_0) ax ? 0x4c700518 1067 bg,pn %icc,.update29 ! (7_0) if ( ax > 0x4c700518 ) 1068 faddd %f4,K1,%f4 ! (0_0) dtmp0 += K1; 1069.cont29: 1070 fmuld %f20,%f26,%f20 ! (1_0) xx *= y0; 1071 srl %o0,18,%o7 ! (0_0) ax >>= 18; 1072 std %f30,[%fp+tmp1] ! (5_0) i = ((unsigned long long*)&div)[0]; 1073 fstod %f0,%f8 ! (7_0) x = (double)ftmp0; 1074 1075 fmuld %f28,%f40,%f28 ! (2_0) dtmp1 = div0 * y0; 1076 and %o7,-8,%o7 ! (0_0) ux &= -8; 1077 ldx [%fp+tmp0],%o0 ! (4_0) i = ((unsigned long long*)&div)[0]; 1078 fand %f24,DC2,%f26 ! (6_0) y = vis_fand(y,dconst2); 1079 1080 faddd %f48,%f44,%f12 ! (7_1) res += dtmp0; 1081 subcc counter,8,counter 1082 ldd [%l0+%o1],%f48 ! (0_0) dtmp0 = *(double*)((char*)sign_arr + ux); 1083 bn,pn %icc,.exit 1084 1085 fmuld %f4,%f50,%f4 ! (0_0) dtmp0 *= x2; 1086 mov %l6,%g5 1087 ldd [%o7+%l7],%f0 ! (0_0) res = *(double*)((char*)parr1 + ax); 1088 fand %f32,DC3,%f24 ! (4_0) y0 = vis_fand(div,dconst3); 1089 1090 fmuld %f20,%f20,%f50 ! (1_0) x2 = xx * xx; 1091 srlx %o0,43,%l6 ! (4_0) i >>= 43; 1092 mov %l5,%o7 1093 fsubd DTWO,%f42,%f44 ! (3_0) dtmp0 = dtwo - dtmp0; 1094 1095 add %g1,stridey,%o0 ! py += stridey; 1096 and %l6,508,%l6 ! (4_0) i &= 508; 1097 bn,pn %icc,.exit 1098 fsubd DTWO,%f28,%f46 ! (2_0) dtmp1 = dtwo - dtmp1; 1099 1100 fmuld %f0,%f48,%f48 ! (0_0) res *= dtmp0; 1101 ld [%i4+%l6],%f0 ! (4_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); 1102 add %i3,stridex,%l5 ! px += stridex; 1103 fdtos %f12,%f12 ! (7_1) ftmp0 = (float)res; 1104 1105 lda [%i3]0x82,%l6 ! (0_0) ux = ((int*)px)[0]; 1106 fmuld %f10,%f26,%f28 ! (6_0) div = x * y; 1107 bpos,pt %icc,.main_loop 1108 faddd %f4,K0,%f42 ! (0_0) dtmp0 += K0; 1109 1110 srl %o7,28,%l5 ! (1_0) ux >>= 28; 1111 st %f12,[%g1] ! (7_1) py[0] = ftmp0; 1112 1113.tail: 1114 addcc counter,7,counter 1115 bneg,pn %icc,.begin 1116 or %g0,%o0,%o1 1117 1118 fsubd %f10,%f26,%f10 ! (6_1) xx = x - y; 1119 and %o7,MASK_0x7fffffff,%g1 ! (1_1) ax = ux & 0x7fffffff; 1120 fmuld K2,%f50,%f4 ! (1_1) dtmp0 = K2 * x2; 1121 1122 fmuld %f40,%f46,%f26 ! (2_1) y0 *= dtmp1; 1123 add %g1,MASK_0x100000,%g1 ! (1_1) ax += 0x00100000; 1124 and %l5,-8,%l5 ! (1_1) ux &= -8; 1125 fpsub32 %f0,%f24,%f40 ! (4_1) y0 = vis_fpsub32(dtmp0, y0); 1126 1127 fmuld %f38,%f44,%f38 ! (3_1) y0 *= dtmp0; 1128 1129 fmuld %f42,%f22,%f44 ! (0_1) dtmp0 *= xx; 1130 faddd DONE,%f28,%f28 ! (6_1) div += done; 1131 1132 fmuld %f32,%f40,%f42 ! (4_1) dtmp0 = div0 * y0; 1133 faddd %f4,K1,%f4 ! (1_1) dtmp0 += K1; 1134 1135 fmuld %f18,%f26,%f18 ! (2_1) xx *= y0; 1136 srl %g1,18,%o7 ! (1_1) ax >>= 18; 1137 std %f28,[%fp+tmp0] ! (6_1) i = ((unsigned long long*)&div)[0]; 1138 1139 fmuld %f34,%f38,%f34 ! (3_1) dtmp1 = div0 * y0; 1140 and %o7,-8,%o7 ! (1_1) ax &= -8; 1141 ldx [%fp+tmp1],%g1 ! (5_1) i = ((unsigned long long*)&div)[0]; 1142 1143 faddd %f48,%f44,%f12 ! (0_1) res += dtmp0; 1144 add %o7,%l7,%o7 ! (1_1) (char*)parr1 + ax; 1145 ldd [%l0+%l5],%f48 ! (1_1) dtmp0 = *(double*)((char*)sign_arr + ux); 1146 1147 fmuld %f4,%f50,%f4 ! (1_1) dtmp0 *= x2; 1148 fand %f30,DC3,%f24 ! (5_1) y0 = vis_fand(div,dconst3); 1149 ldd [%o7],%f0 ! (1_1) res = *(double*)((char*)parr1 + ax); 1150 1151 fmuld %f18,%f18,%f50 ! (2_1) x2 = xx * xx; 1152 fsubd DTWO,%f42,%f44 ! (4_1) dtmp0 = dtwo - dtmp0; 1153 srlx %g1,43,%g1 ! (5_1) i >>= 43; 1154 1155 and %g1,508,%l6 ! (5_1) i &= 508; 1156 mov %l4,%o7 1157 fsubd DTWO,%f34,%f46 ! (3_1) dtmp1 = dtwo - dtmp1; 1158 1159 fmuld %f0,%f48,%f48 ! (1_1) res *= dtmp0; 1160 add %o0,stridey,%g1 ! py += stridey; 1161 ld [%i4+%l6],%f0 ! (5_1) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); 1162 fdtos %f12,%f12 ! (0_1) ftmp0 = (float)res; 1163 1164 srl %o7,28,%l4 ! (2_1) ux >>= 28; 1165 st %f12,[%o0] ! (0_1) py[0] = ftmp0; 1166 faddd %f4,K0,%f42 ! (1_1) dtmp0 += K0; 1167 1168 subcc counter,1,counter 1169 bneg,pn %icc,.begin 1170 or %g0,%g1,%o1 1171 1172 fmuld K2,%f50,%f4 ! (2_1) dtmp0 = K2 * x2; 1173 and %o7,MASK_0x7fffffff,%o0 ! (2_1) ax = ux & 0x7fffffff; 1174 1175 fmuld %f38,%f46,%f26 ! (3_1) y0 *= dtmp1; 1176 add %o0,MASK_0x100000,%o0 ! (2_1) ax += 0x00100000; 1177 and %l4,-8,%l4 ! (2_1) ux &= -8; 1178 fpsub32 %f0,%f24,%f38 ! (5_1) y0 = vis_fpsub32(dtmp0, y0); 1179 1180 fmuld %f40,%f44,%f40 ! (4_1) y0 *= dtmp0; 1181 1182 fmuld %f42,%f20,%f44 ! (1_1) dtmp0 *= xx; 1183 1184 fmuld %f30,%f38,%f42 ! (5_1) dtmp0 = div0 * y0; 1185 faddd %f4,K1,%f4 ! (2_1) dtmp0 += K1; 1186 1187 fmuld %f16,%f26,%f16 ! (3_1) xx *= y0; 1188 srl %o0,18,%o7 ! (2_1) ax >>= 18; 1189 1190 fmuld %f32,%f40,%f32 ! (4_1) dtmp1 = div0 * y0; 1191 and %o7,-8,%o7 ! (2_1) ax &= -8; 1192 ldx [%fp+tmp0],%o0 ! (6_1) i = ((unsigned long long*)&div)[0]; 1193 1194 faddd %f48,%f44,%f12 ! (1_1) res += dtmp0; 1195 add %o7,%l7,%o7 ! (2_1) (char*)parr1 + ax; 1196 ldd [%l0+%l4],%f48 ! (2_1) dtmp0 = *(double*)((char*)sign_arr + ux); 1197 1198 fmuld %f4,%f50,%f4 ! (2_1) dtmp0 *= x2; 1199 fand %f28,DC3,%f24 ! (6_1) y0 = vis_fand(div,dconst3); 1200 ldd [%o7],%f0 ! (2_1) res = *(double*)((char*)parr1 + ax); 1201 1202 fmuld %f16,%f16,%f50 ! (3_1) x2 = xx * xx; 1203 fsubd DTWO,%f42,%f44 ! (5_1) dtmp0 = dtwo - dtmp0; 1204 srlx %o0,43,%o0 ! (6_1) i >>= 43; 1205 1206 and %o0,508,%l6 ! (6_1) i &= 508; 1207 mov %l3,%o7 1208 fsubd DTWO,%f32,%f46 ! (4_1) dtmp1 = dtwo - dtmp1; 1209 1210 fmuld %f0,%f48,%f48 ! (2_1) res *= dtmp0; 1211 add %g1,stridey,%o0 ! py += stridey; 1212 ld [%i4+%l6],%f0 ! (6_1) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); 1213 fdtos %f12,%f12 ! (1_1) ftmp0 = (float)res; 1214 1215 srl %o7,28,%l3 ! (3_1) ux >>= 28; 1216 st %f12,[%g1] ! (1_1) py[0] = ftmp0; 1217 faddd %f4,K0,%f42 ! (2_1) dtmp0 += K0; 1218 1219 subcc counter,1,counter 1220 bneg,pn %icc,.begin 1221 or %g0,%o0,%o1 1222 1223 fmuld K2,%f50,%f4 ! (3_1) dtmp0 = K2 * x2; 1224 and %o7,MASK_0x7fffffff,%g1 ! (3_1) ax = ux & 0x7fffffff; 1225 1226 fmuld %f40,%f46,%f26 ! (4_1) y0 *= dtmp1; 1227 add %g1,MASK_0x100000,%g1 ! (3_1) ax += 0x00100000; 1228 and %l3,-8,%l3 ! (3_1) ux &= -8; 1229 fpsub32 %f0,%f24,%f40 ! (6_1) y0 = vis_fpsub32(dtmp0, y0); 1230 1231 fmuld %f38,%f44,%f38 ! (5_1) y0 *= dtmp0; 1232 1233 fmuld %f42,%f18,%f44 ! (2_1) dtmp0 *= xx; 1234 1235 fmuld %f28,%f40,%f42 ! (6_1) dtmp0 = div0 * y0; 1236 faddd %f4,K1,%f4 ! (3_1) dtmp0 += K1; 1237 1238 fmuld %f14,%f26,%f14 ! (4_1) xx *= y0; 1239 srl %g1,18,%o7 ! (3_1) ax >>= 18; 1240 1241 fmuld %f30,%f38,%f30 ! (5_1) dtmp1 = div0 * y0; 1242 and %o7,-8,%o7 ! (3_1) ax &= -8; 1243 1244 faddd %f48,%f44,%f12 ! (2_1) res += dtmp0; 1245 add %o7,%l7,%o7 ! (3_1) (char*)parr1 + ax; 1246 ldd [%l0+%l3],%f48 ! (3_1) dtmp0 = *(double*)((char*)sign_arr + ux); 1247 1248 fmuld %f4,%f50,%f4 ! (3_1) dtmp0 *= x2; 1249 ldd [%o7],%f0 ! (3_1) res = *(double*)((char*)parr1 + ax) 1250 1251 fmuld %f14,%f14,%f50 ! (4_1) x2 = xx * xx; 1252 fsubd DTWO,%f42,%f44 ! (6_1) dtmp0 = dtwo - dtmp0; 1253 1254 mov %i0,%o7 1255 fsubd DTWO,%f30,%f46 ! (5_1) dtmp1 = dtwo - dtmp1; 1256 1257 fmuld %f0,%f48,%f48 ! (3_1) res *= dtmp0; 1258 add %o0,stridey,%g1 ! py += stridey; 1259 fdtos %f12,%f12 ! (2_1) ftmp0 = (float)res; 1260 1261 srl %o7,28,%i0 ! (4_1) ux >>= 28; 1262 st %f12,[%o0] ! (2_1) py[0] = ftmp0; 1263 faddd %f4,K0,%f42 ! (3_1) dtmp0 += K0; 1264 1265 subcc counter,1,counter 1266 bneg,pn %icc,.begin 1267 or %g0,%g1,%o1 1268 1269 fmuld K2,%f50,%f4 ! (4_1) dtmp0 = K2 * x2; 1270 and %o7,MASK_0x7fffffff,%o0 ! (4_1) ax = ux & 0x7fffffff; 1271 1272 fmuld %f38,%f46,%f26 ! (5_1) y0 *= dtmp1; 1273 add %o0,MASK_0x100000,%o0 ! (4_1) ax += 0x00100000; 1274 and %i0,-8,%i0 ! (4_1) ux &= -8; 1275 1276 fmuld %f40,%f44,%f40 ! (6_1) y0 *= dtmp0; 1277 1278 fmuld %f42,%f16,%f44 ! (3_1) dtmp0 *= xx; 1279 1280 faddd %f4,K1,%f4 ! (4_1) dtmp0 += K1; 1281 1282 fmuld %f36,%f26,%f36 ! (5_1) xx *= y0; 1283 srl %o0,18,%o7 ! (4_1) ax >>= 18; 1284 1285 fmuld %f28,%f40,%f28 ! (6_1) dtmp1 = div0 * y0; 1286 and %o7,-8,%o7 ! (4_1) ax &= -8; 1287 1288 faddd %f48,%f44,%f12 ! (3_1) res += dtmp0; 1289 add %o7,%l7,%o7 ! (4_1) (char*)parr1 + ax; 1290 ldd [%l0+%i0],%f48 ! (4_1) dtmp0 = *(double*)((char*)sign_arr + ux); 1291 1292 fmuld %f4,%f50,%f4 ! (4_1) dtmp0 *= x2; 1293 ldd [%o7],%f0 ! (4_1) res = *(double*)((char*)parr1 + ax); 1294 1295 fmuld %f36,%f36,%f50 ! (5_1) x2 = xx * xx; 1296 1297 mov %i2,%o7 1298 fsubd DTWO,%f28,%f46 ! (6_1) dtmp1 = dtwo - dtmp1; 1299 1300 fmuld %f0,%f48,%f48 ! (4_1) res *= dtmp0; 1301 add %g1,stridey,%o0 ! py += stridey; 1302 fdtos %f12,%f12 ! (3_1) ftmp0 = (float)res; 1303 1304 srl %o7,28,%i2 ! (5_1) ux >>= 28; 1305 st %f12,[%g1] ! (3_1) py[0] = ftmp0; 1306 faddd %f4,K0,%f42 ! (4_1) dtmp0 += K0; 1307 1308 subcc counter,1,counter 1309 bneg,pn %icc,.begin 1310 or %g0,%o0,%o1 1311 1312 fmuld K2,%f50,%f4 ! (5_1) dtmp0 = K2 * x2; 1313 and %o7,MASK_0x7fffffff,%g1 ! (5_1) ax = ux & 0x7fffffff; 1314 1315 fmuld %f40,%f46,%f26 ! (6_1) y0 *= dtmp1; 1316 add %g1,MASK_0x100000,%g1 ! (5_1) ax += 0x00100000; 1317 and %i2,-8,%i2 ! (5_1) ux &= -8; 1318 1319 fmuld %f42,%f14,%f44 ! (4_1) dtmp0 *= xx; 1320 1321 faddd %f4,K1,%f4 ! (5_1) dtmp0 += K1; 1322 1323 fmuld %f10,%f26,%f10 ! (6_1) xx *= y0; 1324 srl %g1,18,%o7 ! (5_1) ax >>= 18; 1325 1326 and %o7,-8,%o7 ! (5_1) ax &= -8; 1327 1328 faddd %f48,%f44,%f12 ! (4_1) res += dtmp0; 1329 add %o7,%l7,%o7 ! (5_1) (char*)parr1 + ax; 1330 ldd [%l0+%i2],%f48 ! (5_1) dtmp0 = *(double*)((char*)sign_arr + ux); 1331 1332 fmuld %f4,%f50,%f4 ! (5_1) dtmp0 *= x2; 1333 ldd [%o7],%f0 ! (5_1) res = *(double*)((char*)parr1 + ax); 1334 1335 fmuld %f10,%f10,%f50 ! (6_1) x2 = xx * xx; 1336 1337 mov %l2,%o7 1338 1339 fmuld %f0,%f48,%f48 ! (5_1) res *= dtmp0; 1340 add %o0,stridey,%g1 ! py += stridey; 1341 fdtos %f12,%f12 ! (4_1) ftmp0 = (float)res; 1342 1343 srl %o7,28,%l2 ! (6_1) ux >>= 28; 1344 st %f12,[%o0] ! (4_1) py[0] = ftmp0; 1345 faddd %f4,K0,%f42 ! (5_1) dtmp0 += K0; 1346 1347 subcc counter,1,counter 1348 bneg,pn %icc,.begin 1349 or %g0,%g1,%o1 1350 1351 fmuld K2,%f50,%f4 ! (6_1) dtmp0 = K2 * x2; 1352 and %o7,MASK_0x7fffffff,%o0 ! (6_1) ax = ux & 0x7fffffff; 1353 1354 add %o0,MASK_0x100000,%o0 ! (6_1) ax += 0x00100000; 1355 and %l2,-8,%l2 ! (6_1) ux &= -8; 1356 1357 fmuld %f42,%f36,%f44 ! (5_1) dtmp0 *= xx; 1358 1359 faddd %f4,K1,%f4 ! (6_1) dtmp0 += K1; 1360 1361 srl %o0,18,%o7 ! (6_1) ax >>= 18; 1362 1363 and %o7,-8,%o7 ! (6_1) ax &= -8; 1364 1365 faddd %f48,%f44,%f12 ! (5_1) res += dtmp0; 1366 add %o7,%l7,%o7 ! (6_1) (char*)parr1 + ax; 1367 ldd [%l0+%l2],%f48 ! (6_1) dtmp0 = *(double*)((char*)sign_arr + ux); 1368 1369 fmuld %f4,%f50,%f4 ! (6_1) dtmp0 *= x2; 1370 ldd [%o7],%f0 ! (6_1) res = *(double*)((char*)parr1 + ax); 1371 1372 fmuld %f0,%f48,%f48 ! (6_1) res *= dtmp0; 1373 add %g1,stridey,%o0 ! py += stridey; 1374 fdtos %f12,%f12 ! (5_1) ftmp0 = (float)res; 1375 1376 st %f12,[%g1] ! (5_1) py[0] = ftmp0; 1377 faddd %f4,K0,%f42 ! (6_1) dtmp0 += K0; 1378 1379 subcc counter,1,counter 1380 bneg,pn %icc,.begin 1381 or %g0,%o0,%o1 1382 1383 fmuld %f42,%f10,%f44 ! (6_1) dtmp0 *= xx; 1384 1385 faddd %f48,%f44,%f12 ! (6_1) res += dtmp0; 1386 1387 add %o0,stridey,%g1 ! py += stridey; 1388 fdtos %f12,%f12 ! (6_1) ftmp0 = (float)res; 1389 1390 st %f12,[%o0] ! (6_1) py[0] = ftmp0; 1391 1392 ba .begin 1393 or %g0,%g1,%o1 ! py += stridey; 1394 1395.exit: 1396 ret 1397 restore %g0,%g0,%g0 1398 1399 .align 16 1400.spec0: 1401 add %i3,stridex,%i3 ! px += stridex; 1402 sub counter,1,counter 1403 st %l6,[%o1] ! *(int*)py = ux; 1404 1405 ba .begin1 1406 add %o1,stridey,%o1 ! py += stridey; 1407 1408 .align 16 1409.spec1: 1410 sethi %hi(0x7f800000),%l3 1411 sethi %hi(0x3fc90c00),%l4 ! pi_2 1412 1413 sethi %hi(0x80000000),%o0 1414 add %l4,0x3db,%l4 ! pi_2 1415 1416 cmp %l5,%l3 ! if ( ax > 0x7f800000 ) 1417 bg,a,pn %icc,1f 1418 fabss %f0,%f0 ! fpx = fabsf(*px); 1419 1420 and %l6,%o0,%l6 ! sign = ux & 0x80000000; 1421 1422 or %l6,%l4,%l6 ! sign |= pi_2; 1423 1424 add %i3,stridex,%i3 ! px += stridex; 1425 sub counter,1,counter 1426 st %l6,[%o1] ! *(int*)py = sign; 1427 1428 ba .begin1 1429 add %o1,stridey,%o1 ! py += stridey; 1430 14311: 1432 fmuls %f0,%f0,%f0 ! fpx *= fpx; 1433 1434 add %i3,stridex,%i3 ! px += stridex 1435 sub counter,1,counter 1436 st %f0,[%o1] ! *py = fpx; 1437 1438 ba .begin1 1439 add %o1,stridey,%o1 ! py += stridey; 1440 1441 .align 16 1442.update0: 1443 cmp counter,1 1444 fzeros %f0 1445 ble,a .cont0 1446 sethi %hi(0x3fffffff),%l6 1447 1448 sub counter,1,counter 1449 st counter,[%fp+tmp_counter] 1450 1451 stx %l5,[%fp+tmp_px] 1452 sethi %hi(0x3fffffff),%l6 1453 ba .cont0 1454 or %g0,1,counter 1455 1456 .align 16 1457.update1: 1458 cmp counter,1 1459 fzeros %f0 1460 ble,a .cont1 1461 sethi %hi(0x3fffffff),%l6 1462 1463 sub counter,1,counter 1464 st counter,[%fp+tmp_counter] 1465 1466 stx %l5,[%fp+tmp_px] 1467 sethi %hi(0x3fffffff),%l6 1468 ba .cont1 1469 or %g0,1,counter 1470 1471 .align 16 1472.update2: 1473 cmp counter,2 1474 fzeros %f0 1475 ble,a .cont2 1476 sethi %hi(0x3fffffff),%l6 1477 1478 sub counter,2,counter 1479 st counter,[%fp+tmp_counter] 1480 1481 stx %l4,[%fp+tmp_px] 1482 sethi %hi(0x3fffffff),%l6 1483 ba .cont2 1484 or %g0,2,counter 1485 1486 .align 16 1487.update3: 1488 cmp counter,2 1489 fzeros %f0 1490 ble,a .cont3 1491 sethi %hi(0x3fffffff),%l6 1492 1493 sub counter,2,counter 1494 st counter,[%fp+tmp_counter] 1495 1496 stx %l4,[%fp+tmp_px] 1497 sethi %hi(0x3fffffff),%l6 1498 ba .cont3 1499 or %g0,2,counter 1500 1501 .align 16 1502.update4: 1503 cmp counter,3 1504 fzeros %f0 1505 ble,a .cont4 1506 sethi %hi(0x3fffffff),%l6 1507 1508 sub counter,3,counter 1509 st counter,[%fp+tmp_counter] 1510 1511 stx %l3,[%fp+tmp_px] 1512 sethi %hi(0x3fffffff),%l6 1513 ba .cont4 1514 or %g0,3,counter 1515 1516 .align 16 1517.update5: 1518 cmp counter,3 1519 fzeros %f0 1520 ble,a .cont5 1521 sethi %hi(0x3fffffff),%l6 1522 1523 sub counter,3,counter 1524 st counter,[%fp+tmp_counter] 1525 1526 stx %l3,[%fp+tmp_px] 1527 sethi %hi(0x3fffffff),%l6 1528 ba .cont5 1529 or %g0,3,counter 1530 1531 .align 16 1532.update6: 1533 cmp counter,4 1534 fzeros %f0 1535 ble,a .cont6 1536 sethi %hi(0x3fffffff),%l6 1537 1538 sub counter,4,counter 1539 st counter,[%fp+tmp_counter] 1540 1541 stx %i0,[%fp+tmp_px] 1542 sethi %hi(0x3fffffff),%l6 1543 ba .cont6 1544 or %g0,4,counter 1545 1546 .align 16 1547.update7: 1548 cmp counter,4 1549 fzeros %f0 1550 ble,a .cont7 1551 sethi %hi(0x3fffffff),%l6 1552 1553 sub counter,4,counter 1554 st counter,[%fp+tmp_counter] 1555 1556 stx %i0,[%fp+tmp_px] 1557 sethi %hi(0x3fffffff),%l6 1558 ba .cont7 1559 or %g0,4,counter 1560 1561 .align 16 1562.update8: 1563 cmp counter,5 1564 fzeros %f0 1565 ble,a .cont8 1566 sethi %hi(0x3fffffff),%l6 1567 1568 sub counter,5,counter 1569 st counter,[%fp+tmp_counter] 1570 1571 stx %i2,[%fp+tmp_px] 1572 sethi %hi(0x3fffffff),%l6 1573 ba .cont8 1574 or %g0,5,counter 1575 1576 .align 16 1577.update9: 1578 cmp counter,5 1579 fzeros %f0 1580 ble,a .cont9 1581 sethi %hi(0x3fffffff),%l6 1582 1583 sub counter,5,counter 1584 st counter,[%fp+tmp_counter] 1585 1586 stx %i2,[%fp+tmp_px] 1587 sethi %hi(0x3fffffff),%l6 1588 ba .cont9 1589 or %g0,5,counter 1590 1591 .align 16 1592.update10: 1593 cmp counter,6 1594 fzeros %f0 1595 ble,a .cont10 1596 sethi %hi(0x3fffffff),%l6 1597 1598 sub counter,6,counter 1599 st counter,[%fp+tmp_counter] 1600 1601 stx %l2,[%fp+tmp_px] 1602 sethi %hi(0x3fffffff),%l6 1603 ba .cont10 1604 or %g0,6,counter 1605 1606 .align 16 1607.update11: 1608 cmp counter,6 1609 fzeros %f0 1610 ble,a .cont11 1611 sethi %hi(0x3fffffff),%l6 1612 1613 sub counter,6,counter 1614 st counter,[%fp+tmp_counter] 1615 1616 stx %l2,[%fp+tmp_px] 1617 sethi %hi(0x3fffffff),%l6 1618 ba .cont11 1619 or %g0,6,counter 1620 1621 .align 16 1622.update12: 1623 cmp counter,7 1624 fzeros %f0 1625 ble,a .cont12 1626 sethi %hi(0x3fffffff),%l6 1627 1628 sub counter,7,counter 1629 st counter,[%fp+tmp_counter] 1630 1631 stx %g5,[%fp+tmp_px] 1632 sethi %hi(0x3fffffff),%l6 1633 ba .cont12 1634 or %g0,7,counter 1635 1636 .align 16 1637.update13: 1638 cmp counter,7 1639 fzeros %f0 1640 ble,a .cont13 1641 sethi %hi(0x3fffffff),%l6 1642 1643 sub counter,7,counter 1644 st counter,[%fp+tmp_counter] 1645 1646 stx %g5,[%fp+tmp_px] 1647 sethi %hi(0x3fffffff),%l6 1648 ba .cont13 1649 or %g0,7,counter 1650 1651 .align 16 1652.update14: 1653 cmp counter,0 1654 fzeros %f0 1655 ble,a .cont14 1656 sethi %hi(0x3fffffff),%l6 1657 1658 sub counter,0,counter 1659 st counter,[%fp+tmp_counter] 1660 1661 stx %i3,[%fp+tmp_px] 1662 sethi %hi(0x3fffffff),%l6 1663 ba .cont14 1664 or %g0,0,counter 1665 1666 .align 16 1667.update15: 1668 cmp counter,0 1669 fzeros %f0 1670 ble,a .cont15 1671 sethi %hi(0x3fffffff),%l6 1672 1673 sub counter,0,counter 1674 st counter,[%fp+tmp_counter] 1675 1676 stx %i3,[%fp+tmp_px] 1677 sethi %hi(0x3fffffff),%l6 1678 ba .cont15 1679 or %g0,0,counter 1680 1681 .align 16 1682.update16: 1683 cmp counter,1 1684 fzeros %f0 1685 ble,a .cont16 1686 sethi %hi(0x3fffffff),%l6 1687 1688 sub counter,1,counter 1689 st counter,[%fp+tmp_counter] 1690 1691 stx %l5,[%fp+tmp_px] 1692 sethi %hi(0x3fffffff),%l6 1693 ba .cont16 1694 or %g0,1,counter 1695 1696 .align 16 1697.update17: 1698 cmp counter,1 1699 fzeros %f0 1700 ble,a .cont17 1701 sethi %hi(0x3fffffff),%l6 1702 1703 sub counter,1,counter 1704 st counter,[%fp+tmp_counter] 1705 1706 stx %l5,[%fp+tmp_px] 1707 sethi %hi(0x3fffffff),%l6 1708 ba .cont17 1709 or %g0,1,counter 1710 1711 .align 16 1712.update18: 1713 cmp counter,2 1714 fzeros %f0 1715 ble,a .cont18 1716 sethi %hi(0x3fffffff),%l6 1717 1718 sub counter,2,counter 1719 st counter,[%fp+tmp_counter] 1720 1721 stx %l4,[%fp+tmp_px] 1722 sethi %hi(0x3fffffff),%l6 1723 ba .cont18 1724 or %g0,2,counter 1725 1726 .align 16 1727.update19: 1728 cmp counter,2 1729 fzeros %f0 1730 ble,a .cont19 1731 sethi %hi(0x3fffffff),%l6 1732 1733 sub counter,2,counter 1734 st counter,[%fp+tmp_counter] 1735 1736 stx %l4,[%fp+tmp_px] 1737 sethi %hi(0x3fffffff),%l6 1738 ba .cont19 1739 or %g0,2,counter 1740 1741 .align 16 1742.update20: 1743 cmp counter,3 1744 fzeros %f0 1745 ble,a .cont20 1746 sethi %hi(0x3fffffff),%l6 1747 1748 sub counter,3,counter 1749 st counter,[%fp+tmp_counter] 1750 1751 stx %l3,[%fp+tmp_px] 1752 sethi %hi(0x3fffffff),%l6 1753 ba .cont20 1754 or %g0,3,counter 1755 1756 .align 16 1757.update21: 1758 cmp counter,3 1759 fzeros %f0 1760 ble,a .cont21 1761 sethi %hi(0x3fffffff),%l6 1762 1763 sub counter,3,counter 1764 st counter,[%fp+tmp_counter] 1765 1766 stx %l3,[%fp+tmp_px] 1767 sethi %hi(0x3fffffff),%l6 1768 ba .cont21 1769 or %g0,3,counter 1770 1771 .align 16 1772.update22: 1773 cmp counter,4 1774 fzeros %f0 1775 ble,a .cont22 1776 sethi %hi(0x3fffffff),%l6 1777 1778 sub counter,4,counter 1779 st counter,[%fp+tmp_counter] 1780 1781 stx %i0,[%fp+tmp_px] 1782 sethi %hi(0x3fffffff),%l6 1783 ba .cont22 1784 or %g0,4,counter 1785 1786 .align 16 1787.update23: 1788 cmp counter,4 1789 fzeros %f0 1790 ble,a .cont23 1791 sethi %hi(0x3fffffff),%l6 1792 1793 sub counter,4,counter 1794 st counter,[%fp+tmp_counter] 1795 1796 stx %i0,[%fp+tmp_px] 1797 sethi %hi(0x3fffffff),%l6 1798 ba .cont23 1799 or %g0,4,counter 1800 1801 .align 16 1802.update24: 1803 cmp counter,5 1804 fzeros %f0 1805 ble,a .cont24 1806 sethi %hi(0x3fffffff),%l6 1807 1808 sub counter,5,counter 1809 st counter,[%fp+tmp_counter] 1810 1811 stx %i2,[%fp+tmp_px] 1812 sethi %hi(0x3fffffff),%l6 1813 ba .cont24 1814 or %g0,5,counter 1815 1816 .align 16 1817.update25: 1818 cmp counter,5 1819 fzeros %f0 1820 ble,a .cont25 1821 sethi %hi(0x3fffffff),%l6 1822 1823 sub counter,5,counter 1824 st counter,[%fp+tmp_counter] 1825 1826 stx %i2,[%fp+tmp_px] 1827 sethi %hi(0x3fffffff),%l6 1828 ba .cont25 1829 or %g0,5,counter 1830 1831 .align 16 1832.update26: 1833 cmp counter,6 1834 fzeros %f0 1835 ble,a .cont26 1836 sethi %hi(0x3fffffff),%l6 1837 1838 sub counter,6,counter 1839 st counter,[%fp+tmp_counter] 1840 1841 stx %l2,[%fp+tmp_px] 1842 sethi %hi(0x3fffffff),%l6 1843 ba .cont26 1844 or %g0,6,counter 1845 1846 .align 16 1847.update27: 1848 cmp counter,6 1849 fzeros %f0 1850 ble,a .cont27 1851 sethi %hi(0x3fffffff),%l6 1852 1853 sub counter,6,counter 1854 st counter,[%fp+tmp_counter] 1855 1856 stx %l2,[%fp+tmp_px] 1857 sethi %hi(0x3fffffff),%l6 1858 ba .cont27 1859 or %g0,6,counter 1860 1861 .align 16 1862.update28: 1863 cmp counter,7 1864 fzeros %f0 1865 ble,a .cont28 1866 sethi %hi(0x3fffffff),%l6 1867 1868 sub counter,7,counter 1869 st counter,[%fp+tmp_counter] 1870 1871 stx %g5,[%fp+tmp_px] 1872 sethi %hi(0x3fffffff),%l6 1873 ba .cont28 1874 or %g0,7,counter 1875 1876 .align 16 1877.update29: 1878 cmp counter,7 1879 fzeros %f0 1880 ble,a .cont29 1881 sethi %hi(0x3fffffff),%l6 1882 1883 sub counter,7,counter 1884 st counter,[%fp+tmp_counter] 1885 1886 stx %g5,[%fp+tmp_px] 1887 sethi %hi(0x3fffffff),%l6 1888 ba .cont29 1889 or %g0,7,counter 1890 1891 SET_SIZE(__vatanf) 1892 1893