1*5b2ba9d3SPiotr Jasiukajtis/* 2*5b2ba9d3SPiotr Jasiukajtis * CDDL HEADER START 3*5b2ba9d3SPiotr Jasiukajtis * 4*5b2ba9d3SPiotr Jasiukajtis * The contents of this file are subject to the terms of the 5*5b2ba9d3SPiotr Jasiukajtis * Common Development and Distribution License (the "License"). 6*5b2ba9d3SPiotr Jasiukajtis * You may not use this file except in compliance with the License. 7*5b2ba9d3SPiotr Jasiukajtis * 8*5b2ba9d3SPiotr Jasiukajtis * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*5b2ba9d3SPiotr Jasiukajtis * or http://www.opensolaris.org/os/licensing. 10*5b2ba9d3SPiotr Jasiukajtis * See the License for the specific language governing permissions 11*5b2ba9d3SPiotr Jasiukajtis * and limitations under the License. 12*5b2ba9d3SPiotr Jasiukajtis * 13*5b2ba9d3SPiotr Jasiukajtis * When distributing Covered Code, include this CDDL HEADER in each 14*5b2ba9d3SPiotr Jasiukajtis * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*5b2ba9d3SPiotr Jasiukajtis * If applicable, add the following below this CDDL HEADER, with the 16*5b2ba9d3SPiotr Jasiukajtis * fields enclosed by brackets "[]" replaced with your own identifying 17*5b2ba9d3SPiotr Jasiukajtis * information: Portions Copyright [yyyy] [name of copyright owner] 18*5b2ba9d3SPiotr Jasiukajtis * 19*5b2ba9d3SPiotr Jasiukajtis * CDDL HEADER END 20*5b2ba9d3SPiotr Jasiukajtis */ 21*5b2ba9d3SPiotr Jasiukajtis/* 22*5b2ba9d3SPiotr Jasiukajtis * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 23*5b2ba9d3SPiotr Jasiukajtis */ 24*5b2ba9d3SPiotr Jasiukajtis/* 25*5b2ba9d3SPiotr Jasiukajtis * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 26*5b2ba9d3SPiotr Jasiukajtis * Use is subject to license terms. 27*5b2ba9d3SPiotr Jasiukajtis */ 28*5b2ba9d3SPiotr Jasiukajtis 29*5b2ba9d3SPiotr Jasiukajtis .file "__vrhypot.S" 30*5b2ba9d3SPiotr Jasiukajtis 31*5b2ba9d3SPiotr Jasiukajtis#include "libm.h" 32*5b2ba9d3SPiotr Jasiukajtis 33*5b2ba9d3SPiotr Jasiukajtis RO_DATA 34*5b2ba9d3SPiotr Jasiukajtis .align 64 35*5b2ba9d3SPiotr Jasiukajtis 36*5b2ba9d3SPiotr Jasiukajtis.CONST_TBL: 37*5b2ba9d3SPiotr Jasiukajtis .word 0x7fe00000, 0x7fdfc07f, 0x7fdf81f8, 0x7fdf4465, 38*5b2ba9d3SPiotr Jasiukajtis .word 0x7fdf07c1, 0x7fdecc07, 0x7fde9131, 0x7fde573a, 39*5b2ba9d3SPiotr Jasiukajtis .word 0x7fde1e1e, 0x7fdde5d6, 0x7fddae60, 0x7fdd77b6, 40*5b2ba9d3SPiotr Jasiukajtis .word 0x7fdd41d4, 0x7fdd0cb5, 0x7fdcd856, 0x7fdca4b3, 41*5b2ba9d3SPiotr Jasiukajtis .word 0x7fdc71c7, 0x7fdc3f8f, 0x7fdc0e07, 0x7fdbdd2b, 42*5b2ba9d3SPiotr Jasiukajtis .word 0x7fdbacf9, 0x7fdb7d6c, 0x7fdb4e81, 0x7fdb2036, 43*5b2ba9d3SPiotr Jasiukajtis .word 0x7fdaf286, 0x7fdac570, 0x7fda98ef, 0x7fda6d01, 44*5b2ba9d3SPiotr Jasiukajtis .word 0x7fda41a4, 0x7fda16d3, 0x7fd9ec8e, 0x7fd9c2d1, 45*5b2ba9d3SPiotr Jasiukajtis .word 0x7fd99999, 0x7fd970e4, 0x7fd948b0, 0x7fd920fb, 46*5b2ba9d3SPiotr Jasiukajtis .word 0x7fd8f9c1, 0x7fd8d301, 0x7fd8acb9, 0x7fd886e5, 47*5b2ba9d3SPiotr Jasiukajtis .word 0x7fd86186, 0x7fd83c97, 0x7fd81818, 0x7fd7f405, 48*5b2ba9d3SPiotr Jasiukajtis .word 0x7fd7d05f, 0x7fd7ad22, 0x7fd78a4c, 0x7fd767dc, 49*5b2ba9d3SPiotr Jasiukajtis .word 0x7fd745d1, 0x7fd72428, 0x7fd702e0, 0x7fd6e1f7, 50*5b2ba9d3SPiotr Jasiukajtis .word 0x7fd6c16c, 0x7fd6a13c, 0x7fd68168, 0x7fd661ec, 51*5b2ba9d3SPiotr Jasiukajtis .word 0x7fd642c8, 0x7fd623fa, 0x7fd60581, 0x7fd5e75b, 52*5b2ba9d3SPiotr Jasiukajtis .word 0x7fd5c988, 0x7fd5ac05, 0x7fd58ed2, 0x7fd571ed, 53*5b2ba9d3SPiotr Jasiukajtis .word 0x7fd55555, 0x7fd53909, 0x7fd51d07, 0x7fd50150, 54*5b2ba9d3SPiotr Jasiukajtis .word 0x7fd4e5e0, 0x7fd4cab8, 0x7fd4afd6, 0x7fd49539, 55*5b2ba9d3SPiotr Jasiukajtis .word 0x7fd47ae1, 0x7fd460cb, 0x7fd446f8, 0x7fd42d66, 56*5b2ba9d3SPiotr Jasiukajtis .word 0x7fd41414, 0x7fd3fb01, 0x7fd3e22c, 0x7fd3c995, 57*5b2ba9d3SPiotr Jasiukajtis .word 0x7fd3b13b, 0x7fd3991c, 0x7fd38138, 0x7fd3698d, 58*5b2ba9d3SPiotr Jasiukajtis .word 0x7fd3521c, 0x7fd33ae4, 0x7fd323e3, 0x7fd30d19, 59*5b2ba9d3SPiotr Jasiukajtis .word 0x7fd2f684, 0x7fd2e025, 0x7fd2c9fb, 0x7fd2b404, 60*5b2ba9d3SPiotr Jasiukajtis .word 0x7fd29e41, 0x7fd288b0, 0x7fd27350, 0x7fd25e22, 61*5b2ba9d3SPiotr Jasiukajtis .word 0x7fd24924, 0x7fd23456, 0x7fd21fb7, 0x7fd20b47, 62*5b2ba9d3SPiotr Jasiukajtis .word 0x7fd1f704, 0x7fd1e2ef, 0x7fd1cf06, 0x7fd1bb4a, 63*5b2ba9d3SPiotr Jasiukajtis .word 0x7fd1a7b9, 0x7fd19453, 0x7fd18118, 0x7fd16e06, 64*5b2ba9d3SPiotr Jasiukajtis .word 0x7fd15b1e, 0x7fd1485f, 0x7fd135c8, 0x7fd12358, 65*5b2ba9d3SPiotr Jasiukajtis .word 0x7fd11111, 0x7fd0fef0, 0x7fd0ecf5, 0x7fd0db20, 66*5b2ba9d3SPiotr Jasiukajtis .word 0x7fd0c971, 0x7fd0b7e6, 0x7fd0a681, 0x7fd0953f, 67*5b2ba9d3SPiotr Jasiukajtis .word 0x7fd08421, 0x7fd07326, 0x7fd0624d, 0x7fd05197, 68*5b2ba9d3SPiotr Jasiukajtis .word 0x7fd04104, 0x7fd03091, 0x7fd02040, 0x7fd01010, 69*5b2ba9d3SPiotr Jasiukajtis 70*5b2ba9d3SPiotr Jasiukajtis .word 0x42300000, 0 ! D2ON36 = 2**36 71*5b2ba9d3SPiotr Jasiukajtis .word 0xffffff00, 0 ! DA0 72*5b2ba9d3SPiotr Jasiukajtis .word 0xfff00000, 0 ! DA1 73*5b2ba9d3SPiotr Jasiukajtis .word 0x3ff00000, 0 ! DONE = 1.0 74*5b2ba9d3SPiotr Jasiukajtis .word 0x40000000, 0 ! DTWO = 2.0 75*5b2ba9d3SPiotr Jasiukajtis .word 0x7fd00000, 0 ! D2ON1022 76*5b2ba9d3SPiotr Jasiukajtis .word 0x3cb00000, 0 ! D2ONM52 77*5b2ba9d3SPiotr Jasiukajtis .word 0x43200000, 0 ! D2ON51 78*5b2ba9d3SPiotr Jasiukajtis .word 0x0007ffff, 0xffffffff ! 0x0007ffffffffffff 79*5b2ba9d3SPiotr Jasiukajtis 80*5b2ba9d3SPiotr Jasiukajtis#define stridex %l2 81*5b2ba9d3SPiotr Jasiukajtis#define stridey %l3 82*5b2ba9d3SPiotr Jasiukajtis#define stridez %l5 83*5b2ba9d3SPiotr Jasiukajtis 84*5b2ba9d3SPiotr Jasiukajtis#define TBL_SHIFT 512 85*5b2ba9d3SPiotr Jasiukajtis 86*5b2ba9d3SPiotr Jasiukajtis#define TBL %l1 87*5b2ba9d3SPiotr Jasiukajtis#define counter %l4 88*5b2ba9d3SPiotr Jasiukajtis 89*5b2ba9d3SPiotr Jasiukajtis#define _0x7ff00000 %l0 90*5b2ba9d3SPiotr Jasiukajtis#define _0x00100000 %o5 91*5b2ba9d3SPiotr Jasiukajtis#define _0x7fffffff %l6 92*5b2ba9d3SPiotr Jasiukajtis 93*5b2ba9d3SPiotr Jasiukajtis#define D2ON36 %f4 94*5b2ba9d3SPiotr Jasiukajtis#define DTWO %f6 95*5b2ba9d3SPiotr Jasiukajtis#define DONE %f8 96*5b2ba9d3SPiotr Jasiukajtis#define DA0 %f58 97*5b2ba9d3SPiotr Jasiukajtis#define DA1 %f56 98*5b2ba9d3SPiotr Jasiukajtis 99*5b2ba9d3SPiotr Jasiukajtis#define dtmp0 STACK_BIAS-0x80 100*5b2ba9d3SPiotr Jasiukajtis#define dtmp1 STACK_BIAS-0x78 101*5b2ba9d3SPiotr Jasiukajtis#define dtmp2 STACK_BIAS-0x70 102*5b2ba9d3SPiotr Jasiukajtis#define dtmp3 STACK_BIAS-0x68 103*5b2ba9d3SPiotr Jasiukajtis#define dtmp4 STACK_BIAS-0x60 104*5b2ba9d3SPiotr Jasiukajtis#define dtmp5 STACK_BIAS-0x58 105*5b2ba9d3SPiotr Jasiukajtis#define dtmp6 STACK_BIAS-0x50 106*5b2ba9d3SPiotr Jasiukajtis#define dtmp7 STACK_BIAS-0x48 107*5b2ba9d3SPiotr Jasiukajtis#define dtmp8 STACK_BIAS-0x40 108*5b2ba9d3SPiotr Jasiukajtis#define dtmp9 STACK_BIAS-0x38 109*5b2ba9d3SPiotr Jasiukajtis#define dtmp10 STACK_BIAS-0x30 110*5b2ba9d3SPiotr Jasiukajtis#define dtmp11 STACK_BIAS-0x28 111*5b2ba9d3SPiotr Jasiukajtis#define dtmp12 STACK_BIAS-0x20 112*5b2ba9d3SPiotr Jasiukajtis#define dtmp13 STACK_BIAS-0x18 113*5b2ba9d3SPiotr Jasiukajtis#define dtmp14 STACK_BIAS-0x10 114*5b2ba9d3SPiotr Jasiukajtis#define dtmp15 STACK_BIAS-0x08 115*5b2ba9d3SPiotr Jasiukajtis 116*5b2ba9d3SPiotr Jasiukajtis#define ftmp0 STACK_BIAS-0x100 117*5b2ba9d3SPiotr Jasiukajtis#define tmp_px STACK_BIAS-0x98 118*5b2ba9d3SPiotr Jasiukajtis#define tmp_py STACK_BIAS-0x90 119*5b2ba9d3SPiotr Jasiukajtis#define tmp_counter STACK_BIAS-0x88 120*5b2ba9d3SPiotr Jasiukajtis 121*5b2ba9d3SPiotr Jasiukajtis! sizeof temp storage - must be a multiple of 16 for V9 122*5b2ba9d3SPiotr Jasiukajtis#define tmps 0x100 123*5b2ba9d3SPiotr Jasiukajtis 124*5b2ba9d3SPiotr Jasiukajtis!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 125*5b2ba9d3SPiotr Jasiukajtis! !!!!! algorithm !!!!! 126*5b2ba9d3SPiotr Jasiukajtis! hx0 = *(int*)px; 127*5b2ba9d3SPiotr Jasiukajtis! hy0 = *(int*)py; 128*5b2ba9d3SPiotr Jasiukajtis! 129*5b2ba9d3SPiotr Jasiukajtis! ((float*)&x0)[0] = ((float*)px)[0]; 130*5b2ba9d3SPiotr Jasiukajtis! ((float*)&x0)[1] = ((float*)px)[1]; 131*5b2ba9d3SPiotr Jasiukajtis! ((float*)&y0)[0] = ((float*)py)[0]; 132*5b2ba9d3SPiotr Jasiukajtis! ((float*)&y0)[1] = ((float*)py)[1]; 133*5b2ba9d3SPiotr Jasiukajtis! 134*5b2ba9d3SPiotr Jasiukajtis! hx0 &= 0x7fffffff; 135*5b2ba9d3SPiotr Jasiukajtis! hy0 &= 0x7fffffff; 136*5b2ba9d3SPiotr Jasiukajtis! 137*5b2ba9d3SPiotr Jasiukajtis! diff0 = hy0 - hx0; 138*5b2ba9d3SPiotr Jasiukajtis! j0 = diff0 >> 31; 139*5b2ba9d3SPiotr Jasiukajtis! j0 &= diff0; 140*5b2ba9d3SPiotr Jasiukajtis! j0 = hy0 - j0; 141*5b2ba9d3SPiotr Jasiukajtis! j0 &= 0x7ff00000; 142*5b2ba9d3SPiotr Jasiukajtis! 143*5b2ba9d3SPiotr Jasiukajtis! j0 = 0x7ff00000 - j0; 144*5b2ba9d3SPiotr Jasiukajtis! ll = (long long)j0 << 32; 145*5b2ba9d3SPiotr Jasiukajtis! *(long long*)&scl0 = ll; 146*5b2ba9d3SPiotr Jasiukajtis! 147*5b2ba9d3SPiotr Jasiukajtis! if ( hx0 >= 0x7ff00000 || hy0 >= 0x7ff00000 ) 148*5b2ba9d3SPiotr Jasiukajtis! { 149*5b2ba9d3SPiotr Jasiukajtis! lx = ((int*)px)[1]; 150*5b2ba9d3SPiotr Jasiukajtis! ly = ((int*)py)[1]; 151*5b2ba9d3SPiotr Jasiukajtis! 152*5b2ba9d3SPiotr Jasiukajtis! if ( hx0 == 0x7ff00000 && lx == 0 ) res0 = 0.0; 153*5b2ba9d3SPiotr Jasiukajtis! else if ( hy0 == 0x7ff00000 && ly == 0 ) res0 = 0.0; 154*5b2ba9d3SPiotr Jasiukajtis! else res0 = fabs(x0) * fabs(y0); 155*5b2ba9d3SPiotr Jasiukajtis! 156*5b2ba9d3SPiotr Jasiukajtis! ((float*)pz)[0] = ((float*)&res0)[0]; 157*5b2ba9d3SPiotr Jasiukajtis! ((float*)pz)[1] = ((float*)&res0)[1]; 158*5b2ba9d3SPiotr Jasiukajtis! 159*5b2ba9d3SPiotr Jasiukajtis! px += stridex; 160*5b2ba9d3SPiotr Jasiukajtis! py += stridey; 161*5b2ba9d3SPiotr Jasiukajtis! pz += stridez; 162*5b2ba9d3SPiotr Jasiukajtis! continue; 163*5b2ba9d3SPiotr Jasiukajtis! } 164*5b2ba9d3SPiotr Jasiukajtis! if ( hx0 < 0x00100000 && hy0 < 0x00100000 ) 165*5b2ba9d3SPiotr Jasiukajtis! { 166*5b2ba9d3SPiotr Jasiukajtis! lx = ((int*)px)[1]; 167*5b2ba9d3SPiotr Jasiukajtis! ly = ((int*)py)[1]; 168*5b2ba9d3SPiotr Jasiukajtis! ii = hx0 | hy0; 169*5b2ba9d3SPiotr Jasiukajtis! ii |= lx; 170*5b2ba9d3SPiotr Jasiukajtis! ii |= ly; 171*5b2ba9d3SPiotr Jasiukajtis! if ( ii == 0 ) 172*5b2ba9d3SPiotr Jasiukajtis! { 173*5b2ba9d3SPiotr Jasiukajtis! res0 = 1.0 / 0.0; 174*5b2ba9d3SPiotr Jasiukajtis! ((float*)pz)[0] = ((float*)&res0)[0]; 175*5b2ba9d3SPiotr Jasiukajtis! ((float*)pz)[1] = ((float*)&res0)[1]; 176*5b2ba9d3SPiotr Jasiukajtis! 177*5b2ba9d3SPiotr Jasiukajtis! px += stridex; 178*5b2ba9d3SPiotr Jasiukajtis! py += stridey; 179*5b2ba9d3SPiotr Jasiukajtis! pz += stridez; 180*5b2ba9d3SPiotr Jasiukajtis! continue; 181*5b2ba9d3SPiotr Jasiukajtis! } 182*5b2ba9d3SPiotr Jasiukajtis! x0 = fabs(x0); 183*5b2ba9d3SPiotr Jasiukajtis! y0 = fabs(y0); 184*5b2ba9d3SPiotr Jasiukajtis! if ( hx0 < 0x00080000 ) 185*5b2ba9d3SPiotr Jasiukajtis! { 186*5b2ba9d3SPiotr Jasiukajtis! x0 = *(long long*)&x0; 187*5b2ba9d3SPiotr Jasiukajtis! } 188*5b2ba9d3SPiotr Jasiukajtis! else 189*5b2ba9d3SPiotr Jasiukajtis! { 190*5b2ba9d3SPiotr Jasiukajtis! ((long long*)&dtmp0)[0] = 0x0007ffffffffffffULL; 191*5b2ba9d3SPiotr Jasiukajtis! x0 = vis_fand(x0, dtmp0); 192*5b2ba9d3SPiotr Jasiukajtis! x0 = *(long long*)&x0; 193*5b2ba9d3SPiotr Jasiukajtis! x0 += D2ON51; 194*5b2ba9d3SPiotr Jasiukajtis! } 195*5b2ba9d3SPiotr Jasiukajtis! x0 *= D2ONM52; 196*5b2ba9d3SPiotr Jasiukajtis! if ( hy0 < 0x00080000 ) 197*5b2ba9d3SPiotr Jasiukajtis! { 198*5b2ba9d3SPiotr Jasiukajtis! y0 = *(long long*)&y0; 199*5b2ba9d3SPiotr Jasiukajtis! } 200*5b2ba9d3SPiotr Jasiukajtis! else 201*5b2ba9d3SPiotr Jasiukajtis! { 202*5b2ba9d3SPiotr Jasiukajtis! ((long long*)&dtmp0)[0] = 0x0007ffffffffffffULL; 203*5b2ba9d3SPiotr Jasiukajtis! y0 = vis_fand(y0, dtmp0); 204*5b2ba9d3SPiotr Jasiukajtis! y0 = *(long long*)&y0; 205*5b2ba9d3SPiotr Jasiukajtis! y0 += D2ON51; 206*5b2ba9d3SPiotr Jasiukajtis! } 207*5b2ba9d3SPiotr Jasiukajtis! y0 *= D2ONM52; 208*5b2ba9d3SPiotr Jasiukajtis! *(long long*)&scl0 = 0x7fd0000000000000ULL; 209*5b2ba9d3SPiotr Jasiukajtis! } 210*5b2ba9d3SPiotr Jasiukajtis! else 211*5b2ba9d3SPiotr Jasiukajtis! { 212*5b2ba9d3SPiotr Jasiukajtis! x0 *= scl0; 213*5b2ba9d3SPiotr Jasiukajtis! y0 *= scl0; 214*5b2ba9d3SPiotr Jasiukajtis! } 215*5b2ba9d3SPiotr Jasiukajtis! 216*5b2ba9d3SPiotr Jasiukajtis! x_hi0 = x0 + D2ON36; 217*5b2ba9d3SPiotr Jasiukajtis! y_hi0 = y0 + D2ON36; 218*5b2ba9d3SPiotr Jasiukajtis! x_hi0 -= D2ON36; 219*5b2ba9d3SPiotr Jasiukajtis! y_hi0 -= D2ON36; 220*5b2ba9d3SPiotr Jasiukajtis! x_lo0 = x0 - x_hi0; 221*5b2ba9d3SPiotr Jasiukajtis! y_lo0 = y0 - y_hi0; 222*5b2ba9d3SPiotr Jasiukajtis! res0_hi = x_hi0 * x_hi0; 223*5b2ba9d3SPiotr Jasiukajtis! dtmp0 = y_hi0 * y_hi0; 224*5b2ba9d3SPiotr Jasiukajtis! res0_hi += dtmp0; 225*5b2ba9d3SPiotr Jasiukajtis! res0_lo = x0 + x_hi0; 226*5b2ba9d3SPiotr Jasiukajtis! res0_lo *= x_lo0; 227*5b2ba9d3SPiotr Jasiukajtis! dtmp1 = y0 + y_hi0; 228*5b2ba9d3SPiotr Jasiukajtis! dtmp1 *= y_lo0; 229*5b2ba9d3SPiotr Jasiukajtis! res0_lo += dtmp1; 230*5b2ba9d3SPiotr Jasiukajtis! 231*5b2ba9d3SPiotr Jasiukajtis! dres = res0_hi + res0_lo; 232*5b2ba9d3SPiotr Jasiukajtis! dexp0 = vis_fand(dres,DA1); 233*5b2ba9d3SPiotr Jasiukajtis! iarr = ((int*)&dres)[0]; 234*5b2ba9d3SPiotr Jasiukajtis! 235*5b2ba9d3SPiotr Jasiukajtis! iarr >>= 11; 236*5b2ba9d3SPiotr Jasiukajtis! iarr &= 0x1fc; 237*5b2ba9d3SPiotr Jasiukajtis! dtmp0 = ((double*)((char*)dll1 + iarr))[0]; 238*5b2ba9d3SPiotr Jasiukajtis! dd = vis_fpsub32(dtmp0, dexp0); 239*5b2ba9d3SPiotr Jasiukajtis! 240*5b2ba9d3SPiotr Jasiukajtis! dtmp0 = dd * dres; 241*5b2ba9d3SPiotr Jasiukajtis! dtmp0 = DTWO - dtmp0; 242*5b2ba9d3SPiotr Jasiukajtis! dd *= dtmp0; 243*5b2ba9d3SPiotr Jasiukajtis! dtmp1 = dd * dres; 244*5b2ba9d3SPiotr Jasiukajtis! dtmp1 = DTWO - dtmp1; 245*5b2ba9d3SPiotr Jasiukajtis! dd *= dtmp1; 246*5b2ba9d3SPiotr Jasiukajtis! dtmp2 = dd * dres; 247*5b2ba9d3SPiotr Jasiukajtis! dtmp2 = DTWO - dtmp2; 248*5b2ba9d3SPiotr Jasiukajtis! dres = dd * dtmp2; 249*5b2ba9d3SPiotr Jasiukajtis! 250*5b2ba9d3SPiotr Jasiukajtis! res0 = vis_fand(dres,DA0); 251*5b2ba9d3SPiotr Jasiukajtis! 252*5b2ba9d3SPiotr Jasiukajtis! dtmp0 = res0_hi * res0; 253*5b2ba9d3SPiotr Jasiukajtis! dtmp0 = DONE - dtmp0; 254*5b2ba9d3SPiotr Jasiukajtis! dtmp1 = res0_lo * res0; 255*5b2ba9d3SPiotr Jasiukajtis! dtmp0 -= dtmp1; 256*5b2ba9d3SPiotr Jasiukajtis! dtmp0 *= dres; 257*5b2ba9d3SPiotr Jasiukajtis! res0 += dtmp0; 258*5b2ba9d3SPiotr Jasiukajtis! 259*5b2ba9d3SPiotr Jasiukajtis! res0 = sqrt ( res0 ); 260*5b2ba9d3SPiotr Jasiukajtis! 261*5b2ba9d3SPiotr Jasiukajtis! res0 = scl0 * res0; 262*5b2ba9d3SPiotr Jasiukajtis! 263*5b2ba9d3SPiotr Jasiukajtis! ((float*)pz)[0] = ((float*)&res0)[0]; 264*5b2ba9d3SPiotr Jasiukajtis! ((float*)pz)[1] = ((float*)&res0)[1]; 265*5b2ba9d3SPiotr Jasiukajtis! 266*5b2ba9d3SPiotr Jasiukajtis! px += stridex; 267*5b2ba9d3SPiotr Jasiukajtis! py += stridey; 268*5b2ba9d3SPiotr Jasiukajtis! pz += stridez; 269*5b2ba9d3SPiotr Jasiukajtis!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 270*5b2ba9d3SPiotr Jasiukajtis 271*5b2ba9d3SPiotr Jasiukajtis ENTRY(__vrhypot) 272*5b2ba9d3SPiotr Jasiukajtis save %sp,-SA(MINFRAME)-tmps,%sp 273*5b2ba9d3SPiotr Jasiukajtis PIC_SETUP(l7) 274*5b2ba9d3SPiotr Jasiukajtis PIC_SET(l7,.CONST_TBL,l1) 275*5b2ba9d3SPiotr Jasiukajtis wr %g0,0x82,%asi 276*5b2ba9d3SPiotr Jasiukajtis 277*5b2ba9d3SPiotr Jasiukajtis#ifdef __sparcv9 278*5b2ba9d3SPiotr Jasiukajtis ldx [%fp+STACK_BIAS+176],stridez 279*5b2ba9d3SPiotr Jasiukajtis#else 280*5b2ba9d3SPiotr Jasiukajtis ld [%fp+STACK_BIAS+92],stridez 281*5b2ba9d3SPiotr Jasiukajtis#endif 282*5b2ba9d3SPiotr Jasiukajtis 283*5b2ba9d3SPiotr Jasiukajtis sll %i2,3,stridex 284*5b2ba9d3SPiotr Jasiukajtis sethi %hi(0x7ff00000),_0x7ff00000 285*5b2ba9d3SPiotr Jasiukajtis st %i0,[%fp+tmp_counter] 286*5b2ba9d3SPiotr Jasiukajtis 287*5b2ba9d3SPiotr Jasiukajtis sll %i4,3,stridey 288*5b2ba9d3SPiotr Jasiukajtis sethi %hi(0x00100000),_0x00100000 289*5b2ba9d3SPiotr Jasiukajtis stx %i1,[%fp+tmp_px] 290*5b2ba9d3SPiotr Jasiukajtis 291*5b2ba9d3SPiotr Jasiukajtis sll stridez,3,stridez 292*5b2ba9d3SPiotr Jasiukajtis sethi %hi(0x7ffffc00),_0x7fffffff 293*5b2ba9d3SPiotr Jasiukajtis stx %i3,[%fp+tmp_py] 294*5b2ba9d3SPiotr Jasiukajtis 295*5b2ba9d3SPiotr Jasiukajtis ldd [TBL+TBL_SHIFT],D2ON36 296*5b2ba9d3SPiotr Jasiukajtis add _0x7fffffff,1023,_0x7fffffff 297*5b2ba9d3SPiotr Jasiukajtis 298*5b2ba9d3SPiotr Jasiukajtis ldd [TBL+TBL_SHIFT+8],DA0 299*5b2ba9d3SPiotr Jasiukajtis 300*5b2ba9d3SPiotr Jasiukajtis ldd [TBL+TBL_SHIFT+16],DA1 301*5b2ba9d3SPiotr Jasiukajtis 302*5b2ba9d3SPiotr Jasiukajtis ldd [TBL+TBL_SHIFT+24],DONE 303*5b2ba9d3SPiotr Jasiukajtis 304*5b2ba9d3SPiotr Jasiukajtis ldd [TBL+TBL_SHIFT+32],DTWO 305*5b2ba9d3SPiotr Jasiukajtis 306*5b2ba9d3SPiotr Jasiukajtis.begin: 307*5b2ba9d3SPiotr Jasiukajtis ld [%fp+tmp_counter],counter 308*5b2ba9d3SPiotr Jasiukajtis ldx [%fp+tmp_px],%i4 309*5b2ba9d3SPiotr Jasiukajtis ldx [%fp+tmp_py],%i3 310*5b2ba9d3SPiotr Jasiukajtis st %g0,[%fp+tmp_counter] 311*5b2ba9d3SPiotr Jasiukajtis.begin1: 312*5b2ba9d3SPiotr Jasiukajtis cmp counter,0 313*5b2ba9d3SPiotr Jasiukajtis ble,pn %icc,.exit 314*5b2ba9d3SPiotr Jasiukajtis 315*5b2ba9d3SPiotr Jasiukajtis lda [%i4]0x82,%o1 ! (7_0) hx0 = *(int*)px; 316*5b2ba9d3SPiotr Jasiukajtis add %i4,stridex,%i1 317*5b2ba9d3SPiotr Jasiukajtis 318*5b2ba9d3SPiotr Jasiukajtis lda [%i3]0x82,%o4 ! (7_0) hy0 = *(int*)py; 319*5b2ba9d3SPiotr Jasiukajtis add %i3,stridey,%i0 ! py += stridey 320*5b2ba9d3SPiotr Jasiukajtis 321*5b2ba9d3SPiotr Jasiukajtis and %o1,_0x7fffffff,%o7 ! (7_0) hx0 &= 0x7fffffff; 322*5b2ba9d3SPiotr Jasiukajtis 323*5b2ba9d3SPiotr Jasiukajtis cmp %o7,_0x7ff00000 ! (7_0) hx0 ? 0x7ff00000 324*5b2ba9d3SPiotr Jasiukajtis bge,pn %icc,.spec0 ! (7_0) if ( hx0 >= 0x7ff00000 ) 325*5b2ba9d3SPiotr Jasiukajtis and %o4,_0x7fffffff,%l7 ! (7_0) hy0 &= 0x7fffffff; 326*5b2ba9d3SPiotr Jasiukajtis 327*5b2ba9d3SPiotr Jasiukajtis cmp %l7,_0x7ff00000 ! (7_0) hy0 ? 0x7ff00000 328*5b2ba9d3SPiotr Jasiukajtis bge,pn %icc,.spec0 ! (7_0) if ( hy0 >= 0x7ff00000 ) 329*5b2ba9d3SPiotr Jasiukajtis sub %l7,%o7,%o1 ! (7_0) diff0 = hy0 - hx0; 330*5b2ba9d3SPiotr Jasiukajtis 331*5b2ba9d3SPiotr Jasiukajtis sra %o1,31,%o3 ! (7_0) j0 = diff0 >> 31; 332*5b2ba9d3SPiotr Jasiukajtis cmp %o7,_0x00100000 ! (7_0) hx0 ? 0x00100000 333*5b2ba9d3SPiotr Jasiukajtis bl,pn %icc,.spec1 ! (7_0) if ( hx0 < 0x00100000 ) 334*5b2ba9d3SPiotr Jasiukajtis 335*5b2ba9d3SPiotr Jasiukajtis and %o1,%o3,%o1 ! (7_0) j0 &= diff0; 336*5b2ba9d3SPiotr Jasiukajtis.cont_spec0: 337*5b2ba9d3SPiotr Jasiukajtis sub %l7,%o1,%o4 ! (7_0) j0 = hy0 - j0; 338*5b2ba9d3SPiotr Jasiukajtis 339*5b2ba9d3SPiotr Jasiukajtis and %o4,%l0,%o4 ! (7_0) j0 &= 0x7ff00000; 340*5b2ba9d3SPiotr Jasiukajtis 341*5b2ba9d3SPiotr Jasiukajtis sub %l0,%o4,%g1 ! (7_0) j0 = 0x7ff00000 - j0; 342*5b2ba9d3SPiotr Jasiukajtis 343*5b2ba9d3SPiotr Jasiukajtis sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32; 344*5b2ba9d3SPiotr Jasiukajtis 345*5b2ba9d3SPiotr Jasiukajtis stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll; 346*5b2ba9d3SPiotr Jasiukajtis 347*5b2ba9d3SPiotr Jasiukajtis stx %g1,[%fp+dtmp0] ! (7_1) *(long long*)&scl0 = ll; 348*5b2ba9d3SPiotr Jasiukajtis.cont_spec1: 349*5b2ba9d3SPiotr Jasiukajtis lda [%i1]0x82,%o1 ! (0_0) hx0 = *(int*)px; 350*5b2ba9d3SPiotr Jasiukajtis mov %i1,%i2 351*5b2ba9d3SPiotr Jasiukajtis 352*5b2ba9d3SPiotr Jasiukajtis lda [%i0]0x82,%o4 ! (0_0) hy0 = *(int*)py; 353*5b2ba9d3SPiotr Jasiukajtis 354*5b2ba9d3SPiotr Jasiukajtis and %o1,_0x7fffffff,%o7 ! (0_0) hx0 &= 0x7fffffff; 355*5b2ba9d3SPiotr Jasiukajtis mov %i0,%o0 356*5b2ba9d3SPiotr Jasiukajtis 357*5b2ba9d3SPiotr Jasiukajtis cmp %o7,_0x7ff00000 ! (0_0) hx0 ? 0x7ff00000 358*5b2ba9d3SPiotr Jasiukajtis bge,pn %icc,.update0 ! (0_0) if ( hx0 >= 0x7ff00000 ) 359*5b2ba9d3SPiotr Jasiukajtis and %o4,_0x7fffffff,%l7 ! (0_0) hy0 &= 0x7fffffff; 360*5b2ba9d3SPiotr Jasiukajtis 361*5b2ba9d3SPiotr Jasiukajtis cmp %l7,_0x7ff00000 ! (0_0) hy0 ? 0x7ff00000 362*5b2ba9d3SPiotr Jasiukajtis sub %l7,%o7,%o1 ! (0_0) diff0 = hy0 - hx0; 363*5b2ba9d3SPiotr Jasiukajtis bge,pn %icc,.update0 ! (0_0) if ( hy0 >= 0x7ff00000 ) 364*5b2ba9d3SPiotr Jasiukajtis sra %o1,31,%o3 ! (0_0) j0 = diff0 >> 31; 365*5b2ba9d3SPiotr Jasiukajtis 366*5b2ba9d3SPiotr Jasiukajtis cmp %o7,_0x00100000 ! (0_0) hx0 ? 0x00100000 367*5b2ba9d3SPiotr Jasiukajtis 368*5b2ba9d3SPiotr Jasiukajtis and %o1,%o3,%o1 ! (0_0) j0 &= diff0; 369*5b2ba9d3SPiotr Jasiukajtis bl,pn %icc,.update1 ! (0_0) if ( hx0 < 0x00100000 ) 370*5b2ba9d3SPiotr Jasiukajtis sub %l7,%o1,%o4 ! (0_0) j0 = hy0 - j0; 371*5b2ba9d3SPiotr Jasiukajtis.cont0: 372*5b2ba9d3SPiotr Jasiukajtis and %o4,%l0,%o4 ! (0_0) j0 &= 0x7ff00000; 373*5b2ba9d3SPiotr Jasiukajtis 374*5b2ba9d3SPiotr Jasiukajtis sub %l0,%o4,%o4 ! (0_0) j0 = 0x7ff00000 - j0; 375*5b2ba9d3SPiotr Jasiukajtis.cont1: 376*5b2ba9d3SPiotr Jasiukajtis sllx %o4,32,%o4 ! (0_0) ll = (long long)j0 << 32; 377*5b2ba9d3SPiotr Jasiukajtis stx %o4,[%fp+dtmp1] ! (0_0) *(long long*)&scl0 = ll; 378*5b2ba9d3SPiotr Jasiukajtis 379*5b2ba9d3SPiotr Jasiukajtis ldd [%fp+dtmp15],%f62 ! (7_1) *(long long*)&scl0 = ll; 380*5b2ba9d3SPiotr Jasiukajtis 381*5b2ba9d3SPiotr Jasiukajtis lda [%i4]%asi,%f10 ! (7_1) ((float*)&x0)[0] = ((float*)px)[0]; 382*5b2ba9d3SPiotr Jasiukajtis 383*5b2ba9d3SPiotr Jasiukajtis lda [%i4+4]%asi,%f11 ! (7_1) ((float*)&x0)[1] = ((float*)px)[1]; 384*5b2ba9d3SPiotr Jasiukajtis 385*5b2ba9d3SPiotr Jasiukajtis lda [%i3]%asi,%f12 ! (7_1) ((float*)&y0)[0] = ((float*)py)[0]; 386*5b2ba9d3SPiotr Jasiukajtis 387*5b2ba9d3SPiotr Jasiukajtis add %i1,stridex,%i4 ! px += stridex 388*5b2ba9d3SPiotr Jasiukajtis lda [%i3+4]%asi,%f13 ! (7_1) ((float*)&y0)[1] = ((float*)py)[1]; 389*5b2ba9d3SPiotr Jasiukajtis 390*5b2ba9d3SPiotr Jasiukajtis fmuld %f10,%f62,%f10 ! (7_1) x0 *= scl0; 391*5b2ba9d3SPiotr Jasiukajtis add %i4,stridex,%i1 ! px += stridex 392*5b2ba9d3SPiotr Jasiukajtis 393*5b2ba9d3SPiotr Jasiukajtis fmuld %f12,%f62,%f60 ! (7_1) y0 *= scl0; 394*5b2ba9d3SPiotr Jasiukajtis 395*5b2ba9d3SPiotr Jasiukajtis lda [%i4]0x82,%o1 ! (1_0) hx0 = *(int*)px; 396*5b2ba9d3SPiotr Jasiukajtis 397*5b2ba9d3SPiotr Jasiukajtis add %i0,stridey,%i3 ! py += stridey 398*5b2ba9d3SPiotr Jasiukajtis faddd %f10,D2ON36,%f46 ! (7_1) x_hi0 = x0 + D2ON36; 399*5b2ba9d3SPiotr Jasiukajtis 400*5b2ba9d3SPiotr Jasiukajtis lda [%i3]0x82,%g1 ! (1_0) hy0 = *(int*)py; 401*5b2ba9d3SPiotr Jasiukajtis add %i3,stridey,%i0 ! py += stridey 402*5b2ba9d3SPiotr Jasiukajtis faddd %f60,D2ON36,%f50 ! (7_1) y_hi0 = y0 + D2ON36; 403*5b2ba9d3SPiotr Jasiukajtis 404*5b2ba9d3SPiotr Jasiukajtis and %o1,_0x7fffffff,%o7 ! (1_0) hx0 &= 0x7fffffff; 405*5b2ba9d3SPiotr Jasiukajtis 406*5b2ba9d3SPiotr Jasiukajtis cmp %o7,_0x7ff00000 ! (1_0) hx0 ? 0x7ff00000 407*5b2ba9d3SPiotr Jasiukajtis stx %o4,[%fp+dtmp2] ! (0_0) *(long long*)&scl0 = ll; 408*5b2ba9d3SPiotr Jasiukajtis 409*5b2ba9d3SPiotr Jasiukajtis and %g1,_0x7fffffff,%l7 ! (1_0) hy0 &= 0x7fffffff; 410*5b2ba9d3SPiotr Jasiukajtis bge,pn %icc,.update2 ! (1_0) if ( hx0 >= 0x7ff00000 ) 411*5b2ba9d3SPiotr Jasiukajtis fsubd %f46,D2ON36,%f20 ! (7_1) x_hi0 -= D2ON36; 412*5b2ba9d3SPiotr Jasiukajtis 413*5b2ba9d3SPiotr Jasiukajtis cmp %l7,_0x7ff00000 ! (1_0) hy0 ? 0x7ff00000 414*5b2ba9d3SPiotr Jasiukajtis sub %l7,%o7,%o1 ! (1_0) diff0 = hy0 - hx0; 415*5b2ba9d3SPiotr Jasiukajtis bge,pn %icc,.update3 ! (1_0) if ( hy0 >= 0x7ff00000 ) 416*5b2ba9d3SPiotr Jasiukajtis fsubd %f50,D2ON36,%f54 ! (7_1) y_hi0 -= D2ON36; 417*5b2ba9d3SPiotr Jasiukajtis 418*5b2ba9d3SPiotr Jasiukajtis sra %o1,31,%o3 ! (1_0) j0 = diff0 >> 31; 419*5b2ba9d3SPiotr Jasiukajtis 420*5b2ba9d3SPiotr Jasiukajtis and %o1,%o3,%o1 ! (1_0) j0 &= diff0; 421*5b2ba9d3SPiotr Jasiukajtis 422*5b2ba9d3SPiotr Jasiukajtis fmuld %f20,%f20,%f2 ! (7_1) res0_hi = x_hi0 * x_hi0; 423*5b2ba9d3SPiotr Jasiukajtis sub %l7,%o1,%o4 ! (1_0) j0 = hy0 - j0; 424*5b2ba9d3SPiotr Jasiukajtis cmp %o7,_0x00100000 ! (1_0) hx0 ? 0x00100000 425*5b2ba9d3SPiotr Jasiukajtis fsubd %f10,%f20,%f0 ! (7_1) x_lo0 = x0 - x_hi0; 426*5b2ba9d3SPiotr Jasiukajtis 427*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (7_1) dtmp0 = y_hi0 * y_hi0; 428*5b2ba9d3SPiotr Jasiukajtis and %o4,%l0,%o4 ! (1_0) j0 &= 0x7ff00000; 429*5b2ba9d3SPiotr Jasiukajtis bl,pn %icc,.update4 ! (1_0) if ( hx0 < 0x00100000 ) 430*5b2ba9d3SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (7_1) res0_lo = x0 + x_hi0; 431*5b2ba9d3SPiotr Jasiukajtis 432*5b2ba9d3SPiotr Jasiukajtis sub %l0,%o4,%o4 ! (1_0) j0 = 0x7ff00000 - j0; 433*5b2ba9d3SPiotr Jasiukajtis.cont4: 434*5b2ba9d3SPiotr Jasiukajtis sllx %o4,32,%o4 ! (1_0) ll = (long long)j0 << 32; 435*5b2ba9d3SPiotr Jasiukajtis stx %o4,[%fp+dtmp3] ! (1_0) *(long long*)&scl0 = ll; 436*5b2ba9d3SPiotr Jasiukajtis faddd %f60,%f54,%f50 ! (7_1) dtmp1 = y0 + y_hi0; 437*5b2ba9d3SPiotr Jasiukajtis 438*5b2ba9d3SPiotr Jasiukajtis fsubd %f60,%f54,%f12 ! (7_1) y_lo0 = y0 - y_hi0; 439*5b2ba9d3SPiotr Jasiukajtis 440*5b2ba9d3SPiotr Jasiukajtis fmuld %f62,%f0,%f0 ! (7_1) res0_lo *= x_lo0; 441*5b2ba9d3SPiotr Jasiukajtis ldd [%fp+dtmp1],%f62 ! (0_0) *(long long*)&scl0 = ll; 442*5b2ba9d3SPiotr Jasiukajtis faddd %f2,%f46,%f44 ! (7_1) res0_hi += dtmp0; 443*5b2ba9d3SPiotr Jasiukajtis 444*5b2ba9d3SPiotr Jasiukajtis lda [%i2]%asi,%f10 ! (0_0) ((float*)&x0)[0] = ((float*)px)[0]; 445*5b2ba9d3SPiotr Jasiukajtis 446*5b2ba9d3SPiotr Jasiukajtis lda [%i2+4]%asi,%f11 ! (0_0) ((float*)&x0)[1] = ((float*)px)[1]; 447*5b2ba9d3SPiotr Jasiukajtis 448*5b2ba9d3SPiotr Jasiukajtis fmuld %f50,%f12,%f26 ! (7_1) dtmp1 *= y_lo0; 449*5b2ba9d3SPiotr Jasiukajtis lda [%o0]%asi,%f12 ! (0_0) ((float*)&y0)[0] = ((float*)py)[0]; 450*5b2ba9d3SPiotr Jasiukajtis 451*5b2ba9d3SPiotr Jasiukajtis lda [%o0+4]%asi,%f13 ! (0_0) ((float*)&y0)[1] = ((float*)py)[1]; 452*5b2ba9d3SPiotr Jasiukajtis 453*5b2ba9d3SPiotr Jasiukajtis fmuld %f10,%f62,%f10 ! (0_0) x0 *= scl0; 454*5b2ba9d3SPiotr Jasiukajtis 455*5b2ba9d3SPiotr Jasiukajtis fmuld %f12,%f62,%f60 ! (0_0) y0 *= scl0; 456*5b2ba9d3SPiotr Jasiukajtis faddd %f0,%f26,%f38 ! (7_1) res0_lo += dtmp1; 457*5b2ba9d3SPiotr Jasiukajtis 458*5b2ba9d3SPiotr Jasiukajtis lda [%i1]0x82,%o1 ! (2_0) hx0 = *(int*)px; 459*5b2ba9d3SPiotr Jasiukajtis mov %i1,%i2 460*5b2ba9d3SPiotr Jasiukajtis 461*5b2ba9d3SPiotr Jasiukajtis faddd %f10,D2ON36,%f46 ! (0_0) x_hi0 = x0 + D2ON36; 462*5b2ba9d3SPiotr Jasiukajtis 463*5b2ba9d3SPiotr Jasiukajtis lda [%i0]0x82,%g1 ! (2_0) hy0 = *(int*)py; 464*5b2ba9d3SPiotr Jasiukajtis mov %i0,%o0 465*5b2ba9d3SPiotr Jasiukajtis faddd %f60,D2ON36,%f12 ! (0_0) y_hi0 = y0 + D2ON36; 466*5b2ba9d3SPiotr Jasiukajtis 467*5b2ba9d3SPiotr Jasiukajtis faddd %f44,%f38,%f14 ! (7_1) dres = res0_hi + res0_lo; 468*5b2ba9d3SPiotr Jasiukajtis and %o1,_0x7fffffff,%o7 ! (2_0) hx0 &= 0x7fffffff; 469*5b2ba9d3SPiotr Jasiukajtis 470*5b2ba9d3SPiotr Jasiukajtis cmp %o7,_0x7ff00000 ! (2_0) hx0 ? 0x7ff00000 471*5b2ba9d3SPiotr Jasiukajtis bge,pn %icc,.update5 ! (2_0) if ( hx0 >= 0x7ff00000 ) 472*5b2ba9d3SPiotr Jasiukajtis stx %o4,[%fp+dtmp4] ! (1_0) *(long long*)&scl0 = ll; 473*5b2ba9d3SPiotr Jasiukajtis 474*5b2ba9d3SPiotr Jasiukajtis and %g1,_0x7fffffff,%l7 ! (2_0) hx0 &= 0x7fffffff; 475*5b2ba9d3SPiotr Jasiukajtis st %f14,[%fp+ftmp0] ! (7_1) iarr = ((int*)&dres)[0]; 476*5b2ba9d3SPiotr Jasiukajtis fsubd %f46,D2ON36,%f20 ! (0_0) x_hi0 -= D2ON36; 477*5b2ba9d3SPiotr Jasiukajtis 478*5b2ba9d3SPiotr Jasiukajtis sub %l7,%o7,%o1 ! (2_0) diff0 = hy0 - hx0; 479*5b2ba9d3SPiotr Jasiukajtis cmp %l7,_0x7ff00000 ! (2_0) hy0 ? 0x7ff00000 480*5b2ba9d3SPiotr Jasiukajtis bge,pn %icc,.update6 ! (2_0) if ( hy0 >= 0x7ff00000 ) 481*5b2ba9d3SPiotr Jasiukajtis fsubd %f12,D2ON36,%f54 ! (0_0) y_hi0 -= D2ON36; 482*5b2ba9d3SPiotr Jasiukajtis 483*5b2ba9d3SPiotr Jasiukajtis sra %o1,31,%o3 ! (2_0) j0 = diff0 >> 31; 484*5b2ba9d3SPiotr Jasiukajtis 485*5b2ba9d3SPiotr Jasiukajtis and %o1,%o3,%o1 ! (2_0) j0 &= diff0; 486*5b2ba9d3SPiotr Jasiukajtis 487*5b2ba9d3SPiotr Jasiukajtis fmuld %f20,%f20,%f2 ! (0_0) res0_hi = x_hi0 * x_hi0; 488*5b2ba9d3SPiotr Jasiukajtis cmp %o7,_0x00100000 ! (2_0) hx0 ? 0x00100000 489*5b2ba9d3SPiotr Jasiukajtis sub %l7,%o1,%o4 ! (2_0) j0 = hy0 - j0; 490*5b2ba9d3SPiotr Jasiukajtis fsubd %f10,%f20,%f0 ! (0_0) x_lo0 = x0 - x_hi0; 491*5b2ba9d3SPiotr Jasiukajtis 492*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (0_0) dtmp0 = y_hi0 * y_hi0; 493*5b2ba9d3SPiotr Jasiukajtis and %o4,%l0,%o4 ! (2_0) j0 &= 0x7ff00000; 494*5b2ba9d3SPiotr Jasiukajtis bl,pn %icc,.update7 ! (2_0) if ( hx0 < 0x00100000 ) 495*5b2ba9d3SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (0_0) res0_lo = x0 + x_hi0; 496*5b2ba9d3SPiotr Jasiukajtis.cont7: 497*5b2ba9d3SPiotr Jasiukajtis sub %l0,%o4,%g1 ! (2_0) j0 = 0x7ff00000 - j0; 498*5b2ba9d3SPiotr Jasiukajtis 499*5b2ba9d3SPiotr Jasiukajtis sllx %g1,32,%g1 ! (2_0) ll = (long long)j0 << 32; 500*5b2ba9d3SPiotr Jasiukajtis.cont8: 501*5b2ba9d3SPiotr Jasiukajtis stx %g1,[%fp+dtmp5] ! (2_0) *(long long*)&scl0 = ll; 502*5b2ba9d3SPiotr Jasiukajtis faddd %f60,%f54,%f50 ! (0_0) dtmp1 = y0 + y_hi0; 503*5b2ba9d3SPiotr Jasiukajtis 504*5b2ba9d3SPiotr Jasiukajtis fsubd %f60,%f54,%f12 ! (0_0) y_lo0 = y0 - y_hi0; 505*5b2ba9d3SPiotr Jasiukajtis 506*5b2ba9d3SPiotr Jasiukajtis fmuld %f62,%f0,%f0 ! (0_0) res0_lo *= x_lo0; 507*5b2ba9d3SPiotr Jasiukajtis ldd [%fp+dtmp3],%f62 ! (1_0) *(long long*)&scl0 = ll; 508*5b2ba9d3SPiotr Jasiukajtis faddd %f2,%f46,%f32 ! (0_0) res0_hi += dtmp0; 509*5b2ba9d3SPiotr Jasiukajtis 510*5b2ba9d3SPiotr Jasiukajtis lda [%i4]%asi,%f10 ! (1_0) ((float*)&x0)[0] = ((float*)px)[0]; 511*5b2ba9d3SPiotr Jasiukajtis 512*5b2ba9d3SPiotr Jasiukajtis lda [%i4+4]%asi,%f11 ! (1_0) ((float*)&x0)[1] = ((float*)px)[1]; 513*5b2ba9d3SPiotr Jasiukajtis 514*5b2ba9d3SPiotr Jasiukajtis fmuld %f50,%f12,%f28 ! (0_0) dtmp1 *= y_lo0; 515*5b2ba9d3SPiotr Jasiukajtis lda [%i3]%asi,%f12 ! (1_0) ((float*)&y0)[0] = ((float*)py)[0]; 516*5b2ba9d3SPiotr Jasiukajtis 517*5b2ba9d3SPiotr Jasiukajtis add %i1,stridex,%i4 ! px += stridex 518*5b2ba9d3SPiotr Jasiukajtis lda [%i3+4]%asi,%f13 ! (1_0) ((float*)&y0)[1] = ((float*)py)[1]; 519*5b2ba9d3SPiotr Jasiukajtis 520*5b2ba9d3SPiotr Jasiukajtis ld [%fp+ftmp0],%o2 ! (7_1) iarr = ((int*)&dres)[0]; 521*5b2ba9d3SPiotr Jasiukajtis add %i4,stridex,%i1 ! px += stridex 522*5b2ba9d3SPiotr Jasiukajtis fand %f14,DA1,%f2 ! (7_1) dexp0 = vis_fand(dres,DA1); 523*5b2ba9d3SPiotr Jasiukajtis 524*5b2ba9d3SPiotr Jasiukajtis fmuld %f10,%f62,%f10 ! (1_0) x0 *= scl0; 525*5b2ba9d3SPiotr Jasiukajtis 526*5b2ba9d3SPiotr Jasiukajtis fmuld %f12,%f62,%f60 ! (1_0) y0 *= scl0; 527*5b2ba9d3SPiotr Jasiukajtis sra %o2,11,%i3 ! (7_1) iarr >>= 11; 528*5b2ba9d3SPiotr Jasiukajtis faddd %f0,%f28,%f36 ! (0_0) res0_lo += dtmp1; 529*5b2ba9d3SPiotr Jasiukajtis 530*5b2ba9d3SPiotr Jasiukajtis and %i3,0x1fc,%i3 ! (7_1) iarr &= 0x1fc; 531*5b2ba9d3SPiotr Jasiukajtis 532*5b2ba9d3SPiotr Jasiukajtis add %i3,TBL,%o4 ! (7_1) (char*)dll1 + iarr 533*5b2ba9d3SPiotr Jasiukajtis lda [%i4]0x82,%o1 ! (3_0) hx0 = *(int*)px; 534*5b2ba9d3SPiotr Jasiukajtis 535*5b2ba9d3SPiotr Jasiukajtis add %i0,stridey,%i3 ! py += stridey 536*5b2ba9d3SPiotr Jasiukajtis ld [%o4],%f26 ! (7_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; 537*5b2ba9d3SPiotr Jasiukajtis faddd %f10,D2ON36,%f46 ! (1_0) x_hi0 = x0 + D2ON36; 538*5b2ba9d3SPiotr Jasiukajtis 539*5b2ba9d3SPiotr Jasiukajtis lda [%i3]0x82,%o4 ! (3_0) hy0 = *(int*)py; 540*5b2ba9d3SPiotr Jasiukajtis add %i3,stridey,%i0 ! py += stridey 541*5b2ba9d3SPiotr Jasiukajtis faddd %f60,D2ON36,%f12 ! (1_0) y_hi0 = y0 + D2ON36; 542*5b2ba9d3SPiotr Jasiukajtis 543*5b2ba9d3SPiotr Jasiukajtis faddd %f32,%f36,%f22 ! (0_0) dres = res0_hi + res0_lo; 544*5b2ba9d3SPiotr Jasiukajtis and %o1,_0x7fffffff,%o7 ! (3_0) hx0 &= 0x7fffffff; 545*5b2ba9d3SPiotr Jasiukajtis 546*5b2ba9d3SPiotr Jasiukajtis cmp %o7,_0x7ff00000 ! (3_0) hx0 ? 0x7ff00000 547*5b2ba9d3SPiotr Jasiukajtis stx %g1,[%fp+dtmp6] ! (2_0) *(long long*)&scl0 = ll; 548*5b2ba9d3SPiotr Jasiukajtis bge,pn %icc,.update9 ! (3_0) if ( hx0 >= 0x7ff00000 ) 549*5b2ba9d3SPiotr Jasiukajtis fpsub32 %f26,%f2,%f26 ! (7_1) dd = vis_fpsub32(dtmp0, dexp0); 550*5b2ba9d3SPiotr Jasiukajtis 551*5b2ba9d3SPiotr Jasiukajtis and %o4,_0x7fffffff,%l7 ! (3_0) hy0 &= 0x7fffffff; 552*5b2ba9d3SPiotr Jasiukajtis st %f22,[%fp+ftmp0] ! (0_0) iarr = ((int*)&dres)[0]; 553*5b2ba9d3SPiotr Jasiukajtis fsubd %f46,D2ON36,%f20 ! (1_0) x_hi0 -= D2ON36; 554*5b2ba9d3SPiotr Jasiukajtis 555*5b2ba9d3SPiotr Jasiukajtis sub %l7,%o7,%o1 ! (3_0) diff0 = hy0 - hx0; 556*5b2ba9d3SPiotr Jasiukajtis cmp %l7,_0x7ff00000 ! (3_0) hy0 ? 0x7ff00000 557*5b2ba9d3SPiotr Jasiukajtis bge,pn %icc,.update10 ! (3_0) if ( hy0 >= 0x7ff00000 ) 558*5b2ba9d3SPiotr Jasiukajtis fsubd %f12,D2ON36,%f54 ! (1_0) y_hi0 -= D2ON36; 559*5b2ba9d3SPiotr Jasiukajtis 560*5b2ba9d3SPiotr Jasiukajtis fmuld %f26,%f14,%f50 ! (7_1) dtmp0 = dd * dres; 561*5b2ba9d3SPiotr Jasiukajtis sra %o1,31,%o3 ! (3_0) j0 = diff0 >> 31; 562*5b2ba9d3SPiotr Jasiukajtis 563*5b2ba9d3SPiotr Jasiukajtis and %o1,%o3,%o1 ! (3_0) j0 &= diff0; 564*5b2ba9d3SPiotr Jasiukajtis 565*5b2ba9d3SPiotr Jasiukajtis fmuld %f20,%f20,%f2 ! (1_0) res0_hi = x_hi0 * x_hi0; 566*5b2ba9d3SPiotr Jasiukajtis cmp %o7,_0x00100000 ! (3_0) hx0 ? 0x00100000 567*5b2ba9d3SPiotr Jasiukajtis sub %l7,%o1,%o4 ! (3_0) j0 = hy0 - j0; 568*5b2ba9d3SPiotr Jasiukajtis fsubd %f10,%f20,%f0 ! (1_0) x_lo0 = x0 - x_hi0; 569*5b2ba9d3SPiotr Jasiukajtis 570*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (1_0) dtmp0 = y_hi0 * y_hi0; 571*5b2ba9d3SPiotr Jasiukajtis and %o4,%l0,%o4 ! (3_0) j0 &= 0x7ff00000; 572*5b2ba9d3SPiotr Jasiukajtis bl,pn %icc,.update11 ! (3_0) if ( hx0 < 0x00100000 ) 573*5b2ba9d3SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (1_0) res0_lo = x0 + x_hi0; 574*5b2ba9d3SPiotr Jasiukajtis.cont11: 575*5b2ba9d3SPiotr Jasiukajtis sub %l0,%o4,%g1 ! (3_0) j0 = 0x7ff00000 - j0; 576*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0; 577*5b2ba9d3SPiotr Jasiukajtis.cont12: 578*5b2ba9d3SPiotr Jasiukajtis sllx %g1,32,%g1 ! (3_0) ll = (long long)j0 << 32; 579*5b2ba9d3SPiotr Jasiukajtis stx %g1,[%fp+dtmp7] ! (3_0) *(long long*)&scl0 = ll; 580*5b2ba9d3SPiotr Jasiukajtis faddd %f60,%f54,%f50 ! (1_0) dtmp1 = y0 + y_hi0; 581*5b2ba9d3SPiotr Jasiukajtis 582*5b2ba9d3SPiotr Jasiukajtis fsubd %f60,%f54,%f12 ! (1_0) y_lo0 = y0 - y_hi0 583*5b2ba9d3SPiotr Jasiukajtis 584*5b2ba9d3SPiotr Jasiukajtis fmuld %f62,%f0,%f0 ! (1_0) res0_lo *= x_lo0; 585*5b2ba9d3SPiotr Jasiukajtis ldd [%fp+dtmp5],%f62 ! (2_0) *(long long*)&scl0 = ll; 586*5b2ba9d3SPiotr Jasiukajtis faddd %f2,%f46,%f42 ! (1_0) res0_hi += dtmp0; 587*5b2ba9d3SPiotr Jasiukajtis 588*5b2ba9d3SPiotr Jasiukajtis lda [%i2]%asi,%f10 ! (2_0) ((float*)&x0)[0] = ((float*)px)[0]; 589*5b2ba9d3SPiotr Jasiukajtis fmuld %f26,%f20,%f54 ! (7_1) dd *= dtmp0; 590*5b2ba9d3SPiotr Jasiukajtis 591*5b2ba9d3SPiotr Jasiukajtis lda [%i2+4]%asi,%f11 ! (2_0) ((float*)&x0)[1] = ((float*)px)[1]; 592*5b2ba9d3SPiotr Jasiukajtis 593*5b2ba9d3SPiotr Jasiukajtis fmuld %f50,%f12,%f26 ! (1_0) dtmp1 *= y_lo0; 594*5b2ba9d3SPiotr Jasiukajtis lda [%o0]%asi,%f12 ! (2_0) ((float*)&y0)[0] = ((float*)py)[0]; 595*5b2ba9d3SPiotr Jasiukajtis 596*5b2ba9d3SPiotr Jasiukajtis lda [%o0+4]%asi,%f13 ! (2_0) ((float*)&y0)[1] = ((float*)py)[1]; 597*5b2ba9d3SPiotr Jasiukajtis 598*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f14,%f50 ! (7_1) dtmp1 = dd * dres; 599*5b2ba9d3SPiotr Jasiukajtis ld [%fp+ftmp0],%o2 ! (0_0) iarr = ((int*)&dres)[0]; 600*5b2ba9d3SPiotr Jasiukajtis fand %f22,DA1,%f2 ! (0_0) dexp0 = vis_fand(dres,DA1); 601*5b2ba9d3SPiotr Jasiukajtis 602*5b2ba9d3SPiotr Jasiukajtis fmuld %f10,%f62,%f10 ! (2_0) x0 *= scl0; 603*5b2ba9d3SPiotr Jasiukajtis 604*5b2ba9d3SPiotr Jasiukajtis fmuld %f12,%f62,%f60 ! (2_0) y0 *= scl0; 605*5b2ba9d3SPiotr Jasiukajtis sra %o2,11,%o4 ! (0_0) iarr >>= 11; 606*5b2ba9d3SPiotr Jasiukajtis faddd %f0,%f26,%f34 ! (1_0) res0_lo += dtmp1; 607*5b2ba9d3SPiotr Jasiukajtis 608*5b2ba9d3SPiotr Jasiukajtis and %o4,0x1fc,%o4 ! (0_0) iarr &= 0x1fc; 609*5b2ba9d3SPiotr Jasiukajtis 610*5b2ba9d3SPiotr Jasiukajtis add %o4,TBL,%o4 ! (0_0) (char*)dll1 + iarr 611*5b2ba9d3SPiotr Jasiukajtis mov %i1,%i2 612*5b2ba9d3SPiotr Jasiukajtis lda [%i1]0x82,%o1 ! (4_0) hx0 = *(int*)px; 613*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (7_1) dtmp1 = DTWO - dtmp1; 614*5b2ba9d3SPiotr Jasiukajtis 615*5b2ba9d3SPiotr Jasiukajtis ld [%o4],%f28 ! (0_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; 616*5b2ba9d3SPiotr Jasiukajtis faddd %f10,D2ON36,%f46 ! (2_0) x_hi0 = x0 + D2ON36; 617*5b2ba9d3SPiotr Jasiukajtis 618*5b2ba9d3SPiotr Jasiukajtis lda [%i0]0x82,%o4 ! (4_0) hy0 = *(int*)py; 619*5b2ba9d3SPiotr Jasiukajtis mov %i0,%o0 620*5b2ba9d3SPiotr Jasiukajtis faddd %f60,D2ON36,%f50 ! (2_0) y_hi0 = y0 + D2ON36; 621*5b2ba9d3SPiotr Jasiukajtis 622*5b2ba9d3SPiotr Jasiukajtis and %o1,_0x7fffffff,%o7 ! (4_0) hx0 &= 0x7fffffff; 623*5b2ba9d3SPiotr Jasiukajtis faddd %f42,%f34,%f18 ! (1_0) dres = res0_hi + res0_lo; 624*5b2ba9d3SPiotr Jasiukajtis 625*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f20,%f16 ! (7_1) dd *= dtmp1; 626*5b2ba9d3SPiotr Jasiukajtis cmp %o7,_0x7ff00000 ! (4_0) hx0 ? 0x7ff00000 627*5b2ba9d3SPiotr Jasiukajtis stx %g1,[%fp+dtmp8] ! (3_0) *(long long*)&scl0 = ll; 628*5b2ba9d3SPiotr Jasiukajtis fpsub32 %f28,%f2,%f28 ! (0_0) dd = vis_fpsub32(dtmp0, dexp0); 629*5b2ba9d3SPiotr Jasiukajtis 630*5b2ba9d3SPiotr Jasiukajtis and %o4,_0x7fffffff,%l7 ! (4_0) hy0 &= 0x7fffffff; 631*5b2ba9d3SPiotr Jasiukajtis bge,pn %icc,.update13 ! (4_0) if ( hx0 >= 0x7ff00000 ) 632*5b2ba9d3SPiotr Jasiukajtis st %f18,[%fp+ftmp0] ! (1_0) iarr = ((int*)&dres)[0]; 633*5b2ba9d3SPiotr Jasiukajtis fsubd %f46,D2ON36,%f20 ! (2_0) x_hi0 -= D2ON36; 634*5b2ba9d3SPiotr Jasiukajtis 635*5b2ba9d3SPiotr Jasiukajtis sub %l7,%o7,%o1 ! (4_0) diff0 = hy0 - hx0; 636*5b2ba9d3SPiotr Jasiukajtis cmp %l7,_0x7ff00000 ! (4_0) hy0 ? 0x7ff00000 637*5b2ba9d3SPiotr Jasiukajtis bge,pn %icc,.update14 ! (4_0) if ( hy0 >= 0x7ff00000 ) 638*5b2ba9d3SPiotr Jasiukajtis fsubd %f50,D2ON36,%f54 ! (2_0) y_hi0 -= D2ON36; 639*5b2ba9d3SPiotr Jasiukajtis 640*5b2ba9d3SPiotr Jasiukajtis fmuld %f28,%f22,%f50 ! (0_0) dtmp0 = dd * dres; 641*5b2ba9d3SPiotr Jasiukajtis sra %o1,31,%o3 ! (4_0) j0 = diff0 >> 31; 642*5b2ba9d3SPiotr Jasiukajtis 643*5b2ba9d3SPiotr Jasiukajtis and %o1,%o3,%o1 ! (4_0) j0 &= diff0; 644*5b2ba9d3SPiotr Jasiukajtis 645*5b2ba9d3SPiotr Jasiukajtis fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0; 646*5b2ba9d3SPiotr Jasiukajtis sub %l7,%o1,%o4 ! (4_0) j0 = hy0 - j0; 647*5b2ba9d3SPiotr Jasiukajtis cmp %o7,_0x00100000 ! (4_0) hx0 ? 0x00100000 648*5b2ba9d3SPiotr Jasiukajtis fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0; 649*5b2ba9d3SPiotr Jasiukajtis 650*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0; 651*5b2ba9d3SPiotr Jasiukajtis and %o4,%l0,%o4 ! (4_0) j0 &= 0x7ff00000; 652*5b2ba9d3SPiotr Jasiukajtis bl,pn %icc,.update15 ! (4_0) if ( hx0 < 0x00100000 ) 653*5b2ba9d3SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0; 654*5b2ba9d3SPiotr Jasiukajtis.cont15: 655*5b2ba9d3SPiotr Jasiukajtis sub %l0,%o4,%g1 ! (4_0) j0 = 0x7ff00000 - j0; 656*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0; 657*5b2ba9d3SPiotr Jasiukajtis.cont16: 658*5b2ba9d3SPiotr Jasiukajtis fmuld %f16,%f14,%f14 ! (7_1) dtmp2 = dd * dres; 659*5b2ba9d3SPiotr Jasiukajtis sllx %g1,32,%g1 ! (4_0) ll = (long long)j0 << 32; 660*5b2ba9d3SPiotr Jasiukajtis stx %g1,[%fp+dtmp9] ! (4_0) *(long long*)&scl0 = ll; 661*5b2ba9d3SPiotr Jasiukajtis faddd %f60,%f54,%f50 ! (2_0) dtmp1 = y0 + y_hi0; 662*5b2ba9d3SPiotr Jasiukajtis 663*5b2ba9d3SPiotr Jasiukajtis fsubd %f60,%f54,%f12 ! (2_0) y_lo0 = y0 - y_hi0; 664*5b2ba9d3SPiotr Jasiukajtis 665*5b2ba9d3SPiotr Jasiukajtis fmuld %f62,%f0,%f0 ! (2_0) res0_lo *= x_lo0; 666*5b2ba9d3SPiotr Jasiukajtis ldd [%fp+dtmp7],%f62 ! (3_0) *(long long*)&scl0 = ll; 667*5b2ba9d3SPiotr Jasiukajtis faddd %f2,%f46,%f30 ! (2_0) res0_hi += dtmp0; 668*5b2ba9d3SPiotr Jasiukajtis 669*5b2ba9d3SPiotr Jasiukajtis lda [%i4]%asi,%f10 ! (3_0) ((float*)&x0)[0] = ((float*)px)[0]; 670*5b2ba9d3SPiotr Jasiukajtis fmuld %f28,%f20,%f54 ! (0_0) dd *= dtmp0; 671*5b2ba9d3SPiotr Jasiukajtis 672*5b2ba9d3SPiotr Jasiukajtis lda [%i4+4]%asi,%f11 ! (3_0) ((float*)&x0)[1] = ((float*)px)[1]; 673*5b2ba9d3SPiotr Jasiukajtis 674*5b2ba9d3SPiotr Jasiukajtis fmuld %f50,%f12,%f28 ! (2_0) dtmp1 *= y_lo0; 675*5b2ba9d3SPiotr Jasiukajtis lda [%i3]%asi,%f12 ! (3_0) ((float*)&y0)[0] = ((float*)py)[0]; 676*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f14,%f20 ! (7_1) dtmp2 = DTWO - dtmp2; 677*5b2ba9d3SPiotr Jasiukajtis 678*5b2ba9d3SPiotr Jasiukajtis lda [%i3+4]%asi,%f13 ! (3_0) ((float*)&y0)[1] = ((float*)py)[1]; 679*5b2ba9d3SPiotr Jasiukajtis add %i1,stridex,%i4 ! px += stridex 680*5b2ba9d3SPiotr Jasiukajtis 681*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f22,%f50 ! (0_0) dtmp1 = dd * dres; 682*5b2ba9d3SPiotr Jasiukajtis ld [%fp+ftmp0],%o2 ! (1_0) iarr = ((int*)&dres)[0]; 683*5b2ba9d3SPiotr Jasiukajtis add %i4,stridex,%i1 ! px += stridex 684*5b2ba9d3SPiotr Jasiukajtis fand %f18,DA1,%f2 ! (1_0) dexp0 = vis_fand(dres,DA1); 685*5b2ba9d3SPiotr Jasiukajtis 686*5b2ba9d3SPiotr Jasiukajtis fmuld %f10,%f62,%f10 ! (3_0) x0 *= scl0; 687*5b2ba9d3SPiotr Jasiukajtis 688*5b2ba9d3SPiotr Jasiukajtis fmuld %f12,%f62,%f60 ! (3_0) y0 *= scl0; 689*5b2ba9d3SPiotr Jasiukajtis sra %o2,11,%i3 ! (1_0) iarr >>= 11; 690*5b2ba9d3SPiotr Jasiukajtis faddd %f0,%f28,%f40 ! (2_0) res0_lo += dtmp1; 691*5b2ba9d3SPiotr Jasiukajtis 692*5b2ba9d3SPiotr Jasiukajtis and %i3,0x1fc,%i3 ! (1_0) iarr &= 0x1fc; 693*5b2ba9d3SPiotr Jasiukajtis fmuld %f16,%f20,%f28 ! (7_1) dres = dd * dtmp2; 694*5b2ba9d3SPiotr Jasiukajtis 695*5b2ba9d3SPiotr Jasiukajtis add %i3,TBL,%o4 ! (1_0) (char*)dll1 + iarr 696*5b2ba9d3SPiotr Jasiukajtis lda [%i4]0x82,%o1 ! (5_0) hx0 = *(int*)px; 697*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (0_0) dtmp1 = DTWO - dtmp1; 698*5b2ba9d3SPiotr Jasiukajtis 699*5b2ba9d3SPiotr Jasiukajtis add %i0,stridey,%i3 ! py += stridey 700*5b2ba9d3SPiotr Jasiukajtis ld [%o4],%f26 ! (1_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; 701*5b2ba9d3SPiotr Jasiukajtis faddd %f10,D2ON36,%f46 ! (3_0) x_hi0 = x0 + D2ON36; 702*5b2ba9d3SPiotr Jasiukajtis 703*5b2ba9d3SPiotr Jasiukajtis lda [%i3]0x82,%o4 ! (5_0) hy0 = *(int*)py; 704*5b2ba9d3SPiotr Jasiukajtis add %i3,stridey,%i0 ! py += stridey 705*5b2ba9d3SPiotr Jasiukajtis faddd %f60,D2ON36,%f50 ! (3_0) y_hi0 = y0 + D2ON36; 706*5b2ba9d3SPiotr Jasiukajtis 707*5b2ba9d3SPiotr Jasiukajtis and %o1,_0x7fffffff,%o7 ! (5_0) hx0 &= 0x7fffffff; 708*5b2ba9d3SPiotr Jasiukajtis faddd %f30,%f40,%f14 ! (2_0) dres = res0_hi + res0_lo; 709*5b2ba9d3SPiotr Jasiukajtis 710*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f20,%f24 ! (0_0) dd *= dtmp1; 711*5b2ba9d3SPiotr Jasiukajtis cmp %o7,_0x7ff00000 ! (5_0) hx0 ? 0x7ff00000 712*5b2ba9d3SPiotr Jasiukajtis stx %g1,[%fp+dtmp10] ! (4_0) *(long long*)&scl0 = ll; 713*5b2ba9d3SPiotr Jasiukajtis fpsub32 %f26,%f2,%f26 ! (1_0) dd = vis_fpsub32(dtmp0, dexp0); 714*5b2ba9d3SPiotr Jasiukajtis 715*5b2ba9d3SPiotr Jasiukajtis and %o4,_0x7fffffff,%l7 ! (5_0) hy0 &= 0x7fffffff; 716*5b2ba9d3SPiotr Jasiukajtis st %f14,[%fp+ftmp0] ! (2_0) iarr = ((int*)&dres)[0]; 717*5b2ba9d3SPiotr Jasiukajtis bge,pn %icc,.update17 ! (5_0) if ( hx0 >= 0x7ff00000 ) 718*5b2ba9d3SPiotr Jasiukajtis fsubd %f46,D2ON36,%f20 ! (3_0) x_hi0 -= D2ON36; 719*5b2ba9d3SPiotr Jasiukajtis 720*5b2ba9d3SPiotr Jasiukajtis sub %l7,%o7,%o1 ! (5_0) diff0 = hy0 - hx0; 721*5b2ba9d3SPiotr Jasiukajtis cmp %l7,_0x7ff00000 ! (5_0) hy0 ? 0x7ff00000 722*5b2ba9d3SPiotr Jasiukajtis bge,pn %icc,.update18 ! (5_0) if ( hy0 >= 0x7ff00000 ) 723*5b2ba9d3SPiotr Jasiukajtis fsubd %f50,D2ON36,%f54 ! (3_0) y_hi0 -= D2ON36; 724*5b2ba9d3SPiotr Jasiukajtis 725*5b2ba9d3SPiotr Jasiukajtis fmuld %f26,%f18,%f50 ! (1_0) dtmp0 = dd * dres; 726*5b2ba9d3SPiotr Jasiukajtis sra %o1,31,%o3 ! (5_0) j0 = diff0 >> 31; 727*5b2ba9d3SPiotr Jasiukajtis 728*5b2ba9d3SPiotr Jasiukajtis and %o1,%o3,%o1 ! (5_0) j0 &= diff0; 729*5b2ba9d3SPiotr Jasiukajtis fand %f28,DA0,%f48 ! (7_1) res0 = vis_fand(dres,DA0); 730*5b2ba9d3SPiotr Jasiukajtis 731*5b2ba9d3SPiotr Jasiukajtis fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0; 732*5b2ba9d3SPiotr Jasiukajtis sub %l7,%o1,%o4 ! (5_0) j0 = hy0 - j0; 733*5b2ba9d3SPiotr Jasiukajtis cmp %o7,_0x00100000 ! (5_0) hx0 ? 0x00100000 734*5b2ba9d3SPiotr Jasiukajtis fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0; 735*5b2ba9d3SPiotr Jasiukajtis 736*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0; 737*5b2ba9d3SPiotr Jasiukajtis and %o4,%l0,%o4 ! (5_0) j0 &= 0x7ff00000; 738*5b2ba9d3SPiotr Jasiukajtis bl,pn %icc,.update19 ! (5_0) if ( hx0 < 0x00100000 ) 739*5b2ba9d3SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0; 740*5b2ba9d3SPiotr Jasiukajtis.cont19a: 741*5b2ba9d3SPiotr Jasiukajtis fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0; 742*5b2ba9d3SPiotr Jasiukajtis sub %l0,%o4,%g1 ! (5_0) j0 = 0x7ff00000 - j0; 743*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0; 744*5b2ba9d3SPiotr Jasiukajtis.cont19b: 745*5b2ba9d3SPiotr Jasiukajtis fmuld %f24,%f22,%f22 ! (0_0) dtmp2 = dd * dres; 746*5b2ba9d3SPiotr Jasiukajtis sllx %g1,32,%g1 ! (5_0) ll = (long long)j0 << 32; 747*5b2ba9d3SPiotr Jasiukajtis stx %g1,[%fp+dtmp11] ! (5_0) *(long long*)&scl0 = ll; 748*5b2ba9d3SPiotr Jasiukajtis faddd %f60,%f54,%f50 ! (3_0) dtmp1 = y0 + y_hi0; 749*5b2ba9d3SPiotr Jasiukajtis 750*5b2ba9d3SPiotr Jasiukajtis fmuld %f38,%f48,%f38 ! (7_1) dtmp1 = res0_lo * res0; 751*5b2ba9d3SPiotr Jasiukajtis fsubd %f60,%f54,%f12 ! (3_0) y_lo0 = y0 - y_hi0; 752*5b2ba9d3SPiotr Jasiukajtis.cont20: 753*5b2ba9d3SPiotr Jasiukajtis fmuld %f62,%f0,%f0 ! (3_0) res0_lo *= x_lo0; 754*5b2ba9d3SPiotr Jasiukajtis ldd [%fp+dtmp9],%f62 ! (4_0) *(long long*)&scl0 = ll; 755*5b2ba9d3SPiotr Jasiukajtis faddd %f2,%f46,%f44 ! (3_0) res0_hi += dtmp0; 756*5b2ba9d3SPiotr Jasiukajtis 757*5b2ba9d3SPiotr Jasiukajtis fsubd DONE,%f10,%f60 ! (7_1) dtmp0 = DONE - dtmp0; 758*5b2ba9d3SPiotr Jasiukajtis lda [%i2]%asi,%f10 ! (4_0) ((float*)&x0)[0] = ((float*)px)[0]; 759*5b2ba9d3SPiotr Jasiukajtis fmuld %f26,%f20,%f54 ! (1_0) dd *= dtmp0; 760*5b2ba9d3SPiotr Jasiukajtis 761*5b2ba9d3SPiotr Jasiukajtis lda [%i2+4]%asi,%f11 ! (4_0) ((float*)&x0)[1] = ((float*)px)[1]; 762*5b2ba9d3SPiotr Jasiukajtis 763*5b2ba9d3SPiotr Jasiukajtis fmuld %f50,%f12,%f26 ! (3_0) dtmp1 *= y_lo0; 764*5b2ba9d3SPiotr Jasiukajtis lda [%o0]%asi,%f12 ! (4_0) ((float*)&y0)[0] = ((float*)py)[0]; 765*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f22,%f20 ! (0_0) dtmp2 = DTWO - dtmp2; 766*5b2ba9d3SPiotr Jasiukajtis 767*5b2ba9d3SPiotr Jasiukajtis lda [%o0+4]%asi,%f13 ! (4_0) ((float*)&y0)[1] = ((float*)py)[1]; 768*5b2ba9d3SPiotr Jasiukajtis 769*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f18,%f50 ! (1_0) dtmp1 = dd * dres; 770*5b2ba9d3SPiotr Jasiukajtis ld [%fp+ftmp0],%o2 ! (2_0) iarr = ((int*)&dres)[0]; 771*5b2ba9d3SPiotr Jasiukajtis fand %f14,DA1,%f2 ! (2_0) dexp0 = vis_fand(dres,DA1); 772*5b2ba9d3SPiotr Jasiukajtis 773*5b2ba9d3SPiotr Jasiukajtis fmuld %f10,%f62,%f10 ! (4_0) x0 *= scl0; 774*5b2ba9d3SPiotr Jasiukajtis fsubd %f60,%f38,%f46 ! (7_1) dtmp0 -= dtmp1; 775*5b2ba9d3SPiotr Jasiukajtis 776*5b2ba9d3SPiotr Jasiukajtis fmuld %f12,%f62,%f60 ! (4_0) y0 *= scl0; 777*5b2ba9d3SPiotr Jasiukajtis sra %o2,11,%o4 ! (2_0) iarr >>= 11; 778*5b2ba9d3SPiotr Jasiukajtis faddd %f0,%f26,%f38 ! (3_0) res0_lo += dtmp1; 779*5b2ba9d3SPiotr Jasiukajtis 780*5b2ba9d3SPiotr Jasiukajtis and %o4,0x1fc,%o4 ! (2_0) iarr &= 0x1fc; 781*5b2ba9d3SPiotr Jasiukajtis fmuld %f24,%f20,%f26 ! (0_0) dres = dd * dtmp2; 782*5b2ba9d3SPiotr Jasiukajtis 783*5b2ba9d3SPiotr Jasiukajtis add %o4,TBL,%o4 ! (2_0) (char*)dll1 + iarr 784*5b2ba9d3SPiotr Jasiukajtis mov %i1,%i2 785*5b2ba9d3SPiotr Jasiukajtis lda [%i1]0x82,%o1 ! (6_0) hx0 = *(int*)px; 786*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f50,%f52 ! (1_0) dtmp1 = DTWO - dtmp1; 787*5b2ba9d3SPiotr Jasiukajtis 788*5b2ba9d3SPiotr Jasiukajtis fmuld %f46,%f28,%f28 ! (7_1) dtmp0 *= dres; 789*5b2ba9d3SPiotr Jasiukajtis ld [%o4],%f20 ! (2_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; 790*5b2ba9d3SPiotr Jasiukajtis faddd %f10,D2ON36,%f46 ! (4_0) x_hi0 = x0 + D2ON36; 791*5b2ba9d3SPiotr Jasiukajtis 792*5b2ba9d3SPiotr Jasiukajtis lda [%i0]0x82,%o4 ! (6_0) hy0 = *(int*)py; 793*5b2ba9d3SPiotr Jasiukajtis mov %i0,%o0 794*5b2ba9d3SPiotr Jasiukajtis faddd %f60,D2ON36,%f50 ! (4_0) y_hi0 = y0 + D2ON36; 795*5b2ba9d3SPiotr Jasiukajtis 796*5b2ba9d3SPiotr Jasiukajtis and %o1,_0x7fffffff,%o7 ! (6_0) hx0 &= 0x7fffffff; 797*5b2ba9d3SPiotr Jasiukajtis faddd %f44,%f38,%f22 ! (3_0) dres = res0_hi + res0_lo; 798*5b2ba9d3SPiotr Jasiukajtis 799*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f52,%f16 ! (1_0) dd *= dtmp1; 800*5b2ba9d3SPiotr Jasiukajtis cmp %o7,_0x7ff00000 ! (6_0) hx0 ? 0x7ff00000 801*5b2ba9d3SPiotr Jasiukajtis stx %g1,[%fp+dtmp12] ! (5_0) *(long long*)&scl0 = ll; 802*5b2ba9d3SPiotr Jasiukajtis fpsub32 %f20,%f2,%f52 ! (2_0) dd = vis_fpsub32(dtmp0, dexp0); 803*5b2ba9d3SPiotr Jasiukajtis 804*5b2ba9d3SPiotr Jasiukajtis and %o4,_0x7fffffff,%l7 ! (6_0) hy0 &= 0x7fffffff; 805*5b2ba9d3SPiotr Jasiukajtis st %f22,[%fp+ftmp0] ! (3_0) iarr = ((int*)&dres)[0]; 806*5b2ba9d3SPiotr Jasiukajtis bge,pn %icc,.update21 ! (6_0) if ( hx0 >= 0x7ff00000 ) 807*5b2ba9d3SPiotr Jasiukajtis fsubd %f46,D2ON36,%f46 ! (4_0) x_hi0 -= D2ON36; 808*5b2ba9d3SPiotr Jasiukajtis 809*5b2ba9d3SPiotr Jasiukajtis sub %l7,%o7,%o1 ! (6_0) diff0 = hy0 - hx0; 810*5b2ba9d3SPiotr Jasiukajtis cmp %l7,_0x7ff00000 ! (6_0) hy0 ? 0x7ff00000 811*5b2ba9d3SPiotr Jasiukajtis bge,pn %icc,.update22 ! (6_0) if ( hy0 >= 0x7ff00000 ) 812*5b2ba9d3SPiotr Jasiukajtis fsubd %f50,D2ON36,%f54 ! (4_0) y_hi0 -= D2ON36; 813*5b2ba9d3SPiotr Jasiukajtis 814*5b2ba9d3SPiotr Jasiukajtis fmuld %f52,%f14,%f50 ! (2_0) dtmp0 = dd * dres; 815*5b2ba9d3SPiotr Jasiukajtis sra %o1,31,%o3 ! (6_0) j0 = diff0 >> 31; 816*5b2ba9d3SPiotr Jasiukajtis faddd %f48,%f28,%f48 ! (7_1) res0 += dtmp0; 817*5b2ba9d3SPiotr Jasiukajtis 818*5b2ba9d3SPiotr Jasiukajtis and %o1,%o3,%o1 ! (6_0) j0 &= diff0; 819*5b2ba9d3SPiotr Jasiukajtis fand %f26,DA0,%f28 ! (0_0) res0 = vis_fand(dres,DA0); 820*5b2ba9d3SPiotr Jasiukajtis 821*5b2ba9d3SPiotr Jasiukajtis fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0; 822*5b2ba9d3SPiotr Jasiukajtis sub %l7,%o1,%o4 ! (6_0) j0 = hy0 - j0; 823*5b2ba9d3SPiotr Jasiukajtis cmp %o7,_0x00100000 ! (6_0) hx0 ? 0x00100000 824*5b2ba9d3SPiotr Jasiukajtis fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0; 825*5b2ba9d3SPiotr Jasiukajtis 826*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0; 827*5b2ba9d3SPiotr Jasiukajtis and %o4,%l0,%o4 ! (6_0) j0 &= 0x7ff00000; 828*5b2ba9d3SPiotr Jasiukajtis bl,pn %icc,.update23 ! (6_0) if ( hx0 < 0x00100000 ) 829*5b2ba9d3SPiotr Jasiukajtis faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0; 830*5b2ba9d3SPiotr Jasiukajtis.cont23a: 831*5b2ba9d3SPiotr Jasiukajtis fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres; 832*5b2ba9d3SPiotr Jasiukajtis sub %l0,%o4,%g1 ! (6_0) j0 = 0x7ff00000 - j0; 833*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0; 834*5b2ba9d3SPiotr Jasiukajtis.cont23b: 835*5b2ba9d3SPiotr Jasiukajtis fmuld %f32,%f28,%f50 ! (0_0) dtmp0 = res0_hi * res0; 836*5b2ba9d3SPiotr Jasiukajtis sllx %g1,32,%g1 ! (6_0) ll = (long long)j0 << 32; 837*5b2ba9d3SPiotr Jasiukajtis stx %g1,[%fp+dtmp13] ! (6_0) *(long long*)&scl0 = ll; 838*5b2ba9d3SPiotr Jasiukajtis faddd %f60,%f54,%f46 ! (4_0) dtmp1 = y0 + y_hi0; 839*5b2ba9d3SPiotr Jasiukajtis 840*5b2ba9d3SPiotr Jasiukajtis fmuld %f36,%f28,%f36 ! (0_0) dtmp1 = res0_lo * res0; 841*5b2ba9d3SPiotr Jasiukajtis fsubd %f60,%f54,%f60 ! (4_0) y_lo0 = y0 - y_hi0; 842*5b2ba9d3SPiotr Jasiukajtis.cont24: 843*5b2ba9d3SPiotr Jasiukajtis fmuld %f62,%f2,%f2 ! (4_0) res0_lo *= x_lo0; 844*5b2ba9d3SPiotr Jasiukajtis ldd [%fp+dtmp11],%f62 ! (5_0) *(long long*)&scl0 = ll; 845*5b2ba9d3SPiotr Jasiukajtis faddd %f0,%f20,%f32 ! (4_0) res0_hi += dtmp0; 846*5b2ba9d3SPiotr Jasiukajtis 847*5b2ba9d3SPiotr Jasiukajtis lda [%i4]%asi,%f0 ! (5_0) ((float*)&x0)[0] = ((float*)px)[0]; 848*5b2ba9d3SPiotr Jasiukajtis fmuld %f52,%f10,%f10 ! (2_0) dd *= dtmp0; 849*5b2ba9d3SPiotr Jasiukajtis 850*5b2ba9d3SPiotr Jasiukajtis lda [%i4+4]%asi,%f1 ! (5_0) ((float*)&x0)[1] = ((float*)px)[1]; 851*5b2ba9d3SPiotr Jasiukajtis fsubd DONE,%f50,%f52 ! (0_0) dtmp0 = DONE - dtmp0; 852*5b2ba9d3SPiotr Jasiukajtis 853*5b2ba9d3SPiotr Jasiukajtis fmuld %f46,%f60,%f46 ! (4_0) dtmp1 *= y_lo0; 854*5b2ba9d3SPiotr Jasiukajtis lda [%i3]%asi,%f12 ! (5_0) ((float*)&y0)[0] = ((float*)py)[0]; 855*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f18,%f18 ! (1_0) dtmp2 = DTWO - dtmp2; 856*5b2ba9d3SPiotr Jasiukajtis 857*5b2ba9d3SPiotr Jasiukajtis add %i1,stridex,%i4 ! px += stridex 858*5b2ba9d3SPiotr Jasiukajtis lda [%i3+4]%asi,%f13 ! (5_0) ((float*)&y0)[1] = ((float*)py)[1]; 859*5b2ba9d3SPiotr Jasiukajtis 860*5b2ba9d3SPiotr Jasiukajtis fmuld %f10,%f14,%f50 ! (2_0) dtmp1 = dd * dres; 861*5b2ba9d3SPiotr Jasiukajtis add %i4,stridex,%i1 ! px += stridex 862*5b2ba9d3SPiotr Jasiukajtis ld [%fp+ftmp0],%o2 ! (3_0) iarr = ((int*)&dres)[0]; 863*5b2ba9d3SPiotr Jasiukajtis fand %f22,DA1,%f54 ! (3_0) dexp0 = vis_fand(dres,DA1); 864*5b2ba9d3SPiotr Jasiukajtis 865*5b2ba9d3SPiotr Jasiukajtis fmuld %f0,%f62,%f60 ! (5_0) x0 *= scl0; 866*5b2ba9d3SPiotr Jasiukajtis fsubd %f52,%f36,%f20 ! (0_0) dtmp0 -= dtmp1; 867*5b2ba9d3SPiotr Jasiukajtis 868*5b2ba9d3SPiotr Jasiukajtis fmuld %f12,%f62,%f52 ! (5_0) y0 *= scl0; 869*5b2ba9d3SPiotr Jasiukajtis sra %o2,11,%i3 ! (3_0) iarr >>= 11; 870*5b2ba9d3SPiotr Jasiukajtis faddd %f2,%f46,%f36 ! (4_0) res0_lo += dtmp1; 871*5b2ba9d3SPiotr Jasiukajtis 872*5b2ba9d3SPiotr Jasiukajtis and %i3,0x1fc,%i3 ! (3_0) iarr &= 0x1fc; 873*5b2ba9d3SPiotr Jasiukajtis fmuld %f16,%f18,%f16 ! (1_0) dres = dd * dtmp2; 874*5b2ba9d3SPiotr Jasiukajtis 875*5b2ba9d3SPiotr Jasiukajtis fsqrtd %f48,%f18 ! (7_1) res0 = sqrt ( res0 ); 876*5b2ba9d3SPiotr Jasiukajtis add %i3,TBL,%o4 ! (3_0) (char*)dll1 + iarr 877*5b2ba9d3SPiotr Jasiukajtis lda [%i4]0x82,%o1 ! (7_0) hx0 = *(int*)px; 878*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f50,%f46 ! (2_0) dtmp1 = DTWO - dtmp1; 879*5b2ba9d3SPiotr Jasiukajtis 880*5b2ba9d3SPiotr Jasiukajtis fmuld %f20,%f26,%f48 ! (0_0) dtmp0 *= dres; 881*5b2ba9d3SPiotr Jasiukajtis add %i0,stridey,%i3 ! py += stridey 882*5b2ba9d3SPiotr Jasiukajtis ld [%o4],%f20 ! (3_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; 883*5b2ba9d3SPiotr Jasiukajtis faddd %f60,D2ON36,%f50 ! (5_0) x_hi0 = x0 + D2ON36; 884*5b2ba9d3SPiotr Jasiukajtis 885*5b2ba9d3SPiotr Jasiukajtis lda [%i3]0x82,%o4 ! (7_0) hy0 = *(int*)py; 886*5b2ba9d3SPiotr Jasiukajtis add %i3,stridey,%i0 ! py += stridey 887*5b2ba9d3SPiotr Jasiukajtis faddd %f52,D2ON36,%f12 ! (5_0) y_hi0 = y0 + D2ON36; 888*5b2ba9d3SPiotr Jasiukajtis 889*5b2ba9d3SPiotr Jasiukajtis and %o1,_0x7fffffff,%o7 ! (7_0) hx0 &= 0x7fffffff; 890*5b2ba9d3SPiotr Jasiukajtis faddd %f32,%f36,%f24 ! (4_0) dres = res0_hi + res0_lo; 891*5b2ba9d3SPiotr Jasiukajtis 892*5b2ba9d3SPiotr Jasiukajtis fmuld %f10,%f46,%f26 ! (2_0) dd *= dtmp1; 893*5b2ba9d3SPiotr Jasiukajtis cmp %o7,_0x7ff00000 ! (7_0) hx0 ? 0x7ff00000 894*5b2ba9d3SPiotr Jasiukajtis stx %g1,[%fp+dtmp14] ! (6_0) *(long long*)&scl0 = ll; 895*5b2ba9d3SPiotr Jasiukajtis fpsub32 %f20,%f54,%f10 ! (3_0) dd = vis_fpsub32(dtmp0, dexp0); 896*5b2ba9d3SPiotr Jasiukajtis 897*5b2ba9d3SPiotr Jasiukajtis and %o4,_0x7fffffff,%l7 ! (7_0) hy0 &= 0x7fffffff; 898*5b2ba9d3SPiotr Jasiukajtis st %f24,[%fp+ftmp0] ! (4_0) iarr = ((int*)&dres)[0]; 899*5b2ba9d3SPiotr Jasiukajtis bge,pn %icc,.update25 ! (7_0) if ( hx0 >= 0x7ff00000 ) 900*5b2ba9d3SPiotr Jasiukajtis fsubd %f50,D2ON36,%f20 ! (5_0) x_hi0 -= D2ON36; 901*5b2ba9d3SPiotr Jasiukajtis 902*5b2ba9d3SPiotr Jasiukajtis sub %l7,%o7,%o1 ! (7_0) diff0 = hy0 - hx0; 903*5b2ba9d3SPiotr Jasiukajtis cmp %l7,_0x7ff00000 ! (7_0) hy0 ? 0x7ff00000 904*5b2ba9d3SPiotr Jasiukajtis bge,pn %icc,.update26 ! (7_0) if ( hy0 >= 0x7ff00000 ) 905*5b2ba9d3SPiotr Jasiukajtis fsubd %f12,D2ON36,%f54 ! (5_0) y_hi0 -= D2ON36; 906*5b2ba9d3SPiotr Jasiukajtis 907*5b2ba9d3SPiotr Jasiukajtis fmuld %f10,%f22,%f50 ! (3_0) dtmp0 = dd * dres; 908*5b2ba9d3SPiotr Jasiukajtis sra %o1,31,%o3 ! (7_0) j0 = diff0 >> 31; 909*5b2ba9d3SPiotr Jasiukajtis faddd %f28,%f48,%f48 ! (0_0) res0 += dtmp0; 910*5b2ba9d3SPiotr Jasiukajtis 911*5b2ba9d3SPiotr Jasiukajtis and %o1,%o3,%o1 ! (7_0) j0 &= diff0; 912*5b2ba9d3SPiotr Jasiukajtis fand %f16,DA0,%f28 ! (1_0) res0 = vis_fand(dres,DA0); 913*5b2ba9d3SPiotr Jasiukajtis 914*5b2ba9d3SPiotr Jasiukajtis fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0; 915*5b2ba9d3SPiotr Jasiukajtis sub %l7,%o1,%o4 ! (7_0) j0 = hy0 - j0; 916*5b2ba9d3SPiotr Jasiukajtis cmp %o7,_0x00100000 ! (7_0) hx0 ? 0x00100000 917*5b2ba9d3SPiotr Jasiukajtis fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0; 918*5b2ba9d3SPiotr Jasiukajtis 919*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0; 920*5b2ba9d3SPiotr Jasiukajtis and %o4,%l0,%o4 ! (7_0) j0 &= 0x7ff00000; 921*5b2ba9d3SPiotr Jasiukajtis bl,pn %icc,.update27 ! (7_0) if ( hx0 < 0x00100000 ) 922*5b2ba9d3SPiotr Jasiukajtis faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0; 923*5b2ba9d3SPiotr Jasiukajtis.cont27a: 924*5b2ba9d3SPiotr Jasiukajtis fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres; 925*5b2ba9d3SPiotr Jasiukajtis sub %l0,%o4,%g1 ! (7_0) j0 = 0x7ff00000 - j0; 926*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0; 927*5b2ba9d3SPiotr Jasiukajtis.cont27b: 928*5b2ba9d3SPiotr Jasiukajtis fmuld %f42,%f28,%f60 ! (1_0) dtmp0 = res0_hi * res0; 929*5b2ba9d3SPiotr Jasiukajtis sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32; 930*5b2ba9d3SPiotr Jasiukajtis stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll; 931*5b2ba9d3SPiotr Jasiukajtis faddd %f52,%f54,%f50 ! (5_0) dtmp1 = y0 + y_hi0; 932*5b2ba9d3SPiotr Jasiukajtis 933*5b2ba9d3SPiotr Jasiukajtis fmuld %f34,%f28,%f34 ! (1_0) dtmp1 = res0_lo * res0; 934*5b2ba9d3SPiotr Jasiukajtis fsubd %f52,%f54,%f54 ! (5_0) y_lo0 = y0 - y_hi0; 935*5b2ba9d3SPiotr Jasiukajtis.cont28: 936*5b2ba9d3SPiotr Jasiukajtis fmuld %f62,%f2,%f2 ! (5_0) res0_lo *= x_lo0; 937*5b2ba9d3SPiotr Jasiukajtis ldd [%fp+dtmp13],%f62 ! (6_0) *(long long*)&scl0 = ll; 938*5b2ba9d3SPiotr Jasiukajtis faddd %f0,%f46,%f42 ! (5_0) res0_hi += dtmp0; 939*5b2ba9d3SPiotr Jasiukajtis 940*5b2ba9d3SPiotr Jasiukajtis fmuld %f10,%f20,%f52 ! (3_0) dd *= dtmp0; 941*5b2ba9d3SPiotr Jasiukajtis lda [%i2]%asi,%f10 ! (6_0) ((float*)&x0)[0] = ((float*)px)[0]; 942*5b2ba9d3SPiotr Jasiukajtis 943*5b2ba9d3SPiotr Jasiukajtis lda [%i2+4]%asi,%f11 ! (6_0) ((float*)&x0)[1] = ((float*)px)[1]; 944*5b2ba9d3SPiotr Jasiukajtis fsubd DONE,%f60,%f60 ! (1_0) dtmp0 = DONE - dtmp0; 945*5b2ba9d3SPiotr Jasiukajtis 946*5b2ba9d3SPiotr Jasiukajtis fmuld %f50,%f54,%f46 ! (5_0) dtmp1 *= y_lo0; 947*5b2ba9d3SPiotr Jasiukajtis lda [%o0]%asi,%f12 ! (6_0) ((float*)&y0)[0] = ((float*)py)[0]; 948*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f14,%f14 ! (2_0) dtmp2 = DTWO - dtmp2; 949*5b2ba9d3SPiotr Jasiukajtis 950*5b2ba9d3SPiotr Jasiukajtis lda [%o0+4]%asi,%f13 ! (6_0) ((float*)&y0)[1] = ((float*)py)[1]; 951*5b2ba9d3SPiotr Jasiukajtis 952*5b2ba9d3SPiotr Jasiukajtis fmuld %f52,%f22,%f50 ! (3_0) dtmp1 = dd * dres; 953*5b2ba9d3SPiotr Jasiukajtis ld [%fp+ftmp0],%o2 ! (4_0) iarr = ((int*)&dres)[0]; 954*5b2ba9d3SPiotr Jasiukajtis fand %f24,DA1,%f54 ! (4_0) dexp0 = vis_fand(dres,DA1); 955*5b2ba9d3SPiotr Jasiukajtis 956*5b2ba9d3SPiotr Jasiukajtis fmuld %f10,%f62,%f10 ! (6_0) x0 *= scl0; 957*5b2ba9d3SPiotr Jasiukajtis ldd [%fp+dtmp0],%f0 ! (7_1) *(long long*)&scl0 = ll; 958*5b2ba9d3SPiotr Jasiukajtis fsubd %f60,%f34,%f20 ! (1_0) dtmp0 -= dtmp1; 959*5b2ba9d3SPiotr Jasiukajtis 960*5b2ba9d3SPiotr Jasiukajtis fmuld %f12,%f62,%f60 ! (6_0) y0 *= scl0; 961*5b2ba9d3SPiotr Jasiukajtis sra %o2,11,%o4 ! (4_0) iarr >>= 11; 962*5b2ba9d3SPiotr Jasiukajtis faddd %f2,%f46,%f34 ! (5_0) res0_lo += dtmp1; 963*5b2ba9d3SPiotr Jasiukajtis 964*5b2ba9d3SPiotr Jasiukajtis and %o4,0x1fc,%o4 ! (4_0) iarr &= 0x1fc; 965*5b2ba9d3SPiotr Jasiukajtis fmuld %f26,%f14,%f26 ! (2_0) dres = dd * dtmp2; 966*5b2ba9d3SPiotr Jasiukajtis 967*5b2ba9d3SPiotr Jasiukajtis cmp counter,8 968*5b2ba9d3SPiotr Jasiukajtis bl,pn %icc,.tail 969*5b2ba9d3SPiotr Jasiukajtis nop 970*5b2ba9d3SPiotr Jasiukajtis 971*5b2ba9d3SPiotr Jasiukajtis ba .main_loop 972*5b2ba9d3SPiotr Jasiukajtis sub counter,8,counter 973*5b2ba9d3SPiotr Jasiukajtis 974*5b2ba9d3SPiotr Jasiukajtis .align 16 975*5b2ba9d3SPiotr Jasiukajtis.main_loop: 976*5b2ba9d3SPiotr Jasiukajtis fsqrtd %f48,%f14 ! (0_1) res0 = sqrt ( res0 ); 977*5b2ba9d3SPiotr Jasiukajtis add %o4,TBL,%o4 ! (4_1) (char*)dll1 + iarr 978*5b2ba9d3SPiotr Jasiukajtis lda [%i1]0x82,%o1 ! (0_0) hx0 = *(int*)px; 979*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f50,%f46 ! (3_1) dtmp1 = DTWO - dtmp1; 980*5b2ba9d3SPiotr Jasiukajtis 981*5b2ba9d3SPiotr Jasiukajtis fmuld %f20,%f16,%f48 ! (1_1) dtmp0 *= dres; 982*5b2ba9d3SPiotr Jasiukajtis mov %i1,%i2 983*5b2ba9d3SPiotr Jasiukajtis ld [%o4],%f20 ! (4_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; 984*5b2ba9d3SPiotr Jasiukajtis faddd %f10,D2ON36,%f50 ! (6_1) x_hi0 = x0 + D2ON36; 985*5b2ba9d3SPiotr Jasiukajtis 986*5b2ba9d3SPiotr Jasiukajtis nop 987*5b2ba9d3SPiotr Jasiukajtis mov %i0,%o0 988*5b2ba9d3SPiotr Jasiukajtis lda [%i0]0x82,%o4 ! (0_0) hy0 = *(int*)py; 989*5b2ba9d3SPiotr Jasiukajtis faddd %f60,D2ON36,%f2 ! (6_1) y_hi0 = y0 + D2ON36; 990*5b2ba9d3SPiotr Jasiukajtis 991*5b2ba9d3SPiotr Jasiukajtis faddd %f42,%f34,%f16 ! (5_1) dres = res0_hi + res0_lo; 992*5b2ba9d3SPiotr Jasiukajtis and %o1,_0x7fffffff,%o7 ! (0_0) hx0 &= 0x7fffffff; 993*5b2ba9d3SPiotr Jasiukajtis st %f16,[%fp+ftmp0] ! (5_1) iarr = ((int*)&dres)[0]; 994*5b2ba9d3SPiotr Jasiukajtis fmuld %f0,%f18,%f0 ! (7_2) res0 = scl0 * res0; 995*5b2ba9d3SPiotr Jasiukajtis 996*5b2ba9d3SPiotr Jasiukajtis fmuld %f52,%f46,%f18 ! (3_1) dd *= dtmp1; 997*5b2ba9d3SPiotr Jasiukajtis cmp %o7,_0x7ff00000 ! (0_0) hx0 ? 0x7ff00000 998*5b2ba9d3SPiotr Jasiukajtis st %f0,[%i5] ! (7_2) ((float*)pz)[0] = ((float*)&res0)[0]; 999*5b2ba9d3SPiotr Jasiukajtis fpsub32 %f20,%f54,%f54 ! (4_1) dd = vis_fpsub32(dtmp0, dexp0); 1000*5b2ba9d3SPiotr Jasiukajtis 1001*5b2ba9d3SPiotr Jasiukajtis and %o4,_0x7fffffff,%l7 ! (0_0) hy0 &= 0x7fffffff; 1002*5b2ba9d3SPiotr Jasiukajtis st %f1,[%i5+4] ! (7_2) ((float*)pz)[1] = ((float*)&res0)[1]; 1003*5b2ba9d3SPiotr Jasiukajtis bge,pn %icc,.update29 ! (0_0) if ( hx0 >= 0x7ff00000 ) 1004*5b2ba9d3SPiotr Jasiukajtis fsubd %f50,D2ON36,%f20 ! (6_1) x_hi0 -= D2ON36; 1005*5b2ba9d3SPiotr Jasiukajtis 1006*5b2ba9d3SPiotr Jasiukajtis cmp %l7,_0x7ff00000 ! (0_0) hy0 ? 0x7ff00000 1007*5b2ba9d3SPiotr Jasiukajtis sub %l7,%o7,%o1 ! (0_0) diff0 = hy0 - hx0; 1008*5b2ba9d3SPiotr Jasiukajtis bge,pn %icc,.update30 ! (0_0) if ( hy0 >= 0x7ff00000 ) 1009*5b2ba9d3SPiotr Jasiukajtis fsubd %f2,D2ON36,%f2 ! (6_1) y_hi0 -= D2ON36; 1010*5b2ba9d3SPiotr Jasiukajtis 1011*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f24,%f50 ! (4_1) dtmp0 = dd * dres; 1012*5b2ba9d3SPiotr Jasiukajtis sra %o1,31,%o3 ! (0_0) j0 = diff0 >> 31; 1013*5b2ba9d3SPiotr Jasiukajtis stx %g1,[%fp+dtmp0] ! (7_1) *(long long*)&scl0 = ll; 1014*5b2ba9d3SPiotr Jasiukajtis faddd %f28,%f48,%f52 ! (1_1) res0 += dtmp0; 1015*5b2ba9d3SPiotr Jasiukajtis 1016*5b2ba9d3SPiotr Jasiukajtis and %o1,%o3,%o1 ! (0_0) j0 &= diff0; 1017*5b2ba9d3SPiotr Jasiukajtis cmp %o7,_0x00100000 ! (0_0) hx0 ? 0x00100000 1018*5b2ba9d3SPiotr Jasiukajtis bl,pn %icc,.update31 ! (0_0) if ( hx0 < 0x00100000 ) 1019*5b2ba9d3SPiotr Jasiukajtis fand %f26,DA0,%f48 ! (2_1) res0 = vis_fand(dres,DA0); 1020*5b2ba9d3SPiotr Jasiukajtis.cont31: 1021*5b2ba9d3SPiotr Jasiukajtis fmuld %f20,%f20,%f0 ! (6_1) res0_hi = x_hi0 * x_hi0; 1022*5b2ba9d3SPiotr Jasiukajtis sub %l7,%o1,%o4 ! (0_0) j0 = hy0 - j0; 1023*5b2ba9d3SPiotr Jasiukajtis nop 1024*5b2ba9d3SPiotr Jasiukajtis fsubd %f10,%f20,%f28 ! (6_1) x_lo0 = x0 - x_hi0; 1025*5b2ba9d3SPiotr Jasiukajtis 1026*5b2ba9d3SPiotr Jasiukajtis fmuld %f2,%f2,%f46 ! (6_1) dtmp0 = y_hi0 * y_hi0; 1027*5b2ba9d3SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 1028*5b2ba9d3SPiotr Jasiukajtis and %o4,%l0,%o4 ! (0_0) j0 &= 0x7ff00000; 1029*5b2ba9d3SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (6_1) res0_lo = x0 + x_hi0; 1030*5b2ba9d3SPiotr Jasiukajtis 1031*5b2ba9d3SPiotr Jasiukajtis fmuld %f18,%f22,%f22 ! (3_1) dtmp2 = dd * dres; 1032*5b2ba9d3SPiotr Jasiukajtis sub %l0,%o4,%o4 ! (0_0) j0 = 0x7ff00000 - j0; 1033*5b2ba9d3SPiotr Jasiukajtis nop 1034*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (4_1) dtmp0 = DTWO - dtmp0; 1035*5b2ba9d3SPiotr Jasiukajtis.cont32: 1036*5b2ba9d3SPiotr Jasiukajtis fmuld %f30,%f48,%f12 ! (2_1) dtmp0 = res0_hi * res0; 1037*5b2ba9d3SPiotr Jasiukajtis sllx %o4,32,%o4 ! (0_0) ll = (long long)j0 << 32; 1038*5b2ba9d3SPiotr Jasiukajtis stx %o4,[%fp+dtmp1] ! (0_0) *(long long*)&scl0 = ll; 1039*5b2ba9d3SPiotr Jasiukajtis faddd %f60,%f2,%f50 ! (6_1) dtmp1 = y0 + y_hi0; 1040*5b2ba9d3SPiotr Jasiukajtis 1041*5b2ba9d3SPiotr Jasiukajtis fmuld %f40,%f48,%f40 ! (2_1) dtmp1 = res0_lo * res0; 1042*5b2ba9d3SPiotr Jasiukajtis nop 1043*5b2ba9d3SPiotr Jasiukajtis bn,pn %icc,.exit 1044*5b2ba9d3SPiotr Jasiukajtis fsubd %f60,%f2,%f2 ! (6_1) y_lo0 = y0 - y_hi0; 1045*5b2ba9d3SPiotr Jasiukajtis 1046*5b2ba9d3SPiotr Jasiukajtis fmuld %f62,%f28,%f28 ! (6_1) res0_lo *= x_lo0; 1047*5b2ba9d3SPiotr Jasiukajtis nop 1048*5b2ba9d3SPiotr Jasiukajtis ldd [%fp+dtmp15],%f62 ! (7_1) *(long long*)&scl0 = ll; 1049*5b2ba9d3SPiotr Jasiukajtis faddd %f0,%f46,%f30 ! (6_1) res0_hi += dtmp0; 1050*5b2ba9d3SPiotr Jasiukajtis 1051*5b2ba9d3SPiotr Jasiukajtis nop 1052*5b2ba9d3SPiotr Jasiukajtis nop 1053*5b2ba9d3SPiotr Jasiukajtis lda [%i4]%asi,%f10 ! (7_1) ((float*)&x0)[0] = ((float*)px)[0]; 1054*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f20,%f54 ! (4_1) dd *= dtmp0; 1055*5b2ba9d3SPiotr Jasiukajtis 1056*5b2ba9d3SPiotr Jasiukajtis nop 1057*5b2ba9d3SPiotr Jasiukajtis nop 1058*5b2ba9d3SPiotr Jasiukajtis lda [%i4+4]%asi,%f11 ! (7_1) ((float*)&x0)[1] = ((float*)px)[1]; 1059*5b2ba9d3SPiotr Jasiukajtis fsubd DONE,%f12,%f60 ! (2_1) dtmp0 = DONE - dtmp0; 1060*5b2ba9d3SPiotr Jasiukajtis 1061*5b2ba9d3SPiotr Jasiukajtis fmuld %f50,%f2,%f46 ! (6_1) dtmp1 *= y_lo0; 1062*5b2ba9d3SPiotr Jasiukajtis nop 1063*5b2ba9d3SPiotr Jasiukajtis lda [%i3]%asi,%f12 ! (7_1) ((float*)&y0)[0] = ((float*)py)[0]; 1064*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f22,%f22 ! (3_1) dtmp2 = DTWO - dtmp2; 1065*5b2ba9d3SPiotr Jasiukajtis 1066*5b2ba9d3SPiotr Jasiukajtis add %i1,stridex,%i4 ! px += stridex 1067*5b2ba9d3SPiotr Jasiukajtis nop 1068*5b2ba9d3SPiotr Jasiukajtis lda [%i3+4]%asi,%f13 ! (7_1) ((float*)&y0)[1] = ((float*)py)[1]; 1069*5b2ba9d3SPiotr Jasiukajtis bn,pn %icc,.exit 1070*5b2ba9d3SPiotr Jasiukajtis 1071*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f24,%f50 ! (4_1) dtmp1 = dd * dres; 1072*5b2ba9d3SPiotr Jasiukajtis add %i4,stridex,%i1 ! px += stridex 1073*5b2ba9d3SPiotr Jasiukajtis ld [%fp+ftmp0],%o2 ! (5_1) iarr = ((int*)&dres)[0]; 1074*5b2ba9d3SPiotr Jasiukajtis fand %f16,DA1,%f2 ! (5_1) dexp0 = vis_fand(dres,DA1); 1075*5b2ba9d3SPiotr Jasiukajtis 1076*5b2ba9d3SPiotr Jasiukajtis fmuld %f10,%f62,%f10 ! (7_1) x0 *= scl0; 1077*5b2ba9d3SPiotr Jasiukajtis nop 1078*5b2ba9d3SPiotr Jasiukajtis ldd [%fp+dtmp2],%f0 ! (0_1) *(long long*)&scl0 = ll; 1079*5b2ba9d3SPiotr Jasiukajtis fsubd %f60,%f40,%f20 ! (2_1) dtmp0 -= dtmp1; 1080*5b2ba9d3SPiotr Jasiukajtis 1081*5b2ba9d3SPiotr Jasiukajtis fmuld %f12,%f62,%f60 ! (7_1) y0 *= scl0; 1082*5b2ba9d3SPiotr Jasiukajtis sra %o2,11,%i3 ! (5_1) iarr >>= 11; 1083*5b2ba9d3SPiotr Jasiukajtis nop 1084*5b2ba9d3SPiotr Jasiukajtis faddd %f28,%f46,%f40 ! (6_1) res0_lo += dtmp1; 1085*5b2ba9d3SPiotr Jasiukajtis 1086*5b2ba9d3SPiotr Jasiukajtis and %i3,0x1fc,%i3 ! (5_1) iarr &= 0x1fc; 1087*5b2ba9d3SPiotr Jasiukajtis nop 1088*5b2ba9d3SPiotr Jasiukajtis bn,pn %icc,.exit 1089*5b2ba9d3SPiotr Jasiukajtis fmuld %f18,%f22,%f28 ! (3_1) dres = dd * dtmp2; 1090*5b2ba9d3SPiotr Jasiukajtis 1091*5b2ba9d3SPiotr Jasiukajtis fsqrtd %f52,%f22 ! (1_1) res0 = sqrt ( res0 ); 1092*5b2ba9d3SPiotr Jasiukajtis lda [%i4]0x82,%o1 ! (1_0) hx0 = *(int*)px; 1093*5b2ba9d3SPiotr Jasiukajtis add %i3,TBL,%g1 ! (5_1) (char*)dll1 + iarr 1094*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f50,%f62 ! (4_1) dtmp1 = DTWO - dtmp1; 1095*5b2ba9d3SPiotr Jasiukajtis 1096*5b2ba9d3SPiotr Jasiukajtis fmuld %f20,%f26,%f52 ! (2_1) dtmp0 *= dres; 1097*5b2ba9d3SPiotr Jasiukajtis add %i0,stridey,%i3 ! py += stridey 1098*5b2ba9d3SPiotr Jasiukajtis ld [%g1],%f26 ! (5_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; 1099*5b2ba9d3SPiotr Jasiukajtis faddd %f10,D2ON36,%f46 ! (7_1) x_hi0 = x0 + D2ON36; 1100*5b2ba9d3SPiotr Jasiukajtis 1101*5b2ba9d3SPiotr Jasiukajtis nop 1102*5b2ba9d3SPiotr Jasiukajtis add %i3,stridey,%i0 ! py += stridey 1103*5b2ba9d3SPiotr Jasiukajtis lda [%i3]0x82,%g1 ! (1_0) hy0 = *(int*)py; 1104*5b2ba9d3SPiotr Jasiukajtis faddd %f60,D2ON36,%f50 ! (7_1) y_hi0 = y0 + D2ON36; 1105*5b2ba9d3SPiotr Jasiukajtis 1106*5b2ba9d3SPiotr Jasiukajtis faddd %f30,%f40,%f18 ! (6_1) dres = res0_hi + res0_lo; 1107*5b2ba9d3SPiotr Jasiukajtis and %o1,_0x7fffffff,%o7 ! (1_0) hx0 &= 0x7fffffff; 1108*5b2ba9d3SPiotr Jasiukajtis st %f18,[%fp+ftmp0] ! (6_1) iarr = ((int*)&dres)[0]; 1109*5b2ba9d3SPiotr Jasiukajtis fmuld %f0,%f14,%f0 ! (0_1) res0 = scl0 * res0; 1110*5b2ba9d3SPiotr Jasiukajtis 1111*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f62,%f14 ! (4_1) dd *= dtmp1; 1112*5b2ba9d3SPiotr Jasiukajtis cmp %o7,_0x7ff00000 ! (1_0) hx0 ? 0x7ff00000 1113*5b2ba9d3SPiotr Jasiukajtis st %f0,[%i5] ! (0_1) ((float*)pz)[0] = ((float*)&res0)[0]; 1114*5b2ba9d3SPiotr Jasiukajtis fpsub32 %f26,%f2,%f26 ! (5_1) dd = vis_fpsub32(dtmp0, dexp0); 1115*5b2ba9d3SPiotr Jasiukajtis 1116*5b2ba9d3SPiotr Jasiukajtis and %g1,_0x7fffffff,%l7 ! (1_0) hy0 &= 0x7fffffff; 1117*5b2ba9d3SPiotr Jasiukajtis nop 1118*5b2ba9d3SPiotr Jasiukajtis bge,pn %icc,.update33 ! (1_0) if ( hx0 >= 0x7ff00000 ) 1119*5b2ba9d3SPiotr Jasiukajtis fsubd %f46,D2ON36,%f20 ! (7_1) x_hi0 -= D2ON36; 1120*5b2ba9d3SPiotr Jasiukajtis 1121*5b2ba9d3SPiotr Jasiukajtis cmp %l7,_0x7ff00000 ! (1_0) hy0 ? 0x7ff00000 1122*5b2ba9d3SPiotr Jasiukajtis sub %l7,%o7,%o1 ! (1_0) diff0 = hy0 - hx0; 1123*5b2ba9d3SPiotr Jasiukajtis st %f1,[%i5+4] ! (0_1) ((float*)pz)[1] = ((float*)&res0)[1]; 1124*5b2ba9d3SPiotr Jasiukajtis fsubd %f50,D2ON36,%f54 ! (7_1) y_hi0 -= D2ON36; 1125*5b2ba9d3SPiotr Jasiukajtis 1126*5b2ba9d3SPiotr Jasiukajtis fmuld %f26,%f16,%f50 ! (5_1) dtmp0 = dd * dres; 1127*5b2ba9d3SPiotr Jasiukajtis sra %o1,31,%o3 ! (1_0) j0 = diff0 >> 31; 1128*5b2ba9d3SPiotr Jasiukajtis bge,pn %icc,.update34 ! (1_0) if ( hy0 >= 0x7ff00000 ) 1129*5b2ba9d3SPiotr Jasiukajtis faddd %f48,%f52,%f52 ! (2_1) res0 += dtmp0; 1130*5b2ba9d3SPiotr Jasiukajtis 1131*5b2ba9d3SPiotr Jasiukajtis and %o1,%o3,%o1 ! (1_0) j0 &= diff0; 1132*5b2ba9d3SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 1133*5b2ba9d3SPiotr Jasiukajtis stx %o4,[%fp+dtmp2] ! (0_0) *(long long*)&scl0 = ll; 1134*5b2ba9d3SPiotr Jasiukajtis fand %f28,DA0,%f48 ! (3_1) res0 = vis_fand(dres,DA0); 1135*5b2ba9d3SPiotr Jasiukajtis 1136*5b2ba9d3SPiotr Jasiukajtis fmuld %f20,%f20,%f2 ! (7_1) res0_hi = x_hi0 * x_hi0; 1137*5b2ba9d3SPiotr Jasiukajtis sub %l7,%o1,%o4 ! (1_0) j0 = hy0 - j0; 1138*5b2ba9d3SPiotr Jasiukajtis cmp %o7,_0x00100000 ! (1_0) hx0 ? 0x00100000 1139*5b2ba9d3SPiotr Jasiukajtis fsubd %f10,%f20,%f0 ! (7_1) x_lo0 = x0 - x_hi0; 1140*5b2ba9d3SPiotr Jasiukajtis 1141*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (7_1) dtmp0 = y_hi0 * y_hi0; 1142*5b2ba9d3SPiotr Jasiukajtis and %o4,%l0,%o4 ! (1_0) j0 &= 0x7ff00000; 1143*5b2ba9d3SPiotr Jasiukajtis bl,pn %icc,.update35 ! (1_0) if ( hx0 < 0x00100000 ) 1144*5b2ba9d3SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (7_1) res0_lo = x0 + x_hi0; 1145*5b2ba9d3SPiotr Jasiukajtis.cont35a: 1146*5b2ba9d3SPiotr Jasiukajtis fmuld %f44,%f48,%f10 ! (3_1) dtmp0 = res0_hi * res0; 1147*5b2ba9d3SPiotr Jasiukajtis nop 1148*5b2ba9d3SPiotr Jasiukajtis sub %l0,%o4,%o4 ! (1_0) j0 = 0x7ff00000 - j0; 1149*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (5_1) dtmp0 = DTWO - dtmp0; 1150*5b2ba9d3SPiotr Jasiukajtis.cont35b: 1151*5b2ba9d3SPiotr Jasiukajtis fmuld %f14,%f24,%f24 ! (4_1) dtmp2 = dd * dres; 1152*5b2ba9d3SPiotr Jasiukajtis sllx %o4,32,%o4 ! (1_0) ll = (long long)j0 << 32; 1153*5b2ba9d3SPiotr Jasiukajtis stx %o4,[%fp+dtmp3] ! (1_0) *(long long*)&scl0 = ll; 1154*5b2ba9d3SPiotr Jasiukajtis faddd %f60,%f54,%f50 ! (7_1) dtmp1 = y0 + y_hi0; 1155*5b2ba9d3SPiotr Jasiukajtis 1156*5b2ba9d3SPiotr Jasiukajtis fmuld %f38,%f48,%f38 ! (3_1) dtmp1 = res0_lo * res0; 1157*5b2ba9d3SPiotr Jasiukajtis nop 1158*5b2ba9d3SPiotr Jasiukajtis nop 1159*5b2ba9d3SPiotr Jasiukajtis fsubd %f60,%f54,%f12 ! (7_1) y_lo0 = y0 - y_hi0; 1160*5b2ba9d3SPiotr Jasiukajtis.cont36: 1161*5b2ba9d3SPiotr Jasiukajtis fmuld %f62,%f0,%f0 ! (7_1) res0_lo *= x_lo0; 1162*5b2ba9d3SPiotr Jasiukajtis nop 1163*5b2ba9d3SPiotr Jasiukajtis ldd [%fp+dtmp1],%f62 ! (0_0) *(long long*)&scl0 = ll; 1164*5b2ba9d3SPiotr Jasiukajtis faddd %f2,%f46,%f44 ! (7_1) res0_hi += dtmp0; 1165*5b2ba9d3SPiotr Jasiukajtis 1166*5b2ba9d3SPiotr Jasiukajtis fsubd DONE,%f10,%f60 ! (3_1) dtmp0 = DONE - dtmp0; 1167*5b2ba9d3SPiotr Jasiukajtis nop 1168*5b2ba9d3SPiotr Jasiukajtis lda [%i2]%asi,%f10 ! (0_0) ((float*)&x0)[0] = ((float*)px)[0]; 1169*5b2ba9d3SPiotr Jasiukajtis fmuld %f26,%f20,%f54 ! (5_1) dd *= dtmp0; 1170*5b2ba9d3SPiotr Jasiukajtis 1171*5b2ba9d3SPiotr Jasiukajtis nop 1172*5b2ba9d3SPiotr Jasiukajtis nop 1173*5b2ba9d3SPiotr Jasiukajtis lda [%i2+4]%asi,%f11 ! (0_0) ((float*)&x0)[1] = ((float*)px)[1]; 1174*5b2ba9d3SPiotr Jasiukajtis bn,pn %icc,.exit 1175*5b2ba9d3SPiotr Jasiukajtis 1176*5b2ba9d3SPiotr Jasiukajtis fmuld %f50,%f12,%f26 ! (7_1) dtmp1 *= y_lo0; 1177*5b2ba9d3SPiotr Jasiukajtis nop 1178*5b2ba9d3SPiotr Jasiukajtis lda [%o0]%asi,%f12 ! (0_0) ((float*)&y0)[0] = ((float*)py)[0]; 1179*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f24,%f24 ! (4_1) dtmp2 = DTWO - dtmp2; 1180*5b2ba9d3SPiotr Jasiukajtis 1181*5b2ba9d3SPiotr Jasiukajtis nop 1182*5b2ba9d3SPiotr Jasiukajtis nop 1183*5b2ba9d3SPiotr Jasiukajtis lda [%o0+4]%asi,%f13 ! (0_0) ((float*)&y0)[1] = ((float*)py)[1]; 1184*5b2ba9d3SPiotr Jasiukajtis bn,pn %icc,.exit 1185*5b2ba9d3SPiotr Jasiukajtis 1186*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f16,%f46 ! (5_1) dtmp1 = dd * dres; 1187*5b2ba9d3SPiotr Jasiukajtis nop 1188*5b2ba9d3SPiotr Jasiukajtis ld [%fp+ftmp0],%o2 ! (6_1) iarr = ((int*)&dres)[0]; 1189*5b2ba9d3SPiotr Jasiukajtis fand %f18,DA1,%f2 ! (6_1) dexp0 = vis_fand(dres,DA1); 1190*5b2ba9d3SPiotr Jasiukajtis 1191*5b2ba9d3SPiotr Jasiukajtis fmuld %f10,%f62,%f10 ! (0_0) x0 *= scl0; 1192*5b2ba9d3SPiotr Jasiukajtis nop 1193*5b2ba9d3SPiotr Jasiukajtis ldd [%fp+dtmp4],%f50 ! (1_1) *(long long*)&scl0 = ll; 1194*5b2ba9d3SPiotr Jasiukajtis fsubd %f60,%f38,%f20 ! (3_1) dtmp0 -= dtmp1; 1195*5b2ba9d3SPiotr Jasiukajtis 1196*5b2ba9d3SPiotr Jasiukajtis fmuld %f12,%f62,%f60 ! (0_0) y0 *= scl0; 1197*5b2ba9d3SPiotr Jasiukajtis sra %o2,11,%g1 ! (6_1) iarr >>= 11; 1198*5b2ba9d3SPiotr Jasiukajtis nop 1199*5b2ba9d3SPiotr Jasiukajtis faddd %f0,%f26,%f38 ! (7_1) res0_lo += dtmp1; 1200*5b2ba9d3SPiotr Jasiukajtis 1201*5b2ba9d3SPiotr Jasiukajtis nop 1202*5b2ba9d3SPiotr Jasiukajtis and %g1,0x1fc,%g1 ! (6_1) iarr &= 0x1fc; 1203*5b2ba9d3SPiotr Jasiukajtis bn,pn %icc,.exit 1204*5b2ba9d3SPiotr Jasiukajtis fmuld %f14,%f24,%f26 ! (4_1) dres = dd * dtmp2; 1205*5b2ba9d3SPiotr Jasiukajtis 1206*5b2ba9d3SPiotr Jasiukajtis fsqrtd %f52,%f24 ! (2_1) res0 = sqrt ( res0 ); 1207*5b2ba9d3SPiotr Jasiukajtis lda [%i1]0x82,%o1 ! (2_0) hx0 = *(int*)px; 1208*5b2ba9d3SPiotr Jasiukajtis add %g1,TBL,%g1 ! (6_1) (char*)dll1 + iarr 1209*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f46,%f62 ! (5_1) dtmp1 = DTWO - dtmp1; 1210*5b2ba9d3SPiotr Jasiukajtis 1211*5b2ba9d3SPiotr Jasiukajtis fmuld %f20,%f28,%f52 ! (3_1) dtmp0 *= dres; 1212*5b2ba9d3SPiotr Jasiukajtis mov %i1,%i2 1213*5b2ba9d3SPiotr Jasiukajtis ld [%g1],%f28 ! (6_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; 1214*5b2ba9d3SPiotr Jasiukajtis faddd %f10,D2ON36,%f46 ! (0_0) x_hi0 = x0 + D2ON36; 1215*5b2ba9d3SPiotr Jasiukajtis 1216*5b2ba9d3SPiotr Jasiukajtis nop 1217*5b2ba9d3SPiotr Jasiukajtis mov %i0,%o0 1218*5b2ba9d3SPiotr Jasiukajtis lda [%i0]0x82,%g1 ! (2_0) hy0 = *(int*)py; 1219*5b2ba9d3SPiotr Jasiukajtis faddd %f60,D2ON36,%f12 ! (0_0) y_hi0 = y0 + D2ON36; 1220*5b2ba9d3SPiotr Jasiukajtis 1221*5b2ba9d3SPiotr Jasiukajtis faddd %f44,%f38,%f14 ! (7_1) dres = res0_hi + res0_lo; 1222*5b2ba9d3SPiotr Jasiukajtis and %o1,_0x7fffffff,%o7 ! (2_0) hx0 &= 0x7fffffff; 1223*5b2ba9d3SPiotr Jasiukajtis st %f14,[%fp+ftmp0] ! (7_1) iarr = ((int*)&dres)[0]; 1224*5b2ba9d3SPiotr Jasiukajtis fmuld %f50,%f22,%f0 ! (1_1) res0 = scl0 * res0; 1225*5b2ba9d3SPiotr Jasiukajtis 1226*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f62,%f22 ! (5_1) dd *= dtmp1; 1227*5b2ba9d3SPiotr Jasiukajtis cmp %o7,_0x7ff00000 ! (2_0) hx0 ? 0x7ff00000 1228*5b2ba9d3SPiotr Jasiukajtis st %f0,[%i5] ! (1_1) ((float*)pz)[0] = ((float*)&res0)[0]; 1229*5b2ba9d3SPiotr Jasiukajtis fpsub32 %f28,%f2,%f28 ! (6_1) dd = vis_fpsub32(dtmp0, dexp0); 1230*5b2ba9d3SPiotr Jasiukajtis 1231*5b2ba9d3SPiotr Jasiukajtis and %g1,_0x7fffffff,%l7 ! (2_0) hx0 &= 0x7fffffff; 1232*5b2ba9d3SPiotr Jasiukajtis nop 1233*5b2ba9d3SPiotr Jasiukajtis bge,pn %icc,.update37 ! (2_0) if ( hx0 >= 0x7ff00000 ) 1234*5b2ba9d3SPiotr Jasiukajtis fsubd %f46,D2ON36,%f20 ! (0_0) x_hi0 -= D2ON36; 1235*5b2ba9d3SPiotr Jasiukajtis 1236*5b2ba9d3SPiotr Jasiukajtis sub %l7,%o7,%o1 ! (2_0) diff0 = hy0 - hx0; 1237*5b2ba9d3SPiotr Jasiukajtis cmp %l7,_0x7ff00000 ! (2_0) hy0 ? 0x7ff00000 1238*5b2ba9d3SPiotr Jasiukajtis st %f1,[%i5+4] ! (1_1) ((float*)pz)[1] = ((float*)&res0)[1]; 1239*5b2ba9d3SPiotr Jasiukajtis fsubd %f12,D2ON36,%f54 ! (0_0) y_hi0 -= D2ON36; 1240*5b2ba9d3SPiotr Jasiukajtis 1241*5b2ba9d3SPiotr Jasiukajtis fmuld %f28,%f18,%f50 ! (6_1) dtmp0 = dd * dres; 1242*5b2ba9d3SPiotr Jasiukajtis sra %o1,31,%o3 ! (2_0) j0 = diff0 >> 31; 1243*5b2ba9d3SPiotr Jasiukajtis bge,pn %icc,.update38 ! (2_0) if ( hy0 >= 0x7ff00000 ) 1244*5b2ba9d3SPiotr Jasiukajtis faddd %f48,%f52,%f52 ! (3_1) res0 += dtmp0; 1245*5b2ba9d3SPiotr Jasiukajtis 1246*5b2ba9d3SPiotr Jasiukajtis and %o1,%o3,%o1 ! (2_0) j0 &= diff0; 1247*5b2ba9d3SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 1248*5b2ba9d3SPiotr Jasiukajtis stx %o4,[%fp+dtmp4] ! (1_0) *(long long*)&scl0 = ll; 1249*5b2ba9d3SPiotr Jasiukajtis fand %f26,DA0,%f48 ! (4_1) res0 = vis_fand(dres,DA0); 1250*5b2ba9d3SPiotr Jasiukajtis 1251*5b2ba9d3SPiotr Jasiukajtis fmuld %f20,%f20,%f2 ! (0_0) res0_hi = x_hi0 * x_hi0; 1252*5b2ba9d3SPiotr Jasiukajtis cmp %o7,_0x00100000 ! (2_0) hx0 ? 0x00100000 1253*5b2ba9d3SPiotr Jasiukajtis sub %l7,%o1,%o4 ! (2_0) j0 = hy0 - j0; 1254*5b2ba9d3SPiotr Jasiukajtis fsubd %f10,%f20,%f0 ! (0_0) x_lo0 = x0 - x_hi0; 1255*5b2ba9d3SPiotr Jasiukajtis 1256*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (0_0) dtmp0 = y_hi0 * y_hi0; 1257*5b2ba9d3SPiotr Jasiukajtis and %o4,%l0,%o4 ! (2_0) j0 &= 0x7ff00000; 1258*5b2ba9d3SPiotr Jasiukajtis bl,pn %icc,.update39 ! (2_0) if ( hx0 < 0x00100000 ) 1259*5b2ba9d3SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (0_0) res0_lo = x0 + x_hi0; 1260*5b2ba9d3SPiotr Jasiukajtis.cont39a: 1261*5b2ba9d3SPiotr Jasiukajtis fmuld %f32,%f48,%f10 ! (4_1) dtmp0 = res0_hi * res0; 1262*5b2ba9d3SPiotr Jasiukajtis sub %l0,%o4,%g1 ! (2_0) j0 = 0x7ff00000 - j0; 1263*5b2ba9d3SPiotr Jasiukajtis nop 1264*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (6_1) dtmp0 = DTWO - dtmp0; 1265*5b2ba9d3SPiotr Jasiukajtis.cont39b: 1266*5b2ba9d3SPiotr Jasiukajtis fmuld %f22,%f16,%f16 ! (5_1) dtmp2 = dd * dres; 1267*5b2ba9d3SPiotr Jasiukajtis sllx %g1,32,%g1 ! (2_0) ll = (long long)j0 << 32; 1268*5b2ba9d3SPiotr Jasiukajtis stx %g1,[%fp+dtmp5] ! (2_0) *(long long*)&scl0 = ll; 1269*5b2ba9d3SPiotr Jasiukajtis faddd %f60,%f54,%f50 ! (0_0) dtmp1 = y0 + y_hi0; 1270*5b2ba9d3SPiotr Jasiukajtis 1271*5b2ba9d3SPiotr Jasiukajtis fmuld %f36,%f48,%f36 ! (4_1) dtmp1 = res0_lo * res0; 1272*5b2ba9d3SPiotr Jasiukajtis nop 1273*5b2ba9d3SPiotr Jasiukajtis nop 1274*5b2ba9d3SPiotr Jasiukajtis fsubd %f60,%f54,%f12 ! (0_0) y_lo0 = y0 - y_hi0; 1275*5b2ba9d3SPiotr Jasiukajtis.cont40: 1276*5b2ba9d3SPiotr Jasiukajtis fmuld %f62,%f0,%f0 ! (0_0) res0_lo *= x_lo0; 1277*5b2ba9d3SPiotr Jasiukajtis nop 1278*5b2ba9d3SPiotr Jasiukajtis ldd [%fp+dtmp3],%f62 ! (1_0) *(long long*)&scl0 = ll; 1279*5b2ba9d3SPiotr Jasiukajtis faddd %f2,%f46,%f32 ! (0_0) res0_hi += dtmp0; 1280*5b2ba9d3SPiotr Jasiukajtis 1281*5b2ba9d3SPiotr Jasiukajtis fsubd DONE,%f10,%f60 ! (4_1) dtmp0 = DONE - dtmp0; 1282*5b2ba9d3SPiotr Jasiukajtis nop 1283*5b2ba9d3SPiotr Jasiukajtis lda [%i4]%asi,%f10 ! (1_0) ((float*)&x0)[0] = ((float*)px)[0]; 1284*5b2ba9d3SPiotr Jasiukajtis fmuld %f28,%f20,%f54 ! (6_1) dd *= dtmp0; 1285*5b2ba9d3SPiotr Jasiukajtis 1286*5b2ba9d3SPiotr Jasiukajtis nop 1287*5b2ba9d3SPiotr Jasiukajtis nop 1288*5b2ba9d3SPiotr Jasiukajtis lda [%i4+4]%asi,%f11 ! (1_0) ((float*)&x0)[1] = ((float*)px)[1]; 1289*5b2ba9d3SPiotr Jasiukajtis bn,pn %icc,.exit 1290*5b2ba9d3SPiotr Jasiukajtis 1291*5b2ba9d3SPiotr Jasiukajtis fmuld %f50,%f12,%f28 ! (0_0) dtmp1 *= y_lo0; 1292*5b2ba9d3SPiotr Jasiukajtis nop 1293*5b2ba9d3SPiotr Jasiukajtis lda [%i3]%asi,%f12 ! (1_0) ((float*)&y0)[0] = ((float*)py)[0]; 1294*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f16,%f16 ! (5_1) dtmp2 = DTWO - dtmp2; 1295*5b2ba9d3SPiotr Jasiukajtis 1296*5b2ba9d3SPiotr Jasiukajtis add %i1,stridex,%i4 ! px += stridex 1297*5b2ba9d3SPiotr Jasiukajtis nop 1298*5b2ba9d3SPiotr Jasiukajtis lda [%i3+4]%asi,%f13 ! (1_0) ((float*)&y0)[1] = ((float*)py)[1]; 1299*5b2ba9d3SPiotr Jasiukajtis bn,pn %icc,.exit 1300*5b2ba9d3SPiotr Jasiukajtis 1301*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f18,%f46 ! (6_1) dtmp1 = dd * dres; 1302*5b2ba9d3SPiotr Jasiukajtis add %i4,stridex,%i1 ! px += stridex 1303*5b2ba9d3SPiotr Jasiukajtis ld [%fp+ftmp0],%o2 ! (7_1) iarr = ((int*)&dres)[0]; 1304*5b2ba9d3SPiotr Jasiukajtis fand %f14,DA1,%f2 ! (7_1) dexp0 = vis_fand(dres,DA1); 1305*5b2ba9d3SPiotr Jasiukajtis 1306*5b2ba9d3SPiotr Jasiukajtis fmuld %f10,%f62,%f10 ! (1_0) x0 *= scl0; 1307*5b2ba9d3SPiotr Jasiukajtis nop 1308*5b2ba9d3SPiotr Jasiukajtis ldd [%fp+dtmp6],%f50 ! (2_1) *(long long*)&scl0 = ll; 1309*5b2ba9d3SPiotr Jasiukajtis fsubd %f60,%f36,%f20 ! (4_1) dtmp0 -= dtmp1; 1310*5b2ba9d3SPiotr Jasiukajtis 1311*5b2ba9d3SPiotr Jasiukajtis fmuld %f12,%f62,%f60 ! (1_0) y0 *= scl0; 1312*5b2ba9d3SPiotr Jasiukajtis sra %o2,11,%i3 ! (7_1) iarr >>= 11; 1313*5b2ba9d3SPiotr Jasiukajtis nop 1314*5b2ba9d3SPiotr Jasiukajtis faddd %f0,%f28,%f36 ! (0_0) res0_lo += dtmp1; 1315*5b2ba9d3SPiotr Jasiukajtis 1316*5b2ba9d3SPiotr Jasiukajtis and %i3,0x1fc,%i3 ! (7_1) iarr &= 0x1fc; 1317*5b2ba9d3SPiotr Jasiukajtis nop 1318*5b2ba9d3SPiotr Jasiukajtis bn,pn %icc,.exit 1319*5b2ba9d3SPiotr Jasiukajtis fmuld %f22,%f16,%f28 ! (5_1) dres = dd * dtmp2; 1320*5b2ba9d3SPiotr Jasiukajtis 1321*5b2ba9d3SPiotr Jasiukajtis fsqrtd %f52,%f16 ! (3_1) res0 = sqrt ( res0 ); 1322*5b2ba9d3SPiotr Jasiukajtis add %i3,TBL,%o4 ! (7_1) (char*)dll1 + iarr 1323*5b2ba9d3SPiotr Jasiukajtis lda [%i4]0x82,%o1 ! (3_0) hx0 = *(int*)px; 1324*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f46,%f62 ! (6_1) dtmp1 = DTWO - dtmp1; 1325*5b2ba9d3SPiotr Jasiukajtis 1326*5b2ba9d3SPiotr Jasiukajtis fmuld %f20,%f26,%f52 ! (4_1) dtmp0 *= dres; 1327*5b2ba9d3SPiotr Jasiukajtis add %i0,stridey,%i3 ! py += stridey 1328*5b2ba9d3SPiotr Jasiukajtis ld [%o4],%f26 ! (7_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; 1329*5b2ba9d3SPiotr Jasiukajtis faddd %f10,D2ON36,%f46 ! (1_0) x_hi0 = x0 + D2ON36; 1330*5b2ba9d3SPiotr Jasiukajtis 1331*5b2ba9d3SPiotr Jasiukajtis nop 1332*5b2ba9d3SPiotr Jasiukajtis add %i3,stridey,%i0 ! py += stridey 1333*5b2ba9d3SPiotr Jasiukajtis lda [%i3]0x82,%o4 ! (3_0) hy0 = *(int*)py; 1334*5b2ba9d3SPiotr Jasiukajtis faddd %f60,D2ON36,%f12 ! (1_0) y_hi0 = y0 + D2ON36; 1335*5b2ba9d3SPiotr Jasiukajtis 1336*5b2ba9d3SPiotr Jasiukajtis faddd %f32,%f36,%f22 ! (0_0) dres = res0_hi + res0_lo; 1337*5b2ba9d3SPiotr Jasiukajtis and %o1,_0x7fffffff,%o7 ! (3_0) hx0 &= 0x7fffffff; 1338*5b2ba9d3SPiotr Jasiukajtis st %f22,[%fp+ftmp0] ! (0_0) iarr = ((int*)&dres)[0]; 1339*5b2ba9d3SPiotr Jasiukajtis fmuld %f50,%f24,%f0 ! (2_1) res0 = scl0 * res0; 1340*5b2ba9d3SPiotr Jasiukajtis 1341*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f62,%f24 ! (6_1) dd *= dtmp1; 1342*5b2ba9d3SPiotr Jasiukajtis cmp %o7,_0x7ff00000 ! (3_0) hx0 ? 0x7ff00000 1343*5b2ba9d3SPiotr Jasiukajtis st %f0,[%i5] ! (2_1) ((float*)pz)[0] = ((float*)&res0)[0]; 1344*5b2ba9d3SPiotr Jasiukajtis fpsub32 %f26,%f2,%f26 ! (7_1) dd = vis_fpsub32(dtmp0, dexp0); 1345*5b2ba9d3SPiotr Jasiukajtis 1346*5b2ba9d3SPiotr Jasiukajtis and %o4,_0x7fffffff,%l7 ! (3_0) hy0 &= 0x7fffffff; 1347*5b2ba9d3SPiotr Jasiukajtis nop 1348*5b2ba9d3SPiotr Jasiukajtis bge,pn %icc,.update41 ! (3_0) if ( hx0 >= 0x7ff00000 ) 1349*5b2ba9d3SPiotr Jasiukajtis fsubd %f46,D2ON36,%f20 ! (1_0) x_hi0 -= D2ON36; 1350*5b2ba9d3SPiotr Jasiukajtis 1351*5b2ba9d3SPiotr Jasiukajtis sub %l7,%o7,%o1 ! (3_0) diff0 = hy0 - hx0; 1352*5b2ba9d3SPiotr Jasiukajtis cmp %l7,_0x7ff00000 ! (3_0) hy0 ? 0x7ff00000 1353*5b2ba9d3SPiotr Jasiukajtis st %f1,[%i5+4] ! (2_1) ((float*)pz)[1] = ((float*)&res0)[1]; 1354*5b2ba9d3SPiotr Jasiukajtis fsubd %f12,D2ON36,%f54 ! (1_0) y_hi0 -= D2ON36; 1355*5b2ba9d3SPiotr Jasiukajtis 1356*5b2ba9d3SPiotr Jasiukajtis fmuld %f26,%f14,%f50 ! (7_1) dtmp0 = dd * dres; 1357*5b2ba9d3SPiotr Jasiukajtis sra %o1,31,%o3 ! (3_0) j0 = diff0 >> 31; 1358*5b2ba9d3SPiotr Jasiukajtis bge,pn %icc,.update42 ! (3_0) if ( hy0 >= 0x7ff00000 ) 1359*5b2ba9d3SPiotr Jasiukajtis faddd %f48,%f52,%f52 ! (4_1) res0 += dtmp0; 1360*5b2ba9d3SPiotr Jasiukajtis 1361*5b2ba9d3SPiotr Jasiukajtis and %o1,%o3,%o1 ! (3_0) j0 &= diff0; 1362*5b2ba9d3SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 1363*5b2ba9d3SPiotr Jasiukajtis stx %g1,[%fp+dtmp6] ! (2_0) *(long long*)&scl0 = ll; 1364*5b2ba9d3SPiotr Jasiukajtis fand %f28,DA0,%f48 ! (5_1) res0 = vis_fand(dres,DA0); 1365*5b2ba9d3SPiotr Jasiukajtis 1366*5b2ba9d3SPiotr Jasiukajtis fmuld %f20,%f20,%f2 ! (1_0) res0_hi = x_hi0 * x_hi0; 1367*5b2ba9d3SPiotr Jasiukajtis cmp %o7,_0x00100000 ! (3_0) hx0 ? 0x00100000 1368*5b2ba9d3SPiotr Jasiukajtis sub %l7,%o1,%o4 ! (3_0) j0 = hy0 - j0; 1369*5b2ba9d3SPiotr Jasiukajtis fsubd %f10,%f20,%f0 ! (1_0) x_lo0 = x0 - x_hi0; 1370*5b2ba9d3SPiotr Jasiukajtis 1371*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (1_0) dtmp0 = y_hi0 * y_hi0; 1372*5b2ba9d3SPiotr Jasiukajtis and %o4,%l0,%o4 ! (3_0) j0 &= 0x7ff00000; 1373*5b2ba9d3SPiotr Jasiukajtis bl,pn %icc,.update43 ! (3_0) if ( hx0 < 0x00100000 ) 1374*5b2ba9d3SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (1_0) res0_lo = x0 + x_hi0; 1375*5b2ba9d3SPiotr Jasiukajtis.cont43a: 1376*5b2ba9d3SPiotr Jasiukajtis fmuld %f42,%f48,%f10 ! (5_1) dtmp0 = res0_hi * res0; 1377*5b2ba9d3SPiotr Jasiukajtis nop 1378*5b2ba9d3SPiotr Jasiukajtis sub %l0,%o4,%g1 ! (3_0) j0 = 0x7ff00000 - j0; 1379*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0; 1380*5b2ba9d3SPiotr Jasiukajtis.cont43b: 1381*5b2ba9d3SPiotr Jasiukajtis fmuld %f24,%f18,%f18 ! (6_1) dtmp2 = dd * dres; 1382*5b2ba9d3SPiotr Jasiukajtis sllx %g1,32,%g1 ! (3_0) ll = (long long)j0 << 32; 1383*5b2ba9d3SPiotr Jasiukajtis stx %g1,[%fp+dtmp7] ! (3_0) *(long long*)&scl0 = ll; 1384*5b2ba9d3SPiotr Jasiukajtis faddd %f60,%f54,%f50 ! (1_0) dtmp1 = y0 + y_hi0; 1385*5b2ba9d3SPiotr Jasiukajtis 1386*5b2ba9d3SPiotr Jasiukajtis fmuld %f34,%f48,%f34 ! (5_1) dtmp1 = res0_lo * res0; 1387*5b2ba9d3SPiotr Jasiukajtis nop 1388*5b2ba9d3SPiotr Jasiukajtis nop 1389*5b2ba9d3SPiotr Jasiukajtis fsubd %f60,%f54,%f12 ! (1_0) y_lo0 = y0 - y_hi0 1390*5b2ba9d3SPiotr Jasiukajtis.cont44: 1391*5b2ba9d3SPiotr Jasiukajtis fmuld %f62,%f0,%f0 ! (1_0) res0_lo *= x_lo0; 1392*5b2ba9d3SPiotr Jasiukajtis nop 1393*5b2ba9d3SPiotr Jasiukajtis ldd [%fp+dtmp5],%f62 ! (2_0) *(long long*)&scl0 = ll; 1394*5b2ba9d3SPiotr Jasiukajtis faddd %f2,%f46,%f42 ! (1_0) res0_hi += dtmp0; 1395*5b2ba9d3SPiotr Jasiukajtis 1396*5b2ba9d3SPiotr Jasiukajtis fsubd DONE,%f10,%f60 ! (5_1) dtmp0 = DONE - dtmp0; 1397*5b2ba9d3SPiotr Jasiukajtis nop 1398*5b2ba9d3SPiotr Jasiukajtis lda [%i2]%asi,%f10 ! (2_0) ((float*)&x0)[0] = ((float*)px)[0]; 1399*5b2ba9d3SPiotr Jasiukajtis fmuld %f26,%f20,%f54 ! (7_1) dd *= dtmp0; 1400*5b2ba9d3SPiotr Jasiukajtis 1401*5b2ba9d3SPiotr Jasiukajtis nop 1402*5b2ba9d3SPiotr Jasiukajtis nop 1403*5b2ba9d3SPiotr Jasiukajtis lda [%i2+4]%asi,%f11 ! (2_0) ((float*)&x0)[1] = ((float*)px)[1]; 1404*5b2ba9d3SPiotr Jasiukajtis bn,pn %icc,.exit 1405*5b2ba9d3SPiotr Jasiukajtis 1406*5b2ba9d3SPiotr Jasiukajtis fmuld %f50,%f12,%f26 ! (1_0) dtmp1 *= y_lo0; 1407*5b2ba9d3SPiotr Jasiukajtis nop 1408*5b2ba9d3SPiotr Jasiukajtis lda [%o0]%asi,%f12 ! (2_0) ((float*)&y0)[0] = ((float*)py)[0]; 1409*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f18,%f20 ! (6_1) dtmp2 = DTWO - dtmp2; 1410*5b2ba9d3SPiotr Jasiukajtis 1411*5b2ba9d3SPiotr Jasiukajtis nop 1412*5b2ba9d3SPiotr Jasiukajtis nop 1413*5b2ba9d3SPiotr Jasiukajtis lda [%o0+4]%asi,%f13 ! (2_0) ((float*)&y0)[1] = ((float*)py)[1]; 1414*5b2ba9d3SPiotr Jasiukajtis bn,pn %icc,.exit 1415*5b2ba9d3SPiotr Jasiukajtis 1416*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f14,%f50 ! (7_1) dtmp1 = dd * dres; 1417*5b2ba9d3SPiotr Jasiukajtis nop 1418*5b2ba9d3SPiotr Jasiukajtis ld [%fp+ftmp0],%o2 ! (0_0) iarr = ((int*)&dres)[0]; 1419*5b2ba9d3SPiotr Jasiukajtis fand %f22,DA1,%f2 ! (0_0) dexp0 = vis_fand(dres,DA1); 1420*5b2ba9d3SPiotr Jasiukajtis 1421*5b2ba9d3SPiotr Jasiukajtis fmuld %f10,%f62,%f10 ! (2_0) x0 *= scl0; 1422*5b2ba9d3SPiotr Jasiukajtis nop 1423*5b2ba9d3SPiotr Jasiukajtis ldd [%fp+dtmp8],%f18 ! (3_1) *(long long*)&scl0 = ll; 1424*5b2ba9d3SPiotr Jasiukajtis fsubd %f60,%f34,%f46 ! (5_1) dtmp0 -= dtmp1; 1425*5b2ba9d3SPiotr Jasiukajtis 1426*5b2ba9d3SPiotr Jasiukajtis fmuld %f12,%f62,%f60 ! (2_0) y0 *= scl0; 1427*5b2ba9d3SPiotr Jasiukajtis sra %o2,11,%o4 ! (0_0) iarr >>= 11; 1428*5b2ba9d3SPiotr Jasiukajtis nop 1429*5b2ba9d3SPiotr Jasiukajtis faddd %f0,%f26,%f34 ! (1_0) res0_lo += dtmp1; 1430*5b2ba9d3SPiotr Jasiukajtis 1431*5b2ba9d3SPiotr Jasiukajtis and %o4,0x1fc,%o4 ! (0_0) iarr &= 0x1fc; 1432*5b2ba9d3SPiotr Jasiukajtis nop 1433*5b2ba9d3SPiotr Jasiukajtis bn,pn %icc,.exit 1434*5b2ba9d3SPiotr Jasiukajtis fmuld %f24,%f20,%f26 ! (6_1) dres = dd * dtmp2; 1435*5b2ba9d3SPiotr Jasiukajtis 1436*5b2ba9d3SPiotr Jasiukajtis fsqrtd %f52,%f24 ! (4_1) res0 = sqrt ( res0 ); 1437*5b2ba9d3SPiotr Jasiukajtis add %o4,TBL,%o4 ! (0_0) (char*)dll1 + iarr 1438*5b2ba9d3SPiotr Jasiukajtis lda [%i1]0x82,%o1 ! (4_0) hx0 = *(int*)px; 1439*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (7_1) dtmp1 = DTWO - dtmp1; 1440*5b2ba9d3SPiotr Jasiukajtis 1441*5b2ba9d3SPiotr Jasiukajtis fmuld %f46,%f28,%f52 ! (5_1) dtmp0 -= dtmp1; 1442*5b2ba9d3SPiotr Jasiukajtis mov %i1,%i2 1443*5b2ba9d3SPiotr Jasiukajtis ld [%o4],%f28 ! (0_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; 1444*5b2ba9d3SPiotr Jasiukajtis faddd %f10,D2ON36,%f46 ! (2_0) x_hi0 = x0 + D2ON36; 1445*5b2ba9d3SPiotr Jasiukajtis 1446*5b2ba9d3SPiotr Jasiukajtis nop 1447*5b2ba9d3SPiotr Jasiukajtis mov %i0,%o0 1448*5b2ba9d3SPiotr Jasiukajtis lda [%i0]0x82,%o4 ! (4_0) hy0 = *(int*)py; 1449*5b2ba9d3SPiotr Jasiukajtis faddd %f60,D2ON36,%f50 ! (2_0) y_hi0 = y0 + D2ON36; 1450*5b2ba9d3SPiotr Jasiukajtis 1451*5b2ba9d3SPiotr Jasiukajtis fmuld %f18,%f16,%f0 ! (3_1) res0 = scl0 * res0; 1452*5b2ba9d3SPiotr Jasiukajtis nop 1453*5b2ba9d3SPiotr Jasiukajtis and %o1,_0x7fffffff,%o7 ! (4_0) hx0 &= 0x7fffffff; 1454*5b2ba9d3SPiotr Jasiukajtis faddd %f42,%f34,%f18 ! (1_0) dres = res0_hi + res0_lo; 1455*5b2ba9d3SPiotr Jasiukajtis 1456*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f20,%f16 ! (7_1) dd *= dtmp1; 1457*5b2ba9d3SPiotr Jasiukajtis cmp %o7,_0x7ff00000 ! (4_0) hx0 ? 0x7ff00000 1458*5b2ba9d3SPiotr Jasiukajtis st %f18,[%fp+ftmp0] ! (1_0) iarr = ((int*)&dres)[0]; 1459*5b2ba9d3SPiotr Jasiukajtis fpsub32 %f28,%f2,%f28 ! (0_0) dd = vis_fpsub32(dtmp0, dexp0); 1460*5b2ba9d3SPiotr Jasiukajtis 1461*5b2ba9d3SPiotr Jasiukajtis and %o4,_0x7fffffff,%l7 ! (4_0) hy0 &= 0x7fffffff; 1462*5b2ba9d3SPiotr Jasiukajtis st %f0,[%i5] ! (3_1) ((float*)pz)[0] = ((float*)&res0)[0]; 1463*5b2ba9d3SPiotr Jasiukajtis bge,pn %icc,.update45 ! (4_0) if ( hx0 >= 0x7ff00000 ) 1464*5b2ba9d3SPiotr Jasiukajtis fsubd %f46,D2ON36,%f20 ! (2_0) x_hi0 -= D2ON36; 1465*5b2ba9d3SPiotr Jasiukajtis 1466*5b2ba9d3SPiotr Jasiukajtis sub %l7,%o7,%o1 ! (4_0) diff0 = hy0 - hx0; 1467*5b2ba9d3SPiotr Jasiukajtis cmp %l7,_0x7ff00000 ! (4_0) hy0 ? 0x7ff00000 1468*5b2ba9d3SPiotr Jasiukajtis bge,pn %icc,.update46 ! (4_0) if ( hy0 >= 0x7ff00000 ) 1469*5b2ba9d3SPiotr Jasiukajtis fsubd %f50,D2ON36,%f54 ! (2_0) y_hi0 -= D2ON36; 1470*5b2ba9d3SPiotr Jasiukajtis 1471*5b2ba9d3SPiotr Jasiukajtis fmuld %f28,%f22,%f50 ! (0_0) dtmp0 = dd * dres; 1472*5b2ba9d3SPiotr Jasiukajtis sra %o1,31,%o3 ! (4_0) j0 = diff0 >> 31; 1473*5b2ba9d3SPiotr Jasiukajtis st %f1,[%i5+4] ! (3_1) ((float*)pz)[1] = ((float*)&res0)[1]; 1474*5b2ba9d3SPiotr Jasiukajtis faddd %f48,%f52,%f52 ! (5_1) res0 += dtmp0; 1475*5b2ba9d3SPiotr Jasiukajtis 1476*5b2ba9d3SPiotr Jasiukajtis and %o1,%o3,%o1 ! (4_0) j0 &= diff0; 1477*5b2ba9d3SPiotr Jasiukajtis cmp %o7,_0x00100000 ! (4_0) hx0 ? 0x00100000 1478*5b2ba9d3SPiotr Jasiukajtis bl,pn %icc,.update47 ! (4_0) if ( hx0 < 0x00100000 ) 1479*5b2ba9d3SPiotr Jasiukajtis fand %f26,DA0,%f48 ! (6_1) res0 = vis_fand(dres,DA0); 1480*5b2ba9d3SPiotr Jasiukajtis.cont47a: 1481*5b2ba9d3SPiotr Jasiukajtis fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0; 1482*5b2ba9d3SPiotr Jasiukajtis sub %l7,%o1,%o4 ! (4_0) j0 = hy0 - j0; 1483*5b2ba9d3SPiotr Jasiukajtis stx %g1,[%fp+dtmp8] ! (3_0) *(long long*)&scl0 = ll; 1484*5b2ba9d3SPiotr Jasiukajtis fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0; 1485*5b2ba9d3SPiotr Jasiukajtis 1486*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0; 1487*5b2ba9d3SPiotr Jasiukajtis and %o4,%l0,%o4 ! (4_0) j0 &= 0x7ff00000; 1488*5b2ba9d3SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 1489*5b2ba9d3SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0; 1490*5b2ba9d3SPiotr Jasiukajtis 1491*5b2ba9d3SPiotr Jasiukajtis fmuld %f30,%f48,%f10 ! (6_1) dtmp0 = res0_hi * res0; 1492*5b2ba9d3SPiotr Jasiukajtis nop 1493*5b2ba9d3SPiotr Jasiukajtis sub %l0,%o4,%g1 ! (4_0) j0 = 0x7ff00000 - j0; 1494*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0; 1495*5b2ba9d3SPiotr Jasiukajtis.cont47b: 1496*5b2ba9d3SPiotr Jasiukajtis fmuld %f16,%f14,%f14 ! (7_1) dtmp2 = dd * dres; 1497*5b2ba9d3SPiotr Jasiukajtis sllx %g1,32,%g1 ! (4_0) ll = (long long)j0 << 32; 1498*5b2ba9d3SPiotr Jasiukajtis stx %g1,[%fp+dtmp9] ! (4_0) *(long long*)&scl0 = ll; 1499*5b2ba9d3SPiotr Jasiukajtis faddd %f60,%f54,%f50 ! (2_0) dtmp1 = y0 + y_hi0; 1500*5b2ba9d3SPiotr Jasiukajtis 1501*5b2ba9d3SPiotr Jasiukajtis fmuld %f40,%f48,%f40 ! (6_1) dtmp1 = res0_lo * res0; 1502*5b2ba9d3SPiotr Jasiukajtis nop 1503*5b2ba9d3SPiotr Jasiukajtis nop 1504*5b2ba9d3SPiotr Jasiukajtis fsubd %f60,%f54,%f12 ! (2_0) y_lo0 = y0 - y_hi0; 1505*5b2ba9d3SPiotr Jasiukajtis.cont48: 1506*5b2ba9d3SPiotr Jasiukajtis fmuld %f62,%f0,%f0 ! (2_0) res0_lo *= x_lo0; 1507*5b2ba9d3SPiotr Jasiukajtis nop 1508*5b2ba9d3SPiotr Jasiukajtis ldd [%fp+dtmp7],%f62 ! (3_0) *(long long*)&scl0 = ll; 1509*5b2ba9d3SPiotr Jasiukajtis faddd %f2,%f46,%f30 ! (2_0) res0_hi += dtmp0; 1510*5b2ba9d3SPiotr Jasiukajtis 1511*5b2ba9d3SPiotr Jasiukajtis fsubd DONE,%f10,%f60 ! (6_1) dtmp0 = DONE - dtmp0; 1512*5b2ba9d3SPiotr Jasiukajtis nop 1513*5b2ba9d3SPiotr Jasiukajtis lda [%i4]%asi,%f10 ! (3_0) ((float*)&x0)[0] = ((float*)px)[0]; 1514*5b2ba9d3SPiotr Jasiukajtis fmuld %f28,%f20,%f54 ! (0_0) dd *= dtmp0; 1515*5b2ba9d3SPiotr Jasiukajtis 1516*5b2ba9d3SPiotr Jasiukajtis nop 1517*5b2ba9d3SPiotr Jasiukajtis nop 1518*5b2ba9d3SPiotr Jasiukajtis lda [%i4+4]%asi,%f11 ! (3_0) ((float*)&x0)[1] = ((float*)px)[1]; 1519*5b2ba9d3SPiotr Jasiukajtis bn,pn %icc,.exit 1520*5b2ba9d3SPiotr Jasiukajtis 1521*5b2ba9d3SPiotr Jasiukajtis fmuld %f50,%f12,%f28 ! (2_0) dtmp1 *= y_lo0; 1522*5b2ba9d3SPiotr Jasiukajtis nop 1523*5b2ba9d3SPiotr Jasiukajtis lda [%i3]%asi,%f12 ! (3_0) ((float*)&y0)[0] = ((float*)py)[0]; 1524*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f14,%f20 ! (7_1) dtmp2 = DTWO - dtmp2; 1525*5b2ba9d3SPiotr Jasiukajtis 1526*5b2ba9d3SPiotr Jasiukajtis lda [%i3+4]%asi,%f13 ! (3_0) ((float*)&y0)[1] = ((float*)py)[1]; 1527*5b2ba9d3SPiotr Jasiukajtis add %i1,stridex,%i4 ! px += stridex 1528*5b2ba9d3SPiotr Jasiukajtis nop 1529*5b2ba9d3SPiotr Jasiukajtis bn,pn %icc,.exit 1530*5b2ba9d3SPiotr Jasiukajtis 1531*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f22,%f50 ! (0_0) dtmp1 = dd * dres; 1532*5b2ba9d3SPiotr Jasiukajtis add %i4,stridex,%i1 ! px += stridex 1533*5b2ba9d3SPiotr Jasiukajtis ld [%fp+ftmp0],%o2 ! (1_0) iarr = ((int*)&dres)[0]; 1534*5b2ba9d3SPiotr Jasiukajtis fand %f18,DA1,%f2 ! (1_0) dexp0 = vis_fand(dres,DA1); 1535*5b2ba9d3SPiotr Jasiukajtis 1536*5b2ba9d3SPiotr Jasiukajtis fmuld %f10,%f62,%f10 ! (3_0) x0 *= scl0; 1537*5b2ba9d3SPiotr Jasiukajtis nop 1538*5b2ba9d3SPiotr Jasiukajtis ldd [%fp+dtmp10],%f14 ! (4_1) *(long long*)&scl0 = ll; 1539*5b2ba9d3SPiotr Jasiukajtis fsubd %f60,%f40,%f46 ! (6_1) dtmp0 -= dtmp1; 1540*5b2ba9d3SPiotr Jasiukajtis 1541*5b2ba9d3SPiotr Jasiukajtis fmuld %f12,%f62,%f60 ! (3_0) y0 *= scl0; 1542*5b2ba9d3SPiotr Jasiukajtis sra %o2,11,%i3 ! (1_0) iarr >>= 11; 1543*5b2ba9d3SPiotr Jasiukajtis nop 1544*5b2ba9d3SPiotr Jasiukajtis faddd %f0,%f28,%f40 ! (2_0) res0_lo += dtmp1; 1545*5b2ba9d3SPiotr Jasiukajtis 1546*5b2ba9d3SPiotr Jasiukajtis and %i3,0x1fc,%i3 ! (1_0) iarr &= 0x1fc; 1547*5b2ba9d3SPiotr Jasiukajtis nop 1548*5b2ba9d3SPiotr Jasiukajtis bn,pn %icc,.exit 1549*5b2ba9d3SPiotr Jasiukajtis fmuld %f16,%f20,%f28 ! (7_1) dres = dd * dtmp2; 1550*5b2ba9d3SPiotr Jasiukajtis 1551*5b2ba9d3SPiotr Jasiukajtis fsqrtd %f52,%f16 ! (5_1) res0 = sqrt ( res0 ); 1552*5b2ba9d3SPiotr Jasiukajtis add %i3,TBL,%o4 ! (1_0) (char*)dll1 + iarr 1553*5b2ba9d3SPiotr Jasiukajtis lda [%i4]0x82,%o1 ! (5_0) hx0 = *(int*)px; 1554*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (0_0) dtmp1 = DTWO - dtmp1; 1555*5b2ba9d3SPiotr Jasiukajtis 1556*5b2ba9d3SPiotr Jasiukajtis fmuld %f46,%f26,%f52 ! (6_1) dtmp0 *= dres; 1557*5b2ba9d3SPiotr Jasiukajtis add %i0,stridey,%i3 ! py += stridey 1558*5b2ba9d3SPiotr Jasiukajtis ld [%o4],%f26 ! (1_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; 1559*5b2ba9d3SPiotr Jasiukajtis faddd %f10,D2ON36,%f46 ! (3_0) x_hi0 = x0 + D2ON36; 1560*5b2ba9d3SPiotr Jasiukajtis 1561*5b2ba9d3SPiotr Jasiukajtis nop 1562*5b2ba9d3SPiotr Jasiukajtis add %i3,stridey,%i0 ! py += stridey 1563*5b2ba9d3SPiotr Jasiukajtis lda [%i3]0x82,%o4 ! (5_0) hy0 = *(int*)py; 1564*5b2ba9d3SPiotr Jasiukajtis faddd %f60,D2ON36,%f50 ! (3_0) y_hi0 = y0 + D2ON36; 1565*5b2ba9d3SPiotr Jasiukajtis 1566*5b2ba9d3SPiotr Jasiukajtis fmuld %f14,%f24,%f0 ! (4_1) res0 = scl0 * res0; 1567*5b2ba9d3SPiotr Jasiukajtis and %o1,_0x7fffffff,%o7 ! (5_0) hx0 &= 0x7fffffff; 1568*5b2ba9d3SPiotr Jasiukajtis nop 1569*5b2ba9d3SPiotr Jasiukajtis faddd %f30,%f40,%f14 ! (2_0) dres = res0_hi + res0_lo; 1570*5b2ba9d3SPiotr Jasiukajtis 1571*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f20,%f24 ! (0_0) dd *= dtmp1; 1572*5b2ba9d3SPiotr Jasiukajtis cmp %o7,_0x7ff00000 ! (5_0) hx0 ? 0x7ff00000 1573*5b2ba9d3SPiotr Jasiukajtis st %f14,[%fp+ftmp0] ! (2_0) iarr = ((int*)&dres)[0]; 1574*5b2ba9d3SPiotr Jasiukajtis fpsub32 %f26,%f2,%f26 ! (1_0) dd = vis_fpsub32(dtmp0, dexp0); 1575*5b2ba9d3SPiotr Jasiukajtis 1576*5b2ba9d3SPiotr Jasiukajtis and %o4,_0x7fffffff,%l7 ! (5_0) hy0 &= 0x7fffffff; 1577*5b2ba9d3SPiotr Jasiukajtis st %f0,[%i5] ! (4_1) ((float*)pz)[0] = ((float*)&res0)[0]; 1578*5b2ba9d3SPiotr Jasiukajtis bge,pn %icc,.update49 ! (5_0) if ( hx0 >= 0x7ff00000 ) 1579*5b2ba9d3SPiotr Jasiukajtis fsubd %f46,D2ON36,%f20 ! (3_0) x_hi0 -= D2ON36; 1580*5b2ba9d3SPiotr Jasiukajtis 1581*5b2ba9d3SPiotr Jasiukajtis sub %l7,%o7,%o1 ! (5_0) diff0 = hy0 - hx0; 1582*5b2ba9d3SPiotr Jasiukajtis cmp %l7,_0x7ff00000 ! (5_0) hy0 ? 0x7ff00000 1583*5b2ba9d3SPiotr Jasiukajtis bge,pn %icc,.update50 ! (5_0) if ( hy0 >= 0x7ff00000 ) 1584*5b2ba9d3SPiotr Jasiukajtis fsubd %f50,D2ON36,%f54 ! (3_0) y_hi0 -= D2ON36; 1585*5b2ba9d3SPiotr Jasiukajtis 1586*5b2ba9d3SPiotr Jasiukajtis fmuld %f26,%f18,%f50 ! (1_0) dtmp0 = dd * dres; 1587*5b2ba9d3SPiotr Jasiukajtis sra %o1,31,%o3 ! (5_0) j0 = diff0 >> 31; 1588*5b2ba9d3SPiotr Jasiukajtis st %f1,[%i5+4] ! (4_1) ((float*)pz)[1] = ((float*)&res0)[1]; 1589*5b2ba9d3SPiotr Jasiukajtis faddd %f48,%f52,%f52 ! (6_1) res0 += dtmp0; 1590*5b2ba9d3SPiotr Jasiukajtis 1591*5b2ba9d3SPiotr Jasiukajtis and %o1,%o3,%o1 ! (5_0) j0 &= diff0; 1592*5b2ba9d3SPiotr Jasiukajtis cmp %o7,_0x00100000 ! (5_0) hx0 ? 0x00100000 1593*5b2ba9d3SPiotr Jasiukajtis bl,pn %icc,.update51 ! (5_0) if ( hx0 < 0x00100000 ) 1594*5b2ba9d3SPiotr Jasiukajtis fand %f28,DA0,%f48 ! (7_1) res0 = vis_fand(dres,DA0); 1595*5b2ba9d3SPiotr Jasiukajtis.cont51a: 1596*5b2ba9d3SPiotr Jasiukajtis fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0; 1597*5b2ba9d3SPiotr Jasiukajtis sub %l7,%o1,%o4 ! (5_0) j0 = hy0 - j0; 1598*5b2ba9d3SPiotr Jasiukajtis stx %g1,[%fp+dtmp10] ! (4_0) *(long long*)&scl0 = ll; 1599*5b2ba9d3SPiotr Jasiukajtis fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0; 1600*5b2ba9d3SPiotr Jasiukajtis 1601*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0; 1602*5b2ba9d3SPiotr Jasiukajtis and %o4,%l0,%o4 ! (5_0) j0 &= 0x7ff00000; 1603*5b2ba9d3SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 1604*5b2ba9d3SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0; 1605*5b2ba9d3SPiotr Jasiukajtis 1606*5b2ba9d3SPiotr Jasiukajtis fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0; 1607*5b2ba9d3SPiotr Jasiukajtis sub %l0,%o4,%g1 ! (5_0) j0 = 0x7ff00000 - j0; 1608*5b2ba9d3SPiotr Jasiukajtis nop 1609*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0; 1610*5b2ba9d3SPiotr Jasiukajtis.cont51b: 1611*5b2ba9d3SPiotr Jasiukajtis fmuld %f24,%f22,%f22 ! (0_0) dtmp2 = dd * dres; 1612*5b2ba9d3SPiotr Jasiukajtis sllx %g1,32,%g1 ! (5_0) ll = (long long)j0 << 32; 1613*5b2ba9d3SPiotr Jasiukajtis stx %g1,[%fp+dtmp11] ! (5_0) *(long long*)&scl0 = ll; 1614*5b2ba9d3SPiotr Jasiukajtis faddd %f60,%f54,%f50 ! (3_0) dtmp1 = y0 + y_hi0; 1615*5b2ba9d3SPiotr Jasiukajtis 1616*5b2ba9d3SPiotr Jasiukajtis fmuld %f38,%f48,%f38 ! (7_1) dtmp1 = res0_lo * res0; 1617*5b2ba9d3SPiotr Jasiukajtis nop 1618*5b2ba9d3SPiotr Jasiukajtis nop 1619*5b2ba9d3SPiotr Jasiukajtis fsubd %f60,%f54,%f12 ! (3_0) y_lo0 = y0 - y_hi0; 1620*5b2ba9d3SPiotr Jasiukajtis.cont52: 1621*5b2ba9d3SPiotr Jasiukajtis fmuld %f62,%f0,%f0 ! (3_0) res0_lo *= x_lo0; 1622*5b2ba9d3SPiotr Jasiukajtis nop 1623*5b2ba9d3SPiotr Jasiukajtis ldd [%fp+dtmp9],%f62 ! (4_0) *(long long*)&scl0 = ll; 1624*5b2ba9d3SPiotr Jasiukajtis faddd %f2,%f46,%f44 ! (3_0) res0_hi += dtmp0; 1625*5b2ba9d3SPiotr Jasiukajtis 1626*5b2ba9d3SPiotr Jasiukajtis fsubd DONE,%f10,%f60 ! (7_1) dtmp0 = DONE - dtmp0; 1627*5b2ba9d3SPiotr Jasiukajtis nop 1628*5b2ba9d3SPiotr Jasiukajtis lda [%i2]%asi,%f10 ! (4_0) ((float*)&x0)[0] = ((float*)px)[0]; 1629*5b2ba9d3SPiotr Jasiukajtis fmuld %f26,%f20,%f54 ! (1_0) dd *= dtmp0; 1630*5b2ba9d3SPiotr Jasiukajtis 1631*5b2ba9d3SPiotr Jasiukajtis nop 1632*5b2ba9d3SPiotr Jasiukajtis nop 1633*5b2ba9d3SPiotr Jasiukajtis lda [%i2+4]%asi,%f11 ! (4_0) ((float*)&x0)[1] = ((float*)px)[1]; 1634*5b2ba9d3SPiotr Jasiukajtis bn,pn %icc,.exit 1635*5b2ba9d3SPiotr Jasiukajtis 1636*5b2ba9d3SPiotr Jasiukajtis fmuld %f50,%f12,%f26 ! (3_0) dtmp1 *= y_lo0; 1637*5b2ba9d3SPiotr Jasiukajtis nop 1638*5b2ba9d3SPiotr Jasiukajtis lda [%o0]%asi,%f12 ! (4_0) ((float*)&y0)[0] = ((float*)py)[0]; 1639*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f22,%f20 ! (0_0) dtmp2 = DTWO - dtmp2; 1640*5b2ba9d3SPiotr Jasiukajtis 1641*5b2ba9d3SPiotr Jasiukajtis nop 1642*5b2ba9d3SPiotr Jasiukajtis nop 1643*5b2ba9d3SPiotr Jasiukajtis lda [%o0+4]%asi,%f13 ! (4_0) ((float*)&y0)[1] = ((float*)py)[1]; 1644*5b2ba9d3SPiotr Jasiukajtis bn,pn %icc,.exit 1645*5b2ba9d3SPiotr Jasiukajtis 1646*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f18,%f50 ! (1_0) dtmp1 = dd * dres; 1647*5b2ba9d3SPiotr Jasiukajtis nop 1648*5b2ba9d3SPiotr Jasiukajtis ld [%fp+ftmp0],%o2 ! (2_0) iarr = ((int*)&dres)[0]; 1649*5b2ba9d3SPiotr Jasiukajtis fand %f14,DA1,%f2 ! (2_0) dexp0 = vis_fand(dres,DA1); 1650*5b2ba9d3SPiotr Jasiukajtis 1651*5b2ba9d3SPiotr Jasiukajtis fmuld %f10,%f62,%f10 ! (4_0) x0 *= scl0; 1652*5b2ba9d3SPiotr Jasiukajtis nop 1653*5b2ba9d3SPiotr Jasiukajtis ldd [%fp+dtmp12],%f22 ! (5_1) *(long long*)&scl0 = ll; 1654*5b2ba9d3SPiotr Jasiukajtis fsubd %f60,%f38,%f46 ! (7_1) dtmp0 -= dtmp1; 1655*5b2ba9d3SPiotr Jasiukajtis 1656*5b2ba9d3SPiotr Jasiukajtis fmuld %f12,%f62,%f60 ! (4_0) y0 *= scl0; 1657*5b2ba9d3SPiotr Jasiukajtis sra %o2,11,%o4 ! (2_0) iarr >>= 11; 1658*5b2ba9d3SPiotr Jasiukajtis nop 1659*5b2ba9d3SPiotr Jasiukajtis faddd %f0,%f26,%f38 ! (3_0) res0_lo += dtmp1; 1660*5b2ba9d3SPiotr Jasiukajtis 1661*5b2ba9d3SPiotr Jasiukajtis and %o4,0x1fc,%o4 ! (2_0) iarr &= 0x1fc; 1662*5b2ba9d3SPiotr Jasiukajtis nop 1663*5b2ba9d3SPiotr Jasiukajtis bn,pn %icc,.exit 1664*5b2ba9d3SPiotr Jasiukajtis fmuld %f24,%f20,%f26 ! (0_0) dres = dd * dtmp2; 1665*5b2ba9d3SPiotr Jasiukajtis 1666*5b2ba9d3SPiotr Jasiukajtis fsqrtd %f52,%f24 ! (6_1) res0 = sqrt ( res0 ); 1667*5b2ba9d3SPiotr Jasiukajtis add %o4,TBL,%o4 ! (2_0) (char*)dll1 + iarr 1668*5b2ba9d3SPiotr Jasiukajtis lda [%i1]0x82,%o1 ! (6_0) hx0 = *(int*)px; 1669*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f50,%f52 ! (1_0) dtmp1 = DTWO - dtmp1; 1670*5b2ba9d3SPiotr Jasiukajtis 1671*5b2ba9d3SPiotr Jasiukajtis fmuld %f46,%f28,%f28 ! (7_1) dtmp0 *= dres; 1672*5b2ba9d3SPiotr Jasiukajtis mov %i1,%i2 1673*5b2ba9d3SPiotr Jasiukajtis ld [%o4],%f20 ! (2_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; 1674*5b2ba9d3SPiotr Jasiukajtis faddd %f10,D2ON36,%f46 ! (4_0) x_hi0 = x0 + D2ON36; 1675*5b2ba9d3SPiotr Jasiukajtis 1676*5b2ba9d3SPiotr Jasiukajtis nop 1677*5b2ba9d3SPiotr Jasiukajtis mov %i0,%o0 1678*5b2ba9d3SPiotr Jasiukajtis lda [%i0]0x82,%o4 ! (6_0) hy0 = *(int*)py; 1679*5b2ba9d3SPiotr Jasiukajtis faddd %f60,D2ON36,%f50 ! (4_0) y_hi0 = y0 + D2ON36; 1680*5b2ba9d3SPiotr Jasiukajtis 1681*5b2ba9d3SPiotr Jasiukajtis fmuld %f22,%f16,%f0 ! (5_1) res0 = scl0 * res0; 1682*5b2ba9d3SPiotr Jasiukajtis and %o1,_0x7fffffff,%o7 ! (6_0) hx0 &= 0x7fffffff; 1683*5b2ba9d3SPiotr Jasiukajtis nop 1684*5b2ba9d3SPiotr Jasiukajtis faddd %f44,%f38,%f22 ! (3_0) dres = res0_hi + res0_lo; 1685*5b2ba9d3SPiotr Jasiukajtis 1686*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f52,%f16 ! (1_0) dd *= dtmp1; 1687*5b2ba9d3SPiotr Jasiukajtis cmp %o7,_0x7ff00000 ! (6_0) hx0 ? 0x7ff00000 1688*5b2ba9d3SPiotr Jasiukajtis st %f22,[%fp+ftmp0] ! (3_0) iarr = ((int*)&dres)[0]; 1689*5b2ba9d3SPiotr Jasiukajtis fpsub32 %f20,%f2,%f52 ! (2_0) dd = vis_fpsub32(dtmp0, dexp0); 1690*5b2ba9d3SPiotr Jasiukajtis 1691*5b2ba9d3SPiotr Jasiukajtis and %o4,_0x7fffffff,%l7 ! (6_0) hy0 &= 0x7fffffff; 1692*5b2ba9d3SPiotr Jasiukajtis st %f0,[%i5] ! (5_1) ((float*)pz)[0] = ((float*)&res0)[0]; 1693*5b2ba9d3SPiotr Jasiukajtis bge,pn %icc,.update53 ! (6_0) if ( hx0 >= 0x7ff00000 ) 1694*5b2ba9d3SPiotr Jasiukajtis fsubd %f46,D2ON36,%f46 ! (4_0) x_hi0 -= D2ON36; 1695*5b2ba9d3SPiotr Jasiukajtis 1696*5b2ba9d3SPiotr Jasiukajtis sub %l7,%o7,%o1 ! (6_0) diff0 = hy0 - hx0; 1697*5b2ba9d3SPiotr Jasiukajtis cmp %l7,_0x7ff00000 ! (6_0) hy0 ? 0x7ff00000 1698*5b2ba9d3SPiotr Jasiukajtis bge,pn %icc,.update54 ! (6_0) if ( hy0 >= 0x7ff00000 ) 1699*5b2ba9d3SPiotr Jasiukajtis fsubd %f50,D2ON36,%f54 ! (4_0) y_hi0 -= D2ON36; 1700*5b2ba9d3SPiotr Jasiukajtis 1701*5b2ba9d3SPiotr Jasiukajtis fmuld %f52,%f14,%f50 ! (2_0) dtmp0 = dd * dres; 1702*5b2ba9d3SPiotr Jasiukajtis sra %o1,31,%o3 ! (6_0) j0 = diff0 >> 31; 1703*5b2ba9d3SPiotr Jasiukajtis st %f1,[%i5+4] ! (5_1) ((float*)pz)[1] = ((float*)&res0)[1]; 1704*5b2ba9d3SPiotr Jasiukajtis faddd %f48,%f28,%f48 ! (7_1) res0 += dtmp0; 1705*5b2ba9d3SPiotr Jasiukajtis 1706*5b2ba9d3SPiotr Jasiukajtis and %o1,%o3,%o1 ! (6_0) j0 &= diff0; 1707*5b2ba9d3SPiotr Jasiukajtis cmp %o7,_0x00100000 ! (6_0) hx0 ? 0x00100000 1708*5b2ba9d3SPiotr Jasiukajtis bl,pn %icc,.update55 ! (6_0) if ( hx0 < 0x00100000 ) 1709*5b2ba9d3SPiotr Jasiukajtis fand %f26,DA0,%f28 ! (0_0) res0 = vis_fand(dres,DA0); 1710*5b2ba9d3SPiotr Jasiukajtis.cont55a: 1711*5b2ba9d3SPiotr Jasiukajtis fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0; 1712*5b2ba9d3SPiotr Jasiukajtis sub %l7,%o1,%o4 ! (6_0) j0 = hy0 - j0; 1713*5b2ba9d3SPiotr Jasiukajtis stx %g1,[%fp+dtmp12] ! (5_0) *(long long*)&scl0 = ll; 1714*5b2ba9d3SPiotr Jasiukajtis fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0; 1715*5b2ba9d3SPiotr Jasiukajtis 1716*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0; 1717*5b2ba9d3SPiotr Jasiukajtis and %o4,%l0,%o4 ! (6_0) j0 &= 0x7ff00000; 1718*5b2ba9d3SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 1719*5b2ba9d3SPiotr Jasiukajtis faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0; 1720*5b2ba9d3SPiotr Jasiukajtis 1721*5b2ba9d3SPiotr Jasiukajtis fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres; 1722*5b2ba9d3SPiotr Jasiukajtis sub %l0,%o4,%g1 ! (6_0) j0 = 0x7ff00000 - j0; 1723*5b2ba9d3SPiotr Jasiukajtis nop 1724*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0; 1725*5b2ba9d3SPiotr Jasiukajtis.cont55b: 1726*5b2ba9d3SPiotr Jasiukajtis fmuld %f32,%f28,%f50 ! (0_0) dtmp0 = res0_hi * res0; 1727*5b2ba9d3SPiotr Jasiukajtis sllx %g1,32,%g1 ! (6_0) ll = (long long)j0 << 32; 1728*5b2ba9d3SPiotr Jasiukajtis stx %g1,[%fp+dtmp13] ! (6_0) *(long long*)&scl0 = ll; 1729*5b2ba9d3SPiotr Jasiukajtis faddd %f60,%f54,%f46 ! (4_0) dtmp1 = y0 + y_hi0; 1730*5b2ba9d3SPiotr Jasiukajtis 1731*5b2ba9d3SPiotr Jasiukajtis fmuld %f36,%f28,%f36 ! (0_0) dtmp1 = res0_lo * res0; 1732*5b2ba9d3SPiotr Jasiukajtis nop 1733*5b2ba9d3SPiotr Jasiukajtis nop 1734*5b2ba9d3SPiotr Jasiukajtis fsubd %f60,%f54,%f60 ! (4_0) y_lo0 = y0 - y_hi0; 1735*5b2ba9d3SPiotr Jasiukajtis.cont56: 1736*5b2ba9d3SPiotr Jasiukajtis fmuld %f62,%f2,%f2 ! (4_0) res0_lo *= x_lo0; 1737*5b2ba9d3SPiotr Jasiukajtis nop 1738*5b2ba9d3SPiotr Jasiukajtis ldd [%fp+dtmp11],%f62 ! (5_0) *(long long*)&scl0 = ll; 1739*5b2ba9d3SPiotr Jasiukajtis faddd %f0,%f20,%f32 ! (4_0) res0_hi += dtmp0; 1740*5b2ba9d3SPiotr Jasiukajtis 1741*5b2ba9d3SPiotr Jasiukajtis lda [%i4]%asi,%f0 ! (5_0) ((float*)&x0)[0] = ((float*)px)[0]; 1742*5b2ba9d3SPiotr Jasiukajtis nop 1743*5b2ba9d3SPiotr Jasiukajtis nop 1744*5b2ba9d3SPiotr Jasiukajtis fmuld %f52,%f10,%f10 ! (2_0) dd *= dtmp0; 1745*5b2ba9d3SPiotr Jasiukajtis 1746*5b2ba9d3SPiotr Jasiukajtis lda [%i4+4]%asi,%f1 ! (5_0) ((float*)&x0)[1] = ((float*)px)[1]; 1747*5b2ba9d3SPiotr Jasiukajtis nop 1748*5b2ba9d3SPiotr Jasiukajtis nop 1749*5b2ba9d3SPiotr Jasiukajtis fsubd DONE,%f50,%f52 ! (0_0) dtmp0 = DONE - dtmp0; 1750*5b2ba9d3SPiotr Jasiukajtis 1751*5b2ba9d3SPiotr Jasiukajtis fmuld %f46,%f60,%f46 ! (4_0) dtmp1 *= y_lo0; 1752*5b2ba9d3SPiotr Jasiukajtis nop 1753*5b2ba9d3SPiotr Jasiukajtis lda [%i3]%asi,%f12 ! (5_0) ((float*)&y0)[0] = ((float*)py)[0]; 1754*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f18,%f18 ! (1_0) dtmp2 = DTWO - dtmp2; 1755*5b2ba9d3SPiotr Jasiukajtis 1756*5b2ba9d3SPiotr Jasiukajtis nop 1757*5b2ba9d3SPiotr Jasiukajtis add %i1,stridex,%i4 ! px += stridex 1758*5b2ba9d3SPiotr Jasiukajtis lda [%i3+4]%asi,%f13 ! (5_0) ((float*)&y0)[1] = ((float*)py)[1]; 1759*5b2ba9d3SPiotr Jasiukajtis bn,pn %icc,.exit 1760*5b2ba9d3SPiotr Jasiukajtis 1761*5b2ba9d3SPiotr Jasiukajtis fmuld %f10,%f14,%f50 ! (2_0) dtmp1 = dd * dres; 1762*5b2ba9d3SPiotr Jasiukajtis add %i4,stridex,%i1 ! px += stridex 1763*5b2ba9d3SPiotr Jasiukajtis ld [%fp+ftmp0],%o2 ! (3_0) iarr = ((int*)&dres)[0]; 1764*5b2ba9d3SPiotr Jasiukajtis fand %f22,DA1,%f54 ! (3_0) dexp0 = vis_fand(dres,DA1); 1765*5b2ba9d3SPiotr Jasiukajtis 1766*5b2ba9d3SPiotr Jasiukajtis fmuld %f0,%f62,%f60 ! (5_0) x0 *= scl0; 1767*5b2ba9d3SPiotr Jasiukajtis nop 1768*5b2ba9d3SPiotr Jasiukajtis ldd [%fp+dtmp14],%f0 ! (6_1) *(long long*)&scl0 = ll; 1769*5b2ba9d3SPiotr Jasiukajtis fsubd %f52,%f36,%f20 ! (0_0) dtmp0 -= dtmp1; 1770*5b2ba9d3SPiotr Jasiukajtis 1771*5b2ba9d3SPiotr Jasiukajtis fmuld %f12,%f62,%f52 ! (5_0) y0 *= scl0; 1772*5b2ba9d3SPiotr Jasiukajtis sra %o2,11,%i3 ! (3_0) iarr >>= 11; 1773*5b2ba9d3SPiotr Jasiukajtis nop 1774*5b2ba9d3SPiotr Jasiukajtis faddd %f2,%f46,%f36 ! (4_0) res0_lo += dtmp1; 1775*5b2ba9d3SPiotr Jasiukajtis 1776*5b2ba9d3SPiotr Jasiukajtis and %i3,0x1fc,%i3 ! (3_0) iarr &= 0x1fc; 1777*5b2ba9d3SPiotr Jasiukajtis nop 1778*5b2ba9d3SPiotr Jasiukajtis bn,pn %icc,.exit 1779*5b2ba9d3SPiotr Jasiukajtis fmuld %f16,%f18,%f16 ! (1_0) dres = dd * dtmp2; 1780*5b2ba9d3SPiotr Jasiukajtis 1781*5b2ba9d3SPiotr Jasiukajtis fsqrtd %f48,%f18 ! (7_1) res0 = sqrt ( res0 ); 1782*5b2ba9d3SPiotr Jasiukajtis add %i3,TBL,%o4 ! (3_0) (char*)dll1 + iarr 1783*5b2ba9d3SPiotr Jasiukajtis lda [%i4]0x82,%o1 ! (7_0) hx0 = *(int*)px; 1784*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f50,%f46 ! (2_0) dtmp1 = DTWO - dtmp1; 1785*5b2ba9d3SPiotr Jasiukajtis 1786*5b2ba9d3SPiotr Jasiukajtis fmuld %f20,%f26,%f48 ! (0_0) dtmp0 *= dres; 1787*5b2ba9d3SPiotr Jasiukajtis add %i0,stridey,%i3 ! py += stridey 1788*5b2ba9d3SPiotr Jasiukajtis ld [%o4],%f20 ! (3_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; 1789*5b2ba9d3SPiotr Jasiukajtis faddd %f60,D2ON36,%f50 ! (5_0) x_hi0 = x0 + D2ON36; 1790*5b2ba9d3SPiotr Jasiukajtis 1791*5b2ba9d3SPiotr Jasiukajtis nop 1792*5b2ba9d3SPiotr Jasiukajtis add %i3,stridey,%i0 ! py += stridey 1793*5b2ba9d3SPiotr Jasiukajtis lda [%i3]0x82,%o4 ! (7_0) hy0 = *(int*)py; 1794*5b2ba9d3SPiotr Jasiukajtis faddd %f52,D2ON36,%f12 ! (5_0) y_hi0 = y0 + D2ON36; 1795*5b2ba9d3SPiotr Jasiukajtis 1796*5b2ba9d3SPiotr Jasiukajtis fmuld %f0,%f24,%f2 ! (6_1) res0 = scl0 * res0; 1797*5b2ba9d3SPiotr Jasiukajtis and %o1,_0x7fffffff,%o7 ! (7_0) hx0 &= 0x7fffffff; 1798*5b2ba9d3SPiotr Jasiukajtis nop 1799*5b2ba9d3SPiotr Jasiukajtis faddd %f32,%f36,%f24 ! (4_0) dres = res0_hi + res0_lo; 1800*5b2ba9d3SPiotr Jasiukajtis 1801*5b2ba9d3SPiotr Jasiukajtis fmuld %f10,%f46,%f26 ! (2_0) dd *= dtmp1; 1802*5b2ba9d3SPiotr Jasiukajtis cmp %o7,_0x7ff00000 ! (7_0) hx0 ? 0x7ff00000 1803*5b2ba9d3SPiotr Jasiukajtis st %f24,[%fp+ftmp0] ! (4_0) iarr = ((int*)&dres)[0]; 1804*5b2ba9d3SPiotr Jasiukajtis fpsub32 %f20,%f54,%f10 ! (3_0) dd = vis_fpsub32(dtmp0, dexp0); 1805*5b2ba9d3SPiotr Jasiukajtis 1806*5b2ba9d3SPiotr Jasiukajtis and %o4,_0x7fffffff,%l7 ! (7_0) hy0 &= 0x7fffffff; 1807*5b2ba9d3SPiotr Jasiukajtis st %f2,[%i5] ! (6_1) ((float*)pz)[0] = ((float*)&res0)[0]; 1808*5b2ba9d3SPiotr Jasiukajtis bge,pn %icc,.update57 ! (7_0) if ( hx0 >= 0x7ff00000 ) 1809*5b2ba9d3SPiotr Jasiukajtis fsubd %f50,D2ON36,%f20 ! (5_0) x_hi0 -= D2ON36; 1810*5b2ba9d3SPiotr Jasiukajtis 1811*5b2ba9d3SPiotr Jasiukajtis sub %l7,%o7,%o1 ! (7_0) diff0 = hy0 - hx0; 1812*5b2ba9d3SPiotr Jasiukajtis cmp %l7,_0x7ff00000 ! (7_0) hy0 ? 0x7ff00000 1813*5b2ba9d3SPiotr Jasiukajtis bge,pn %icc,.update58 ! (7_0) if ( hy0 >= 0x7ff00000 ) 1814*5b2ba9d3SPiotr Jasiukajtis fsubd %f12,D2ON36,%f54 ! (5_0) y_hi0 -= D2ON36; 1815*5b2ba9d3SPiotr Jasiukajtis 1816*5b2ba9d3SPiotr Jasiukajtis fmuld %f10,%f22,%f50 ! (3_0) dtmp0 = dd * dres; 1817*5b2ba9d3SPiotr Jasiukajtis sra %o1,31,%o3 ! (7_0) j0 = diff0 >> 31; 1818*5b2ba9d3SPiotr Jasiukajtis st %f3,[%i5+4] ! (6_1) ((float*)pz)[1] = ((float*)&res0)[1]; 1819*5b2ba9d3SPiotr Jasiukajtis faddd %f28,%f48,%f48 ! (0_0) res0 += dtmp0; 1820*5b2ba9d3SPiotr Jasiukajtis 1821*5b2ba9d3SPiotr Jasiukajtis and %o1,%o3,%o1 ! (7_0) j0 &= diff0; 1822*5b2ba9d3SPiotr Jasiukajtis cmp %o7,_0x00100000 ! (7_0) hx0 ? 0x00100000 1823*5b2ba9d3SPiotr Jasiukajtis bl,pn %icc,.update59 ! (7_0) if ( hx0 < 0x00100000 ) 1824*5b2ba9d3SPiotr Jasiukajtis fand %f16,DA0,%f28 ! (1_0) res0 = vis_fand(dres,DA0); 1825*5b2ba9d3SPiotr Jasiukajtis.cont59a: 1826*5b2ba9d3SPiotr Jasiukajtis fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0; 1827*5b2ba9d3SPiotr Jasiukajtis sub %l7,%o1,%o4 ! (7_0) j0 = hy0 - j0; 1828*5b2ba9d3SPiotr Jasiukajtis stx %g1,[%fp+dtmp14] ! (6_0) *(long long*)&scl0 = ll; 1829*5b2ba9d3SPiotr Jasiukajtis fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0; 1830*5b2ba9d3SPiotr Jasiukajtis 1831*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0; 1832*5b2ba9d3SPiotr Jasiukajtis and %o4,%l0,%o4 ! (7_0) j0 &= 0x7ff00000; 1833*5b2ba9d3SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 1834*5b2ba9d3SPiotr Jasiukajtis faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0; 1835*5b2ba9d3SPiotr Jasiukajtis 1836*5b2ba9d3SPiotr Jasiukajtis fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres; 1837*5b2ba9d3SPiotr Jasiukajtis sub %l0,%o4,%g1 ! (7_0) j0 = 0x7ff00000 - j0; 1838*5b2ba9d3SPiotr Jasiukajtis nop 1839*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0; 1840*5b2ba9d3SPiotr Jasiukajtis.cont59b: 1841*5b2ba9d3SPiotr Jasiukajtis fmuld %f42,%f28,%f60 ! (1_0) dtmp0 = res0_hi * res0; 1842*5b2ba9d3SPiotr Jasiukajtis sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32; 1843*5b2ba9d3SPiotr Jasiukajtis stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll; 1844*5b2ba9d3SPiotr Jasiukajtis faddd %f52,%f54,%f50 ! (5_0) dtmp1 = y0 + y_hi0; 1845*5b2ba9d3SPiotr Jasiukajtis 1846*5b2ba9d3SPiotr Jasiukajtis fmuld %f34,%f28,%f34 ! (1_0) dtmp1 = res0_lo * res0; 1847*5b2ba9d3SPiotr Jasiukajtis nop 1848*5b2ba9d3SPiotr Jasiukajtis nop 1849*5b2ba9d3SPiotr Jasiukajtis fsubd %f52,%f54,%f54 ! (5_0) y_lo0 = y0 - y_hi0; 1850*5b2ba9d3SPiotr Jasiukajtis.cont60: 1851*5b2ba9d3SPiotr Jasiukajtis fmuld %f62,%f2,%f2 ! (5_0) res0_lo *= x_lo0; 1852*5b2ba9d3SPiotr Jasiukajtis nop 1853*5b2ba9d3SPiotr Jasiukajtis ldd [%fp+dtmp13],%f62 ! (6_0) *(long long*)&scl0 = ll; 1854*5b2ba9d3SPiotr Jasiukajtis faddd %f0,%f46,%f42 ! (5_0) res0_hi += dtmp0; 1855*5b2ba9d3SPiotr Jasiukajtis 1856*5b2ba9d3SPiotr Jasiukajtis fmuld %f10,%f20,%f52 ! (3_0) dd *= dtmp0; 1857*5b2ba9d3SPiotr Jasiukajtis nop 1858*5b2ba9d3SPiotr Jasiukajtis lda [%i2]%asi,%f10 ! (6_0) ((float*)&x0)[0] = ((float*)px)[0]; 1859*5b2ba9d3SPiotr Jasiukajtis bn,pn %icc,.exit 1860*5b2ba9d3SPiotr Jasiukajtis 1861*5b2ba9d3SPiotr Jasiukajtis lda [%i2+4]%asi,%f11 ! (6_0) ((float*)&x0)[1] = ((float*)px)[1]; 1862*5b2ba9d3SPiotr Jasiukajtis nop 1863*5b2ba9d3SPiotr Jasiukajtis nop 1864*5b2ba9d3SPiotr Jasiukajtis fsubd DONE,%f60,%f60 ! (1_0) dtmp0 = DONE - dtmp0; 1865*5b2ba9d3SPiotr Jasiukajtis 1866*5b2ba9d3SPiotr Jasiukajtis fmuld %f50,%f54,%f46 ! (5_0) dtmp1 *= y_lo0; 1867*5b2ba9d3SPiotr Jasiukajtis nop 1868*5b2ba9d3SPiotr Jasiukajtis lda [%o0]%asi,%f12 ! (6_0) ((float*)&y0)[0] = ((float*)py)[0]; 1869*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f14,%f14 ! (2_0) dtmp2 = DTWO - dtmp2; 1870*5b2ba9d3SPiotr Jasiukajtis 1871*5b2ba9d3SPiotr Jasiukajtis nop 1872*5b2ba9d3SPiotr Jasiukajtis nop 1873*5b2ba9d3SPiotr Jasiukajtis lda [%o0+4]%asi,%f13 ! (6_0) ((float*)&y0)[1] = ((float*)py)[1]; 1874*5b2ba9d3SPiotr Jasiukajtis bn,pn %icc,.exit 1875*5b2ba9d3SPiotr Jasiukajtis 1876*5b2ba9d3SPiotr Jasiukajtis fmuld %f52,%f22,%f50 ! (3_0) dtmp1 = dd * dres; 1877*5b2ba9d3SPiotr Jasiukajtis nop 1878*5b2ba9d3SPiotr Jasiukajtis ld [%fp+ftmp0],%o2 ! (4_0) iarr = ((int*)&dres)[0]; 1879*5b2ba9d3SPiotr Jasiukajtis fand %f24,DA1,%f54 ! (4_0) dexp0 = vis_fand(dres,DA1); 1880*5b2ba9d3SPiotr Jasiukajtis 1881*5b2ba9d3SPiotr Jasiukajtis fmuld %f10,%f62,%f10 ! (6_0) x0 *= scl0; 1882*5b2ba9d3SPiotr Jasiukajtis nop 1883*5b2ba9d3SPiotr Jasiukajtis ldd [%fp+dtmp0],%f0 ! (7_1) *(long long*)&scl0 = ll; 1884*5b2ba9d3SPiotr Jasiukajtis fsubd %f60,%f34,%f20 ! (1_0) dtmp0 -= dtmp1; 1885*5b2ba9d3SPiotr Jasiukajtis 1886*5b2ba9d3SPiotr Jasiukajtis fmuld %f12,%f62,%f60 ! (6_0) y0 *= scl0; 1887*5b2ba9d3SPiotr Jasiukajtis sra %o2,11,%o4 ! (4_0) iarr >>= 11; 1888*5b2ba9d3SPiotr Jasiukajtis nop 1889*5b2ba9d3SPiotr Jasiukajtis faddd %f2,%f46,%f34 ! (5_0) res0_lo += dtmp1; 1890*5b2ba9d3SPiotr Jasiukajtis 1891*5b2ba9d3SPiotr Jasiukajtis and %o4,0x1fc,%o4 ! (4_0) iarr &= 0x1fc; 1892*5b2ba9d3SPiotr Jasiukajtis subcc counter,8,counter ! counter -= 8; 1893*5b2ba9d3SPiotr Jasiukajtis bpos,pt %icc,.main_loop 1894*5b2ba9d3SPiotr Jasiukajtis fmuld %f26,%f14,%f26 ! (2_0) dres = dd * dtmp2; 1895*5b2ba9d3SPiotr Jasiukajtis 1896*5b2ba9d3SPiotr Jasiukajtis add counter,8,counter 1897*5b2ba9d3SPiotr Jasiukajtis 1898*5b2ba9d3SPiotr Jasiukajtis.tail: 1899*5b2ba9d3SPiotr Jasiukajtis subcc counter,1,counter 1900*5b2ba9d3SPiotr Jasiukajtis bneg .begin 1901*5b2ba9d3SPiotr Jasiukajtis nop 1902*5b2ba9d3SPiotr Jasiukajtis 1903*5b2ba9d3SPiotr Jasiukajtis fsqrtd %f48,%f14 ! (0_1) res0 = sqrt ( res0 ); 1904*5b2ba9d3SPiotr Jasiukajtis add %o4,TBL,%o4 ! (4_1) (char*)dll1 + iarr 1905*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f50,%f46 ! (3_1) dtmp1 = DTWO - dtmp1; 1906*5b2ba9d3SPiotr Jasiukajtis 1907*5b2ba9d3SPiotr Jasiukajtis fmuld %f20,%f16,%f48 ! (1_1) dtmp0 *= dres; 1908*5b2ba9d3SPiotr Jasiukajtis ld [%o4],%f20 ! (4_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; 1909*5b2ba9d3SPiotr Jasiukajtis 1910*5b2ba9d3SPiotr Jasiukajtis fmuld %f0,%f18,%f0 ! (7_2) res0 = scl0 * res0; 1911*5b2ba9d3SPiotr Jasiukajtis st %f0,[%i5] ! (7_2) ((float*)pz)[0] = ((float*)&res0)[0]; 1912*5b2ba9d3SPiotr Jasiukajtis faddd %f42,%f34,%f16 ! (5_1) dres = res0_hi + res0_lo; 1913*5b2ba9d3SPiotr Jasiukajtis 1914*5b2ba9d3SPiotr Jasiukajtis subcc counter,1,counter 1915*5b2ba9d3SPiotr Jasiukajtis st %f1,[%i5+4] ! (7_2) ((float*)pz)[1] = ((float*)&res0)[1]; 1916*5b2ba9d3SPiotr Jasiukajtis bneg .begin 1917*5b2ba9d3SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 1918*5b2ba9d3SPiotr Jasiukajtis 1919*5b2ba9d3SPiotr Jasiukajtis fmuld %f52,%f46,%f18 ! (3_1) dd *= dtmp1; 1920*5b2ba9d3SPiotr Jasiukajtis st %f16,[%fp+ftmp0] ! (5_1) iarr = ((int*)&dres)[0]; 1921*5b2ba9d3SPiotr Jasiukajtis fpsub32 %f20,%f54,%f54 ! (4_1) dd = vis_fpsub32(dtmp0, dexp0); 1922*5b2ba9d3SPiotr Jasiukajtis 1923*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f24,%f50 ! (4_1) dtmp0 = dd * dres; 1924*5b2ba9d3SPiotr Jasiukajtis faddd %f28,%f48,%f52 ! (1_1) res0 += dtmp0; 1925*5b2ba9d3SPiotr Jasiukajtis 1926*5b2ba9d3SPiotr Jasiukajtis 1927*5b2ba9d3SPiotr Jasiukajtis fand %f26,DA0,%f48 ! (2_1) res0 = vis_fand(dres,DA0); 1928*5b2ba9d3SPiotr Jasiukajtis 1929*5b2ba9d3SPiotr Jasiukajtis fmuld %f18,%f22,%f22 ! (3_1) dtmp2 = dd * dres; 1930*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (4_1) dtmp0 = DTWO - dtmp0; 1931*5b2ba9d3SPiotr Jasiukajtis 1932*5b2ba9d3SPiotr Jasiukajtis fmuld %f30,%f48,%f12 ! (2_1) dtmp0 = res0_hi * res0; 1933*5b2ba9d3SPiotr Jasiukajtis 1934*5b2ba9d3SPiotr Jasiukajtis fmuld %f40,%f48,%f40 ! (2_1) dtmp1 = res0_lo * res0; 1935*5b2ba9d3SPiotr Jasiukajtis 1936*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f20,%f54 ! (4_1) dd *= dtmp0; 1937*5b2ba9d3SPiotr Jasiukajtis 1938*5b2ba9d3SPiotr Jasiukajtis fsubd DONE,%f12,%f60 ! (2_1) dtmp0 = DONE - dtmp0; 1939*5b2ba9d3SPiotr Jasiukajtis 1940*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f22,%f22 ! (3_1) dtmp2 = DTWO - dtmp2; 1941*5b2ba9d3SPiotr Jasiukajtis 1942*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f24,%f50 ! (4_1) dtmp1 = dd * dres; 1943*5b2ba9d3SPiotr Jasiukajtis ld [%fp+ftmp0],%o2 ! (5_1) iarr = ((int*)&dres)[0]; 1944*5b2ba9d3SPiotr Jasiukajtis fand %f16,DA1,%f2 ! (5_1) dexp0 = vis_fand(dres,DA1); 1945*5b2ba9d3SPiotr Jasiukajtis 1946*5b2ba9d3SPiotr Jasiukajtis ldd [%fp+dtmp2],%f0 ! (0_1) *(long long*)&scl0 = ll; 1947*5b2ba9d3SPiotr Jasiukajtis fsubd %f60,%f40,%f20 ! (2_1) dtmp0 -= dtmp1; 1948*5b2ba9d3SPiotr Jasiukajtis 1949*5b2ba9d3SPiotr Jasiukajtis sra %o2,11,%i3 ! (5_1) iarr >>= 11; 1950*5b2ba9d3SPiotr Jasiukajtis 1951*5b2ba9d3SPiotr Jasiukajtis and %i3,0x1fc,%i3 ! (5_1) iarr &= 0x1fc; 1952*5b2ba9d3SPiotr Jasiukajtis fmuld %f18,%f22,%f28 ! (3_1) dres = dd * dtmp2; 1953*5b2ba9d3SPiotr Jasiukajtis 1954*5b2ba9d3SPiotr Jasiukajtis fsqrtd %f52,%f22 ! (1_1) res0 = sqrt ( res0 ); 1955*5b2ba9d3SPiotr Jasiukajtis add %i3,TBL,%g1 ! (5_1) (char*)dll1 + iarr 1956*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f50,%f62 ! (4_1) dtmp1 = DTWO - dtmp1; 1957*5b2ba9d3SPiotr Jasiukajtis 1958*5b2ba9d3SPiotr Jasiukajtis fmuld %f20,%f26,%f52 ! (2_1) dtmp0 *= dres; 1959*5b2ba9d3SPiotr Jasiukajtis ld [%g1],%f26 ! (5_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; 1960*5b2ba9d3SPiotr Jasiukajtis 1961*5b2ba9d3SPiotr Jasiukajtis fmuld %f0,%f14,%f0 ! (0_1) res0 = scl0 * res0; 1962*5b2ba9d3SPiotr Jasiukajtis 1963*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f62,%f14 ! (4_1) dd *= dtmp1; 1964*5b2ba9d3SPiotr Jasiukajtis fpsub32 %f26,%f2,%f26 ! (5_1) dd = vis_fpsub32(dtmp0, dexp0); 1965*5b2ba9d3SPiotr Jasiukajtis 1966*5b2ba9d3SPiotr Jasiukajtis st %f0,[%i5] ! (0_1) ((float*)pz)[0] = ((float*)&res0)[0]; 1967*5b2ba9d3SPiotr Jasiukajtis 1968*5b2ba9d3SPiotr Jasiukajtis fmuld %f26,%f16,%f50 ! (5_1) dtmp0 = dd * dres; 1969*5b2ba9d3SPiotr Jasiukajtis st %f1,[%i5+4] ! (0_1) ((float*)pz)[1] = ((float*)&res0)[1]; 1970*5b2ba9d3SPiotr Jasiukajtis faddd %f48,%f52,%f52 ! (2_1) res0 += dtmp0; 1971*5b2ba9d3SPiotr Jasiukajtis 1972*5b2ba9d3SPiotr Jasiukajtis subcc counter,1,counter 1973*5b2ba9d3SPiotr Jasiukajtis bneg .begin 1974*5b2ba9d3SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 1975*5b2ba9d3SPiotr Jasiukajtis 1976*5b2ba9d3SPiotr Jasiukajtis fand %f28,DA0,%f48 ! (3_1) res0 = vis_fand(dres,DA0); 1977*5b2ba9d3SPiotr Jasiukajtis 1978*5b2ba9d3SPiotr Jasiukajtis fmuld %f44,%f48,%f10 ! (3_1) dtmp0 = res0_hi * res0; 1979*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (5_1) dtmp0 = DTWO - dtmp0; 1980*5b2ba9d3SPiotr Jasiukajtis 1981*5b2ba9d3SPiotr Jasiukajtis fmuld %f14,%f24,%f24 ! (4_1) dtmp2 = dd * dres; 1982*5b2ba9d3SPiotr Jasiukajtis 1983*5b2ba9d3SPiotr Jasiukajtis fmuld %f38,%f48,%f38 ! (3_1) dtmp1 = res0_lo * res0; 1984*5b2ba9d3SPiotr Jasiukajtis 1985*5b2ba9d3SPiotr Jasiukajtis fsubd DONE,%f10,%f60 ! (3_1) dtmp0 = DONE - dtmp0; 1986*5b2ba9d3SPiotr Jasiukajtis fmuld %f26,%f20,%f54 ! (5_1) dd *= dtmp0; 1987*5b2ba9d3SPiotr Jasiukajtis 1988*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f24,%f24 ! (4_1) dtmp2 = DTWO - dtmp2; 1989*5b2ba9d3SPiotr Jasiukajtis 1990*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f16,%f46 ! (5_1) dtmp1 = dd * dres; 1991*5b2ba9d3SPiotr Jasiukajtis 1992*5b2ba9d3SPiotr Jasiukajtis ldd [%fp+dtmp4],%f50 ! (1_1) *(long long*)&scl0 = ll; 1993*5b2ba9d3SPiotr Jasiukajtis fsubd %f60,%f38,%f20 ! (3_1) dtmp0 -= dtmp1; 1994*5b2ba9d3SPiotr Jasiukajtis 1995*5b2ba9d3SPiotr Jasiukajtis fmuld %f14,%f24,%f26 ! (4_1) dres = dd * dtmp2; 1996*5b2ba9d3SPiotr Jasiukajtis 1997*5b2ba9d3SPiotr Jasiukajtis fsqrtd %f52,%f24 ! (2_1) res0 = sqrt ( res0 ); 1998*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f46,%f62 ! (5_1) dtmp1 = DTWO - dtmp1; 1999*5b2ba9d3SPiotr Jasiukajtis 2000*5b2ba9d3SPiotr Jasiukajtis fmuld %f20,%f28,%f52 ! (3_1) dtmp0 *= dres; 2001*5b2ba9d3SPiotr Jasiukajtis 2002*5b2ba9d3SPiotr Jasiukajtis fmuld %f50,%f22,%f0 ! (1_1) res0 = scl0 * res0; 2003*5b2ba9d3SPiotr Jasiukajtis 2004*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f62,%f22 ! (5_1) dd *= dtmp1; 2005*5b2ba9d3SPiotr Jasiukajtis 2006*5b2ba9d3SPiotr Jasiukajtis st %f0,[%i5] ! (1_1) ((float*)pz)[0] = ((float*)&res0)[0]; 2007*5b2ba9d3SPiotr Jasiukajtis 2008*5b2ba9d3SPiotr Jasiukajtis subcc counter,1,counter 2009*5b2ba9d3SPiotr Jasiukajtis st %f1,[%i5+4] ! (1_1) ((float*)pz)[1] = ((float*)&res0)[1]; 2010*5b2ba9d3SPiotr Jasiukajtis bneg .begin 2011*5b2ba9d3SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 2012*5b2ba9d3SPiotr Jasiukajtis 2013*5b2ba9d3SPiotr Jasiukajtis faddd %f48,%f52,%f52 ! (3_1) res0 += dtmp0; 2014*5b2ba9d3SPiotr Jasiukajtis 2015*5b2ba9d3SPiotr Jasiukajtis fand %f26,DA0,%f48 ! (4_1) res0 = vis_fand(dres,DA0); 2016*5b2ba9d3SPiotr Jasiukajtis 2017*5b2ba9d3SPiotr Jasiukajtis fmuld %f32,%f48,%f10 ! (4_1) dtmp0 = res0_hi * res0; 2018*5b2ba9d3SPiotr Jasiukajtis 2019*5b2ba9d3SPiotr Jasiukajtis fmuld %f22,%f16,%f16 ! (5_1) dtmp2 = dd * dres; 2020*5b2ba9d3SPiotr Jasiukajtis 2021*5b2ba9d3SPiotr Jasiukajtis fmuld %f36,%f48,%f36 ! (4_1) dtmp1 = res0_lo * res0; 2022*5b2ba9d3SPiotr Jasiukajtis 2023*5b2ba9d3SPiotr Jasiukajtis fsubd DONE,%f10,%f60 ! (4_1) dtmp0 = DONE - dtmp0; 2024*5b2ba9d3SPiotr Jasiukajtis 2025*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f16,%f16 ! (5_1) dtmp2 = DTWO - dtmp2; 2026*5b2ba9d3SPiotr Jasiukajtis 2027*5b2ba9d3SPiotr Jasiukajtis ldd [%fp+dtmp6],%f50 ! (2_1) *(long long*)&scl0 = ll; 2028*5b2ba9d3SPiotr Jasiukajtis fsubd %f60,%f36,%f20 ! (4_1) dtmp0 -= dtmp1; 2029*5b2ba9d3SPiotr Jasiukajtis 2030*5b2ba9d3SPiotr Jasiukajtis fmuld %f22,%f16,%f28 ! (5_1) dres = dd * dtmp2; 2031*5b2ba9d3SPiotr Jasiukajtis 2032*5b2ba9d3SPiotr Jasiukajtis fsqrtd %f52,%f16 ! (3_1) res0 = sqrt ( res0 ); 2033*5b2ba9d3SPiotr Jasiukajtis 2034*5b2ba9d3SPiotr Jasiukajtis fmuld %f20,%f26,%f52 ! (4_1) dtmp0 *= dres; 2035*5b2ba9d3SPiotr Jasiukajtis 2036*5b2ba9d3SPiotr Jasiukajtis fmuld %f50,%f24,%f0 ! (2_1) res0 = scl0 * res0; 2037*5b2ba9d3SPiotr Jasiukajtis 2038*5b2ba9d3SPiotr Jasiukajtis st %f0,[%i5] ! (2_1) ((float*)pz)[0] = ((float*)&res0)[0]; 2039*5b2ba9d3SPiotr Jasiukajtis 2040*5b2ba9d3SPiotr Jasiukajtis st %f1,[%i5+4] ! (2_1) ((float*)pz)[1] = ((float*)&res0)[1]; 2041*5b2ba9d3SPiotr Jasiukajtis faddd %f48,%f52,%f52 ! (4_1) res0 += dtmp0; 2042*5b2ba9d3SPiotr Jasiukajtis 2043*5b2ba9d3SPiotr Jasiukajtis subcc counter,1,counter 2044*5b2ba9d3SPiotr Jasiukajtis bneg .begin 2045*5b2ba9d3SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 2046*5b2ba9d3SPiotr Jasiukajtis 2047*5b2ba9d3SPiotr Jasiukajtis fand %f28,DA0,%f48 ! (5_1) res0 = vis_fand(dres,DA0); 2048*5b2ba9d3SPiotr Jasiukajtis 2049*5b2ba9d3SPiotr Jasiukajtis fmuld %f42,%f48,%f10 ! (5_1) dtmp0 = res0_hi * res0; 2050*5b2ba9d3SPiotr Jasiukajtis 2051*5b2ba9d3SPiotr Jasiukajtis fmuld %f34,%f48,%f34 ! (5_1) dtmp1 = res0_lo * res0; 2052*5b2ba9d3SPiotr Jasiukajtis 2053*5b2ba9d3SPiotr Jasiukajtis fsubd DONE,%f10,%f60 ! (5_1) dtmp0 = DONE - dtmp0; 2054*5b2ba9d3SPiotr Jasiukajtis 2055*5b2ba9d3SPiotr Jasiukajtis ldd [%fp+dtmp8],%f18 ! (3_1) *(long long*)&scl0 = ll; 2056*5b2ba9d3SPiotr Jasiukajtis fsubd %f60,%f34,%f46 ! (5_1) dtmp0 -= dtmp1; 2057*5b2ba9d3SPiotr Jasiukajtis 2058*5b2ba9d3SPiotr Jasiukajtis fsqrtd %f52,%f24 ! (4_1) res0 = sqrt ( res0 ); 2059*5b2ba9d3SPiotr Jasiukajtis 2060*5b2ba9d3SPiotr Jasiukajtis fmuld %f46,%f28,%f52 ! (5_1) dtmp0 -= dtmp1; 2061*5b2ba9d3SPiotr Jasiukajtis 2062*5b2ba9d3SPiotr Jasiukajtis fmuld %f18,%f16,%f0 ! (3_1) res0 = scl0 * res0; 2063*5b2ba9d3SPiotr Jasiukajtis st %f0,[%i5] ! (3_1) ((float*)pz)[0] = ((float*)&res0)[0]; 2064*5b2ba9d3SPiotr Jasiukajtis st %f1,[%i5+4] ! (3_1) ((float*)pz)[1] = ((float*)&res0)[1]; 2065*5b2ba9d3SPiotr Jasiukajtis faddd %f48,%f52,%f52 ! (5_1) res0 += dtmp0; 2066*5b2ba9d3SPiotr Jasiukajtis 2067*5b2ba9d3SPiotr Jasiukajtis subcc counter,1,counter 2068*5b2ba9d3SPiotr Jasiukajtis bneg .begin 2069*5b2ba9d3SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 2070*5b2ba9d3SPiotr Jasiukajtis 2071*5b2ba9d3SPiotr Jasiukajtis ldd [%fp+dtmp10],%f14 ! (4_1) *(long long*)&scl0 = ll; 2072*5b2ba9d3SPiotr Jasiukajtis 2073*5b2ba9d3SPiotr Jasiukajtis fsqrtd %f52,%f16 ! (5_1) res0 = sqrt ( res0 ); 2074*5b2ba9d3SPiotr Jasiukajtis 2075*5b2ba9d3SPiotr Jasiukajtis fmuld %f14,%f24,%f0 ! (4_1) res0 = scl0 * res0 2076*5b2ba9d3SPiotr Jasiukajtis st %f0,[%i5] ! (4_1) ((float*)pz)[0] = ((float*)&res0)[0]; 2077*5b2ba9d3SPiotr Jasiukajtis st %f1,[%i5+4] ! (4_1) ((float*)pz)[1] = ((float*)&res0)[1]; 2078*5b2ba9d3SPiotr Jasiukajtis 2079*5b2ba9d3SPiotr Jasiukajtis subcc counter,1,counter 2080*5b2ba9d3SPiotr Jasiukajtis bneg .begin 2081*5b2ba9d3SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 2082*5b2ba9d3SPiotr Jasiukajtis 2083*5b2ba9d3SPiotr Jasiukajtis ldd [%fp+dtmp12],%f22 ! (5_1) *(long long*)&scl0 = ll; 2084*5b2ba9d3SPiotr Jasiukajtis 2085*5b2ba9d3SPiotr Jasiukajtis fmuld %f22,%f16,%f0 ! (5_1) res0 = scl0 * res0; 2086*5b2ba9d3SPiotr Jasiukajtis st %f0,[%i5] ! (5_1) ((float*)pz)[0] = ((float*)&res0)[0]; 2087*5b2ba9d3SPiotr Jasiukajtis st %f1,[%i5+4] ! (5_1) ((float*)pz)[1] = ((float*)&res0)[1]; 2088*5b2ba9d3SPiotr Jasiukajtis 2089*5b2ba9d3SPiotr Jasiukajtis ba .begin 2090*5b2ba9d3SPiotr Jasiukajtis add %i5,stridez,%i5 2091*5b2ba9d3SPiotr Jasiukajtis 2092*5b2ba9d3SPiotr Jasiukajtis .align 16 2093*5b2ba9d3SPiotr Jasiukajtis.spec0: 2094*5b2ba9d3SPiotr Jasiukajtis cmp %o7,_0x7ff00000 ! hx0 ? 0x7ff00000 2095*5b2ba9d3SPiotr Jasiukajtis bne 1f ! if ( hx0 != 0x7ff00000 ) 2096*5b2ba9d3SPiotr Jasiukajtis ld [%i4+4],%i2 ! lx = ((int*)px)[1]; 2097*5b2ba9d3SPiotr Jasiukajtis 2098*5b2ba9d3SPiotr Jasiukajtis cmp %i2,0 ! lx ? 0 2099*5b2ba9d3SPiotr Jasiukajtis be 3f ! if ( lx == 0 ) 2100*5b2ba9d3SPiotr Jasiukajtis nop 2101*5b2ba9d3SPiotr Jasiukajtis1: 2102*5b2ba9d3SPiotr Jasiukajtis cmp %l7,_0x7ff00000 ! hy0 ? 0x7ff00000 2103*5b2ba9d3SPiotr Jasiukajtis bne 2f ! if ( hy0 != 0x7ff00000 ) 2104*5b2ba9d3SPiotr Jasiukajtis ld [%i3+4],%o2 ! ly = ((int*)py)[1]; 2105*5b2ba9d3SPiotr Jasiukajtis 2106*5b2ba9d3SPiotr Jasiukajtis cmp %o2,0 ! ly ? 0 2107*5b2ba9d3SPiotr Jasiukajtis be 3f ! if ( ly == 0 ) 2108*5b2ba9d3SPiotr Jasiukajtis2: 2109*5b2ba9d3SPiotr Jasiukajtis ld [%i4],%f0 ! ((float*)&x0)[0] = ((float*)px)[0]; 2110*5b2ba9d3SPiotr Jasiukajtis ld [%i4+4],%f1 ! ((float*)&x0)[1] = ((float*)px)[1]; 2111*5b2ba9d3SPiotr Jasiukajtis 2112*5b2ba9d3SPiotr Jasiukajtis ld [%i3],%f2 ! ((float*)&y0)[0] = ((float*)py)[0]; 2113*5b2ba9d3SPiotr Jasiukajtis add %i4,stridex,%i4 ! px += stridex 2114*5b2ba9d3SPiotr Jasiukajtis ld [%i3+4],%f3 ! ((float*)&y0)[1] = ((float*)py)[1]; 2115*5b2ba9d3SPiotr Jasiukajtis 2116*5b2ba9d3SPiotr Jasiukajtis fabsd %f0,%f0 2117*5b2ba9d3SPiotr Jasiukajtis 2118*5b2ba9d3SPiotr Jasiukajtis fabsd %f2,%f2 2119*5b2ba9d3SPiotr Jasiukajtis 2120*5b2ba9d3SPiotr Jasiukajtis fmuld %f0,%f2,%f0 ! res0 = fabs(x0) * fabs(y0); 2121*5b2ba9d3SPiotr Jasiukajtis add %i3,stridey,%i3 ! py += stridey; 2122*5b2ba9d3SPiotr Jasiukajtis st %f0,[%i5] ! ((float*)pz)[0] = ((float*)&res0)[0]; 2123*5b2ba9d3SPiotr Jasiukajtis 2124*5b2ba9d3SPiotr Jasiukajtis st %f1,[%i5+4] ! ((float*)pz)[1] = ((float*)&res0)[1]; 2125*5b2ba9d3SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 2126*5b2ba9d3SPiotr Jasiukajtis ba .begin1 2127*5b2ba9d3SPiotr Jasiukajtis sub counter,1,counter 2128*5b2ba9d3SPiotr Jasiukajtis3: 2129*5b2ba9d3SPiotr Jasiukajtis add %i4,stridex,%i4 ! px += stridex 2130*5b2ba9d3SPiotr Jasiukajtis add %i3,stridey,%i3 ! py += stridey 2131*5b2ba9d3SPiotr Jasiukajtis st %g0,[%i5] ! ((int*)pz)[0] = 0; 2132*5b2ba9d3SPiotr Jasiukajtis 2133*5b2ba9d3SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez; 2134*5b2ba9d3SPiotr Jasiukajtis st %g0,[%i5+4] ! ((int*)pz)[1] = 0; 2135*5b2ba9d3SPiotr Jasiukajtis ba .begin1 2136*5b2ba9d3SPiotr Jasiukajtis sub counter,1,counter 2137*5b2ba9d3SPiotr Jasiukajtis 2138*5b2ba9d3SPiotr Jasiukajtis .align 16 2139*5b2ba9d3SPiotr Jasiukajtis.spec1: 2140*5b2ba9d3SPiotr Jasiukajtis and %o1,%o3,%o1 ! (7_0) j0 &= diff0; 2141*5b2ba9d3SPiotr Jasiukajtis 2142*5b2ba9d3SPiotr Jasiukajtis cmp %l7,_0x00100000 ! (7_0) hy0 ? 0x00100000 2143*5b2ba9d3SPiotr Jasiukajtis bge,pn %icc,.cont_spec0 ! (7_0) if ( hy0 < 0x00100000 ) 2144*5b2ba9d3SPiotr Jasiukajtis 2145*5b2ba9d3SPiotr Jasiukajtis ld [%i4+4],%i2 ! lx = ((int*)px)[1]; 2146*5b2ba9d3SPiotr Jasiukajtis or %o7,%l7,%g5 ! ii = hx0 | hy0; 2147*5b2ba9d3SPiotr Jasiukajtis fzero %f0 2148*5b2ba9d3SPiotr Jasiukajtis 2149*5b2ba9d3SPiotr Jasiukajtis ld [%i3+4],%o2 ! ly = ((int*)py)[1]; 2150*5b2ba9d3SPiotr Jasiukajtis or %i2,%g5,%g5 ! ii |= lx; 2151*5b2ba9d3SPiotr Jasiukajtis 2152*5b2ba9d3SPiotr Jasiukajtis orcc %o2,%g5,%g5 ! ii |= ly; 2153*5b2ba9d3SPiotr Jasiukajtis bnz,a,pn %icc,1f ! if ( ii != 0 ) 2154*5b2ba9d3SPiotr Jasiukajtis sethi %hi(0x00080000),%i2 2155*5b2ba9d3SPiotr Jasiukajtis 2156*5b2ba9d3SPiotr Jasiukajtis fdivd DONE,%f0,%f0 ! res0 = 1.0 / 0.0; 2157*5b2ba9d3SPiotr Jasiukajtis 2158*5b2ba9d3SPiotr Jasiukajtis st %f0,[%i5] ! ((float*)pz)[0] = ((float*)&res0)[0]; 2159*5b2ba9d3SPiotr Jasiukajtis 2160*5b2ba9d3SPiotr Jasiukajtis add %i4,stridex,%i4 ! px += stridex; 2161*5b2ba9d3SPiotr Jasiukajtis add %i3,stridey,%i3 ! py += stridey; 2162*5b2ba9d3SPiotr Jasiukajtis st %f1,[%i5+4] ! ((float*)pz)[1] = ((float*)&res0)[1]; 2163*5b2ba9d3SPiotr Jasiukajtis 2164*5b2ba9d3SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez; 2165*5b2ba9d3SPiotr Jasiukajtis ba .begin1 2166*5b2ba9d3SPiotr Jasiukajtis sub counter,1,counter 2167*5b2ba9d3SPiotr Jasiukajtis1: 2168*5b2ba9d3SPiotr Jasiukajtis ld [%i4],%f0 ! ((float*)&x0)[0] = ((float*)px)[0]; 2169*5b2ba9d3SPiotr Jasiukajtis 2170*5b2ba9d3SPiotr Jasiukajtis ld [%i4+4],%f1 ! ((float*)&x0)[1] = ((float*)px)[1]; 2171*5b2ba9d3SPiotr Jasiukajtis 2172*5b2ba9d3SPiotr Jasiukajtis ld [%i3],%f2 ! ((float*)&y0)[0] = ((float*)py)[0]; 2173*5b2ba9d3SPiotr Jasiukajtis 2174*5b2ba9d3SPiotr Jasiukajtis fabsd %f0,%f0 ! x0 = fabs(x0); 2175*5b2ba9d3SPiotr Jasiukajtis ld [%i3+4],%f3 ! ((float*)&y0)[1] = ((float*)py)[1]; 2176*5b2ba9d3SPiotr Jasiukajtis 2177*5b2ba9d3SPiotr Jasiukajtis ldd [TBL+TBL_SHIFT+64],%f12 ! ((long long*)&dtmp0)[0] = 0x0007ffffffffffffULL; 2178*5b2ba9d3SPiotr Jasiukajtis add %fp,dtmp2,%i4 2179*5b2ba9d3SPiotr Jasiukajtis add %fp,dtmp3,%i3 2180*5b2ba9d3SPiotr Jasiukajtis 2181*5b2ba9d3SPiotr Jasiukajtis fabsd %f2,%f2 ! y0 = fabs(y0); 2182*5b2ba9d3SPiotr Jasiukajtis ldd [TBL+TBL_SHIFT+56],%f10 ! D2ON51 2183*5b2ba9d3SPiotr Jasiukajtis 2184*5b2ba9d3SPiotr Jasiukajtis ldx [TBL+TBL_SHIFT+48],%g5 ! D2ONM52 2185*5b2ba9d3SPiotr Jasiukajtis cmp %o7,%i2 ! hx0 ? 0x00080000 2186*5b2ba9d3SPiotr Jasiukajtis bl,a 1f ! if ( hx0 < 0x00080000 ) 2187*5b2ba9d3SPiotr Jasiukajtis fxtod %f0,%f0 ! x0 = *(long long*)&x0; 2188*5b2ba9d3SPiotr Jasiukajtis 2189*5b2ba9d3SPiotr Jasiukajtis fand %f0,%f12,%f0 ! x0 = vis_fand(x0, dtmp0); 2190*5b2ba9d3SPiotr Jasiukajtis fxtod %f0,%f0 ! x0 = *(long long*)&x0; 2191*5b2ba9d3SPiotr Jasiukajtis faddd %f0,%f10,%f0 ! x0 += D2ON51; 2192*5b2ba9d3SPiotr Jasiukajtis1: 2193*5b2ba9d3SPiotr Jasiukajtis std %f0,[%i4] 2194*5b2ba9d3SPiotr Jasiukajtis 2195*5b2ba9d3SPiotr Jasiukajtis ldx [TBL+TBL_SHIFT+40],%g1 ! D2ON1022 2196*5b2ba9d3SPiotr Jasiukajtis cmp %l7,%i2 ! hy0 ? 0x00080000 2197*5b2ba9d3SPiotr Jasiukajtis bl,a 1f ! if ( hy0 < 0x00080000 ) 2198*5b2ba9d3SPiotr Jasiukajtis fxtod %f2,%f2 ! y0 = *(long long*)&y0; 2199*5b2ba9d3SPiotr Jasiukajtis 2200*5b2ba9d3SPiotr Jasiukajtis fand %f2,%f12,%f2 ! y0 = vis_fand(y0, dtmp0); 2201*5b2ba9d3SPiotr Jasiukajtis fxtod %f2,%f2 ! y0 = *(long long*)&y0; 2202*5b2ba9d3SPiotr Jasiukajtis faddd %f2,%f10,%f2 ! y0 += D2ON51; 2203*5b2ba9d3SPiotr Jasiukajtis1: 2204*5b2ba9d3SPiotr Jasiukajtis std %f2,[%i3] 2205*5b2ba9d3SPiotr Jasiukajtis 2206*5b2ba9d3SPiotr Jasiukajtis stx %g5,[%fp+dtmp15] ! D2ONM52 2207*5b2ba9d3SPiotr Jasiukajtis 2208*5b2ba9d3SPiotr Jasiukajtis ba .cont_spec1 2209*5b2ba9d3SPiotr Jasiukajtis stx %g1,[%fp+dtmp0] ! D2ON1022 2210*5b2ba9d3SPiotr Jasiukajtis 2211*5b2ba9d3SPiotr Jasiukajtis .align 16 2212*5b2ba9d3SPiotr Jasiukajtis.update0: 2213*5b2ba9d3SPiotr Jasiukajtis cmp counter,1 2214*5b2ba9d3SPiotr Jasiukajtis ble 1f 2215*5b2ba9d3SPiotr Jasiukajtis nop 2216*5b2ba9d3SPiotr Jasiukajtis 2217*5b2ba9d3SPiotr Jasiukajtis sub counter,1,counter 2218*5b2ba9d3SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 2219*5b2ba9d3SPiotr Jasiukajtis 2220*5b2ba9d3SPiotr Jasiukajtis stx %i2,[%fp+tmp_px] 2221*5b2ba9d3SPiotr Jasiukajtis 2222*5b2ba9d3SPiotr Jasiukajtis stx %o0,[%fp+tmp_py] 2223*5b2ba9d3SPiotr Jasiukajtis 2224*5b2ba9d3SPiotr Jasiukajtis mov 1,counter 2225*5b2ba9d3SPiotr Jasiukajtis1: 2226*5b2ba9d3SPiotr Jasiukajtis sethi %hi(0x3ff00000),%o4 2227*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i2 2228*5b2ba9d3SPiotr Jasiukajtis ba .cont1 2229*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%o0 2230*5b2ba9d3SPiotr Jasiukajtis 2231*5b2ba9d3SPiotr Jasiukajtis .align 16 2232*5b2ba9d3SPiotr Jasiukajtis.update1: 2233*5b2ba9d3SPiotr Jasiukajtis cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 2234*5b2ba9d3SPiotr Jasiukajtis bge,pn %icc,.cont0 ! (0_0) if ( hy0 < 0x00100000 ) 2235*5b2ba9d3SPiotr Jasiukajtis 2236*5b2ba9d3SPiotr Jasiukajtis cmp counter,1 2237*5b2ba9d3SPiotr Jasiukajtis ble,a 1f 2238*5b2ba9d3SPiotr Jasiukajtis nop 2239*5b2ba9d3SPiotr Jasiukajtis 2240*5b2ba9d3SPiotr Jasiukajtis sub counter,1,counter 2241*5b2ba9d3SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 2242*5b2ba9d3SPiotr Jasiukajtis 2243*5b2ba9d3SPiotr Jasiukajtis stx %i2,[%fp+tmp_px] 2244*5b2ba9d3SPiotr Jasiukajtis 2245*5b2ba9d3SPiotr Jasiukajtis mov 1,counter 2246*5b2ba9d3SPiotr Jasiukajtis stx %o0,[%fp+tmp_py] 2247*5b2ba9d3SPiotr Jasiukajtis1: 2248*5b2ba9d3SPiotr Jasiukajtis sethi %hi(0x3ff00000),%o4 2249*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i2 2250*5b2ba9d3SPiotr Jasiukajtis ba .cont1 2251*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%o0 2252*5b2ba9d3SPiotr Jasiukajtis 2253*5b2ba9d3SPiotr Jasiukajtis .align 16 2254*5b2ba9d3SPiotr Jasiukajtis.update2: 2255*5b2ba9d3SPiotr Jasiukajtis cmp counter,2 2256*5b2ba9d3SPiotr Jasiukajtis ble 1f 2257*5b2ba9d3SPiotr Jasiukajtis nop 2258*5b2ba9d3SPiotr Jasiukajtis 2259*5b2ba9d3SPiotr Jasiukajtis sub counter,2,counter 2260*5b2ba9d3SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 2261*5b2ba9d3SPiotr Jasiukajtis 2262*5b2ba9d3SPiotr Jasiukajtis stx %i4,[%fp+tmp_px] 2263*5b2ba9d3SPiotr Jasiukajtis 2264*5b2ba9d3SPiotr Jasiukajtis stx %i3,[%fp+tmp_py] 2265*5b2ba9d3SPiotr Jasiukajtis 2266*5b2ba9d3SPiotr Jasiukajtis mov 2,counter 2267*5b2ba9d3SPiotr Jasiukajtis1: 2268*5b2ba9d3SPiotr Jasiukajtis fsubd %f50,D2ON36,%f54 ! (7_1) y_hi0 -= D2ON36; 2269*5b2ba9d3SPiotr Jasiukajtis 2270*5b2ba9d3SPiotr Jasiukajtis fmuld %f20,%f20,%f2 ! (7_1) res0_hi = x_hi0 * x_hi0; 2271*5b2ba9d3SPiotr Jasiukajtis fsubd %f10,%f20,%f0 ! (7_1) x_lo0 = x0 - x_hi0; 2272*5b2ba9d3SPiotr Jasiukajtis 2273*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (7_1) dtmp0 = y_hi0 * y_hi0; 2274*5b2ba9d3SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (7_1) res0_lo = x0 + x_hi0; 2275*5b2ba9d3SPiotr Jasiukajtis 2276*5b2ba9d3SPiotr Jasiukajtis sethi %hi(0x3ff00000),%o4 2277*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i4 2278*5b2ba9d3SPiotr Jasiukajtis ba .cont4 2279*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i3 2280*5b2ba9d3SPiotr Jasiukajtis 2281*5b2ba9d3SPiotr Jasiukajtis .align 16 2282*5b2ba9d3SPiotr Jasiukajtis.update3: 2283*5b2ba9d3SPiotr Jasiukajtis cmp counter,2 2284*5b2ba9d3SPiotr Jasiukajtis ble 1f 2285*5b2ba9d3SPiotr Jasiukajtis nop 2286*5b2ba9d3SPiotr Jasiukajtis 2287*5b2ba9d3SPiotr Jasiukajtis sub counter,2,counter 2288*5b2ba9d3SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 2289*5b2ba9d3SPiotr Jasiukajtis 2290*5b2ba9d3SPiotr Jasiukajtis stx %i4,[%fp+tmp_px] 2291*5b2ba9d3SPiotr Jasiukajtis 2292*5b2ba9d3SPiotr Jasiukajtis stx %i3,[%fp+tmp_py] 2293*5b2ba9d3SPiotr Jasiukajtis 2294*5b2ba9d3SPiotr Jasiukajtis mov 2,counter 2295*5b2ba9d3SPiotr Jasiukajtis1: 2296*5b2ba9d3SPiotr Jasiukajtis fmuld %f20,%f20,%f2 ! (7_1) res0_hi = x_hi0 * x_hi0; 2297*5b2ba9d3SPiotr Jasiukajtis fsubd %f10,%f20,%f0 ! (7_1) x_lo0 = x0 - x_hi0; 2298*5b2ba9d3SPiotr Jasiukajtis 2299*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (7_1) dtmp0 = y_hi0 * y_hi0; 2300*5b2ba9d3SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (7_1) res0_lo = x0 + x_hi0; 2301*5b2ba9d3SPiotr Jasiukajtis 2302*5b2ba9d3SPiotr Jasiukajtis sethi %hi(0x3ff00000),%o4 2303*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i4 2304*5b2ba9d3SPiotr Jasiukajtis ba .cont4 2305*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i3 2306*5b2ba9d3SPiotr Jasiukajtis 2307*5b2ba9d3SPiotr Jasiukajtis .align 16 2308*5b2ba9d3SPiotr Jasiukajtis.update4: 2309*5b2ba9d3SPiotr Jasiukajtis cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 2310*5b2ba9d3SPiotr Jasiukajtis bge,a,pn %icc,.cont4 ! (0_0) if ( hy0 < 0x00100000 ) 2311*5b2ba9d3SPiotr Jasiukajtis sub %l0,%o4,%o4 ! (1_0) j0 = 0x7ff00000 - j0; 2312*5b2ba9d3SPiotr Jasiukajtis 2313*5b2ba9d3SPiotr Jasiukajtis cmp counter,2 2314*5b2ba9d3SPiotr Jasiukajtis ble,a 1f 2315*5b2ba9d3SPiotr Jasiukajtis nop 2316*5b2ba9d3SPiotr Jasiukajtis 2317*5b2ba9d3SPiotr Jasiukajtis sub counter,2,counter 2318*5b2ba9d3SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 2319*5b2ba9d3SPiotr Jasiukajtis 2320*5b2ba9d3SPiotr Jasiukajtis stx %i4,[%fp+tmp_px] 2321*5b2ba9d3SPiotr Jasiukajtis 2322*5b2ba9d3SPiotr Jasiukajtis mov 2,counter 2323*5b2ba9d3SPiotr Jasiukajtis stx %i3,[%fp+tmp_py] 2324*5b2ba9d3SPiotr Jasiukajtis1: 2325*5b2ba9d3SPiotr Jasiukajtis sethi %hi(0x3ff00000),%o4 2326*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i4 2327*5b2ba9d3SPiotr Jasiukajtis ba .cont4 2328*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i3 2329*5b2ba9d3SPiotr Jasiukajtis 2330*5b2ba9d3SPiotr Jasiukajtis .align 16 2331*5b2ba9d3SPiotr Jasiukajtis.update5: 2332*5b2ba9d3SPiotr Jasiukajtis cmp counter,3 2333*5b2ba9d3SPiotr Jasiukajtis ble 1f 2334*5b2ba9d3SPiotr Jasiukajtis nop 2335*5b2ba9d3SPiotr Jasiukajtis 2336*5b2ba9d3SPiotr Jasiukajtis sub counter,3,counter 2337*5b2ba9d3SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 2338*5b2ba9d3SPiotr Jasiukajtis 2339*5b2ba9d3SPiotr Jasiukajtis stx %i2,[%fp+tmp_px] 2340*5b2ba9d3SPiotr Jasiukajtis 2341*5b2ba9d3SPiotr Jasiukajtis stx %o0,[%fp+tmp_py] 2342*5b2ba9d3SPiotr Jasiukajtis 2343*5b2ba9d3SPiotr Jasiukajtis mov 3,counter 2344*5b2ba9d3SPiotr Jasiukajtis1: 2345*5b2ba9d3SPiotr Jasiukajtis st %f14,[%fp+ftmp0] ! (7_1) iarr = ((int*)&dres)[0]; 2346*5b2ba9d3SPiotr Jasiukajtis fsubd %f46,D2ON36,%f20 ! (0_0) x_hi0 -= D2ON36; 2347*5b2ba9d3SPiotr Jasiukajtis 2348*5b2ba9d3SPiotr Jasiukajtis fsubd %f12,D2ON36,%f54 ! (0_0) y_hi0 -= D2ON36; 2349*5b2ba9d3SPiotr Jasiukajtis 2350*5b2ba9d3SPiotr Jasiukajtis fmuld %f20,%f20,%f2 ! (0_0) res0_hi = x_hi0 * x_hi0; 2351*5b2ba9d3SPiotr Jasiukajtis fsubd %f10,%f20,%f0 ! (0_0) x_lo0 = x0 - x_hi0; 2352*5b2ba9d3SPiotr Jasiukajtis 2353*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (0_0) dtmp0 = y_hi0 * y_hi0; 2354*5b2ba9d3SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (0_0) res0_lo = x0 + x_hi0; 2355*5b2ba9d3SPiotr Jasiukajtis 2356*5b2ba9d3SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 2357*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i2 2358*5b2ba9d3SPiotr Jasiukajtis 2359*5b2ba9d3SPiotr Jasiukajtis sllx %g1,32,%g1 2360*5b2ba9d3SPiotr Jasiukajtis ba .cont8 2361*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%o0 2362*5b2ba9d3SPiotr Jasiukajtis 2363*5b2ba9d3SPiotr Jasiukajtis .align 16 2364*5b2ba9d3SPiotr Jasiukajtis.update6: 2365*5b2ba9d3SPiotr Jasiukajtis cmp counter,3 2366*5b2ba9d3SPiotr Jasiukajtis ble 1f 2367*5b2ba9d3SPiotr Jasiukajtis nop 2368*5b2ba9d3SPiotr Jasiukajtis 2369*5b2ba9d3SPiotr Jasiukajtis sub counter,3,counter 2370*5b2ba9d3SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 2371*5b2ba9d3SPiotr Jasiukajtis 2372*5b2ba9d3SPiotr Jasiukajtis stx %i2,[%fp+tmp_px] 2373*5b2ba9d3SPiotr Jasiukajtis 2374*5b2ba9d3SPiotr Jasiukajtis stx %o0,[%fp+tmp_py] 2375*5b2ba9d3SPiotr Jasiukajtis 2376*5b2ba9d3SPiotr Jasiukajtis mov 3,counter 2377*5b2ba9d3SPiotr Jasiukajtis1: 2378*5b2ba9d3SPiotr Jasiukajtis fmuld %f20,%f20,%f2 ! (0_0) res0_hi = x_hi0 * x_hi0; 2379*5b2ba9d3SPiotr Jasiukajtis fsubd %f10,%f20,%f0 ! (0_0) x_lo0 = x0 - x_hi0; 2380*5b2ba9d3SPiotr Jasiukajtis 2381*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (0_0) dtmp0 = y_hi0 * y_hi0; 2382*5b2ba9d3SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (0_0) res0_lo = x0 + x_hi0; 2383*5b2ba9d3SPiotr Jasiukajtis 2384*5b2ba9d3SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 2385*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i2 2386*5b2ba9d3SPiotr Jasiukajtis 2387*5b2ba9d3SPiotr Jasiukajtis sllx %g1,32,%g1 2388*5b2ba9d3SPiotr Jasiukajtis ba .cont8 2389*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%o0 2390*5b2ba9d3SPiotr Jasiukajtis 2391*5b2ba9d3SPiotr Jasiukajtis .align 16 2392*5b2ba9d3SPiotr Jasiukajtis.update7: 2393*5b2ba9d3SPiotr Jasiukajtis cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 2394*5b2ba9d3SPiotr Jasiukajtis bge,pn %icc,.cont7 ! (0_0) if ( hy0 < 0x00100000 ) 2395*5b2ba9d3SPiotr Jasiukajtis 2396*5b2ba9d3SPiotr Jasiukajtis cmp counter,3 2397*5b2ba9d3SPiotr Jasiukajtis ble,a 1f 2398*5b2ba9d3SPiotr Jasiukajtis nop 2399*5b2ba9d3SPiotr Jasiukajtis 2400*5b2ba9d3SPiotr Jasiukajtis sub counter,3,counter 2401*5b2ba9d3SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 2402*5b2ba9d3SPiotr Jasiukajtis 2403*5b2ba9d3SPiotr Jasiukajtis stx %i2,[%fp+tmp_px] 2404*5b2ba9d3SPiotr Jasiukajtis 2405*5b2ba9d3SPiotr Jasiukajtis mov 3,counter 2406*5b2ba9d3SPiotr Jasiukajtis stx %o0,[%fp+tmp_py] 2407*5b2ba9d3SPiotr Jasiukajtis1: 2408*5b2ba9d3SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 2409*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i2 2410*5b2ba9d3SPiotr Jasiukajtis 2411*5b2ba9d3SPiotr Jasiukajtis sllx %g1,32,%g1 2412*5b2ba9d3SPiotr Jasiukajtis ba .cont8 2413*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%o0 2414*5b2ba9d3SPiotr Jasiukajtis 2415*5b2ba9d3SPiotr Jasiukajtis .align 16 2416*5b2ba9d3SPiotr Jasiukajtis.update9: 2417*5b2ba9d3SPiotr Jasiukajtis cmp counter,4 2418*5b2ba9d3SPiotr Jasiukajtis ble 1f 2419*5b2ba9d3SPiotr Jasiukajtis nop 2420*5b2ba9d3SPiotr Jasiukajtis 2421*5b2ba9d3SPiotr Jasiukajtis sub counter,4,counter 2422*5b2ba9d3SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 2423*5b2ba9d3SPiotr Jasiukajtis 2424*5b2ba9d3SPiotr Jasiukajtis stx %i4,[%fp+tmp_px] 2425*5b2ba9d3SPiotr Jasiukajtis 2426*5b2ba9d3SPiotr Jasiukajtis stx %i3,[%fp+tmp_py] 2427*5b2ba9d3SPiotr Jasiukajtis 2428*5b2ba9d3SPiotr Jasiukajtis mov 4,counter 2429*5b2ba9d3SPiotr Jasiukajtis1: 2430*5b2ba9d3SPiotr Jasiukajtis st %f22,[%fp+ftmp0] ! (0_0) iarr = ((int*)&dres)[0]; 2431*5b2ba9d3SPiotr Jasiukajtis fsubd %f46,D2ON36,%f20 ! (1_0) x_hi0 -= D2ON36; 2432*5b2ba9d3SPiotr Jasiukajtis 2433*5b2ba9d3SPiotr Jasiukajtis fsubd %f12,D2ON36,%f54 ! (1_0) y_hi0 -= D2ON36; 2434*5b2ba9d3SPiotr Jasiukajtis 2435*5b2ba9d3SPiotr Jasiukajtis fmuld %f26,%f14,%f50 ! (7_1) dtmp0 = dd * dres; 2436*5b2ba9d3SPiotr Jasiukajtis 2437*5b2ba9d3SPiotr Jasiukajtis 2438*5b2ba9d3SPiotr Jasiukajtis fmuld %f20,%f20,%f2 ! (1_0) res0_hi = x_hi0 * x_hi0; 2439*5b2ba9d3SPiotr Jasiukajtis fsubd %f10,%f20,%f0 ! (1_0) x_lo0 = x0 - x_hi0; 2440*5b2ba9d3SPiotr Jasiukajtis 2441*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (1_0) dtmp0 = y_hi0 * y_hi0; 2442*5b2ba9d3SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (1_0) res0_lo = x0 + x_hi0; 2443*5b2ba9d3SPiotr Jasiukajtis 2444*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0; 2445*5b2ba9d3SPiotr Jasiukajtis 2446*5b2ba9d3SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 2447*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i4 2448*5b2ba9d3SPiotr Jasiukajtis ba .cont12 2449*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i3 2450*5b2ba9d3SPiotr Jasiukajtis 2451*5b2ba9d3SPiotr Jasiukajtis .align 16 2452*5b2ba9d3SPiotr Jasiukajtis.update10: 2453*5b2ba9d3SPiotr Jasiukajtis cmp counter,4 2454*5b2ba9d3SPiotr Jasiukajtis ble 1f 2455*5b2ba9d3SPiotr Jasiukajtis nop 2456*5b2ba9d3SPiotr Jasiukajtis 2457*5b2ba9d3SPiotr Jasiukajtis sub counter,4,counter 2458*5b2ba9d3SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 2459*5b2ba9d3SPiotr Jasiukajtis 2460*5b2ba9d3SPiotr Jasiukajtis stx %i4,[%fp+tmp_px] 2461*5b2ba9d3SPiotr Jasiukajtis 2462*5b2ba9d3SPiotr Jasiukajtis stx %i3,[%fp+tmp_py] 2463*5b2ba9d3SPiotr Jasiukajtis 2464*5b2ba9d3SPiotr Jasiukajtis mov 4,counter 2465*5b2ba9d3SPiotr Jasiukajtis1: 2466*5b2ba9d3SPiotr Jasiukajtis fmuld %f26,%f14,%f50 ! (7_1) dtmp0 = dd * dres; 2467*5b2ba9d3SPiotr Jasiukajtis 2468*5b2ba9d3SPiotr Jasiukajtis 2469*5b2ba9d3SPiotr Jasiukajtis fmuld %f20,%f20,%f2 ! (1_0) res0_hi = x_hi0 * x_hi0; 2470*5b2ba9d3SPiotr Jasiukajtis fsubd %f10,%f20,%f0 ! (1_0) x_lo0 = x0 - x_hi0; 2471*5b2ba9d3SPiotr Jasiukajtis 2472*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (1_0) dtmp0 = y_hi0 * y_hi0; 2473*5b2ba9d3SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (1_0) res0_lo = x0 + x_hi0; 2474*5b2ba9d3SPiotr Jasiukajtis 2475*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0; 2476*5b2ba9d3SPiotr Jasiukajtis 2477*5b2ba9d3SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 2478*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i4 2479*5b2ba9d3SPiotr Jasiukajtis ba .cont12 2480*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i3 2481*5b2ba9d3SPiotr Jasiukajtis 2482*5b2ba9d3SPiotr Jasiukajtis .align 16 2483*5b2ba9d3SPiotr Jasiukajtis.update11: 2484*5b2ba9d3SPiotr Jasiukajtis cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 2485*5b2ba9d3SPiotr Jasiukajtis bge,pn %icc,.cont11 ! (0_0) if ( hy0 < 0x00100000 ) 2486*5b2ba9d3SPiotr Jasiukajtis 2487*5b2ba9d3SPiotr Jasiukajtis cmp counter,4 2488*5b2ba9d3SPiotr Jasiukajtis ble,a 1f 2489*5b2ba9d3SPiotr Jasiukajtis nop 2490*5b2ba9d3SPiotr Jasiukajtis 2491*5b2ba9d3SPiotr Jasiukajtis sub counter,4,counter 2492*5b2ba9d3SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 2493*5b2ba9d3SPiotr Jasiukajtis 2494*5b2ba9d3SPiotr Jasiukajtis stx %i4,[%fp+tmp_px] 2495*5b2ba9d3SPiotr Jasiukajtis 2496*5b2ba9d3SPiotr Jasiukajtis mov 4,counter 2497*5b2ba9d3SPiotr Jasiukajtis stx %i3,[%fp+tmp_py] 2498*5b2ba9d3SPiotr Jasiukajtis1: 2499*5b2ba9d3SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 2500*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i4 2501*5b2ba9d3SPiotr Jasiukajtis 2502*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0; 2503*5b2ba9d3SPiotr Jasiukajtis ba .cont12 2504*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i3 2505*5b2ba9d3SPiotr Jasiukajtis 2506*5b2ba9d3SPiotr Jasiukajtis .align 16 2507*5b2ba9d3SPiotr Jasiukajtis.update13: 2508*5b2ba9d3SPiotr Jasiukajtis cmp counter,5 2509*5b2ba9d3SPiotr Jasiukajtis ble 1f 2510*5b2ba9d3SPiotr Jasiukajtis nop 2511*5b2ba9d3SPiotr Jasiukajtis 2512*5b2ba9d3SPiotr Jasiukajtis sub counter,5,counter 2513*5b2ba9d3SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 2514*5b2ba9d3SPiotr Jasiukajtis 2515*5b2ba9d3SPiotr Jasiukajtis stx %i2,[%fp+tmp_px] 2516*5b2ba9d3SPiotr Jasiukajtis 2517*5b2ba9d3SPiotr Jasiukajtis stx %o0,[%fp+tmp_py] 2518*5b2ba9d3SPiotr Jasiukajtis 2519*5b2ba9d3SPiotr Jasiukajtis mov 5,counter 2520*5b2ba9d3SPiotr Jasiukajtis1: 2521*5b2ba9d3SPiotr Jasiukajtis fsubd %f46,D2ON36,%f20 ! (2_0) x_hi0 -= D2ON36; 2522*5b2ba9d3SPiotr Jasiukajtis 2523*5b2ba9d3SPiotr Jasiukajtis fsubd %f50,D2ON36,%f54 ! (2_0) y_hi0 -= D2ON36; 2524*5b2ba9d3SPiotr Jasiukajtis 2525*5b2ba9d3SPiotr Jasiukajtis fmuld %f28,%f22,%f50 ! (0_0) dtmp0 = dd * dres; 2526*5b2ba9d3SPiotr Jasiukajtis 2527*5b2ba9d3SPiotr Jasiukajtis fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0; 2528*5b2ba9d3SPiotr Jasiukajtis fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0; 2529*5b2ba9d3SPiotr Jasiukajtis 2530*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0; 2531*5b2ba9d3SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0; 2532*5b2ba9d3SPiotr Jasiukajtis 2533*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0; 2534*5b2ba9d3SPiotr Jasiukajtis 2535*5b2ba9d3SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 2536*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i2 2537*5b2ba9d3SPiotr Jasiukajtis ba .cont16 2538*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%o0 2539*5b2ba9d3SPiotr Jasiukajtis 2540*5b2ba9d3SPiotr Jasiukajtis .align 16 2541*5b2ba9d3SPiotr Jasiukajtis.update14: 2542*5b2ba9d3SPiotr Jasiukajtis cmp counter,5 2543*5b2ba9d3SPiotr Jasiukajtis ble 1f 2544*5b2ba9d3SPiotr Jasiukajtis nop 2545*5b2ba9d3SPiotr Jasiukajtis 2546*5b2ba9d3SPiotr Jasiukajtis sub counter,5,counter 2547*5b2ba9d3SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 2548*5b2ba9d3SPiotr Jasiukajtis 2549*5b2ba9d3SPiotr Jasiukajtis stx %i2,[%fp+tmp_px] 2550*5b2ba9d3SPiotr Jasiukajtis 2551*5b2ba9d3SPiotr Jasiukajtis stx %o0,[%fp+tmp_py] 2552*5b2ba9d3SPiotr Jasiukajtis 2553*5b2ba9d3SPiotr Jasiukajtis mov 5,counter 2554*5b2ba9d3SPiotr Jasiukajtis1: 2555*5b2ba9d3SPiotr Jasiukajtis fmuld %f28,%f22,%f50 ! (0_0) dtmp0 = dd * dres; 2556*5b2ba9d3SPiotr Jasiukajtis 2557*5b2ba9d3SPiotr Jasiukajtis fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0; 2558*5b2ba9d3SPiotr Jasiukajtis fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0; 2559*5b2ba9d3SPiotr Jasiukajtis 2560*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0; 2561*5b2ba9d3SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0; 2562*5b2ba9d3SPiotr Jasiukajtis 2563*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0; 2564*5b2ba9d3SPiotr Jasiukajtis 2565*5b2ba9d3SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 2566*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i2 2567*5b2ba9d3SPiotr Jasiukajtis ba .cont16 2568*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%o0 2569*5b2ba9d3SPiotr Jasiukajtis 2570*5b2ba9d3SPiotr Jasiukajtis .align 16 2571*5b2ba9d3SPiotr Jasiukajtis.update15: 2572*5b2ba9d3SPiotr Jasiukajtis cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 2573*5b2ba9d3SPiotr Jasiukajtis bge,pn %icc,.cont15 ! (0_0) if ( hy0 < 0x00100000 ) 2574*5b2ba9d3SPiotr Jasiukajtis 2575*5b2ba9d3SPiotr Jasiukajtis cmp counter,5 2576*5b2ba9d3SPiotr Jasiukajtis ble,a 1f 2577*5b2ba9d3SPiotr Jasiukajtis nop 2578*5b2ba9d3SPiotr Jasiukajtis 2579*5b2ba9d3SPiotr Jasiukajtis sub counter,5,counter 2580*5b2ba9d3SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 2581*5b2ba9d3SPiotr Jasiukajtis 2582*5b2ba9d3SPiotr Jasiukajtis stx %i2,[%fp+tmp_px] 2583*5b2ba9d3SPiotr Jasiukajtis 2584*5b2ba9d3SPiotr Jasiukajtis mov 5,counter 2585*5b2ba9d3SPiotr Jasiukajtis stx %o0,[%fp+tmp_py] 2586*5b2ba9d3SPiotr Jasiukajtis1: 2587*5b2ba9d3SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 2588*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i2 2589*5b2ba9d3SPiotr Jasiukajtis 2590*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0; 2591*5b2ba9d3SPiotr Jasiukajtis ba .cont16 2592*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%o0 2593*5b2ba9d3SPiotr Jasiukajtis 2594*5b2ba9d3SPiotr Jasiukajtis .align 16 2595*5b2ba9d3SPiotr Jasiukajtis.update17: 2596*5b2ba9d3SPiotr Jasiukajtis cmp counter,6 2597*5b2ba9d3SPiotr Jasiukajtis ble 1f 2598*5b2ba9d3SPiotr Jasiukajtis nop 2599*5b2ba9d3SPiotr Jasiukajtis 2600*5b2ba9d3SPiotr Jasiukajtis sub counter,6,counter 2601*5b2ba9d3SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 2602*5b2ba9d3SPiotr Jasiukajtis 2603*5b2ba9d3SPiotr Jasiukajtis stx %i4,[%fp+tmp_px] 2604*5b2ba9d3SPiotr Jasiukajtis 2605*5b2ba9d3SPiotr Jasiukajtis stx %i3,[%fp+tmp_py] 2606*5b2ba9d3SPiotr Jasiukajtis 2607*5b2ba9d3SPiotr Jasiukajtis mov 6,counter 2608*5b2ba9d3SPiotr Jasiukajtis1: 2609*5b2ba9d3SPiotr Jasiukajtis fsubd %f50,D2ON36,%f54 ! (3_0) y_hi0 -= D2ON36; 2610*5b2ba9d3SPiotr Jasiukajtis 2611*5b2ba9d3SPiotr Jasiukajtis fmuld %f26,%f18,%f50 ! (1_0) dtmp0 = dd * dres; 2612*5b2ba9d3SPiotr Jasiukajtis 2613*5b2ba9d3SPiotr Jasiukajtis fand %f28,DA0,%f48 ! (7_1) res0 = vis_fand(dres,DA0); 2614*5b2ba9d3SPiotr Jasiukajtis 2615*5b2ba9d3SPiotr Jasiukajtis fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0; 2616*5b2ba9d3SPiotr Jasiukajtis fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0; 2617*5b2ba9d3SPiotr Jasiukajtis 2618*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0; 2619*5b2ba9d3SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0; 2620*5b2ba9d3SPiotr Jasiukajtis 2621*5b2ba9d3SPiotr Jasiukajtis fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0; 2622*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0; 2623*5b2ba9d3SPiotr Jasiukajtis 2624*5b2ba9d3SPiotr Jasiukajtis fmuld %f24,%f22,%f22 ! (0_0) dtmp2 = dd * dres; 2625*5b2ba9d3SPiotr Jasiukajtis faddd %f60,%f54,%f50 ! (3_0) dtmp1 = y0 + y_hi0; 2626*5b2ba9d3SPiotr Jasiukajtis 2627*5b2ba9d3SPiotr Jasiukajtis fmuld %f38,%f48,%f38 ! (7_1) dtmp1 = res0_lo * res0; 2628*5b2ba9d3SPiotr Jasiukajtis fsubd %f60,%f54,%f12 ! (3_0) y_lo0 = y0 - y_hi0; 2629*5b2ba9d3SPiotr Jasiukajtis 2630*5b2ba9d3SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 2631*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i4 2632*5b2ba9d3SPiotr Jasiukajtis 2633*5b2ba9d3SPiotr Jasiukajtis sllx %g1,32,%g1 ! (5_0) ll = (long long)j0 << 32; 2634*5b2ba9d3SPiotr Jasiukajtis stx %g1,[%fp+dtmp11] ! (5_0) *(long long*)&scl0 = ll; 2635*5b2ba9d3SPiotr Jasiukajtis ba .cont20 2636*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i3 2637*5b2ba9d3SPiotr Jasiukajtis 2638*5b2ba9d3SPiotr Jasiukajtis .align 16 2639*5b2ba9d3SPiotr Jasiukajtis.update18: 2640*5b2ba9d3SPiotr Jasiukajtis cmp counter,6 2641*5b2ba9d3SPiotr Jasiukajtis ble 1f 2642*5b2ba9d3SPiotr Jasiukajtis nop 2643*5b2ba9d3SPiotr Jasiukajtis 2644*5b2ba9d3SPiotr Jasiukajtis sub counter,6,counter 2645*5b2ba9d3SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 2646*5b2ba9d3SPiotr Jasiukajtis 2647*5b2ba9d3SPiotr Jasiukajtis stx %i4,[%fp+tmp_px] 2648*5b2ba9d3SPiotr Jasiukajtis 2649*5b2ba9d3SPiotr Jasiukajtis stx %i3,[%fp+tmp_py] 2650*5b2ba9d3SPiotr Jasiukajtis 2651*5b2ba9d3SPiotr Jasiukajtis mov 6,counter 2652*5b2ba9d3SPiotr Jasiukajtis1: 2653*5b2ba9d3SPiotr Jasiukajtis fmuld %f26,%f18,%f50 ! (1_0) dtmp0 = dd * dres; 2654*5b2ba9d3SPiotr Jasiukajtis 2655*5b2ba9d3SPiotr Jasiukajtis fand %f28,DA0,%f48 ! (7_1) res0 = vis_fand(dres,DA0); 2656*5b2ba9d3SPiotr Jasiukajtis 2657*5b2ba9d3SPiotr Jasiukajtis fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0; 2658*5b2ba9d3SPiotr Jasiukajtis fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0; 2659*5b2ba9d3SPiotr Jasiukajtis 2660*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0; 2661*5b2ba9d3SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0; 2662*5b2ba9d3SPiotr Jasiukajtis 2663*5b2ba9d3SPiotr Jasiukajtis fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0; 2664*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0; 2665*5b2ba9d3SPiotr Jasiukajtis 2666*5b2ba9d3SPiotr Jasiukajtis fmuld %f24,%f22,%f22 ! (0_0) dtmp2 = dd * dres; 2667*5b2ba9d3SPiotr Jasiukajtis faddd %f60,%f54,%f50 ! (3_0) dtmp1 = y0 + y_hi0; 2668*5b2ba9d3SPiotr Jasiukajtis 2669*5b2ba9d3SPiotr Jasiukajtis fmuld %f38,%f48,%f38 ! (7_1) dtmp1 = res0_lo * res0; 2670*5b2ba9d3SPiotr Jasiukajtis fsubd %f60,%f54,%f12 ! (3_0) y_lo0 = y0 - y_hi0; 2671*5b2ba9d3SPiotr Jasiukajtis 2672*5b2ba9d3SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 2673*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i4 2674*5b2ba9d3SPiotr Jasiukajtis 2675*5b2ba9d3SPiotr Jasiukajtis sllx %g1,32,%g1 ! (5_0) ll = (long long)j0 << 32; 2676*5b2ba9d3SPiotr Jasiukajtis stx %g1,[%fp+dtmp11] ! (5_0) *(long long*)&scl0 = ll; 2677*5b2ba9d3SPiotr Jasiukajtis ba .cont20 2678*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i3 2679*5b2ba9d3SPiotr Jasiukajtis 2680*5b2ba9d3SPiotr Jasiukajtis .align 16 2681*5b2ba9d3SPiotr Jasiukajtis.update19: 2682*5b2ba9d3SPiotr Jasiukajtis cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 2683*5b2ba9d3SPiotr Jasiukajtis bge,pn %icc,.cont19a ! (0_0) if ( hy0 < 0x00100000 ) 2684*5b2ba9d3SPiotr Jasiukajtis 2685*5b2ba9d3SPiotr Jasiukajtis cmp counter,6 2686*5b2ba9d3SPiotr Jasiukajtis ble,a 1f 2687*5b2ba9d3SPiotr Jasiukajtis nop 2688*5b2ba9d3SPiotr Jasiukajtis 2689*5b2ba9d3SPiotr Jasiukajtis sub counter,6,counter 2690*5b2ba9d3SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 2691*5b2ba9d3SPiotr Jasiukajtis 2692*5b2ba9d3SPiotr Jasiukajtis stx %i4,[%fp+tmp_px] 2693*5b2ba9d3SPiotr Jasiukajtis 2694*5b2ba9d3SPiotr Jasiukajtis mov 6,counter 2695*5b2ba9d3SPiotr Jasiukajtis stx %i3,[%fp+tmp_py] 2696*5b2ba9d3SPiotr Jasiukajtis1: 2697*5b2ba9d3SPiotr Jasiukajtis fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0; 2698*5b2ba9d3SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 2699*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i4 2700*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0; 2701*5b2ba9d3SPiotr Jasiukajtis 2702*5b2ba9d3SPiotr Jasiukajtis ba .cont19b 2703*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i3 2704*5b2ba9d3SPiotr Jasiukajtis 2705*5b2ba9d3SPiotr Jasiukajtis .align 16 2706*5b2ba9d3SPiotr Jasiukajtis.update21: 2707*5b2ba9d3SPiotr Jasiukajtis cmp counter,7 2708*5b2ba9d3SPiotr Jasiukajtis ble 1f 2709*5b2ba9d3SPiotr Jasiukajtis nop 2710*5b2ba9d3SPiotr Jasiukajtis 2711*5b2ba9d3SPiotr Jasiukajtis sub counter,7,counter 2712*5b2ba9d3SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 2713*5b2ba9d3SPiotr Jasiukajtis 2714*5b2ba9d3SPiotr Jasiukajtis stx %i2,[%fp+tmp_px] 2715*5b2ba9d3SPiotr Jasiukajtis 2716*5b2ba9d3SPiotr Jasiukajtis stx %o0,[%fp+tmp_py] 2717*5b2ba9d3SPiotr Jasiukajtis 2718*5b2ba9d3SPiotr Jasiukajtis mov 7,counter 2719*5b2ba9d3SPiotr Jasiukajtis1: 2720*5b2ba9d3SPiotr Jasiukajtis fsubd %f50,D2ON36,%f54 ! (4_0) y_hi0 -= D2ON36; 2721*5b2ba9d3SPiotr Jasiukajtis 2722*5b2ba9d3SPiotr Jasiukajtis fmuld %f52,%f14,%f50 ! (2_0) dtmp0 = dd * dres; 2723*5b2ba9d3SPiotr Jasiukajtis faddd %f48,%f28,%f48 ! (7_1) res0 += dtmp0; 2724*5b2ba9d3SPiotr Jasiukajtis 2725*5b2ba9d3SPiotr Jasiukajtis fand %f26,DA0,%f28 ! (0_0) res0 = vis_fand(dres,DA0); 2726*5b2ba9d3SPiotr Jasiukajtis 2727*5b2ba9d3SPiotr Jasiukajtis fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0; 2728*5b2ba9d3SPiotr Jasiukajtis fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0; 2729*5b2ba9d3SPiotr Jasiukajtis 2730*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0; 2731*5b2ba9d3SPiotr Jasiukajtis faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0; 2732*5b2ba9d3SPiotr Jasiukajtis 2733*5b2ba9d3SPiotr Jasiukajtis fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres; 2734*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0; 2735*5b2ba9d3SPiotr Jasiukajtis 2736*5b2ba9d3SPiotr Jasiukajtis fmuld %f32,%f28,%f50 ! (0_0) dtmp0 = res0_hi * res0; 2737*5b2ba9d3SPiotr Jasiukajtis faddd %f60,%f54,%f46 ! (4_0) dtmp1 = y0 + y_hi0; 2738*5b2ba9d3SPiotr Jasiukajtis 2739*5b2ba9d3SPiotr Jasiukajtis fmuld %f36,%f28,%f36 ! (0_0) dtmp1 = res0_lo * res0; 2740*5b2ba9d3SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 2741*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i2 2742*5b2ba9d3SPiotr Jasiukajtis fsubd %f60,%f54,%f60 ! (4_0) y_lo0 = y0 - y_hi0; 2743*5b2ba9d3SPiotr Jasiukajtis 2744*5b2ba9d3SPiotr Jasiukajtis sllx %g1,32,%g1 ! (6_0) ll = (long long)j0 << 32; 2745*5b2ba9d3SPiotr Jasiukajtis stx %g1,[%fp+dtmp13] ! (6_0) *(long long*)&scl0 = ll; 2746*5b2ba9d3SPiotr Jasiukajtis ba .cont24 2747*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%o0 2748*5b2ba9d3SPiotr Jasiukajtis 2749*5b2ba9d3SPiotr Jasiukajtis .align 16 2750*5b2ba9d3SPiotr Jasiukajtis.update22: 2751*5b2ba9d3SPiotr Jasiukajtis cmp counter,7 2752*5b2ba9d3SPiotr Jasiukajtis ble 1f 2753*5b2ba9d3SPiotr Jasiukajtis nop 2754*5b2ba9d3SPiotr Jasiukajtis 2755*5b2ba9d3SPiotr Jasiukajtis sub counter,7,counter 2756*5b2ba9d3SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 2757*5b2ba9d3SPiotr Jasiukajtis 2758*5b2ba9d3SPiotr Jasiukajtis stx %i2,[%fp+tmp_px] 2759*5b2ba9d3SPiotr Jasiukajtis 2760*5b2ba9d3SPiotr Jasiukajtis stx %o0,[%fp+tmp_py] 2761*5b2ba9d3SPiotr Jasiukajtis 2762*5b2ba9d3SPiotr Jasiukajtis mov 7,counter 2763*5b2ba9d3SPiotr Jasiukajtis1: 2764*5b2ba9d3SPiotr Jasiukajtis fmuld %f52,%f14,%f50 ! (2_0) dtmp0 = dd * dres; 2765*5b2ba9d3SPiotr Jasiukajtis faddd %f48,%f28,%f48 ! (7_1) res0 += dtmp0; 2766*5b2ba9d3SPiotr Jasiukajtis 2767*5b2ba9d3SPiotr Jasiukajtis fand %f26,DA0,%f28 ! (0_0) res0 = vis_fand(dres,DA0); 2768*5b2ba9d3SPiotr Jasiukajtis 2769*5b2ba9d3SPiotr Jasiukajtis fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0; 2770*5b2ba9d3SPiotr Jasiukajtis fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0; 2771*5b2ba9d3SPiotr Jasiukajtis 2772*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0; 2773*5b2ba9d3SPiotr Jasiukajtis faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0; 2774*5b2ba9d3SPiotr Jasiukajtis 2775*5b2ba9d3SPiotr Jasiukajtis fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres; 2776*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0; 2777*5b2ba9d3SPiotr Jasiukajtis 2778*5b2ba9d3SPiotr Jasiukajtis fmuld %f32,%f28,%f50 ! (0_0) dtmp0 = res0_hi * res0; 2779*5b2ba9d3SPiotr Jasiukajtis faddd %f60,%f54,%f46 ! (4_0) dtmp1 = y0 + y_hi0; 2780*5b2ba9d3SPiotr Jasiukajtis 2781*5b2ba9d3SPiotr Jasiukajtis fmuld %f36,%f28,%f36 ! (0_0) dtmp1 = res0_lo * res0; 2782*5b2ba9d3SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 2783*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i2 2784*5b2ba9d3SPiotr Jasiukajtis fsubd %f60,%f54,%f60 ! (4_0) y_lo0 = y0 - y_hi0; 2785*5b2ba9d3SPiotr Jasiukajtis 2786*5b2ba9d3SPiotr Jasiukajtis sllx %g1,32,%g1 ! (6_0) ll = (long long)j0 << 32; 2787*5b2ba9d3SPiotr Jasiukajtis stx %g1,[%fp+dtmp13] ! (6_0) *(long long*)&scl0 = ll; 2788*5b2ba9d3SPiotr Jasiukajtis ba .cont24 2789*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%o0 2790*5b2ba9d3SPiotr Jasiukajtis 2791*5b2ba9d3SPiotr Jasiukajtis .align 16 2792*5b2ba9d3SPiotr Jasiukajtis.update23: 2793*5b2ba9d3SPiotr Jasiukajtis cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 2794*5b2ba9d3SPiotr Jasiukajtis bge,pn %icc,.cont23a ! (0_0) if ( hy0 < 0x00100000 ) 2795*5b2ba9d3SPiotr Jasiukajtis 2796*5b2ba9d3SPiotr Jasiukajtis cmp counter,7 2797*5b2ba9d3SPiotr Jasiukajtis ble,a 1f 2798*5b2ba9d3SPiotr Jasiukajtis nop 2799*5b2ba9d3SPiotr Jasiukajtis 2800*5b2ba9d3SPiotr Jasiukajtis sub counter,7,counter 2801*5b2ba9d3SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 2802*5b2ba9d3SPiotr Jasiukajtis 2803*5b2ba9d3SPiotr Jasiukajtis stx %i2,[%fp+tmp_px] 2804*5b2ba9d3SPiotr Jasiukajtis 2805*5b2ba9d3SPiotr Jasiukajtis mov 7,counter 2806*5b2ba9d3SPiotr Jasiukajtis stx %o0,[%fp+tmp_py] 2807*5b2ba9d3SPiotr Jasiukajtis1: 2808*5b2ba9d3SPiotr Jasiukajtis fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres; 2809*5b2ba9d3SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 2810*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i2 2811*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0; 2812*5b2ba9d3SPiotr Jasiukajtis 2813*5b2ba9d3SPiotr Jasiukajtis ba .cont23b 2814*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%o0 2815*5b2ba9d3SPiotr Jasiukajtis 2816*5b2ba9d3SPiotr Jasiukajtis .align 16 2817*5b2ba9d3SPiotr Jasiukajtis.update25: 2818*5b2ba9d3SPiotr Jasiukajtis cmp counter,8 2819*5b2ba9d3SPiotr Jasiukajtis ble 1f 2820*5b2ba9d3SPiotr Jasiukajtis nop 2821*5b2ba9d3SPiotr Jasiukajtis 2822*5b2ba9d3SPiotr Jasiukajtis sub counter,8,counter 2823*5b2ba9d3SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 2824*5b2ba9d3SPiotr Jasiukajtis 2825*5b2ba9d3SPiotr Jasiukajtis stx %i4,[%fp+tmp_px] 2826*5b2ba9d3SPiotr Jasiukajtis 2827*5b2ba9d3SPiotr Jasiukajtis stx %i3,[%fp+tmp_py] 2828*5b2ba9d3SPiotr Jasiukajtis 2829*5b2ba9d3SPiotr Jasiukajtis mov 8,counter 2830*5b2ba9d3SPiotr Jasiukajtis1: 2831*5b2ba9d3SPiotr Jasiukajtis fsubd %f12,D2ON36,%f54 ! (5_0) y_hi0 -= D2ON36; 2832*5b2ba9d3SPiotr Jasiukajtis 2833*5b2ba9d3SPiotr Jasiukajtis fmuld %f10,%f22,%f50 ! (3_0) dtmp0 = dd * dres; 2834*5b2ba9d3SPiotr Jasiukajtis faddd %f28,%f48,%f48 ! (0_0) res0 += dtmp0; 2835*5b2ba9d3SPiotr Jasiukajtis 2836*5b2ba9d3SPiotr Jasiukajtis fand %f16,DA0,%f28 ! (1_0) res0 = vis_fand(dres,DA0); 2837*5b2ba9d3SPiotr Jasiukajtis 2838*5b2ba9d3SPiotr Jasiukajtis fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0; 2839*5b2ba9d3SPiotr Jasiukajtis fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0; 2840*5b2ba9d3SPiotr Jasiukajtis 2841*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0; 2842*5b2ba9d3SPiotr Jasiukajtis faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0; 2843*5b2ba9d3SPiotr Jasiukajtis 2844*5b2ba9d3SPiotr Jasiukajtis fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres; 2845*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0; 2846*5b2ba9d3SPiotr Jasiukajtis 2847*5b2ba9d3SPiotr Jasiukajtis fmuld %f42,%f28,%f60 ! (1_0) dtmp0 = res0_hi * res0; 2848*5b2ba9d3SPiotr Jasiukajtis faddd %f52,%f54,%f50 ! (5_0) dtmp1 = y0 + y_hi0; 2849*5b2ba9d3SPiotr Jasiukajtis 2850*5b2ba9d3SPiotr Jasiukajtis fmuld %f34,%f28,%f34 ! (1_0) dtmp1 = res0_lo * res0; 2851*5b2ba9d3SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 2852*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i4 2853*5b2ba9d3SPiotr Jasiukajtis fsubd %f52,%f54,%f54 ! (5_0) y_lo0 = y0 - y_hi0; 2854*5b2ba9d3SPiotr Jasiukajtis 2855*5b2ba9d3SPiotr Jasiukajtis sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32; 2856*5b2ba9d3SPiotr Jasiukajtis stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll; 2857*5b2ba9d3SPiotr Jasiukajtis ba .cont28 2858*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i3 2859*5b2ba9d3SPiotr Jasiukajtis 2860*5b2ba9d3SPiotr Jasiukajtis .align 16 2861*5b2ba9d3SPiotr Jasiukajtis.update26: 2862*5b2ba9d3SPiotr Jasiukajtis cmp counter,8 2863*5b2ba9d3SPiotr Jasiukajtis ble 1f 2864*5b2ba9d3SPiotr Jasiukajtis nop 2865*5b2ba9d3SPiotr Jasiukajtis 2866*5b2ba9d3SPiotr Jasiukajtis sub counter,8,counter 2867*5b2ba9d3SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 2868*5b2ba9d3SPiotr Jasiukajtis 2869*5b2ba9d3SPiotr Jasiukajtis stx %i4,[%fp+tmp_px] 2870*5b2ba9d3SPiotr Jasiukajtis 2871*5b2ba9d3SPiotr Jasiukajtis stx %i3,[%fp+tmp_py] 2872*5b2ba9d3SPiotr Jasiukajtis 2873*5b2ba9d3SPiotr Jasiukajtis mov 8,counter 2874*5b2ba9d3SPiotr Jasiukajtis1: 2875*5b2ba9d3SPiotr Jasiukajtis fmuld %f10,%f22,%f50 ! (3_0) dtmp0 = dd * dres; 2876*5b2ba9d3SPiotr Jasiukajtis faddd %f28,%f48,%f48 ! (0_0) res0 += dtmp0; 2877*5b2ba9d3SPiotr Jasiukajtis 2878*5b2ba9d3SPiotr Jasiukajtis fand %f16,DA0,%f28 ! (1_0) res0 = vis_fand(dres,DA0); 2879*5b2ba9d3SPiotr Jasiukajtis 2880*5b2ba9d3SPiotr Jasiukajtis fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0; 2881*5b2ba9d3SPiotr Jasiukajtis fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0; 2882*5b2ba9d3SPiotr Jasiukajtis 2883*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0; 2884*5b2ba9d3SPiotr Jasiukajtis faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0; 2885*5b2ba9d3SPiotr Jasiukajtis 2886*5b2ba9d3SPiotr Jasiukajtis fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres; 2887*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0; 2888*5b2ba9d3SPiotr Jasiukajtis 2889*5b2ba9d3SPiotr Jasiukajtis fmuld %f42,%f28,%f60 ! (1_0) dtmp0 = res0_hi * res0; 2890*5b2ba9d3SPiotr Jasiukajtis faddd %f52,%f54,%f50 ! (5_0) dtmp1 = y0 + y_hi0; 2891*5b2ba9d3SPiotr Jasiukajtis 2892*5b2ba9d3SPiotr Jasiukajtis fmuld %f34,%f28,%f34 ! (1_0) dtmp1 = res0_lo * res0; 2893*5b2ba9d3SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 2894*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i4 2895*5b2ba9d3SPiotr Jasiukajtis fsubd %f52,%f54,%f54 ! (5_0) y_lo0 = y0 - y_hi0; 2896*5b2ba9d3SPiotr Jasiukajtis 2897*5b2ba9d3SPiotr Jasiukajtis sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32; 2898*5b2ba9d3SPiotr Jasiukajtis stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll; 2899*5b2ba9d3SPiotr Jasiukajtis ba .cont28 2900*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i3 2901*5b2ba9d3SPiotr Jasiukajtis 2902*5b2ba9d3SPiotr Jasiukajtis .align 16 2903*5b2ba9d3SPiotr Jasiukajtis.update27: 2904*5b2ba9d3SPiotr Jasiukajtis cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 2905*5b2ba9d3SPiotr Jasiukajtis bge,pn %icc,.cont27a ! (0_0) if ( hy0 < 0x00100000 ) 2906*5b2ba9d3SPiotr Jasiukajtis 2907*5b2ba9d3SPiotr Jasiukajtis cmp counter,8 2908*5b2ba9d3SPiotr Jasiukajtis ble,a 1f 2909*5b2ba9d3SPiotr Jasiukajtis nop 2910*5b2ba9d3SPiotr Jasiukajtis 2911*5b2ba9d3SPiotr Jasiukajtis sub counter,8,counter 2912*5b2ba9d3SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 2913*5b2ba9d3SPiotr Jasiukajtis 2914*5b2ba9d3SPiotr Jasiukajtis stx %i4,[%fp+tmp_px] 2915*5b2ba9d3SPiotr Jasiukajtis 2916*5b2ba9d3SPiotr Jasiukajtis mov 8,counter 2917*5b2ba9d3SPiotr Jasiukajtis stx %i3,[%fp+tmp_py] 2918*5b2ba9d3SPiotr Jasiukajtis1: 2919*5b2ba9d3SPiotr Jasiukajtis fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres; 2920*5b2ba9d3SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 2921*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i4 2922*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0; 2923*5b2ba9d3SPiotr Jasiukajtis 2924*5b2ba9d3SPiotr Jasiukajtis ba .cont27b 2925*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i3 2926*5b2ba9d3SPiotr Jasiukajtis 2927*5b2ba9d3SPiotr Jasiukajtis .align 16 2928*5b2ba9d3SPiotr Jasiukajtis.update29: 2929*5b2ba9d3SPiotr Jasiukajtis cmp counter,1 2930*5b2ba9d3SPiotr Jasiukajtis ble 1f 2931*5b2ba9d3SPiotr Jasiukajtis nop 2932*5b2ba9d3SPiotr Jasiukajtis 2933*5b2ba9d3SPiotr Jasiukajtis sub counter,1,counter 2934*5b2ba9d3SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 2935*5b2ba9d3SPiotr Jasiukajtis 2936*5b2ba9d3SPiotr Jasiukajtis stx %i2,[%fp+tmp_px] 2937*5b2ba9d3SPiotr Jasiukajtis 2938*5b2ba9d3SPiotr Jasiukajtis stx %o0,[%fp+tmp_py] 2939*5b2ba9d3SPiotr Jasiukajtis 2940*5b2ba9d3SPiotr Jasiukajtis mov 1,counter 2941*5b2ba9d3SPiotr Jasiukajtis1: 2942*5b2ba9d3SPiotr Jasiukajtis fsubd %f2,D2ON36,%f2 ! (6_1) y_hi0 -= D2ON36; 2943*5b2ba9d3SPiotr Jasiukajtis 2944*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f24,%f50 ! (4_1) dtmp0 = dd * dres; 2945*5b2ba9d3SPiotr Jasiukajtis stx %g1,[%fp+dtmp0] ! (7_1) *(long long*)&scl0 = ll; 2946*5b2ba9d3SPiotr Jasiukajtis faddd %f28,%f48,%f52 ! (1_1) res0 += dtmp0; 2947*5b2ba9d3SPiotr Jasiukajtis 2948*5b2ba9d3SPiotr Jasiukajtis fand %f26,DA0,%f48 ! (2_1) res0 = vis_fand(dres,DA0); 2949*5b2ba9d3SPiotr Jasiukajtis 2950*5b2ba9d3SPiotr Jasiukajtis fmuld %f20,%f20,%f0 ! (6_1) res0_hi = x_hi0 * x_hi0; 2951*5b2ba9d3SPiotr Jasiukajtis fsubd %f10,%f20,%f28 ! (6_1) x_lo0 = x0 - x_hi0; 2952*5b2ba9d3SPiotr Jasiukajtis 2953*5b2ba9d3SPiotr Jasiukajtis fmuld %f2,%f2,%f46 ! (6_1) dtmp0 = y_hi0 * y_hi0; 2954*5b2ba9d3SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 2955*5b2ba9d3SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (6_1) res0_lo = x0 + x_hi0; 2956*5b2ba9d3SPiotr Jasiukajtis 2957*5b2ba9d3SPiotr Jasiukajtis fmuld %f18,%f22,%f22 ! (3_1) dtmp2 = dd * dres; 2958*5b2ba9d3SPiotr Jasiukajtis sethi %hi(0x3ff00000),%o4 2959*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i2 2960*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (4_1) dtmp0 = DTWO - dtmp0; 2961*5b2ba9d3SPiotr Jasiukajtis 2962*5b2ba9d3SPiotr Jasiukajtis ba .cont32 2963*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%o0 2964*5b2ba9d3SPiotr Jasiukajtis 2965*5b2ba9d3SPiotr Jasiukajtis .align 16 2966*5b2ba9d3SPiotr Jasiukajtis.update30: 2967*5b2ba9d3SPiotr Jasiukajtis cmp counter,1 2968*5b2ba9d3SPiotr Jasiukajtis ble 1f 2969*5b2ba9d3SPiotr Jasiukajtis nop 2970*5b2ba9d3SPiotr Jasiukajtis 2971*5b2ba9d3SPiotr Jasiukajtis sub counter,1,counter 2972*5b2ba9d3SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 2973*5b2ba9d3SPiotr Jasiukajtis 2974*5b2ba9d3SPiotr Jasiukajtis stx %i2,[%fp+tmp_px] 2975*5b2ba9d3SPiotr Jasiukajtis 2976*5b2ba9d3SPiotr Jasiukajtis stx %o0,[%fp+tmp_py] 2977*5b2ba9d3SPiotr Jasiukajtis 2978*5b2ba9d3SPiotr Jasiukajtis mov 1,counter 2979*5b2ba9d3SPiotr Jasiukajtis1: 2980*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f24,%f50 ! (4_1) dtmp0 = dd * dres; 2981*5b2ba9d3SPiotr Jasiukajtis stx %g1,[%fp+dtmp0] ! (7_1) *(long long*)&scl0 = ll; 2982*5b2ba9d3SPiotr Jasiukajtis faddd %f28,%f48,%f52 ! (1_1) res0 += dtmp0; 2983*5b2ba9d3SPiotr Jasiukajtis 2984*5b2ba9d3SPiotr Jasiukajtis fand %f26,DA0,%f48 ! (2_1) res0 = vis_fand(dres,DA0); 2985*5b2ba9d3SPiotr Jasiukajtis 2986*5b2ba9d3SPiotr Jasiukajtis fmuld %f20,%f20,%f0 ! (6_1) res0_hi = x_hi0 * x_hi0; 2987*5b2ba9d3SPiotr Jasiukajtis fsubd %f10,%f20,%f28 ! (6_1) x_lo0 = x0 - x_hi0; 2988*5b2ba9d3SPiotr Jasiukajtis 2989*5b2ba9d3SPiotr Jasiukajtis fmuld %f2,%f2,%f46 ! (6_1) dtmp0 = y_hi0 * y_hi0; 2990*5b2ba9d3SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 2991*5b2ba9d3SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (6_1) res0_lo = x0 + x_hi0; 2992*5b2ba9d3SPiotr Jasiukajtis 2993*5b2ba9d3SPiotr Jasiukajtis fmuld %f18,%f22,%f22 ! (3_1) dtmp2 = dd * dres; 2994*5b2ba9d3SPiotr Jasiukajtis sethi %hi(0x3ff00000),%o4 2995*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i2 2996*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (4_1) dtmp0 = DTWO - dtmp0; 2997*5b2ba9d3SPiotr Jasiukajtis 2998*5b2ba9d3SPiotr Jasiukajtis ba .cont32 2999*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%o0 3000*5b2ba9d3SPiotr Jasiukajtis 3001*5b2ba9d3SPiotr Jasiukajtis .align 16 3002*5b2ba9d3SPiotr Jasiukajtis.update31: 3003*5b2ba9d3SPiotr Jasiukajtis cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 3004*5b2ba9d3SPiotr Jasiukajtis bge,pn %icc,.cont31 ! (0_0) if ( hy0 < 0x00100000 ) 3005*5b2ba9d3SPiotr Jasiukajtis 3006*5b2ba9d3SPiotr Jasiukajtis cmp counter,1 3007*5b2ba9d3SPiotr Jasiukajtis ble,a 1f 3008*5b2ba9d3SPiotr Jasiukajtis nop 3009*5b2ba9d3SPiotr Jasiukajtis 3010*5b2ba9d3SPiotr Jasiukajtis sub counter,1,counter 3011*5b2ba9d3SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 3012*5b2ba9d3SPiotr Jasiukajtis 3013*5b2ba9d3SPiotr Jasiukajtis stx %i2,[%fp+tmp_px] 3014*5b2ba9d3SPiotr Jasiukajtis 3015*5b2ba9d3SPiotr Jasiukajtis mov 1,counter 3016*5b2ba9d3SPiotr Jasiukajtis stx %o0,[%fp+tmp_py] 3017*5b2ba9d3SPiotr Jasiukajtis1: 3018*5b2ba9d3SPiotr Jasiukajtis fmuld %f20,%f20,%f0 ! (6_1) res0_hi = x_hi0 * x_hi0; 3019*5b2ba9d3SPiotr Jasiukajtis fsubd %f10,%f20,%f28 ! (6_1) x_lo0 = x0 - x_hi0; 3020*5b2ba9d3SPiotr Jasiukajtis 3021*5b2ba9d3SPiotr Jasiukajtis fmuld %f2,%f2,%f46 ! (6_1) dtmp0 = y_hi0 * y_hi0; 3022*5b2ba9d3SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 3023*5b2ba9d3SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (6_1) res0_lo = x0 + x_hi0; 3024*5b2ba9d3SPiotr Jasiukajtis 3025*5b2ba9d3SPiotr Jasiukajtis fmuld %f18,%f22,%f22 ! (3_1) dtmp2 = dd * dres; 3026*5b2ba9d3SPiotr Jasiukajtis sethi %hi(0x3ff00000),%o4 3027*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i2 3028*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (4_1) dtmp0 = DTWO - dtmp0; 3029*5b2ba9d3SPiotr Jasiukajtis 3030*5b2ba9d3SPiotr Jasiukajtis ba .cont32 3031*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%o0 3032*5b2ba9d3SPiotr Jasiukajtis 3033*5b2ba9d3SPiotr Jasiukajtis .align 16 3034*5b2ba9d3SPiotr Jasiukajtis.update33: 3035*5b2ba9d3SPiotr Jasiukajtis cmp counter,2 3036*5b2ba9d3SPiotr Jasiukajtis ble 1f 3037*5b2ba9d3SPiotr Jasiukajtis nop 3038*5b2ba9d3SPiotr Jasiukajtis 3039*5b2ba9d3SPiotr Jasiukajtis sub counter,2,counter 3040*5b2ba9d3SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 3041*5b2ba9d3SPiotr Jasiukajtis 3042*5b2ba9d3SPiotr Jasiukajtis stx %i4,[%fp+tmp_px] 3043*5b2ba9d3SPiotr Jasiukajtis 3044*5b2ba9d3SPiotr Jasiukajtis stx %i3,[%fp+tmp_py] 3045*5b2ba9d3SPiotr Jasiukajtis 3046*5b2ba9d3SPiotr Jasiukajtis mov 2,counter 3047*5b2ba9d3SPiotr Jasiukajtis1: 3048*5b2ba9d3SPiotr Jasiukajtis st %f1,[%i5+4] ! (0_1) ((float*)pz)[1] = ((float*)&res0)[1]; 3049*5b2ba9d3SPiotr Jasiukajtis fsubd %f50,D2ON36,%f54 ! (7_1) y_hi0 -= D2ON36; 3050*5b2ba9d3SPiotr Jasiukajtis 3051*5b2ba9d3SPiotr Jasiukajtis fmuld %f26,%f16,%f50 ! (5_1) dtmp0 = dd * dres; 3052*5b2ba9d3SPiotr Jasiukajtis faddd %f48,%f52,%f52 ! (2_1) res0 += dtmp0; 3053*5b2ba9d3SPiotr Jasiukajtis 3054*5b2ba9d3SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 3055*5b2ba9d3SPiotr Jasiukajtis stx %o4,[%fp+dtmp2] ! (0_0) *(long long*)&scl0 = ll; 3056*5b2ba9d3SPiotr Jasiukajtis fand %f28,DA0,%f48 ! (3_1) res0 = vis_fand(dres,DA0); 3057*5b2ba9d3SPiotr Jasiukajtis 3058*5b2ba9d3SPiotr Jasiukajtis fmuld %f20,%f20,%f2 ! (7_1) res0_hi = x_hi0 * x_hi0; 3059*5b2ba9d3SPiotr Jasiukajtis fsubd %f10,%f20,%f0 ! (7_1) x_lo0 = x0 - x_hi0; 3060*5b2ba9d3SPiotr Jasiukajtis 3061*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (7_1) dtmp0 = y_hi0 * y_hi0; 3062*5b2ba9d3SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (7_1) res0_lo = x0 + x_hi0; 3063*5b2ba9d3SPiotr Jasiukajtis 3064*5b2ba9d3SPiotr Jasiukajtis fmuld %f44,%f48,%f10 ! (3_1) dtmp0 = res0_hi * res0; 3065*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (5_1) dtmp0 = DTWO - dtmp0; 3066*5b2ba9d3SPiotr Jasiukajtis 3067*5b2ba9d3SPiotr Jasiukajtis fmuld %f14,%f24,%f24 ! (4_1) dtmp2 = dd * dres; 3068*5b2ba9d3SPiotr Jasiukajtis faddd %f60,%f54,%f50 ! (7_1) dtmp1 = y0 + y_hi0; 3069*5b2ba9d3SPiotr Jasiukajtis 3070*5b2ba9d3SPiotr Jasiukajtis fmuld %f38,%f48,%f38 ! (3_1) dtmp1 = res0_lo * res0; 3071*5b2ba9d3SPiotr Jasiukajtis sethi %hi(0x3ff00000),%o4 3072*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i4 3073*5b2ba9d3SPiotr Jasiukajtis fsubd %f60,%f54,%f12 ! (7_1) y_lo0 = y0 - y_hi0; 3074*5b2ba9d3SPiotr Jasiukajtis 3075*5b2ba9d3SPiotr Jasiukajtis sllx %o4,32,%o4 ! (1_0) ll = (long long)j0 << 32; 3076*5b2ba9d3SPiotr Jasiukajtis stx %o4,[%fp+dtmp3] ! (1_0) *(long long*)&scl0 = ll; 3077*5b2ba9d3SPiotr Jasiukajtis ba .cont36 3078*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i3 3079*5b2ba9d3SPiotr Jasiukajtis 3080*5b2ba9d3SPiotr Jasiukajtis .align 16 3081*5b2ba9d3SPiotr Jasiukajtis.update34: 3082*5b2ba9d3SPiotr Jasiukajtis cmp counter,2 3083*5b2ba9d3SPiotr Jasiukajtis ble 1f 3084*5b2ba9d3SPiotr Jasiukajtis nop 3085*5b2ba9d3SPiotr Jasiukajtis 3086*5b2ba9d3SPiotr Jasiukajtis sub counter,2,counter 3087*5b2ba9d3SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 3088*5b2ba9d3SPiotr Jasiukajtis 3089*5b2ba9d3SPiotr Jasiukajtis stx %i4,[%fp+tmp_px] 3090*5b2ba9d3SPiotr Jasiukajtis 3091*5b2ba9d3SPiotr Jasiukajtis stx %i3,[%fp+tmp_py] 3092*5b2ba9d3SPiotr Jasiukajtis 3093*5b2ba9d3SPiotr Jasiukajtis mov 2,counter 3094*5b2ba9d3SPiotr Jasiukajtis1: 3095*5b2ba9d3SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 3096*5b2ba9d3SPiotr Jasiukajtis stx %o4,[%fp+dtmp2] ! (0_0) *(long long*)&scl0 = ll; 3097*5b2ba9d3SPiotr Jasiukajtis fand %f28,DA0,%f48 ! (3_1) res0 = vis_fand(dres,DA0); 3098*5b2ba9d3SPiotr Jasiukajtis 3099*5b2ba9d3SPiotr Jasiukajtis fmuld %f20,%f20,%f2 ! (7_1) res0_hi = x_hi0 * x_hi0; 3100*5b2ba9d3SPiotr Jasiukajtis fsubd %f10,%f20,%f0 ! (7_1) x_lo0 = x0 - x_hi0; 3101*5b2ba9d3SPiotr Jasiukajtis 3102*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (7_1) dtmp0 = y_hi0 * y_hi0; 3103*5b2ba9d3SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (7_1) res0_lo = x0 + x_hi0; 3104*5b2ba9d3SPiotr Jasiukajtis 3105*5b2ba9d3SPiotr Jasiukajtis fmuld %f44,%f48,%f10 ! (3_1) dtmp0 = res0_hi * res0; 3106*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (5_1) dtmp0 = DTWO - dtmp0; 3107*5b2ba9d3SPiotr Jasiukajtis 3108*5b2ba9d3SPiotr Jasiukajtis fmuld %f14,%f24,%f24 ! (4_1) dtmp2 = dd * dres; 3109*5b2ba9d3SPiotr Jasiukajtis faddd %f60,%f54,%f50 ! (7_1) dtmp1 = y0 + y_hi0; 3110*5b2ba9d3SPiotr Jasiukajtis 3111*5b2ba9d3SPiotr Jasiukajtis fmuld %f38,%f48,%f38 ! (3_1) dtmp1 = res0_lo * res0; 3112*5b2ba9d3SPiotr Jasiukajtis sethi %hi(0x3ff00000),%o4 3113*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i4 3114*5b2ba9d3SPiotr Jasiukajtis fsubd %f60,%f54,%f12 ! (7_1) y_lo0 = y0 - y_hi0; 3115*5b2ba9d3SPiotr Jasiukajtis 3116*5b2ba9d3SPiotr Jasiukajtis sllx %o4,32,%o4 ! (1_0) ll = (long long)j0 << 32; 3117*5b2ba9d3SPiotr Jasiukajtis stx %o4,[%fp+dtmp3] ! (1_0) *(long long*)&scl0 = ll; 3118*5b2ba9d3SPiotr Jasiukajtis ba .cont36 3119*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i3 3120*5b2ba9d3SPiotr Jasiukajtis 3121*5b2ba9d3SPiotr Jasiukajtis .align 16 3122*5b2ba9d3SPiotr Jasiukajtis.update35: 3123*5b2ba9d3SPiotr Jasiukajtis cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 3124*5b2ba9d3SPiotr Jasiukajtis bge,pn %icc,.cont35a ! (0_0) if ( hy0 < 0x00100000 ) 3125*5b2ba9d3SPiotr Jasiukajtis 3126*5b2ba9d3SPiotr Jasiukajtis cmp counter,2 3127*5b2ba9d3SPiotr Jasiukajtis ble,a 1f 3128*5b2ba9d3SPiotr Jasiukajtis nop 3129*5b2ba9d3SPiotr Jasiukajtis 3130*5b2ba9d3SPiotr Jasiukajtis sub counter,2,counter 3131*5b2ba9d3SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 3132*5b2ba9d3SPiotr Jasiukajtis 3133*5b2ba9d3SPiotr Jasiukajtis stx %i4,[%fp+tmp_px] 3134*5b2ba9d3SPiotr Jasiukajtis 3135*5b2ba9d3SPiotr Jasiukajtis mov 2,counter 3136*5b2ba9d3SPiotr Jasiukajtis stx %i3,[%fp+tmp_py] 3137*5b2ba9d3SPiotr Jasiukajtis1: 3138*5b2ba9d3SPiotr Jasiukajtis fmuld %f44,%f48,%f10 ! (3_1) dtmp0 = res0_hi * res0; 3139*5b2ba9d3SPiotr Jasiukajtis sethi %hi(0x3ff00000),%o4 3140*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i4 3141*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (5_1) dtmp0 = DTWO - dtmp0; 3142*5b2ba9d3SPiotr Jasiukajtis 3143*5b2ba9d3SPiotr Jasiukajtis ba .cont35b 3144*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i3 3145*5b2ba9d3SPiotr Jasiukajtis 3146*5b2ba9d3SPiotr Jasiukajtis .align 16 3147*5b2ba9d3SPiotr Jasiukajtis.update37: 3148*5b2ba9d3SPiotr Jasiukajtis cmp counter,3 3149*5b2ba9d3SPiotr Jasiukajtis ble 1f 3150*5b2ba9d3SPiotr Jasiukajtis nop 3151*5b2ba9d3SPiotr Jasiukajtis 3152*5b2ba9d3SPiotr Jasiukajtis sub counter,3,counter 3153*5b2ba9d3SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 3154*5b2ba9d3SPiotr Jasiukajtis 3155*5b2ba9d3SPiotr Jasiukajtis stx %i2,[%fp+tmp_px] 3156*5b2ba9d3SPiotr Jasiukajtis 3157*5b2ba9d3SPiotr Jasiukajtis stx %o0,[%fp+tmp_py] 3158*5b2ba9d3SPiotr Jasiukajtis 3159*5b2ba9d3SPiotr Jasiukajtis mov 3,counter 3160*5b2ba9d3SPiotr Jasiukajtis1: 3161*5b2ba9d3SPiotr Jasiukajtis st %f1,[%i5+4] ! (1_1) ((float*)pz)[1] = ((float*)&res0)[1]; 3162*5b2ba9d3SPiotr Jasiukajtis fsubd %f12,D2ON36,%f54 ! (0_0) y_hi0 -= D2ON36; 3163*5b2ba9d3SPiotr Jasiukajtis 3164*5b2ba9d3SPiotr Jasiukajtis fmuld %f28,%f18,%f50 ! (6_1) dtmp0 = dd * dres; 3165*5b2ba9d3SPiotr Jasiukajtis faddd %f48,%f52,%f52 ! (3_1) res0 += dtmp0; 3166*5b2ba9d3SPiotr Jasiukajtis 3167*5b2ba9d3SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 3168*5b2ba9d3SPiotr Jasiukajtis stx %o4,[%fp+dtmp4] ! (1_0) *(long long*)&scl0 = ll; 3169*5b2ba9d3SPiotr Jasiukajtis fand %f26,DA0,%f48 ! (4_1) res0 = vis_fand(dres,DA0); 3170*5b2ba9d3SPiotr Jasiukajtis 3171*5b2ba9d3SPiotr Jasiukajtis fmuld %f20,%f20,%f2 ! (0_0) res0_hi = x_hi0 * x_hi0; 3172*5b2ba9d3SPiotr Jasiukajtis fsubd %f10,%f20,%f0 ! (0_0) x_lo0 = x0 - x_hi0; 3173*5b2ba9d3SPiotr Jasiukajtis 3174*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (0_0) dtmp0 = y_hi0 * y_hi0; 3175*5b2ba9d3SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (0_0) res0_lo = x0 + x_hi0; 3176*5b2ba9d3SPiotr Jasiukajtis 3177*5b2ba9d3SPiotr Jasiukajtis fmuld %f32,%f48,%f10 ! (4_1) dtmp0 = res0_hi * res0; 3178*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (6_1) dtmp0 = DTWO - dtmp0; 3179*5b2ba9d3SPiotr Jasiukajtis 3180*5b2ba9d3SPiotr Jasiukajtis fmuld %f22,%f16,%f16 ! (5_1) dtmp2 = dd * dres; 3181*5b2ba9d3SPiotr Jasiukajtis faddd %f60,%f54,%f50 ! (0_0) dtmp1 = y0 + y_hi0; 3182*5b2ba9d3SPiotr Jasiukajtis 3183*5b2ba9d3SPiotr Jasiukajtis fmuld %f36,%f48,%f36 ! (4_1) dtmp1 = res0_lo * res0; 3184*5b2ba9d3SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 3185*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i2 3186*5b2ba9d3SPiotr Jasiukajtis fsubd %f60,%f54,%f12 ! (0_0) y_lo0 = y0 - y_hi0; 3187*5b2ba9d3SPiotr Jasiukajtis 3188*5b2ba9d3SPiotr Jasiukajtis sllx %g1,32,%g1 ! (2_0) ll = (long long)j0 << 32; 3189*5b2ba9d3SPiotr Jasiukajtis stx %g1,[%fp+dtmp5] ! (2_0) *(long long*)&scl0 = ll; 3190*5b2ba9d3SPiotr Jasiukajtis ba .cont40 3191*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%o0 3192*5b2ba9d3SPiotr Jasiukajtis 3193*5b2ba9d3SPiotr Jasiukajtis .align 16 3194*5b2ba9d3SPiotr Jasiukajtis.update38: 3195*5b2ba9d3SPiotr Jasiukajtis cmp counter,3 3196*5b2ba9d3SPiotr Jasiukajtis ble 1f 3197*5b2ba9d3SPiotr Jasiukajtis nop 3198*5b2ba9d3SPiotr Jasiukajtis 3199*5b2ba9d3SPiotr Jasiukajtis sub counter,3,counter 3200*5b2ba9d3SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 3201*5b2ba9d3SPiotr Jasiukajtis 3202*5b2ba9d3SPiotr Jasiukajtis stx %i2,[%fp+tmp_px] 3203*5b2ba9d3SPiotr Jasiukajtis 3204*5b2ba9d3SPiotr Jasiukajtis stx %o0,[%fp+tmp_py] 3205*5b2ba9d3SPiotr Jasiukajtis 3206*5b2ba9d3SPiotr Jasiukajtis mov 3,counter 3207*5b2ba9d3SPiotr Jasiukajtis1: 3208*5b2ba9d3SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 3209*5b2ba9d3SPiotr Jasiukajtis stx %o4,[%fp+dtmp4] ! (1_0) *(long long*)&scl0 = ll; 3210*5b2ba9d3SPiotr Jasiukajtis fand %f26,DA0,%f48 ! (4_1) res0 = vis_fand(dres,DA0); 3211*5b2ba9d3SPiotr Jasiukajtis 3212*5b2ba9d3SPiotr Jasiukajtis fmuld %f20,%f20,%f2 ! (0_0) res0_hi = x_hi0 * x_hi0; 3213*5b2ba9d3SPiotr Jasiukajtis fsubd %f10,%f20,%f0 ! (0_0) x_lo0 = x0 - x_hi0; 3214*5b2ba9d3SPiotr Jasiukajtis 3215*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (0_0) dtmp0 = y_hi0 * y_hi0; 3216*5b2ba9d3SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (0_0) res0_lo = x0 + x_hi0; 3217*5b2ba9d3SPiotr Jasiukajtis 3218*5b2ba9d3SPiotr Jasiukajtis fmuld %f32,%f48,%f10 ! (4_1) dtmp0 = res0_hi * res0; 3219*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (6_1) dtmp0 = DTWO - dtmp0; 3220*5b2ba9d3SPiotr Jasiukajtis 3221*5b2ba9d3SPiotr Jasiukajtis fmuld %f22,%f16,%f16 ! (5_1) dtmp2 = dd * dres; 3222*5b2ba9d3SPiotr Jasiukajtis faddd %f60,%f54,%f50 ! (0_0) dtmp1 = y0 + y_hi0; 3223*5b2ba9d3SPiotr Jasiukajtis 3224*5b2ba9d3SPiotr Jasiukajtis fmuld %f36,%f48,%f36 ! (4_1) dtmp1 = res0_lo * res0; 3225*5b2ba9d3SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 3226*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i2 3227*5b2ba9d3SPiotr Jasiukajtis fsubd %f60,%f54,%f12 ! (0_0) y_lo0 = y0 - y_hi0; 3228*5b2ba9d3SPiotr Jasiukajtis 3229*5b2ba9d3SPiotr Jasiukajtis sllx %g1,32,%g1 ! (2_0) ll = (long long)j0 << 32; 3230*5b2ba9d3SPiotr Jasiukajtis stx %g1,[%fp+dtmp5] ! (2_0) *(long long*)&scl0 = ll; 3231*5b2ba9d3SPiotr Jasiukajtis ba .cont40 3232*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%o0 3233*5b2ba9d3SPiotr Jasiukajtis 3234*5b2ba9d3SPiotr Jasiukajtis .align 16 3235*5b2ba9d3SPiotr Jasiukajtis.update39: 3236*5b2ba9d3SPiotr Jasiukajtis cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 3237*5b2ba9d3SPiotr Jasiukajtis bge,pn %icc,.cont39a ! (0_0) if ( hy0 < 0x00100000 ) 3238*5b2ba9d3SPiotr Jasiukajtis 3239*5b2ba9d3SPiotr Jasiukajtis cmp counter,3 3240*5b2ba9d3SPiotr Jasiukajtis ble,a 1f 3241*5b2ba9d3SPiotr Jasiukajtis nop 3242*5b2ba9d3SPiotr Jasiukajtis 3243*5b2ba9d3SPiotr Jasiukajtis sub counter,3,counter 3244*5b2ba9d3SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 3245*5b2ba9d3SPiotr Jasiukajtis 3246*5b2ba9d3SPiotr Jasiukajtis stx %i2,[%fp+tmp_px] 3247*5b2ba9d3SPiotr Jasiukajtis 3248*5b2ba9d3SPiotr Jasiukajtis mov 3,counter 3249*5b2ba9d3SPiotr Jasiukajtis stx %o0,[%fp+tmp_py] 3250*5b2ba9d3SPiotr Jasiukajtis1: 3251*5b2ba9d3SPiotr Jasiukajtis fmuld %f32,%f48,%f10 ! (4_1) dtmp0 = res0_hi * res0; 3252*5b2ba9d3SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 3253*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i2 3254*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (6_1) dtmp0 = DTWO - dtmp0; 3255*5b2ba9d3SPiotr Jasiukajtis 3256*5b2ba9d3SPiotr Jasiukajtis ba .cont39b 3257*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%o0 3258*5b2ba9d3SPiotr Jasiukajtis 3259*5b2ba9d3SPiotr Jasiukajtis .align 16 3260*5b2ba9d3SPiotr Jasiukajtis.update41: 3261*5b2ba9d3SPiotr Jasiukajtis cmp counter,4 3262*5b2ba9d3SPiotr Jasiukajtis ble 1f 3263*5b2ba9d3SPiotr Jasiukajtis nop 3264*5b2ba9d3SPiotr Jasiukajtis 3265*5b2ba9d3SPiotr Jasiukajtis sub counter,4,counter 3266*5b2ba9d3SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 3267*5b2ba9d3SPiotr Jasiukajtis 3268*5b2ba9d3SPiotr Jasiukajtis stx %i4,[%fp+tmp_px] 3269*5b2ba9d3SPiotr Jasiukajtis 3270*5b2ba9d3SPiotr Jasiukajtis stx %i3,[%fp+tmp_py] 3271*5b2ba9d3SPiotr Jasiukajtis 3272*5b2ba9d3SPiotr Jasiukajtis mov 4,counter 3273*5b2ba9d3SPiotr Jasiukajtis1: 3274*5b2ba9d3SPiotr Jasiukajtis st %f1,[%i5+4] ! (2_1) ((float*)pz)[1] = ((float*)&res0)[1]; 3275*5b2ba9d3SPiotr Jasiukajtis fsubd %f12,D2ON36,%f54 ! (1_0) y_hi0 -= D2ON36; 3276*5b2ba9d3SPiotr Jasiukajtis 3277*5b2ba9d3SPiotr Jasiukajtis fmuld %f26,%f14,%f50 ! (7_1) dtmp0 = dd * dres; 3278*5b2ba9d3SPiotr Jasiukajtis faddd %f48,%f52,%f52 ! (4_1) res0 += dtmp0; 3279*5b2ba9d3SPiotr Jasiukajtis 3280*5b2ba9d3SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 3281*5b2ba9d3SPiotr Jasiukajtis stx %g1,[%fp+dtmp6] ! (2_0) *(long long*)&scl0 = ll; 3282*5b2ba9d3SPiotr Jasiukajtis fand %f28,DA0,%f48 ! (5_1) res0 = vis_fand(dres,DA0); 3283*5b2ba9d3SPiotr Jasiukajtis 3284*5b2ba9d3SPiotr Jasiukajtis fmuld %f20,%f20,%f2 ! (1_0) res0_hi = x_hi0 * x_hi0; 3285*5b2ba9d3SPiotr Jasiukajtis fsubd %f10,%f20,%f0 ! (1_0) x_lo0 = x0 - x_hi0; 3286*5b2ba9d3SPiotr Jasiukajtis 3287*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (1_0) dtmp0 = y_hi0 * y_hi0; 3288*5b2ba9d3SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (1_0) res0_lo = x0 + x_hi0; 3289*5b2ba9d3SPiotr Jasiukajtis 3290*5b2ba9d3SPiotr Jasiukajtis fmuld %f42,%f48,%f10 ! (5_1) dtmp0 = res0_hi * res0; 3291*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0; 3292*5b2ba9d3SPiotr Jasiukajtis 3293*5b2ba9d3SPiotr Jasiukajtis fmuld %f24,%f18,%f18 ! (6_1) dtmp2 = dd * dres; 3294*5b2ba9d3SPiotr Jasiukajtis faddd %f60,%f54,%f50 ! (1_0) dtmp1 = y0 + y_hi0; 3295*5b2ba9d3SPiotr Jasiukajtis 3296*5b2ba9d3SPiotr Jasiukajtis fmuld %f34,%f48,%f34 ! (5_1) dtmp1 = res0_lo * res0; 3297*5b2ba9d3SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 3298*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i4 3299*5b2ba9d3SPiotr Jasiukajtis fsubd %f60,%f54,%f12 ! (1_0) y_lo0 = y0 - y_hi0 3300*5b2ba9d3SPiotr Jasiukajtis 3301*5b2ba9d3SPiotr Jasiukajtis sllx %g1,32,%g1 ! (3_0) ll = (long long)j0 << 32; 3302*5b2ba9d3SPiotr Jasiukajtis stx %g1,[%fp+dtmp7] ! (3_0) *(long long*)&scl0 = ll; 3303*5b2ba9d3SPiotr Jasiukajtis ba .cont44 3304*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i3 3305*5b2ba9d3SPiotr Jasiukajtis 3306*5b2ba9d3SPiotr Jasiukajtis .align 16 3307*5b2ba9d3SPiotr Jasiukajtis.update42: 3308*5b2ba9d3SPiotr Jasiukajtis cmp counter,4 3309*5b2ba9d3SPiotr Jasiukajtis ble 1f 3310*5b2ba9d3SPiotr Jasiukajtis nop 3311*5b2ba9d3SPiotr Jasiukajtis 3312*5b2ba9d3SPiotr Jasiukajtis sub counter,4,counter 3313*5b2ba9d3SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 3314*5b2ba9d3SPiotr Jasiukajtis 3315*5b2ba9d3SPiotr Jasiukajtis stx %i4,[%fp+tmp_px] 3316*5b2ba9d3SPiotr Jasiukajtis 3317*5b2ba9d3SPiotr Jasiukajtis stx %i3,[%fp+tmp_py] 3318*5b2ba9d3SPiotr Jasiukajtis 3319*5b2ba9d3SPiotr Jasiukajtis mov 4,counter 3320*5b2ba9d3SPiotr Jasiukajtis1: 3321*5b2ba9d3SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 3322*5b2ba9d3SPiotr Jasiukajtis stx %g1,[%fp+dtmp6] ! (2_0) *(long long*)&scl0 = ll; 3323*5b2ba9d3SPiotr Jasiukajtis fand %f28,DA0,%f48 ! (5_1) res0 = vis_fand(dres,DA0); 3324*5b2ba9d3SPiotr Jasiukajtis 3325*5b2ba9d3SPiotr Jasiukajtis fmuld %f20,%f20,%f2 ! (1_0) res0_hi = x_hi0 * x_hi0; 3326*5b2ba9d3SPiotr Jasiukajtis fsubd %f10,%f20,%f0 ! (1_0) x_lo0 = x0 - x_hi0; 3327*5b2ba9d3SPiotr Jasiukajtis 3328*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (1_0) dtmp0 = y_hi0 * y_hi0; 3329*5b2ba9d3SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (1_0) res0_lo = x0 + x_hi0; 3330*5b2ba9d3SPiotr Jasiukajtis 3331*5b2ba9d3SPiotr Jasiukajtis fmuld %f42,%f48,%f10 ! (5_1) dtmp0 = res0_hi * res0; 3332*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0; 3333*5b2ba9d3SPiotr Jasiukajtis 3334*5b2ba9d3SPiotr Jasiukajtis fmuld %f24,%f18,%f18 ! (6_1) dtmp2 = dd * dres; 3335*5b2ba9d3SPiotr Jasiukajtis faddd %f60,%f54,%f50 ! (1_0) dtmp1 = y0 + y_hi0; 3336*5b2ba9d3SPiotr Jasiukajtis 3337*5b2ba9d3SPiotr Jasiukajtis fmuld %f34,%f48,%f34 ! (5_1) dtmp1 = res0_lo * res0; 3338*5b2ba9d3SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 3339*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i4 3340*5b2ba9d3SPiotr Jasiukajtis fsubd %f60,%f54,%f12 ! (1_0) y_lo0 = y0 - y_hi0 3341*5b2ba9d3SPiotr Jasiukajtis 3342*5b2ba9d3SPiotr Jasiukajtis sllx %g1,32,%g1 ! (3_0) ll = (long long)j0 << 32; 3343*5b2ba9d3SPiotr Jasiukajtis stx %g1,[%fp+dtmp7] ! (3_0) *(long long*)&scl0 = ll; 3344*5b2ba9d3SPiotr Jasiukajtis ba .cont44 3345*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i3 3346*5b2ba9d3SPiotr Jasiukajtis 3347*5b2ba9d3SPiotr Jasiukajtis .align 16 3348*5b2ba9d3SPiotr Jasiukajtis.update43: 3349*5b2ba9d3SPiotr Jasiukajtis cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 3350*5b2ba9d3SPiotr Jasiukajtis bge,pn %icc,.cont43a ! (0_0) if ( hy0 < 0x00100000 ) 3351*5b2ba9d3SPiotr Jasiukajtis 3352*5b2ba9d3SPiotr Jasiukajtis cmp counter,4 3353*5b2ba9d3SPiotr Jasiukajtis ble,a 1f 3354*5b2ba9d3SPiotr Jasiukajtis nop 3355*5b2ba9d3SPiotr Jasiukajtis 3356*5b2ba9d3SPiotr Jasiukajtis sub counter,4,counter 3357*5b2ba9d3SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 3358*5b2ba9d3SPiotr Jasiukajtis 3359*5b2ba9d3SPiotr Jasiukajtis stx %i4,[%fp+tmp_px] 3360*5b2ba9d3SPiotr Jasiukajtis 3361*5b2ba9d3SPiotr Jasiukajtis mov 4,counter 3362*5b2ba9d3SPiotr Jasiukajtis stx %i3,[%fp+tmp_py] 3363*5b2ba9d3SPiotr Jasiukajtis1: 3364*5b2ba9d3SPiotr Jasiukajtis fmuld %f42,%f48,%f10 ! (5_1) dtmp0 = res0_hi * res0; 3365*5b2ba9d3SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 3366*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i4 3367*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0; 3368*5b2ba9d3SPiotr Jasiukajtis 3369*5b2ba9d3SPiotr Jasiukajtis ba .cont43b 3370*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i3 3371*5b2ba9d3SPiotr Jasiukajtis 3372*5b2ba9d3SPiotr Jasiukajtis .align 16 3373*5b2ba9d3SPiotr Jasiukajtis.update45: 3374*5b2ba9d3SPiotr Jasiukajtis cmp counter,5 3375*5b2ba9d3SPiotr Jasiukajtis ble 1f 3376*5b2ba9d3SPiotr Jasiukajtis nop 3377*5b2ba9d3SPiotr Jasiukajtis 3378*5b2ba9d3SPiotr Jasiukajtis sub counter,5,counter 3379*5b2ba9d3SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 3380*5b2ba9d3SPiotr Jasiukajtis 3381*5b2ba9d3SPiotr Jasiukajtis stx %i2,[%fp+tmp_px] 3382*5b2ba9d3SPiotr Jasiukajtis 3383*5b2ba9d3SPiotr Jasiukajtis stx %o0,[%fp+tmp_py] 3384*5b2ba9d3SPiotr Jasiukajtis 3385*5b2ba9d3SPiotr Jasiukajtis mov 5,counter 3386*5b2ba9d3SPiotr Jasiukajtis1: 3387*5b2ba9d3SPiotr Jasiukajtis fsubd %f50,D2ON36,%f54 ! (2_0) y_hi0 -= D2ON36; 3388*5b2ba9d3SPiotr Jasiukajtis 3389*5b2ba9d3SPiotr Jasiukajtis fmuld %f28,%f22,%f50 ! (0_0) dtmp0 = dd * dres; 3390*5b2ba9d3SPiotr Jasiukajtis st %f1,[%i5+4] ! (3_1) ((float*)pz)[1] = ((float*)&res0)[1]; 3391*5b2ba9d3SPiotr Jasiukajtis faddd %f48,%f52,%f52 ! (5_1) res0 += dtmp0; 3392*5b2ba9d3SPiotr Jasiukajtis 3393*5b2ba9d3SPiotr Jasiukajtis fand %f26,DA0,%f48 ! (6_1) res0 = vis_fand(dres,DA0); 3394*5b2ba9d3SPiotr Jasiukajtis 3395*5b2ba9d3SPiotr Jasiukajtis fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0; 3396*5b2ba9d3SPiotr Jasiukajtis stx %g1,[%fp+dtmp8] ! (3_0) *(long long*)&scl0 = ll; 3397*5b2ba9d3SPiotr Jasiukajtis fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0; 3398*5b2ba9d3SPiotr Jasiukajtis 3399*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0; 3400*5b2ba9d3SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 3401*5b2ba9d3SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0; 3402*5b2ba9d3SPiotr Jasiukajtis 3403*5b2ba9d3SPiotr Jasiukajtis fmuld %f30,%f48,%f10 ! (6_1) dtmp0 = res0_hi * res0; 3404*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0; 3405*5b2ba9d3SPiotr Jasiukajtis 3406*5b2ba9d3SPiotr Jasiukajtis fmuld %f16,%f14,%f14 ! (7_1) dtmp2 = dd * dres; 3407*5b2ba9d3SPiotr Jasiukajtis faddd %f60,%f54,%f50 ! (2_0) dtmp1 = y0 + y_hi0; 3408*5b2ba9d3SPiotr Jasiukajtis 3409*5b2ba9d3SPiotr Jasiukajtis fmuld %f40,%f48,%f40 ! (6_1) dtmp1 = res0_lo * res0; 3410*5b2ba9d3SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 3411*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i2 3412*5b2ba9d3SPiotr Jasiukajtis fsubd %f60,%f54,%f12 ! (2_0) y_lo0 = y0 - y_hi0; 3413*5b2ba9d3SPiotr Jasiukajtis 3414*5b2ba9d3SPiotr Jasiukajtis sllx %g1,32,%g1 ! (4_0) ll = (long long)j0 << 32; 3415*5b2ba9d3SPiotr Jasiukajtis stx %g1,[%fp+dtmp9] ! (4_0) *(long long*)&scl0 = ll; 3416*5b2ba9d3SPiotr Jasiukajtis ba .cont48 3417*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%o0 3418*5b2ba9d3SPiotr Jasiukajtis 3419*5b2ba9d3SPiotr Jasiukajtis .align 16 3420*5b2ba9d3SPiotr Jasiukajtis.update46: 3421*5b2ba9d3SPiotr Jasiukajtis cmp counter,5 3422*5b2ba9d3SPiotr Jasiukajtis ble 1f 3423*5b2ba9d3SPiotr Jasiukajtis nop 3424*5b2ba9d3SPiotr Jasiukajtis 3425*5b2ba9d3SPiotr Jasiukajtis sub counter,5,counter 3426*5b2ba9d3SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 3427*5b2ba9d3SPiotr Jasiukajtis 3428*5b2ba9d3SPiotr Jasiukajtis stx %i2,[%fp+tmp_px] 3429*5b2ba9d3SPiotr Jasiukajtis 3430*5b2ba9d3SPiotr Jasiukajtis stx %o0,[%fp+tmp_py] 3431*5b2ba9d3SPiotr Jasiukajtis 3432*5b2ba9d3SPiotr Jasiukajtis mov 5,counter 3433*5b2ba9d3SPiotr Jasiukajtis1: 3434*5b2ba9d3SPiotr Jasiukajtis fmuld %f28,%f22,%f50 ! (0_0) dtmp0 = dd * dres; 3435*5b2ba9d3SPiotr Jasiukajtis st %f1,[%i5+4] ! (3_1) ((float*)pz)[1] = ((float*)&res0)[1]; 3436*5b2ba9d3SPiotr Jasiukajtis faddd %f48,%f52,%f52 ! (5_1) res0 += dtmp0; 3437*5b2ba9d3SPiotr Jasiukajtis 3438*5b2ba9d3SPiotr Jasiukajtis fand %f26,DA0,%f48 ! (6_1) res0 = vis_fand(dres,DA0); 3439*5b2ba9d3SPiotr Jasiukajtis 3440*5b2ba9d3SPiotr Jasiukajtis fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0; 3441*5b2ba9d3SPiotr Jasiukajtis stx %g1,[%fp+dtmp8] ! (3_0) *(long long*)&scl0 = ll; 3442*5b2ba9d3SPiotr Jasiukajtis fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0; 3443*5b2ba9d3SPiotr Jasiukajtis 3444*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0; 3445*5b2ba9d3SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 3446*5b2ba9d3SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0; 3447*5b2ba9d3SPiotr Jasiukajtis 3448*5b2ba9d3SPiotr Jasiukajtis fmuld %f30,%f48,%f10 ! (6_1) dtmp0 = res0_hi * res0; 3449*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0; 3450*5b2ba9d3SPiotr Jasiukajtis 3451*5b2ba9d3SPiotr Jasiukajtis fmuld %f16,%f14,%f14 ! (7_1) dtmp2 = dd * dres; 3452*5b2ba9d3SPiotr Jasiukajtis faddd %f60,%f54,%f50 ! (2_0) dtmp1 = y0 + y_hi0; 3453*5b2ba9d3SPiotr Jasiukajtis 3454*5b2ba9d3SPiotr Jasiukajtis fmuld %f40,%f48,%f40 ! (6_1) dtmp1 = res0_lo * res0; 3455*5b2ba9d3SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 3456*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i2 3457*5b2ba9d3SPiotr Jasiukajtis fsubd %f60,%f54,%f12 ! (2_0) y_lo0 = y0 - y_hi0; 3458*5b2ba9d3SPiotr Jasiukajtis 3459*5b2ba9d3SPiotr Jasiukajtis sllx %g1,32,%g1 ! (4_0) ll = (long long)j0 << 32; 3460*5b2ba9d3SPiotr Jasiukajtis stx %g1,[%fp+dtmp9] ! (4_0) *(long long*)&scl0 = ll; 3461*5b2ba9d3SPiotr Jasiukajtis ba .cont48 3462*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%o0 3463*5b2ba9d3SPiotr Jasiukajtis 3464*5b2ba9d3SPiotr Jasiukajtis .align 16 3465*5b2ba9d3SPiotr Jasiukajtis.update47: 3466*5b2ba9d3SPiotr Jasiukajtis cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 3467*5b2ba9d3SPiotr Jasiukajtis bge,pn %icc,.cont47a ! (0_0) if ( hy0 < 0x00100000 ) 3468*5b2ba9d3SPiotr Jasiukajtis 3469*5b2ba9d3SPiotr Jasiukajtis cmp counter,5 3470*5b2ba9d3SPiotr Jasiukajtis ble,a 1f 3471*5b2ba9d3SPiotr Jasiukajtis nop 3472*5b2ba9d3SPiotr Jasiukajtis 3473*5b2ba9d3SPiotr Jasiukajtis sub counter,5,counter 3474*5b2ba9d3SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 3475*5b2ba9d3SPiotr Jasiukajtis 3476*5b2ba9d3SPiotr Jasiukajtis stx %i2,[%fp+tmp_px] 3477*5b2ba9d3SPiotr Jasiukajtis 3478*5b2ba9d3SPiotr Jasiukajtis mov 5,counter 3479*5b2ba9d3SPiotr Jasiukajtis stx %o0,[%fp+tmp_py] 3480*5b2ba9d3SPiotr Jasiukajtis1: 3481*5b2ba9d3SPiotr Jasiukajtis fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0; 3482*5b2ba9d3SPiotr Jasiukajtis stx %g1,[%fp+dtmp8] ! (3_0) *(long long*)&scl0 = ll; 3483*5b2ba9d3SPiotr Jasiukajtis fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0; 3484*5b2ba9d3SPiotr Jasiukajtis 3485*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0; 3486*5b2ba9d3SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 3487*5b2ba9d3SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0; 3488*5b2ba9d3SPiotr Jasiukajtis 3489*5b2ba9d3SPiotr Jasiukajtis fmuld %f30,%f48,%f10 ! (6_1) dtmp0 = res0_hi * res0; 3490*5b2ba9d3SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 3491*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i2 3492*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0; 3493*5b2ba9d3SPiotr Jasiukajtis 3494*5b2ba9d3SPiotr Jasiukajtis ba .cont47b 3495*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%o0 3496*5b2ba9d3SPiotr Jasiukajtis 3497*5b2ba9d3SPiotr Jasiukajtis .align 16 3498*5b2ba9d3SPiotr Jasiukajtis.update49: 3499*5b2ba9d3SPiotr Jasiukajtis cmp counter,6 3500*5b2ba9d3SPiotr Jasiukajtis ble 1f 3501*5b2ba9d3SPiotr Jasiukajtis nop 3502*5b2ba9d3SPiotr Jasiukajtis 3503*5b2ba9d3SPiotr Jasiukajtis sub counter,6,counter 3504*5b2ba9d3SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 3505*5b2ba9d3SPiotr Jasiukajtis 3506*5b2ba9d3SPiotr Jasiukajtis stx %i4,[%fp+tmp_px] 3507*5b2ba9d3SPiotr Jasiukajtis 3508*5b2ba9d3SPiotr Jasiukajtis stx %i3,[%fp+tmp_py] 3509*5b2ba9d3SPiotr Jasiukajtis 3510*5b2ba9d3SPiotr Jasiukajtis mov 6,counter 3511*5b2ba9d3SPiotr Jasiukajtis1: 3512*5b2ba9d3SPiotr Jasiukajtis fsubd %f50,D2ON36,%f54 ! (3_0) y_hi0 -= D2ON36; 3513*5b2ba9d3SPiotr Jasiukajtis 3514*5b2ba9d3SPiotr Jasiukajtis fmuld %f26,%f18,%f50 ! (1_0) dtmp0 = dd * dres; 3515*5b2ba9d3SPiotr Jasiukajtis st %f1,[%i5+4] ! (4_1) ((float*)pz)[1] = ((float*)&res0)[1]; 3516*5b2ba9d3SPiotr Jasiukajtis faddd %f48,%f52,%f52 ! (6_1) res0 += dtmp0; 3517*5b2ba9d3SPiotr Jasiukajtis 3518*5b2ba9d3SPiotr Jasiukajtis fand %f28,DA0,%f48 ! (7_1) res0 = vis_fand(dres,DA0); 3519*5b2ba9d3SPiotr Jasiukajtis 3520*5b2ba9d3SPiotr Jasiukajtis fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0; 3521*5b2ba9d3SPiotr Jasiukajtis stx %g1,[%fp+dtmp10] ! (4_0) *(long long*)&scl0 = ll; 3522*5b2ba9d3SPiotr Jasiukajtis fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0; 3523*5b2ba9d3SPiotr Jasiukajtis 3524*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0; 3525*5b2ba9d3SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 3526*5b2ba9d3SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0; 3527*5b2ba9d3SPiotr Jasiukajtis 3528*5b2ba9d3SPiotr Jasiukajtis fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0; 3529*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0; 3530*5b2ba9d3SPiotr Jasiukajtis 3531*5b2ba9d3SPiotr Jasiukajtis fmuld %f24,%f22,%f22 ! (0_0) dtmp2 = dd * dres; 3532*5b2ba9d3SPiotr Jasiukajtis faddd %f60,%f54,%f50 ! (3_0) dtmp1 = y0 + y_hi0; 3533*5b2ba9d3SPiotr Jasiukajtis 3534*5b2ba9d3SPiotr Jasiukajtis fmuld %f38,%f48,%f38 ! (7_1) dtmp1 = res0_lo * res0; 3535*5b2ba9d3SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 3536*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i4 3537*5b2ba9d3SPiotr Jasiukajtis fsubd %f60,%f54,%f12 ! (3_0) y_lo0 = y0 - y_hi0; 3538*5b2ba9d3SPiotr Jasiukajtis 3539*5b2ba9d3SPiotr Jasiukajtis sllx %g1,32,%g1 ! (5_0) ll = (long long)j0 << 32; 3540*5b2ba9d3SPiotr Jasiukajtis stx %g1,[%fp+dtmp11] ! (5_0) *(long long*)&scl0 = ll; 3541*5b2ba9d3SPiotr Jasiukajtis ba .cont52 3542*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i3 3543*5b2ba9d3SPiotr Jasiukajtis 3544*5b2ba9d3SPiotr Jasiukajtis .align 16 3545*5b2ba9d3SPiotr Jasiukajtis.update50: 3546*5b2ba9d3SPiotr Jasiukajtis cmp counter,6 3547*5b2ba9d3SPiotr Jasiukajtis ble 1f 3548*5b2ba9d3SPiotr Jasiukajtis nop 3549*5b2ba9d3SPiotr Jasiukajtis 3550*5b2ba9d3SPiotr Jasiukajtis sub counter,6,counter 3551*5b2ba9d3SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 3552*5b2ba9d3SPiotr Jasiukajtis 3553*5b2ba9d3SPiotr Jasiukajtis stx %i4,[%fp+tmp_px] 3554*5b2ba9d3SPiotr Jasiukajtis 3555*5b2ba9d3SPiotr Jasiukajtis stx %i3,[%fp+tmp_py] 3556*5b2ba9d3SPiotr Jasiukajtis 3557*5b2ba9d3SPiotr Jasiukajtis mov 6,counter 3558*5b2ba9d3SPiotr Jasiukajtis1: 3559*5b2ba9d3SPiotr Jasiukajtis fmuld %f26,%f18,%f50 ! (1_0) dtmp0 = dd * dres; 3560*5b2ba9d3SPiotr Jasiukajtis st %f1,[%i5+4] ! (4_1) ((float*)pz)[1] = ((float*)&res0)[1]; 3561*5b2ba9d3SPiotr Jasiukajtis faddd %f48,%f52,%f52 ! (6_1) res0 += dtmp0; 3562*5b2ba9d3SPiotr Jasiukajtis 3563*5b2ba9d3SPiotr Jasiukajtis fand %f28,DA0,%f48 ! (7_1) res0 = vis_fand(dres,DA0); 3564*5b2ba9d3SPiotr Jasiukajtis 3565*5b2ba9d3SPiotr Jasiukajtis fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0; 3566*5b2ba9d3SPiotr Jasiukajtis stx %g1,[%fp+dtmp10] ! (4_0) *(long long*)&scl0 = ll; 3567*5b2ba9d3SPiotr Jasiukajtis fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0; 3568*5b2ba9d3SPiotr Jasiukajtis 3569*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0; 3570*5b2ba9d3SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 3571*5b2ba9d3SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0; 3572*5b2ba9d3SPiotr Jasiukajtis 3573*5b2ba9d3SPiotr Jasiukajtis fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0; 3574*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0; 3575*5b2ba9d3SPiotr Jasiukajtis 3576*5b2ba9d3SPiotr Jasiukajtis fmuld %f24,%f22,%f22 ! (0_0) dtmp2 = dd * dres; 3577*5b2ba9d3SPiotr Jasiukajtis faddd %f60,%f54,%f50 ! (3_0) dtmp1 = y0 + y_hi0; 3578*5b2ba9d3SPiotr Jasiukajtis 3579*5b2ba9d3SPiotr Jasiukajtis fmuld %f38,%f48,%f38 ! (7_1) dtmp1 = res0_lo * res0; 3580*5b2ba9d3SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 3581*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i4 3582*5b2ba9d3SPiotr Jasiukajtis fsubd %f60,%f54,%f12 ! (3_0) y_lo0 = y0 - y_hi0; 3583*5b2ba9d3SPiotr Jasiukajtis 3584*5b2ba9d3SPiotr Jasiukajtis sllx %g1,32,%g1 ! (5_0) ll = (long long)j0 << 32; 3585*5b2ba9d3SPiotr Jasiukajtis stx %g1,[%fp+dtmp11] ! (5_0) *(long long*)&scl0 = ll; 3586*5b2ba9d3SPiotr Jasiukajtis ba .cont52 3587*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i3 3588*5b2ba9d3SPiotr Jasiukajtis 3589*5b2ba9d3SPiotr Jasiukajtis .align 16 3590*5b2ba9d3SPiotr Jasiukajtis.update51: 3591*5b2ba9d3SPiotr Jasiukajtis cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 3592*5b2ba9d3SPiotr Jasiukajtis bge,pn %icc,.cont51a ! (0_0) if ( hy0 < 0x00100000 ) 3593*5b2ba9d3SPiotr Jasiukajtis 3594*5b2ba9d3SPiotr Jasiukajtis cmp counter,6 3595*5b2ba9d3SPiotr Jasiukajtis ble,a 1f 3596*5b2ba9d3SPiotr Jasiukajtis nop 3597*5b2ba9d3SPiotr Jasiukajtis 3598*5b2ba9d3SPiotr Jasiukajtis sub counter,6,counter 3599*5b2ba9d3SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 3600*5b2ba9d3SPiotr Jasiukajtis 3601*5b2ba9d3SPiotr Jasiukajtis stx %i4,[%fp+tmp_px] 3602*5b2ba9d3SPiotr Jasiukajtis 3603*5b2ba9d3SPiotr Jasiukajtis mov 6,counter 3604*5b2ba9d3SPiotr Jasiukajtis stx %i3,[%fp+tmp_py] 3605*5b2ba9d3SPiotr Jasiukajtis1: 3606*5b2ba9d3SPiotr Jasiukajtis fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0; 3607*5b2ba9d3SPiotr Jasiukajtis stx %g1,[%fp+dtmp10] ! (4_0) *(long long*)&scl0 = ll; 3608*5b2ba9d3SPiotr Jasiukajtis fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0; 3609*5b2ba9d3SPiotr Jasiukajtis 3610*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0; 3611*5b2ba9d3SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 3612*5b2ba9d3SPiotr Jasiukajtis faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0; 3613*5b2ba9d3SPiotr Jasiukajtis 3614*5b2ba9d3SPiotr Jasiukajtis fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0; 3615*5b2ba9d3SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 3616*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i4 3617*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0; 3618*5b2ba9d3SPiotr Jasiukajtis 3619*5b2ba9d3SPiotr Jasiukajtis ba .cont51b 3620*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i3 3621*5b2ba9d3SPiotr Jasiukajtis 3622*5b2ba9d3SPiotr Jasiukajtis .align 16 3623*5b2ba9d3SPiotr Jasiukajtis.update53: 3624*5b2ba9d3SPiotr Jasiukajtis cmp counter,7 3625*5b2ba9d3SPiotr Jasiukajtis ble 1f 3626*5b2ba9d3SPiotr Jasiukajtis nop 3627*5b2ba9d3SPiotr Jasiukajtis 3628*5b2ba9d3SPiotr Jasiukajtis sub counter,7,counter 3629*5b2ba9d3SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 3630*5b2ba9d3SPiotr Jasiukajtis 3631*5b2ba9d3SPiotr Jasiukajtis stx %i2,[%fp+tmp_px] 3632*5b2ba9d3SPiotr Jasiukajtis 3633*5b2ba9d3SPiotr Jasiukajtis stx %o0,[%fp+tmp_py] 3634*5b2ba9d3SPiotr Jasiukajtis 3635*5b2ba9d3SPiotr Jasiukajtis mov 7,counter 3636*5b2ba9d3SPiotr Jasiukajtis1: 3637*5b2ba9d3SPiotr Jasiukajtis fsubd %f50,D2ON36,%f54 ! (4_0) y_hi0 -= D2ON36; 3638*5b2ba9d3SPiotr Jasiukajtis 3639*5b2ba9d3SPiotr Jasiukajtis fmuld %f52,%f14,%f50 ! (2_0) dtmp0 = dd * dres; 3640*5b2ba9d3SPiotr Jasiukajtis st %f1,[%i5+4] ! (5_1) ((float*)pz)[1] = ((float*)&res0)[1]; 3641*5b2ba9d3SPiotr Jasiukajtis faddd %f48,%f28,%f48 ! (7_1) res0 += dtmp0; 3642*5b2ba9d3SPiotr Jasiukajtis 3643*5b2ba9d3SPiotr Jasiukajtis fand %f26,DA0,%f28 ! (0_0) res0 = vis_fand(dres,DA0); 3644*5b2ba9d3SPiotr Jasiukajtis 3645*5b2ba9d3SPiotr Jasiukajtis fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0; 3646*5b2ba9d3SPiotr Jasiukajtis stx %g1,[%fp+dtmp12] ! (5_0) *(long long*)&scl0 = ll; 3647*5b2ba9d3SPiotr Jasiukajtis fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0; 3648*5b2ba9d3SPiotr Jasiukajtis 3649*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0; 3650*5b2ba9d3SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 3651*5b2ba9d3SPiotr Jasiukajtis faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0; 3652*5b2ba9d3SPiotr Jasiukajtis 3653*5b2ba9d3SPiotr Jasiukajtis fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres; 3654*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0; 3655*5b2ba9d3SPiotr Jasiukajtis 3656*5b2ba9d3SPiotr Jasiukajtis fmuld %f32,%f28,%f50 ! (0_0) dtmp0 = res0_hi * res0; 3657*5b2ba9d3SPiotr Jasiukajtis faddd %f60,%f54,%f46 ! (4_0) dtmp1 = y0 + y_hi0; 3658*5b2ba9d3SPiotr Jasiukajtis 3659*5b2ba9d3SPiotr Jasiukajtis fmuld %f36,%f28,%f36 ! (0_0) dtmp1 = res0_lo * res0; 3660*5b2ba9d3SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 3661*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i2 3662*5b2ba9d3SPiotr Jasiukajtis fsubd %f60,%f54,%f60 ! (4_0) y_lo0 = y0 - y_hi0; 3663*5b2ba9d3SPiotr Jasiukajtis 3664*5b2ba9d3SPiotr Jasiukajtis sllx %g1,32,%g1 ! (6_0) ll = (long long)j0 << 32; 3665*5b2ba9d3SPiotr Jasiukajtis stx %g1,[%fp+dtmp13] ! (6_0) *(long long*)&scl0 = ll; 3666*5b2ba9d3SPiotr Jasiukajtis ba .cont56 3667*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%o0 3668*5b2ba9d3SPiotr Jasiukajtis 3669*5b2ba9d3SPiotr Jasiukajtis .align 16 3670*5b2ba9d3SPiotr Jasiukajtis.update54: 3671*5b2ba9d3SPiotr Jasiukajtis cmp counter,7 3672*5b2ba9d3SPiotr Jasiukajtis ble 1f 3673*5b2ba9d3SPiotr Jasiukajtis nop 3674*5b2ba9d3SPiotr Jasiukajtis 3675*5b2ba9d3SPiotr Jasiukajtis sub counter,7,counter 3676*5b2ba9d3SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 3677*5b2ba9d3SPiotr Jasiukajtis 3678*5b2ba9d3SPiotr Jasiukajtis stx %i2,[%fp+tmp_px] 3679*5b2ba9d3SPiotr Jasiukajtis 3680*5b2ba9d3SPiotr Jasiukajtis stx %o0,[%fp+tmp_py] 3681*5b2ba9d3SPiotr Jasiukajtis 3682*5b2ba9d3SPiotr Jasiukajtis mov 7,counter 3683*5b2ba9d3SPiotr Jasiukajtis1: 3684*5b2ba9d3SPiotr Jasiukajtis fmuld %f52,%f14,%f50 ! (2_0) dtmp0 = dd * dres; 3685*5b2ba9d3SPiotr Jasiukajtis st %f1,[%i5+4] ! (5_1) ((float*)pz)[1] = ((float*)&res0)[1]; 3686*5b2ba9d3SPiotr Jasiukajtis faddd %f48,%f28,%f48 ! (7_1) res0 += dtmp0; 3687*5b2ba9d3SPiotr Jasiukajtis 3688*5b2ba9d3SPiotr Jasiukajtis fand %f26,DA0,%f28 ! (0_0) res0 = vis_fand(dres,DA0); 3689*5b2ba9d3SPiotr Jasiukajtis 3690*5b2ba9d3SPiotr Jasiukajtis fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0; 3691*5b2ba9d3SPiotr Jasiukajtis stx %g1,[%fp+dtmp12] ! (5_0) *(long long*)&scl0 = ll; 3692*5b2ba9d3SPiotr Jasiukajtis fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0; 3693*5b2ba9d3SPiotr Jasiukajtis 3694*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0; 3695*5b2ba9d3SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 3696*5b2ba9d3SPiotr Jasiukajtis faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0; 3697*5b2ba9d3SPiotr Jasiukajtis 3698*5b2ba9d3SPiotr Jasiukajtis fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres; 3699*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0; 3700*5b2ba9d3SPiotr Jasiukajtis 3701*5b2ba9d3SPiotr Jasiukajtis fmuld %f32,%f28,%f50 ! (0_0) dtmp0 = res0_hi * res0; 3702*5b2ba9d3SPiotr Jasiukajtis faddd %f60,%f54,%f46 ! (4_0) dtmp1 = y0 + y_hi0; 3703*5b2ba9d3SPiotr Jasiukajtis 3704*5b2ba9d3SPiotr Jasiukajtis fmuld %f36,%f28,%f36 ! (0_0) dtmp1 = res0_lo * res0; 3705*5b2ba9d3SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 3706*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i2 3707*5b2ba9d3SPiotr Jasiukajtis fsubd %f60,%f54,%f60 ! (4_0) y_lo0 = y0 - y_hi0; 3708*5b2ba9d3SPiotr Jasiukajtis 3709*5b2ba9d3SPiotr Jasiukajtis sllx %g1,32,%g1 ! (6_0) ll = (long long)j0 << 32; 3710*5b2ba9d3SPiotr Jasiukajtis stx %g1,[%fp+dtmp13] ! (6_0) *(long long*)&scl0 = ll; 3711*5b2ba9d3SPiotr Jasiukajtis ba .cont56 3712*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%o0 3713*5b2ba9d3SPiotr Jasiukajtis 3714*5b2ba9d3SPiotr Jasiukajtis .align 16 3715*5b2ba9d3SPiotr Jasiukajtis.update55: 3716*5b2ba9d3SPiotr Jasiukajtis cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 3717*5b2ba9d3SPiotr Jasiukajtis bge,pn %icc,.cont55a ! (0_0) if ( hy0 < 0x00100000 ) 3718*5b2ba9d3SPiotr Jasiukajtis 3719*5b2ba9d3SPiotr Jasiukajtis cmp counter,7 3720*5b2ba9d3SPiotr Jasiukajtis ble,a 1f 3721*5b2ba9d3SPiotr Jasiukajtis nop 3722*5b2ba9d3SPiotr Jasiukajtis 3723*5b2ba9d3SPiotr Jasiukajtis sub counter,7,counter 3724*5b2ba9d3SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 3725*5b2ba9d3SPiotr Jasiukajtis 3726*5b2ba9d3SPiotr Jasiukajtis stx %i2,[%fp+tmp_px] 3727*5b2ba9d3SPiotr Jasiukajtis 3728*5b2ba9d3SPiotr Jasiukajtis mov 7,counter 3729*5b2ba9d3SPiotr Jasiukajtis stx %o0,[%fp+tmp_py] 3730*5b2ba9d3SPiotr Jasiukajtis1: 3731*5b2ba9d3SPiotr Jasiukajtis fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0; 3732*5b2ba9d3SPiotr Jasiukajtis stx %g1,[%fp+dtmp12] ! (5_0) *(long long*)&scl0 = ll; 3733*5b2ba9d3SPiotr Jasiukajtis fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0; 3734*5b2ba9d3SPiotr Jasiukajtis 3735*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0; 3736*5b2ba9d3SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 3737*5b2ba9d3SPiotr Jasiukajtis faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0; 3738*5b2ba9d3SPiotr Jasiukajtis 3739*5b2ba9d3SPiotr Jasiukajtis fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres; 3740*5b2ba9d3SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 3741*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i2 3742*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0; 3743*5b2ba9d3SPiotr Jasiukajtis 3744*5b2ba9d3SPiotr Jasiukajtis ba .cont55b 3745*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%o0 3746*5b2ba9d3SPiotr Jasiukajtis 3747*5b2ba9d3SPiotr Jasiukajtis .align 16 3748*5b2ba9d3SPiotr Jasiukajtis.update57: 3749*5b2ba9d3SPiotr Jasiukajtis cmp counter,8 3750*5b2ba9d3SPiotr Jasiukajtis ble 1f 3751*5b2ba9d3SPiotr Jasiukajtis nop 3752*5b2ba9d3SPiotr Jasiukajtis 3753*5b2ba9d3SPiotr Jasiukajtis sub counter,8,counter 3754*5b2ba9d3SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 3755*5b2ba9d3SPiotr Jasiukajtis 3756*5b2ba9d3SPiotr Jasiukajtis stx %i4,[%fp+tmp_px] 3757*5b2ba9d3SPiotr Jasiukajtis 3758*5b2ba9d3SPiotr Jasiukajtis stx %i3,[%fp+tmp_py] 3759*5b2ba9d3SPiotr Jasiukajtis 3760*5b2ba9d3SPiotr Jasiukajtis mov 8,counter 3761*5b2ba9d3SPiotr Jasiukajtis1: 3762*5b2ba9d3SPiotr Jasiukajtis fsubd %f12,D2ON36,%f54 ! (5_0) y_hi0 -= D2ON36; 3763*5b2ba9d3SPiotr Jasiukajtis 3764*5b2ba9d3SPiotr Jasiukajtis fmuld %f10,%f22,%f50 ! (3_0) dtmp0 = dd * dres; 3765*5b2ba9d3SPiotr Jasiukajtis st %f3,[%i5+4] ! (6_1) ((float*)pz)[1] = ((float*)&res0)[1]; 3766*5b2ba9d3SPiotr Jasiukajtis faddd %f28,%f48,%f48 ! (0_0) res0 += dtmp0; 3767*5b2ba9d3SPiotr Jasiukajtis 3768*5b2ba9d3SPiotr Jasiukajtis fand %f16,DA0,%f28 ! (1_0) res0 = vis_fand(dres,DA0); 3769*5b2ba9d3SPiotr Jasiukajtis 3770*5b2ba9d3SPiotr Jasiukajtis fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0; 3771*5b2ba9d3SPiotr Jasiukajtis stx %g1,[%fp+dtmp14] ! (6_0) *(long long*)&scl0 = ll; 3772*5b2ba9d3SPiotr Jasiukajtis fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0; 3773*5b2ba9d3SPiotr Jasiukajtis 3774*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0; 3775*5b2ba9d3SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 3776*5b2ba9d3SPiotr Jasiukajtis faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0; 3777*5b2ba9d3SPiotr Jasiukajtis 3778*5b2ba9d3SPiotr Jasiukajtis fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres; 3779*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0; 3780*5b2ba9d3SPiotr Jasiukajtis 3781*5b2ba9d3SPiotr Jasiukajtis fmuld %f42,%f28,%f60 ! (1_0) dtmp0 = res0_hi * res0; 3782*5b2ba9d3SPiotr Jasiukajtis faddd %f52,%f54,%f50 ! (5_0) dtmp1 = y0 + y_hi0; 3783*5b2ba9d3SPiotr Jasiukajtis 3784*5b2ba9d3SPiotr Jasiukajtis fmuld %f34,%f28,%f34 ! (1_0) dtmp1 = res0_lo * res0; 3785*5b2ba9d3SPiotr Jasiukajtis fsubd %f52,%f54,%f54 ! (5_0) y_lo0 = y0 - y_hi0; 3786*5b2ba9d3SPiotr Jasiukajtis 3787*5b2ba9d3SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 3788*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i4 3789*5b2ba9d3SPiotr Jasiukajtis 3790*5b2ba9d3SPiotr Jasiukajtis sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32; 3791*5b2ba9d3SPiotr Jasiukajtis stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll; 3792*5b2ba9d3SPiotr Jasiukajtis ba .cont60 3793*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i3 3794*5b2ba9d3SPiotr Jasiukajtis 3795*5b2ba9d3SPiotr Jasiukajtis .align 16 3796*5b2ba9d3SPiotr Jasiukajtis.update58: 3797*5b2ba9d3SPiotr Jasiukajtis cmp counter,8 3798*5b2ba9d3SPiotr Jasiukajtis ble 1f 3799*5b2ba9d3SPiotr Jasiukajtis nop 3800*5b2ba9d3SPiotr Jasiukajtis 3801*5b2ba9d3SPiotr Jasiukajtis sub counter,8,counter 3802*5b2ba9d3SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 3803*5b2ba9d3SPiotr Jasiukajtis 3804*5b2ba9d3SPiotr Jasiukajtis stx %i4,[%fp+tmp_px] 3805*5b2ba9d3SPiotr Jasiukajtis 3806*5b2ba9d3SPiotr Jasiukajtis stx %i3,[%fp+tmp_py] 3807*5b2ba9d3SPiotr Jasiukajtis 3808*5b2ba9d3SPiotr Jasiukajtis mov 8,counter 3809*5b2ba9d3SPiotr Jasiukajtis1: 3810*5b2ba9d3SPiotr Jasiukajtis fmuld %f10,%f22,%f50 ! (3_0) dtmp0 = dd * dres; 3811*5b2ba9d3SPiotr Jasiukajtis st %f3,[%i5+4] ! (6_1) ((float*)pz)[1] = ((float*)&res0)[1]; 3812*5b2ba9d3SPiotr Jasiukajtis faddd %f28,%f48,%f48 ! (0_0) res0 += dtmp0; 3813*5b2ba9d3SPiotr Jasiukajtis 3814*5b2ba9d3SPiotr Jasiukajtis fand %f16,DA0,%f28 ! (1_0) res0 = vis_fand(dres,DA0); 3815*5b2ba9d3SPiotr Jasiukajtis 3816*5b2ba9d3SPiotr Jasiukajtis fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0; 3817*5b2ba9d3SPiotr Jasiukajtis stx %g1,[%fp+dtmp14] ! (6_0) *(long long*)&scl0 = ll; 3818*5b2ba9d3SPiotr Jasiukajtis fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0; 3819*5b2ba9d3SPiotr Jasiukajtis 3820*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0; 3821*5b2ba9d3SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 3822*5b2ba9d3SPiotr Jasiukajtis faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0; 3823*5b2ba9d3SPiotr Jasiukajtis 3824*5b2ba9d3SPiotr Jasiukajtis fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres; 3825*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0; 3826*5b2ba9d3SPiotr Jasiukajtis 3827*5b2ba9d3SPiotr Jasiukajtis fmuld %f42,%f28,%f60 ! (1_0) dtmp0 = res0_hi * res0; 3828*5b2ba9d3SPiotr Jasiukajtis faddd %f52,%f54,%f50 ! (5_0) dtmp1 = y0 + y_hi0; 3829*5b2ba9d3SPiotr Jasiukajtis 3830*5b2ba9d3SPiotr Jasiukajtis fmuld %f34,%f28,%f34 ! (1_0) dtmp1 = res0_lo * res0; 3831*5b2ba9d3SPiotr Jasiukajtis fsubd %f52,%f54,%f54 ! (5_0) y_lo0 = y0 - y_hi0; 3832*5b2ba9d3SPiotr Jasiukajtis 3833*5b2ba9d3SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 3834*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i4 3835*5b2ba9d3SPiotr Jasiukajtis 3836*5b2ba9d3SPiotr Jasiukajtis sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32; 3837*5b2ba9d3SPiotr Jasiukajtis stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll; 3838*5b2ba9d3SPiotr Jasiukajtis ba .cont60 3839*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i3 3840*5b2ba9d3SPiotr Jasiukajtis 3841*5b2ba9d3SPiotr Jasiukajtis .align 16 3842*5b2ba9d3SPiotr Jasiukajtis.update59: 3843*5b2ba9d3SPiotr Jasiukajtis cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 3844*5b2ba9d3SPiotr Jasiukajtis bge,pn %icc,.cont59a ! (0_0) if ( hy0 < 0x00100000 ) 3845*5b2ba9d3SPiotr Jasiukajtis 3846*5b2ba9d3SPiotr Jasiukajtis cmp counter,8 3847*5b2ba9d3SPiotr Jasiukajtis ble,a 1f 3848*5b2ba9d3SPiotr Jasiukajtis nop 3849*5b2ba9d3SPiotr Jasiukajtis 3850*5b2ba9d3SPiotr Jasiukajtis sub counter,8,counter 3851*5b2ba9d3SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 3852*5b2ba9d3SPiotr Jasiukajtis 3853*5b2ba9d3SPiotr Jasiukajtis stx %i4,[%fp+tmp_px] 3854*5b2ba9d3SPiotr Jasiukajtis 3855*5b2ba9d3SPiotr Jasiukajtis mov 8,counter 3856*5b2ba9d3SPiotr Jasiukajtis stx %i3,[%fp+tmp_py] 3857*5b2ba9d3SPiotr Jasiukajtis1: 3858*5b2ba9d3SPiotr Jasiukajtis fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0; 3859*5b2ba9d3SPiotr Jasiukajtis stx %g1,[%fp+dtmp14] ! (6_0) *(long long*)&scl0 = ll; 3860*5b2ba9d3SPiotr Jasiukajtis fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0; 3861*5b2ba9d3SPiotr Jasiukajtis 3862*5b2ba9d3SPiotr Jasiukajtis fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0; 3863*5b2ba9d3SPiotr Jasiukajtis add %i5,stridez,%i5 ! pz += stridez 3864*5b2ba9d3SPiotr Jasiukajtis faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0; 3865*5b2ba9d3SPiotr Jasiukajtis 3866*5b2ba9d3SPiotr Jasiukajtis fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres; 3867*5b2ba9d3SPiotr Jasiukajtis sethi %hi(0x3ff00000),%g1 3868*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i4 3869*5b2ba9d3SPiotr Jasiukajtis fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0; 3870*5b2ba9d3SPiotr Jasiukajtis 3871*5b2ba9d3SPiotr Jasiukajtis ba .cont59b 3872*5b2ba9d3SPiotr Jasiukajtis add TBL,TBL_SHIFT+24,%i3 3873*5b2ba9d3SPiotr Jasiukajtis 3874*5b2ba9d3SPiotr Jasiukajtis .align 16 3875*5b2ba9d3SPiotr Jasiukajtis.exit: 3876*5b2ba9d3SPiotr Jasiukajtis ret 3877*5b2ba9d3SPiotr Jasiukajtis restore 3878*5b2ba9d3SPiotr Jasiukajtis SET_SIZE(__vrhypot) 3879*5b2ba9d3SPiotr Jasiukajtis 3880