1*25c28e83SPiotr Jasiukajtis/* 2*25c28e83SPiotr Jasiukajtis * CDDL HEADER START 3*25c28e83SPiotr Jasiukajtis * 4*25c28e83SPiotr Jasiukajtis * The contents of this file are subject to the terms of the 5*25c28e83SPiotr Jasiukajtis * Common Development and Distribution License (the "License"). 6*25c28e83SPiotr Jasiukajtis * You may not use this file except in compliance with the License. 7*25c28e83SPiotr Jasiukajtis * 8*25c28e83SPiotr Jasiukajtis * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*25c28e83SPiotr Jasiukajtis * or http://www.opensolaris.org/os/licensing. 10*25c28e83SPiotr Jasiukajtis * See the License for the specific language governing permissions 11*25c28e83SPiotr Jasiukajtis * and limitations under the License. 12*25c28e83SPiotr Jasiukajtis * 13*25c28e83SPiotr Jasiukajtis * When distributing Covered Code, include this CDDL HEADER in each 14*25c28e83SPiotr Jasiukajtis * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*25c28e83SPiotr Jasiukajtis * If applicable, add the following below this CDDL HEADER, with the 16*25c28e83SPiotr Jasiukajtis * fields enclosed by brackets "[]" replaced with your own identifying 17*25c28e83SPiotr Jasiukajtis * information: Portions Copyright [yyyy] [name of copyright owner] 18*25c28e83SPiotr Jasiukajtis * 19*25c28e83SPiotr Jasiukajtis * CDDL HEADER END 20*25c28e83SPiotr Jasiukajtis */ 21*25c28e83SPiotr Jasiukajtis/* 22*25c28e83SPiotr Jasiukajtis * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 23*25c28e83SPiotr Jasiukajtis */ 24*25c28e83SPiotr Jasiukajtis/* 25*25c28e83SPiotr Jasiukajtis * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 26*25c28e83SPiotr Jasiukajtis * Use is subject to license terms. 27*25c28e83SPiotr Jasiukajtis */ 28*25c28e83SPiotr Jasiukajtis 29*25c28e83SPiotr Jasiukajtis .file "__vatanf.S" 30*25c28e83SPiotr Jasiukajtis 31*25c28e83SPiotr Jasiukajtis#include "libm.h" 32*25c28e83SPiotr Jasiukajtis 33*25c28e83SPiotr Jasiukajtis RO_DATA 34*25c28e83SPiotr Jasiukajtis .align 64 35*25c28e83SPiotr Jasiukajtis 36*25c28e83SPiotr Jasiukajtis.CONST_TBL: 37*25c28e83SPiotr Jasiukajtis .word 0x3fefffff, 0xfffccbbc ! K0 = 9.99999999976686608841e-01 38*25c28e83SPiotr Jasiukajtis .word 0xbfd55554, 0x51c6b90f ! K1 = -3.33333091601972730504e-01 39*25c28e83SPiotr Jasiukajtis .word 0x3fc98d6d, 0x926596cc ! K2 = 1.99628540499523379702e-01 40*25c28e83SPiotr Jasiukajtis .word 0x00020000, 0x00000000 ! DC1 41*25c28e83SPiotr Jasiukajtis .word 0xfffc0000, 0x00000000 ! DC2 42*25c28e83SPiotr Jasiukajtis .word 0x7ff00000, 0x00000000 ! DC3 43*25c28e83SPiotr Jasiukajtis .word 0x3ff00000, 0x00000000 ! DONE = 1.0 44*25c28e83SPiotr Jasiukajtis .word 0x40000000, 0x00000000 ! DTWO = 2.0 45*25c28e83SPiotr Jasiukajtis 46*25c28e83SPiotr Jasiukajtis! parr0 = *(int*)&(1.0 / *(double*)&(((long long)i << 45) | 0x3ff0100000000000ULL)) + 0x3ff00000, i = [0, 127] 47*25c28e83SPiotr Jasiukajtis 48*25c28e83SPiotr Jasiukajtis .word 0x7fdfe01f, 0x7fdfa11c, 0x7fdf6310, 0x7fdf25f6 49*25c28e83SPiotr Jasiukajtis .word 0x7fdee9c7, 0x7fdeae80, 0x7fde741a, 0x7fde3a91 50*25c28e83SPiotr Jasiukajtis .word 0x7fde01e0, 0x7fddca01, 0x7fdd92f2, 0x7fdd5cac 51*25c28e83SPiotr Jasiukajtis .word 0x7fdd272c, 0x7fdcf26e, 0x7fdcbe6d, 0x7fdc8b26 52*25c28e83SPiotr Jasiukajtis .word 0x7fdc5894, 0x7fdc26b5, 0x7fdbf583, 0x7fdbc4fd 53*25c28e83SPiotr Jasiukajtis .word 0x7fdb951e, 0x7fdb65e2, 0x7fdb3748, 0x7fdb094b 54*25c28e83SPiotr Jasiukajtis .word 0x7fdadbe8, 0x7fdaaf1d, 0x7fda82e6, 0x7fda5741 55*25c28e83SPiotr Jasiukajtis .word 0x7fda2c2a, 0x7fda01a0, 0x7fd9d79f, 0x7fd9ae24 56*25c28e83SPiotr Jasiukajtis .word 0x7fd9852f, 0x7fd95cbb, 0x7fd934c6, 0x7fd90d4f 57*25c28e83SPiotr Jasiukajtis .word 0x7fd8e652, 0x7fd8bfce, 0x7fd899c0, 0x7fd87427 58*25c28e83SPiotr Jasiukajtis .word 0x7fd84f00, 0x7fd82a4a, 0x7fd80601, 0x7fd7e225 59*25c28e83SPiotr Jasiukajtis .word 0x7fd7beb3, 0x7fd79baa, 0x7fd77908, 0x7fd756ca 60*25c28e83SPiotr Jasiukajtis .word 0x7fd734f0, 0x7fd71378, 0x7fd6f260, 0x7fd6d1a6 61*25c28e83SPiotr Jasiukajtis .word 0x7fd6b149, 0x7fd69147, 0x7fd6719f, 0x7fd6524f 62*25c28e83SPiotr Jasiukajtis .word 0x7fd63356, 0x7fd614b3, 0x7fd5f664, 0x7fd5d867 63*25c28e83SPiotr Jasiukajtis .word 0x7fd5babc, 0x7fd59d61, 0x7fd58056, 0x7fd56397 64*25c28e83SPiotr Jasiukajtis .word 0x7fd54725, 0x7fd52aff, 0x7fd50f22, 0x7fd4f38f 65*25c28e83SPiotr Jasiukajtis .word 0x7fd4d843, 0x7fd4bd3e, 0x7fd4a27f, 0x7fd48805 66*25c28e83SPiotr Jasiukajtis .word 0x7fd46dce, 0x7fd453d9, 0x7fd43a27, 0x7fd420b5 67*25c28e83SPiotr Jasiukajtis .word 0x7fd40782, 0x7fd3ee8f, 0x7fd3d5d9, 0x7fd3bd60 68*25c28e83SPiotr Jasiukajtis .word 0x7fd3a524, 0x7fd38d22, 0x7fd3755b, 0x7fd35dce 69*25c28e83SPiotr Jasiukajtis .word 0x7fd34679, 0x7fd32f5c, 0x7fd31877, 0x7fd301c8 70*25c28e83SPiotr Jasiukajtis .word 0x7fd2eb4e, 0x7fd2d50a, 0x7fd2bef9, 0x7fd2a91c 71*25c28e83SPiotr Jasiukajtis .word 0x7fd29372, 0x7fd27dfa, 0x7fd268b3, 0x7fd2539d 72*25c28e83SPiotr Jasiukajtis .word 0x7fd23eb7, 0x7fd22a01, 0x7fd21579, 0x7fd20120 73*25c28e83SPiotr Jasiukajtis .word 0x7fd1ecf4, 0x7fd1d8f5, 0x7fd1c522, 0x7fd1b17c 74*25c28e83SPiotr Jasiukajtis .word 0x7fd19e01, 0x7fd18ab0, 0x7fd1778a, 0x7fd1648d 75*25c28e83SPiotr Jasiukajtis .word 0x7fd151b9, 0x7fd13f0e, 0x7fd12c8b, 0x7fd11a30 76*25c28e83SPiotr Jasiukajtis .word 0x7fd107fb, 0x7fd0f5ed, 0x7fd0e406, 0x7fd0d244 77*25c28e83SPiotr Jasiukajtis .word 0x7fd0c0a7, 0x7fd0af2f, 0x7fd09ddb, 0x7fd08cab 78*25c28e83SPiotr Jasiukajtis .word 0x7fd07b9f, 0x7fd06ab5, 0x7fd059ee, 0x7fd04949 79*25c28e83SPiotr Jasiukajtis .word 0x7fd038c6, 0x7fd02864, 0x7fd01824, 0x7fd00804 80*25c28e83SPiotr Jasiukajtis 81*25c28e83SPiotr Jasiukajtis .word 0x3ff00000, 0x00000000 ! 1.0 82*25c28e83SPiotr Jasiukajtis .word 0xbff00000, 0x00000000 ! -1.0 83*25c28e83SPiotr Jasiukajtis 84*25c28e83SPiotr Jasiukajtis! parr1[i] = atan((double)*(float*)&((i + 460) << 21)), i = [0, 155] 85*25c28e83SPiotr Jasiukajtis 86*25c28e83SPiotr Jasiukajtis .word 0x3f2fffff, 0xf555555c, 0x3f33ffff, 0xf595555f 87*25c28e83SPiotr Jasiukajtis .word 0x3f37ffff, 0xee000018, 0x3f3bffff, 0xe36aaadf 88*25c28e83SPiotr Jasiukajtis .word 0x3f3fffff, 0xd55555bc, 0x3f43ffff, 0xd65555f2 89*25c28e83SPiotr Jasiukajtis .word 0x3f47ffff, 0xb8000185, 0x3f4bffff, 0x8daaadf3 90*25c28e83SPiotr Jasiukajtis .word 0x3f4fffff, 0x55555bbc, 0x3f53ffff, 0x59555f19 91*25c28e83SPiotr Jasiukajtis .word 0x3f57fffe, 0xe000184d, 0x3f5bfffe, 0x36aadf30 92*25c28e83SPiotr Jasiukajtis .word 0x3f5ffffd, 0x5555bbbc, 0x3f63fffd, 0x6555f195 93*25c28e83SPiotr Jasiukajtis .word 0x3f67fffb, 0x800184cc, 0x3f6bfff8, 0xdaadf302 94*25c28e83SPiotr Jasiukajtis .word 0x3f6ffff5, 0x555bbbb7, 0x3f73fff5, 0x955f194a 95*25c28e83SPiotr Jasiukajtis .word 0x3f77ffee, 0x00184ca6, 0x3f7bffe3, 0x6adf2fd1 96*25c28e83SPiotr Jasiukajtis .word 0x3f7fffd5, 0x55bbba97, 0x3f83ffd6, 0x55f1929c 97*25c28e83SPiotr Jasiukajtis .word 0x3f87ffb8, 0x0184c30a, 0x3f8bff8d, 0xadf2e78c 98*25c28e83SPiotr Jasiukajtis .word 0x3f8fff55, 0x5bbb729b, 0x3f93ff59, 0x5f18a700 99*25c28e83SPiotr Jasiukajtis .word 0x3f97fee0, 0x184a5c36, 0x3f9bfe36, 0xdf291712 100*25c28e83SPiotr Jasiukajtis .word 0x3f9ffd55, 0xbba97625, 0x3fa3fd65, 0xf169c9d9 101*25c28e83SPiotr Jasiukajtis .word 0x3fa7fb81, 0x8430da2a, 0x3fabf8dd, 0xf139c444 102*25c28e83SPiotr Jasiukajtis .word 0x3faff55b, 0xb72cfdea, 0x3fb3f59f, 0x0e7c559d 103*25c28e83SPiotr Jasiukajtis .word 0x3fb7ee18, 0x2602f10f, 0x3fbbe39e, 0xbe6f07c4 104*25c28e83SPiotr Jasiukajtis .word 0x3fbfd5ba, 0x9aac2f6e, 0x3fc3d6ee, 0xe8c6626c 105*25c28e83SPiotr Jasiukajtis .word 0x3fc7b97b, 0x4bce5b02, 0x3fcb90d7, 0x529260a2 106*25c28e83SPiotr Jasiukajtis .word 0x3fcf5b75, 0xf92c80dd, 0x3fd36277, 0x3707ebcc 107*25c28e83SPiotr Jasiukajtis .word 0x3fd6f619, 0x41e4def1, 0x3fda64ee, 0xc3cc23fd 108*25c28e83SPiotr Jasiukajtis .word 0x3fddac67, 0x0561bb4f, 0x3fe1e00b, 0xabdefeb4 109*25c28e83SPiotr Jasiukajtis .word 0x3fe4978f, 0xa3269ee1, 0x3fe700a7, 0xc5784634 110*25c28e83SPiotr Jasiukajtis .word 0x3fe921fb, 0x54442d18, 0x3fecac7c, 0x57846f9e 111*25c28e83SPiotr Jasiukajtis .word 0x3fef730b, 0xd281f69b, 0x3ff0d38f, 0x2c5ba09f 112*25c28e83SPiotr Jasiukajtis .word 0x3ff1b6e1, 0x92ebbe44, 0x3ff30b6d, 0x796a4da8 113*25c28e83SPiotr Jasiukajtis .word 0x3ff3fc17, 0x6b7a8560, 0x3ff4ae10, 0xfc6589a5 114*25c28e83SPiotr Jasiukajtis .word 0x3ff5368c, 0x951e9cfd, 0x3ff5f973, 0x15254857 115*25c28e83SPiotr Jasiukajtis .word 0x3ff67d88, 0x63bc99bd, 0x3ff6dcc5, 0x7bb565fd 116*25c28e83SPiotr Jasiukajtis .word 0x3ff7249f, 0xaa996a21, 0x3ff789bd, 0x2c160054 117*25c28e83SPiotr Jasiukajtis .word 0x3ff7cd6f, 0x6dc59db4, 0x3ff7fde8, 0x0870c2a0 118*25c28e83SPiotr Jasiukajtis .word 0x3ff82250, 0x768ac529, 0x3ff8555a, 0x2787981f 119*25c28e83SPiotr Jasiukajtis .word 0x3ff87769, 0xeb8e956b, 0x3ff88fc2, 0x18ace9dc 120*25c28e83SPiotr Jasiukajtis .word 0x3ff8a205, 0xfd558740, 0x3ff8bb9a, 0x63718f45 121*25c28e83SPiotr Jasiukajtis .word 0x3ff8cca9, 0x27cf0b3d, 0x3ff8d8d8, 0xbf65316f 122*25c28e83SPiotr Jasiukajtis .word 0x3ff8e1fc, 0xa98cb633, 0x3ff8eec8, 0xcfd00665 123*25c28e83SPiotr Jasiukajtis .word 0x3ff8f751, 0x0eba96e6, 0x3ff8fd69, 0x4acf36b0 124*25c28e83SPiotr Jasiukajtis .word 0x3ff901fb, 0x7eee715e, 0x3ff90861, 0xd082d9b5 125*25c28e83SPiotr Jasiukajtis .word 0x3ff90ca6, 0x0b9322c5, 0x3ff90fb2, 0x37a7ea27 126*25c28e83SPiotr Jasiukajtis .word 0x3ff911fb, 0x59997f3a, 0x3ff9152e, 0x8a326c38 127*25c28e83SPiotr Jasiukajtis .word 0x3ff91750, 0xab2e0d12, 0x3ff918d6, 0xc2f9c9e2 128*25c28e83SPiotr Jasiukajtis .word 0x3ff919fb, 0x54eed7a9, 0x3ff91b94, 0xee352849 129*25c28e83SPiotr Jasiukajtis .word 0x3ff91ca5, 0xff216922, 0x3ff91d69, 0x0b3f72ff 130*25c28e83SPiotr Jasiukajtis .word 0x3ff91dfb, 0x5459826d, 0x3ff91ec8, 0x211be619 131*25c28e83SPiotr Jasiukajtis .word 0x3ff91f50, 0xa99fd49a, 0x3ff91fb2, 0x2fb5defa 132*25c28e83SPiotr Jasiukajtis .word 0x3ff91ffb, 0x5446d7c3, 0x3ff92061, 0xbaabf105 133*25c28e83SPiotr Jasiukajtis .word 0x3ff920a5, 0xfeefa208, 0x3ff920d6, 0xc1fb87e7 134*25c28e83SPiotr Jasiukajtis .word 0x3ff920fb, 0x5444826e, 0x3ff9212e, 0x87778bfc 135*25c28e83SPiotr Jasiukajtis .word 0x3ff92150, 0xa9999bb6, 0x3ff92169, 0x0b1faabb 136*25c28e83SPiotr Jasiukajtis .word 0x3ff9217b, 0x544437c3, 0x3ff92194, 0xedddcc28 137*25c28e83SPiotr Jasiukajtis .word 0x3ff921a5, 0xfeeedaec, 0x3ff921b2, 0x2fb1e5f1 138*25c28e83SPiotr Jasiukajtis .word 0x3ff921bb, 0x54442e6e, 0x3ff921c8, 0x2110fa94 139*25c28e83SPiotr Jasiukajtis .word 0x3ff921d0, 0xa99982d3, 0x3ff921d6, 0xc1fb08c6 140*25c28e83SPiotr Jasiukajtis .word 0x3ff921db, 0x54442d43, 0x3ff921e1, 0xbaaa9395 141*25c28e83SPiotr Jasiukajtis .word 0x3ff921e5, 0xfeeed7d0, 0x3ff921e9, 0x0b1f9ad7 142*25c28e83SPiotr Jasiukajtis .word 0x3ff921eb, 0x54442d1e, 0x3ff921ee, 0x8777604e 143*25c28e83SPiotr Jasiukajtis .word 0x3ff921f0, 0xa999826f, 0x3ff921f2, 0x2fb1e3f5 144*25c28e83SPiotr Jasiukajtis .word 0x3ff921f3, 0x54442d19, 0x3ff921f4, 0xedddc6b2 145*25c28e83SPiotr Jasiukajtis .word 0x3ff921f5, 0xfeeed7c3, 0x3ff921f6, 0xc1fb0886 146*25c28e83SPiotr Jasiukajtis .word 0x3ff921f7, 0x54442d18, 0x3ff921f8, 0x2110f9e5 147*25c28e83SPiotr Jasiukajtis .word 0x3ff921f8, 0xa999826e, 0x3ff921f9, 0x0b1f9acf 148*25c28e83SPiotr Jasiukajtis .word 0x3ff921f9, 0x54442d18, 0x3ff921f9, 0xbaaa937f 149*25c28e83SPiotr Jasiukajtis .word 0x3ff921f9, 0xfeeed7c3, 0x3ff921fa, 0x2fb1e3f4 150*25c28e83SPiotr Jasiukajtis .word 0x3ff921fa, 0x54442d18, 0x3ff921fa, 0x8777604b 151*25c28e83SPiotr Jasiukajtis .word 0x3ff921fa, 0xa999826e, 0x3ff921fa, 0xc1fb0886 152*25c28e83SPiotr Jasiukajtis .word 0x3ff921fa, 0xd4442d18, 0x3ff921fa, 0xedddc6b2 153*25c28e83SPiotr Jasiukajtis .word 0x3ff921fa, 0xfeeed7c3, 0x3ff921fb, 0x0b1f9acf 154*25c28e83SPiotr Jasiukajtis .word 0x3ff921fb, 0x14442d18, 0x3ff921fb, 0x2110f9e5 155*25c28e83SPiotr Jasiukajtis .word 0x3ff921fb, 0x2999826e, 0x3ff921fb, 0x2fb1e3f4 156*25c28e83SPiotr Jasiukajtis .word 0x3ff921fb, 0x34442d18, 0x3ff921fb, 0x3aaa937f 157*25c28e83SPiotr Jasiukajtis .word 0x3ff921fb, 0x3eeed7c3, 0x3ff921fb, 0x41fb0886 158*25c28e83SPiotr Jasiukajtis .word 0x3ff921fb, 0x44442d18, 0x3ff921fb, 0x4777604b 159*25c28e83SPiotr Jasiukajtis .word 0x3ff921fb, 0x4999826e, 0x3ff921fb, 0x4b1f9acf 160*25c28e83SPiotr Jasiukajtis .word 0x3ff921fb, 0x4c442d18, 0x3ff921fb, 0x4dddc6b2 161*25c28e83SPiotr Jasiukajtis .word 0x3ff921fb, 0x4eeed7c3, 0x3ff921fb, 0x4fb1e3f4 162*25c28e83SPiotr Jasiukajtis .word 0x3ff921fb, 0x50442d18, 0x3ff921fb, 0x5110f9e5 163*25c28e83SPiotr Jasiukajtis .word 0x3ff921fb, 0x5199826e, 0x3ff921fb, 0x51fb0886 164*25c28e83SPiotr Jasiukajtis 165*25c28e83SPiotr Jasiukajtis#define DC2 %f2 166*25c28e83SPiotr Jasiukajtis#define DTWO %f6 167*25c28e83SPiotr Jasiukajtis#define DONE %f52 168*25c28e83SPiotr Jasiukajtis#define K0 %f54 169*25c28e83SPiotr Jasiukajtis#define K1 %f56 170*25c28e83SPiotr Jasiukajtis#define K2 %f58 171*25c28e83SPiotr Jasiukajtis#define DC1 %f60 172*25c28e83SPiotr Jasiukajtis#define DC3 %f62 173*25c28e83SPiotr Jasiukajtis 174*25c28e83SPiotr Jasiukajtis#define stridex %o2 175*25c28e83SPiotr Jasiukajtis#define stridey %o3 176*25c28e83SPiotr Jasiukajtis#define MASK_0x7fffffff %i1 177*25c28e83SPiotr Jasiukajtis#define MASK_0x100000 %i5 178*25c28e83SPiotr Jasiukajtis 179*25c28e83SPiotr Jasiukajtis#define tmp_px STACK_BIAS-32 180*25c28e83SPiotr Jasiukajtis#define tmp_counter STACK_BIAS-24 181*25c28e83SPiotr Jasiukajtis#define tmp0 STACK_BIAS-16 182*25c28e83SPiotr Jasiukajtis#define tmp1 STACK_BIAS-8 183*25c28e83SPiotr Jasiukajtis 184*25c28e83SPiotr Jasiukajtis#define counter %l1 185*25c28e83SPiotr Jasiukajtis 186*25c28e83SPiotr Jasiukajtis! sizeof temp storage - must be a multiple of 16 for V9 187*25c28e83SPiotr Jasiukajtis#define tmps 0x20 188*25c28e83SPiotr Jasiukajtis 189*25c28e83SPiotr Jasiukajtis!-------------------------------------------------------------------- 190*25c28e83SPiotr Jasiukajtis! !!!!! vatanf algorithm !!!!! 191*25c28e83SPiotr Jasiukajtis! ux = ((int*)px)[0]; 192*25c28e83SPiotr Jasiukajtis! ax = ux & 0x7fffffff; 193*25c28e83SPiotr Jasiukajtis! 194*25c28e83SPiotr Jasiukajtis! if ( ax < 0x39b89c55 ) 195*25c28e83SPiotr Jasiukajtis! { 196*25c28e83SPiotr Jasiukajtis! *(int*)py = ux; 197*25c28e83SPiotr Jasiukajtis! goto next; 198*25c28e83SPiotr Jasiukajtis! } 199*25c28e83SPiotr Jasiukajtis! 200*25c28e83SPiotr Jasiukajtis! if ( ax > 0x4c700518 ) 201*25c28e83SPiotr Jasiukajtis! { 202*25c28e83SPiotr Jasiukajtis! if ( ax > 0x7f800000 ) 203*25c28e83SPiotr Jasiukajtis! { 204*25c28e83SPiotr Jasiukajtis! float fpx = fabsf(*px); 205*25c28e83SPiotr Jasiukajtis! fpx *= fpx; 206*25c28e83SPiotr Jasiukajtis! *py = fpx; 207*25c28e83SPiotr Jasiukajtis! goto next; 208*25c28e83SPiotr Jasiukajtis! } 209*25c28e83SPiotr Jasiukajtis! 210*25c28e83SPiotr Jasiukajtis! sign = ux & 0x80000000; 211*25c28e83SPiotr Jasiukajtis! sign |= pi_2; 212*25c28e83SPiotr Jasiukajtis! *(int*)py = sign; 213*25c28e83SPiotr Jasiukajtis! goto next; 214*25c28e83SPiotr Jasiukajtis! } 215*25c28e83SPiotr Jasiukajtis! 216*25c28e83SPiotr Jasiukajtis! ftmp0 = *px; 217*25c28e83SPiotr Jasiukajtis! x = (double)ftmp0; 218*25c28e83SPiotr Jasiukajtis! px += stridex; 219*25c28e83SPiotr Jasiukajtis! y = vis_fpadd32(x,DC1); 220*25c28e83SPiotr Jasiukajtis! y = vis_fand(y,DC2); 221*25c28e83SPiotr Jasiukajtis! div = x * y; 222*25c28e83SPiotr Jasiukajtis! xx = x - y; 223*25c28e83SPiotr Jasiukajtis! div += DONE; 224*25c28e83SPiotr Jasiukajtis! i = ((unsigned long long*)&div)[0]; 225*25c28e83SPiotr Jasiukajtis! y0 = vis_fand(div,DC3); 226*25c28e83SPiotr Jasiukajtis! i >>= 43; 227*25c28e83SPiotr Jasiukajtis! i &= 508; 228*25c28e83SPiotr Jasiukajtis! *(float*)&dtmp0 = *(float*)((char*)parr0 + i); 229*25c28e83SPiotr Jasiukajtis! y0 = vis_fpsub32(dtmp0, y0); 230*25c28e83SPiotr Jasiukajtis! dtmp0 = div0 * y0; 231*25c28e83SPiotr Jasiukajtis! dtmp0 = DTWO - dtmp0; 232*25c28e83SPiotr Jasiukajtis! y0 *= dtmp0; 233*25c28e83SPiotr Jasiukajtis! dtmp1 = div0 * y0; 234*25c28e83SPiotr Jasiukajtis! dtmp1 = DTWO - dtmp1; 235*25c28e83SPiotr Jasiukajtis! y0 *= dtmp1; 236*25c28e83SPiotr Jasiukajtis! ax = ux & 0x7fffffff; 237*25c28e83SPiotr Jasiukajtis! ax += 0x00100000; 238*25c28e83SPiotr Jasiukajtis! ax >>= 18; 239*25c28e83SPiotr Jasiukajtis! ax &= -8; 240*25c28e83SPiotr Jasiukajtis! res = *(double*)((char*)parr1 + ax); 241*25c28e83SPiotr Jasiukajtis! ux >>= 28; 242*25c28e83SPiotr Jasiukajtis! ux &= -8; 243*25c28e83SPiotr Jasiukajtis! dtmp0 = *(double*)((char*)sign_arr + ux); 244*25c28e83SPiotr Jasiukajtis! res *= dtmp0; 245*25c28e83SPiotr Jasiukajtis! xx *= y0; 246*25c28e83SPiotr Jasiukajtis! x2 = xx * xx; 247*25c28e83SPiotr Jasiukajtis! dtmp0 = K2 * x2; 248*25c28e83SPiotr Jasiukajtis! dtmp0 += K1; 249*25c28e83SPiotr Jasiukajtis! dtmp0 *= x2; 250*25c28e83SPiotr Jasiukajtis! dtmp0 += K0; 251*25c28e83SPiotr Jasiukajtis! dtmp0 *= xx; 252*25c28e83SPiotr Jasiukajtis! res += dtmp0; 253*25c28e83SPiotr Jasiukajtis! ftmp0 = (float)res; 254*25c28e83SPiotr Jasiukajtis! py[0] = ftmp0; 255*25c28e83SPiotr Jasiukajtis! py += stridey; 256*25c28e83SPiotr Jasiukajtis!-------------------------------------------------------------------- 257*25c28e83SPiotr Jasiukajtis 258*25c28e83SPiotr Jasiukajtis ENTRY(__vatanf) 259*25c28e83SPiotr Jasiukajtis save %sp,-SA(MINFRAME)-tmps,%sp 260*25c28e83SPiotr Jasiukajtis PIC_SETUP(l7) 261*25c28e83SPiotr Jasiukajtis PIC_SET(l7,.CONST_TBL,l2) 262*25c28e83SPiotr Jasiukajtis 263*25c28e83SPiotr Jasiukajtis st %i0,[%fp+tmp_counter] 264*25c28e83SPiotr Jasiukajtis 265*25c28e83SPiotr Jasiukajtis sllx %i2,2,stridex 266*25c28e83SPiotr Jasiukajtis sllx %i4,2,stridey 267*25c28e83SPiotr Jasiukajtis 268*25c28e83SPiotr Jasiukajtis or %g0,%i3,%o1 269*25c28e83SPiotr Jasiukajtis stx %i1,[%fp+tmp_px] 270*25c28e83SPiotr Jasiukajtis 271*25c28e83SPiotr Jasiukajtis ldd [%l2],K0 272*25c28e83SPiotr Jasiukajtis ldd [%l2+8],K1 273*25c28e83SPiotr Jasiukajtis ldd [%l2+16],K2 274*25c28e83SPiotr Jasiukajtis ldd [%l2+24],DC1 275*25c28e83SPiotr Jasiukajtis ldd [%l2+32],DC2 276*25c28e83SPiotr Jasiukajtis ldd [%l2+40],DC3 277*25c28e83SPiotr Jasiukajtis ldd [%l2+48],DONE 278*25c28e83SPiotr Jasiukajtis ldd [%l2+56],DTWO 279*25c28e83SPiotr Jasiukajtis 280*25c28e83SPiotr Jasiukajtis add %l2,64,%i4 281*25c28e83SPiotr Jasiukajtis add %l2,64+512,%l0 282*25c28e83SPiotr Jasiukajtis add %l2,64+512+16-0x1cc*8,%l7 283*25c28e83SPiotr Jasiukajtis 284*25c28e83SPiotr Jasiukajtis sethi %hi(0x100000),MASK_0x100000 285*25c28e83SPiotr Jasiukajtis sethi %hi(0x7ffffc00),MASK_0x7fffffff 286*25c28e83SPiotr Jasiukajtis add MASK_0x7fffffff,1023,MASK_0x7fffffff 287*25c28e83SPiotr Jasiukajtis 288*25c28e83SPiotr Jasiukajtis sethi %hi(0x39b89c00),%o4 289*25c28e83SPiotr Jasiukajtis add %o4,0x55,%o4 290*25c28e83SPiotr Jasiukajtis sethi %hi(0x4c700400),%o5 291*25c28e83SPiotr Jasiukajtis add %o5,0x118,%o5 292*25c28e83SPiotr Jasiukajtis 293*25c28e83SPiotr Jasiukajtis.begin: 294*25c28e83SPiotr Jasiukajtis ld [%fp+tmp_counter],counter 295*25c28e83SPiotr Jasiukajtis ldx [%fp+tmp_px],%i3 296*25c28e83SPiotr Jasiukajtis st %g0,[%fp+tmp_counter] 297*25c28e83SPiotr Jasiukajtis.begin1: 298*25c28e83SPiotr Jasiukajtis cmp counter,0 299*25c28e83SPiotr Jasiukajtis ble,pn %icc,.exit 300*25c28e83SPiotr Jasiukajtis nop 301*25c28e83SPiotr Jasiukajtis 302*25c28e83SPiotr Jasiukajtis lda [%i3]0x82,%l6 ! (0_0) ux = ((int*)px)[0]; 303*25c28e83SPiotr Jasiukajtis 304*25c28e83SPiotr Jasiukajtis and %l6,MASK_0x7fffffff,%l5 ! (0_0) ax = ux & 0x7fffffff; 305*25c28e83SPiotr Jasiukajtis lda [%i3]0x82,%f0 ! (0_0) ftmp0 = *px; 306*25c28e83SPiotr Jasiukajtis 307*25c28e83SPiotr Jasiukajtis cmp %l5,%o4 ! (0_0) ax ? 0x39b89c55 308*25c28e83SPiotr Jasiukajtis bl,pn %icc,.spec0 ! (0_0) if ( ax < 0x39b89c55 ) 309*25c28e83SPiotr Jasiukajtis nop 310*25c28e83SPiotr Jasiukajtis 311*25c28e83SPiotr Jasiukajtis cmp %l5,%o5 ! (0_0) ax ? 0x4c700518 312*25c28e83SPiotr Jasiukajtis bg,pn %icc,.spec1 ! (0_0) if ( ax > 0x4c700518 ) 313*25c28e83SPiotr Jasiukajtis nop 314*25c28e83SPiotr Jasiukajtis 315*25c28e83SPiotr Jasiukajtis add %i3,stridex,%l5 ! px += stridex; 316*25c28e83SPiotr Jasiukajtis fstod %f0,%f22 ! (0_0) ftmp0 = *px; 317*25c28e83SPiotr Jasiukajtis mov %l6,%i3 318*25c28e83SPiotr Jasiukajtis 319*25c28e83SPiotr Jasiukajtis lda [%l5]0x82,%l6 ! (1_0) ux = ((int*)px)[0]; 320*25c28e83SPiotr Jasiukajtis 321*25c28e83SPiotr Jasiukajtis and %l6,MASK_0x7fffffff,%o7 ! (1_0) ax = ux & 0x7fffffff; 322*25c28e83SPiotr Jasiukajtis lda [%l5]0x82,%f0 ! (1_0) ftmp0 = *px; 323*25c28e83SPiotr Jasiukajtis add %l5,stridex,%l4 ! px += stridex; 324*25c28e83SPiotr Jasiukajtis fpadd32 %f22,DC1,%f24 ! (0_0) y = vis_fpadd32(x,dconst1); 325*25c28e83SPiotr Jasiukajtis 326*25c28e83SPiotr Jasiukajtis cmp %o7,%o4 ! (1_0) ax ? 0x39b89c55 327*25c28e83SPiotr Jasiukajtis bl,pn %icc,.update0 ! (1_0) if ( ax < 0x39b89c55 ) 328*25c28e83SPiotr Jasiukajtis nop 329*25c28e83SPiotr Jasiukajtis.cont0: 330*25c28e83SPiotr Jasiukajtis cmp %o7,%o5 ! (1_0) ax ? 0x4c700518 331*25c28e83SPiotr Jasiukajtis bg,pn %icc,.update1 ! (1_0) if ( ax > 0x4c700518 ) 332*25c28e83SPiotr Jasiukajtis nop 333*25c28e83SPiotr Jasiukajtis.cont1: 334*25c28e83SPiotr Jasiukajtis fstod %f0,%f20 ! (1_0) x = (double)ftmp0; 335*25c28e83SPiotr Jasiukajtis mov %l6,%l5 336*25c28e83SPiotr Jasiukajtis 337*25c28e83SPiotr Jasiukajtis fand %f24,DC2,%f26 ! (0_0) y = vis_fand(y,dconst2); 338*25c28e83SPiotr Jasiukajtis 339*25c28e83SPiotr Jasiukajtis fmuld %f22,%f26,%f32 ! (0_0) div = x * y; 340*25c28e83SPiotr Jasiukajtis 341*25c28e83SPiotr Jasiukajtis lda [%l4]0x82,%l6 ! (2_0) ux = ((int*)px)[0]; 342*25c28e83SPiotr Jasiukajtis fsubd %f22,%f26,%f22 ! (0_0) xx = x - y; 343*25c28e83SPiotr Jasiukajtis 344*25c28e83SPiotr Jasiukajtis and %l6,MASK_0x7fffffff,%o7 ! (2_0) ax = ux & 0x7fffffff; 345*25c28e83SPiotr Jasiukajtis lda [%l4]0x82,%f0 ! (2_0) ftmp0 = *px; 346*25c28e83SPiotr Jasiukajtis add %l4,stridex,%l3 ! px += stridex; 347*25c28e83SPiotr Jasiukajtis fpadd32 %f20,DC1,%f24 ! (1_0) y = vis_fpadd32(x,dconst1); 348*25c28e83SPiotr Jasiukajtis 349*25c28e83SPiotr Jasiukajtis cmp %o7,%o4 ! (2_0) ax ? 0x39b89c55 350*25c28e83SPiotr Jasiukajtis bl,pn %icc,.update2 ! (2_0) if ( ax < 0x39b89c55 ) 351*25c28e83SPiotr Jasiukajtis faddd DONE,%f32,%f32 ! (0_0) div += done; 352*25c28e83SPiotr Jasiukajtis.cont2: 353*25c28e83SPiotr Jasiukajtis cmp %o7,%o5 ! (2_0) ax ? 0x4c700518 354*25c28e83SPiotr Jasiukajtis bg,pn %icc,.update3 ! (2_0) if ( ax > 0x4c700518 ) 355*25c28e83SPiotr Jasiukajtis nop 356*25c28e83SPiotr Jasiukajtis.cont3: 357*25c28e83SPiotr Jasiukajtis std %f32,[%fp+tmp0] ! (0_0) i = ((unsigned long long*)&div)[0]; 358*25c28e83SPiotr Jasiukajtis mov %l6,%l4 359*25c28e83SPiotr Jasiukajtis fstod %f0,%f18 ! (2_0) x = (double)ftmp0; 360*25c28e83SPiotr Jasiukajtis 361*25c28e83SPiotr Jasiukajtis fand %f24,DC2,%f26 ! (1_0) y = vis_fand(y,dconst2); 362*25c28e83SPiotr Jasiukajtis 363*25c28e83SPiotr Jasiukajtis fmuld %f20,%f26,%f30 ! (1_0) div = x * y; 364*25c28e83SPiotr Jasiukajtis 365*25c28e83SPiotr Jasiukajtis lda [%l3]0x82,%l6 ! (3_0) ux = ((int*)px)[0]; 366*25c28e83SPiotr Jasiukajtis fsubd %f20,%f26,%f20 ! (1_0) xx = x - y; 367*25c28e83SPiotr Jasiukajtis 368*25c28e83SPiotr Jasiukajtis and %l6,MASK_0x7fffffff,%o7 ! (3_0) ax = ux & 0x7fffffff; 369*25c28e83SPiotr Jasiukajtis lda [%l3]0x82,%f0 ! (3_0) ftmp0 = *px; 370*25c28e83SPiotr Jasiukajtis add %l3,stridex,%i0 ! px += stridex; 371*25c28e83SPiotr Jasiukajtis fpadd32 %f18,DC1,%f24 ! (2_0) y = vis_fpadd32(x,dconst1); 372*25c28e83SPiotr Jasiukajtis 373*25c28e83SPiotr Jasiukajtis cmp %o7,%o4 ! (3_0) ax ? 0x39b89c55 374*25c28e83SPiotr Jasiukajtis bl,pn %icc,.update4 ! (3_0) if ( ax < 0x39b89c55 ) 375*25c28e83SPiotr Jasiukajtis faddd DONE,%f30,%f30 ! (1_0) div += done; 376*25c28e83SPiotr Jasiukajtis.cont4: 377*25c28e83SPiotr Jasiukajtis cmp %o7,%o5 ! (3_0) ax ? 0x4c700518 378*25c28e83SPiotr Jasiukajtis bg,pn %icc,.update5 ! (3_0) if ( ax > 0x4c700518 ) 379*25c28e83SPiotr Jasiukajtis nop 380*25c28e83SPiotr Jasiukajtis.cont5: 381*25c28e83SPiotr Jasiukajtis std %f30,[%fp+tmp1] ! (1_0) i = ((unsigned long long*)&div)[0]; 382*25c28e83SPiotr Jasiukajtis mov %l6,%l3 383*25c28e83SPiotr Jasiukajtis fstod %f0,%f16 ! (3_0) x = (double)ftmp0; 384*25c28e83SPiotr Jasiukajtis 385*25c28e83SPiotr Jasiukajtis ldx [%fp+tmp0],%o0 ! (0_0) i = ((unsigned long long*)&div)[0]; 386*25c28e83SPiotr Jasiukajtis fand %f24,DC2,%f26 ! (2_0) y = vis_fand(y,dconst2); 387*25c28e83SPiotr Jasiukajtis 388*25c28e83SPiotr Jasiukajtis fand %f32,DC3,%f24 ! (0_0) y0 = vis_fand(div,dconst3); 389*25c28e83SPiotr Jasiukajtis 390*25c28e83SPiotr Jasiukajtis srlx %o0,43,%o0 ! (0_0) i >>= 43; 391*25c28e83SPiotr Jasiukajtis 392*25c28e83SPiotr Jasiukajtis and %o0,508,%l6 ! (0_0) i &= 508; 393*25c28e83SPiotr Jasiukajtis 394*25c28e83SPiotr Jasiukajtis ld [%i4+%l6],%f0 ! (0_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); 395*25c28e83SPiotr Jasiukajtis 396*25c28e83SPiotr Jasiukajtis fmuld %f18,%f26,%f28 ! (2_0) div = x * y; 397*25c28e83SPiotr Jasiukajtis 398*25c28e83SPiotr Jasiukajtis lda [%i0]0x82,%l6 ! (4_0) ux = ((int*)px)[0]; 399*25c28e83SPiotr Jasiukajtis fsubd %f18,%f26,%f18 ! (2_0) xx = x - y; 400*25c28e83SPiotr Jasiukajtis 401*25c28e83SPiotr Jasiukajtis fpsub32 %f0,%f24,%f40 ! (0_0) y0 = vis_fpsub32(dtmp0, y0); 402*25c28e83SPiotr Jasiukajtis 403*25c28e83SPiotr Jasiukajtis and %l6,MASK_0x7fffffff,%o7 ! (4_0) ax = ux & 0x7fffffff; 404*25c28e83SPiotr Jasiukajtis lda [%i0]0x82,%f0 ! (4_0) ftmp0 = *px; 405*25c28e83SPiotr Jasiukajtis add %i0,stridex,%i2 ! px += stridex; 406*25c28e83SPiotr Jasiukajtis fpadd32 %f16,DC1,%f24 ! (3_0) y = vis_fpadd32(x,dconst1); 407*25c28e83SPiotr Jasiukajtis 408*25c28e83SPiotr Jasiukajtis cmp %o7,%o4 ! (4_0) ax ? 0x39b89c55 409*25c28e83SPiotr Jasiukajtis bl,pn %icc,.update6 ! (4_0) if ( ax < 0x39b89c55 ) 410*25c28e83SPiotr Jasiukajtis faddd DONE,%f28,%f28 ! (2_0) div += done; 411*25c28e83SPiotr Jasiukajtis.cont6: 412*25c28e83SPiotr Jasiukajtis fmuld %f32,%f40,%f42 ! (0_0) dtmp0 = div0 * y0; 413*25c28e83SPiotr Jasiukajtis cmp %o7,%o5 ! (4_0) ax ? 0x4c700518 414*25c28e83SPiotr Jasiukajtis bg,pn %icc,.update7 ! (4_0) if ( ax > 0x4c700518 ) 415*25c28e83SPiotr Jasiukajtis nop 416*25c28e83SPiotr Jasiukajtis.cont7: 417*25c28e83SPiotr Jasiukajtis std %f28,[%fp+tmp0] ! (2_0) i = ((unsigned long long*)&div)[0]; 418*25c28e83SPiotr Jasiukajtis mov %l6,%i0 419*25c28e83SPiotr Jasiukajtis fstod %f0,%f14 ! (4_0) x = (double)ftmp0; 420*25c28e83SPiotr Jasiukajtis 421*25c28e83SPiotr Jasiukajtis ldx [%fp+tmp1],%g1 ! (1_0) i = ((unsigned long long*)&div)[0]; 422*25c28e83SPiotr Jasiukajtis fand %f24,DC2,%f26 ! (3_0) y = vis_fand(y,dconst2); 423*25c28e83SPiotr Jasiukajtis 424*25c28e83SPiotr Jasiukajtis fand %f30,DC3,%f24 ! (1_0) y0 = vis_fand(div,dconst3); 425*25c28e83SPiotr Jasiukajtis 426*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f42,%f44 ! (0_0) dtmp0 = dtwo - dtmp0; 427*25c28e83SPiotr Jasiukajtis srlx %g1,43,%g1 ! (1_0) i >>= 43; 428*25c28e83SPiotr Jasiukajtis 429*25c28e83SPiotr Jasiukajtis and %g1,508,%l6 ! (1_0) i &= 508; 430*25c28e83SPiotr Jasiukajtis 431*25c28e83SPiotr Jasiukajtis ld [%i4+%l6],%f0 ! (1_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); 432*25c28e83SPiotr Jasiukajtis 433*25c28e83SPiotr Jasiukajtis fmuld %f16,%f26,%f34 ! (3_0) div = x * y; 434*25c28e83SPiotr Jasiukajtis 435*25c28e83SPiotr Jasiukajtis lda [%i2]0x82,%l6 ! (5_0) ux = ((int*)px)[0]; 436*25c28e83SPiotr Jasiukajtis fsubd %f16,%f26,%f16 ! (3_0) xx = x - y; 437*25c28e83SPiotr Jasiukajtis 438*25c28e83SPiotr Jasiukajtis fpsub32 %f0,%f24,%f38 ! (1_0) y0 = vis_fpsub32(dtmp0, y0); 439*25c28e83SPiotr Jasiukajtis add %i2,stridex,%l2 ! px += stridex; 440*25c28e83SPiotr Jasiukajtis 441*25c28e83SPiotr Jasiukajtis fmuld %f40,%f44,%f40 ! (0_0) y0 *= dtmp0; 442*25c28e83SPiotr Jasiukajtis and %l6,MASK_0x7fffffff,%o7 ! (5_0) ax = ux & 0x7fffffff; 443*25c28e83SPiotr Jasiukajtis lda [%i2]0x82,%f0 ! (5_0) ftmp0 = *px; 444*25c28e83SPiotr Jasiukajtis fpadd32 %f14,DC1,%f24 ! (4_0) y = vis_fpadd32(x,dconst1); 445*25c28e83SPiotr Jasiukajtis 446*25c28e83SPiotr Jasiukajtis cmp %o7,%o4 ! (5_0) ax ? 0x39b89c55 447*25c28e83SPiotr Jasiukajtis bl,pn %icc,.update8 ! (5_0) if ( ax < 0x39b89c55 ) 448*25c28e83SPiotr Jasiukajtis faddd DONE,%f34,%f34 ! (3_0) div += done; 449*25c28e83SPiotr Jasiukajtis.cont8: 450*25c28e83SPiotr Jasiukajtis fmuld %f30,%f38,%f42 ! (1_0) dtmp0 = div0 * y0; 451*25c28e83SPiotr Jasiukajtis cmp %o7,%o5 ! (5_0) ax ? 0x4c700518 452*25c28e83SPiotr Jasiukajtis bg,pn %icc,.update9 ! (5_0) if ( ax > 0x4c700518 ) 453*25c28e83SPiotr Jasiukajtis nop 454*25c28e83SPiotr Jasiukajtis.cont9: 455*25c28e83SPiotr Jasiukajtis std %f34,[%fp+tmp1] ! (3_0) i = ((unsigned long long*)&div)[0]; 456*25c28e83SPiotr Jasiukajtis mov %l6,%i2 457*25c28e83SPiotr Jasiukajtis fstod %f0,%f36 ! (5_0) x = (double)ftmp0; 458*25c28e83SPiotr Jasiukajtis 459*25c28e83SPiotr Jasiukajtis fmuld %f32,%f40,%f32 ! (0_0) dtmp1 = div0 * y0; 460*25c28e83SPiotr Jasiukajtis ldx [%fp+tmp0],%o0 ! (2_0) i = ((unsigned long long*)&div)[0]; 461*25c28e83SPiotr Jasiukajtis fand %f24,DC2,%f26 ! (4_0) y = vis_fand(y,dconst2); 462*25c28e83SPiotr Jasiukajtis 463*25c28e83SPiotr Jasiukajtis fand %f28,DC3,%f24 ! (2_0) y0 = vis_fand(div,dconst3); 464*25c28e83SPiotr Jasiukajtis 465*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f42,%f44 ! (1_0) dtmp0 = dtwo - dtmp0; 466*25c28e83SPiotr Jasiukajtis srlx %o0,43,%o0 ! (2_0) i >>= 43; 467*25c28e83SPiotr Jasiukajtis 468*25c28e83SPiotr Jasiukajtis and %o0,508,%l6 ! (2_0) i &= 508; 469*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f32,%f46 ! (0_0) dtmp1 = dtwo - dtmp1; 470*25c28e83SPiotr Jasiukajtis 471*25c28e83SPiotr Jasiukajtis ld [%i4+%l6],%f0 ! (2_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); 472*25c28e83SPiotr Jasiukajtis 473*25c28e83SPiotr Jasiukajtis fmuld %f14,%f26,%f32 ! (4_0) div = x * y; 474*25c28e83SPiotr Jasiukajtis 475*25c28e83SPiotr Jasiukajtis lda [%l2]0x82,%l6 ! (6_0) ux = ((int*)px)[0]; 476*25c28e83SPiotr Jasiukajtis fsubd %f14,%f26,%f14 ! (4_0) xx = x - y; 477*25c28e83SPiotr Jasiukajtis 478*25c28e83SPiotr Jasiukajtis fmuld %f40,%f46,%f26 ! (0_0) y0 *= dtmp1; 479*25c28e83SPiotr Jasiukajtis add %l2,stridex,%g5 ! px += stridex; 480*25c28e83SPiotr Jasiukajtis fpsub32 %f0,%f24,%f40 ! (2_0) y0 = vis_fpsub32(dtmp0, y0); 481*25c28e83SPiotr Jasiukajtis 482*25c28e83SPiotr Jasiukajtis fmuld %f38,%f44,%f38 ! (1_0) y0 *= dtmp0; 483*25c28e83SPiotr Jasiukajtis and %l6,MASK_0x7fffffff,%o7 ! (6_0) ax = ux & 0x7fffffff; 484*25c28e83SPiotr Jasiukajtis lda [%l2]0x82,%f0 ! (6_0) ftmp0 = *px; 485*25c28e83SPiotr Jasiukajtis fpadd32 %f36,DC1,%f24 ! (5_0) y = vis_fpadd32(x,dconst1); 486*25c28e83SPiotr Jasiukajtis 487*25c28e83SPiotr Jasiukajtis cmp %o7,%o4 ! (6_0) ax ? 0x39b89c55 488*25c28e83SPiotr Jasiukajtis bl,pn %icc,.update10 ! (6_0) if ( ax < 0x39b89c55 ) 489*25c28e83SPiotr Jasiukajtis faddd DONE,%f32,%f32 ! (4_0) div += done; 490*25c28e83SPiotr Jasiukajtis.cont10: 491*25c28e83SPiotr Jasiukajtis fmuld %f28,%f40,%f42 ! (2_0) dtmp0 = div0 * y0; 492*25c28e83SPiotr Jasiukajtis cmp %o7,%o5 ! (6_0) ax ? 0x4c700518 493*25c28e83SPiotr Jasiukajtis bg,pn %icc,.update11 ! (6_0) if ( ax > 0x4c700518 ) 494*25c28e83SPiotr Jasiukajtis nop 495*25c28e83SPiotr Jasiukajtis.cont11: 496*25c28e83SPiotr Jasiukajtis fmuld %f22,%f26,%f22 ! (0_0) xx *= y0; 497*25c28e83SPiotr Jasiukajtis mov %l6,%l2 498*25c28e83SPiotr Jasiukajtis std %f32,[%fp+tmp0] ! (4_0) i = ((unsigned long long*)&div)[0]; 499*25c28e83SPiotr Jasiukajtis fstod %f0,%f10 ! (6_0) x = (double)ftmp0; 500*25c28e83SPiotr Jasiukajtis 501*25c28e83SPiotr Jasiukajtis fmuld %f30,%f38,%f30 ! (1_0) dtmp1 = div0 * y0; 502*25c28e83SPiotr Jasiukajtis ldx [%fp+tmp1],%g1 ! (3_0) i = ((unsigned long long*)&div)[0]; 503*25c28e83SPiotr Jasiukajtis fand %f24,DC2,%f26 ! (5_0) y = vis_fand(y,dconst2); 504*25c28e83SPiotr Jasiukajtis 505*25c28e83SPiotr Jasiukajtis fand %f34,DC3,%f24 ! (3_0) y0 = vis_fand(div,dconst3); 506*25c28e83SPiotr Jasiukajtis 507*25c28e83SPiotr Jasiukajtis fmuld %f22,%f22,%f50 ! (0_0) x2 = xx * xx; 508*25c28e83SPiotr Jasiukajtis srlx %g1,43,%g1 ! (3_0) i >>= 43; 509*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f42,%f44 ! (2_0) dtmp0 = dtwo - dtmp0; 510*25c28e83SPiotr Jasiukajtis 511*25c28e83SPiotr Jasiukajtis and %g1,508,%l6 ! (3_0) i &= 508; 512*25c28e83SPiotr Jasiukajtis mov %i3,%o7 513*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f30,%f46 ! (1_0) dtmp1 = dtwo - dtmp1; 514*25c28e83SPiotr Jasiukajtis 515*25c28e83SPiotr Jasiukajtis ld [%i4+%l6],%f0 ! (3_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); 516*25c28e83SPiotr Jasiukajtis 517*25c28e83SPiotr Jasiukajtis fmuld %f36,%f26,%f30 ! (5_0) div = x * y; 518*25c28e83SPiotr Jasiukajtis srl %o7,28,%g1 ! (0_0) ux >>= 28; 519*25c28e83SPiotr Jasiukajtis add %g5,stridex,%i3 ! px += stridex; 520*25c28e83SPiotr Jasiukajtis 521*25c28e83SPiotr Jasiukajtis fmuld K2,%f50,%f4 ! (0_0) dtmp0 = K2 * x2; 522*25c28e83SPiotr Jasiukajtis and %o7,MASK_0x7fffffff,%o0 ! (0_0) ax = ux & 0x7fffffff; 523*25c28e83SPiotr Jasiukajtis lda [%g5]0x82,%l6 ! (7_0) ux = ((int*)px)[0]; 524*25c28e83SPiotr Jasiukajtis fsubd %f36,%f26,%f36 ! (5_0) xx = x - y; 525*25c28e83SPiotr Jasiukajtis 526*25c28e83SPiotr Jasiukajtis fmuld %f38,%f46,%f26 ! (1_0) y0 *= dtmp1; 527*25c28e83SPiotr Jasiukajtis add %o0,MASK_0x100000,%o0 ! (0_0) ax += 0x00100000; 528*25c28e83SPiotr Jasiukajtis and %g1,-8,%g1 ! (0_0) ux &= -8; 529*25c28e83SPiotr Jasiukajtis fpsub32 %f0,%f24,%f38 ! (3_0) y0 = vis_fpsub32(dtmp0, y0); 530*25c28e83SPiotr Jasiukajtis 531*25c28e83SPiotr Jasiukajtis fmuld %f40,%f44,%f40 ! (2_0) y0 *= dtmp0; 532*25c28e83SPiotr Jasiukajtis and %l6,MASK_0x7fffffff,%o7 ! (7_0) ax = ux & 0x7fffffff; 533*25c28e83SPiotr Jasiukajtis lda [%g5]0x82,%f0 ! (7_0) ftmp0 = *px; 534*25c28e83SPiotr Jasiukajtis fpadd32 %f10,DC1,%f24 ! (6_0) y = vis_fpadd32(x,dconst1); 535*25c28e83SPiotr Jasiukajtis 536*25c28e83SPiotr Jasiukajtis cmp %o7,%o4 ! (7_0) ax ? 0x39b89c55 537*25c28e83SPiotr Jasiukajtis bl,pn %icc,.update12 ! (7_0) if ( ax < 0x39b89c55 ) 538*25c28e83SPiotr Jasiukajtis faddd DONE,%f30,%f30 ! (5_0) div += done; 539*25c28e83SPiotr Jasiukajtis.cont12: 540*25c28e83SPiotr Jasiukajtis fmuld %f34,%f38,%f42 ! (3_0) dtmp0 = div0 * y0; 541*25c28e83SPiotr Jasiukajtis cmp %o7,%o5 ! (7_0) ax ? 0x4c700518 542*25c28e83SPiotr Jasiukajtis bg,pn %icc,.update13 ! (7_0) if ( ax > 0x4c700518 ) 543*25c28e83SPiotr Jasiukajtis faddd %f4,K1,%f4 ! (0_0) dtmp0 += K1; 544*25c28e83SPiotr Jasiukajtis.cont13: 545*25c28e83SPiotr Jasiukajtis fmuld %f20,%f26,%f20 ! (1_0) xx *= y0; 546*25c28e83SPiotr Jasiukajtis srl %o0,18,%o7 ! (0_0) ax >>= 18; 547*25c28e83SPiotr Jasiukajtis std %f30,[%fp+tmp1] ! (5_0) i = ((unsigned long long*)&div)[0]; 548*25c28e83SPiotr Jasiukajtis fstod %f0,%f8 ! (7_0) x = (double)ftmp0; 549*25c28e83SPiotr Jasiukajtis 550*25c28e83SPiotr Jasiukajtis fmuld %f28,%f40,%f28 ! (2_0) dtmp1 = div0 * y0; 551*25c28e83SPiotr Jasiukajtis and %o7,-8,%o7 ! (0_0) ux &= -8; 552*25c28e83SPiotr Jasiukajtis ldx [%fp+tmp0],%o0 ! (4_0) i = ((unsigned long long*)&div)[0]; 553*25c28e83SPiotr Jasiukajtis fand %f24,DC2,%f26 ! (6_0) y = vis_fand(y,dconst2); 554*25c28e83SPiotr Jasiukajtis 555*25c28e83SPiotr Jasiukajtis add %o7,%l7,%o7 ! (0_0) (char*)parr1 + ax; 556*25c28e83SPiotr Jasiukajtis mov %l6,%g5 557*25c28e83SPiotr Jasiukajtis ldd [%l0+%g1],%f48 ! (0_0) dtmp0 = *(double*)((char*)sign_arr + ux); 558*25c28e83SPiotr Jasiukajtis 559*25c28e83SPiotr Jasiukajtis fmuld %f4,%f50,%f4 ! (0_0) dtmp0 *= x2; 560*25c28e83SPiotr Jasiukajtis srlx %o0,43,%o0 ! (4_0) i >>= 43; 561*25c28e83SPiotr Jasiukajtis ldd [%o7],%f0 ! (0_0) res = *(double*)((char*)parr1 + ax); 562*25c28e83SPiotr Jasiukajtis fand %f32,DC3,%f24 ! (4_0) y0 = vis_fand(div,dconst3); 563*25c28e83SPiotr Jasiukajtis 564*25c28e83SPiotr Jasiukajtis fmuld %f20,%f20,%f50 ! (1_0) x2 = xx * xx; 565*25c28e83SPiotr Jasiukajtis and %o0,508,%l6 ! (4_0) i &= 508; 566*25c28e83SPiotr Jasiukajtis mov %l5,%o7 567*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f42,%f44 ! (3_0) dtmp0 = dtwo - dtmp0; 568*25c28e83SPiotr Jasiukajtis 569*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f28,%f46 ! (2_0) dtmp1 = dtwo - dtmp1; 570*25c28e83SPiotr Jasiukajtis 571*25c28e83SPiotr Jasiukajtis fmuld %f0,%f48,%f48 ! (0_0) res *= dtmp0; 572*25c28e83SPiotr Jasiukajtis srl %o7,28,%l5 ! (1_0) ux >>= 28; 573*25c28e83SPiotr Jasiukajtis ld [%i4+%l6],%f0 ! (4_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); 574*25c28e83SPiotr Jasiukajtis 575*25c28e83SPiotr Jasiukajtis fmuld %f10,%f26,%f28 ! (6_0) div = x * y; 576*25c28e83SPiotr Jasiukajtis faddd %f4,K0,%f42 ! (0_0) dtmp0 += K0; 577*25c28e83SPiotr Jasiukajtis 578*25c28e83SPiotr Jasiukajtis subcc counter,8,counter 579*25c28e83SPiotr Jasiukajtis bneg,pn %icc,.tail 580*25c28e83SPiotr Jasiukajtis or %g0,%o1,%o0 581*25c28e83SPiotr Jasiukajtis 582*25c28e83SPiotr Jasiukajtis add %fp,tmp0,%g1 583*25c28e83SPiotr Jasiukajtis lda [%i3]0x82,%l6 ! (0_0) ux = ((int*)px)[0]; 584*25c28e83SPiotr Jasiukajtis 585*25c28e83SPiotr Jasiukajtis ba .main_loop 586*25c28e83SPiotr Jasiukajtis add %i3,stridex,%l5 ! px += stridex; 587*25c28e83SPiotr Jasiukajtis 588*25c28e83SPiotr Jasiukajtis .align 16 589*25c28e83SPiotr Jasiukajtis.main_loop: 590*25c28e83SPiotr Jasiukajtis fsubd %f10,%f26,%f10 ! (6_1) xx = x - y; 591*25c28e83SPiotr Jasiukajtis and %o7,MASK_0x7fffffff,%o1 ! (1_1) ax = ux & 0x7fffffff; 592*25c28e83SPiotr Jasiukajtis st %f12,[%g1] ! (7_1) py[0] = ftmp0; 593*25c28e83SPiotr Jasiukajtis fmuld K2,%f50,%f4 ! (1_1) dtmp0 = K2 * x2; 594*25c28e83SPiotr Jasiukajtis 595*25c28e83SPiotr Jasiukajtis fmuld %f40,%f46,%f26 ! (2_1) y0 *= dtmp1; 596*25c28e83SPiotr Jasiukajtis srl %o7,28,%o7 ! (1_0) ux >>= 28; 597*25c28e83SPiotr Jasiukajtis add %o1,MASK_0x100000,%g1 ! (1_1) ax += 0x00100000; 598*25c28e83SPiotr Jasiukajtis fpsub32 %f0,%f24,%f40 ! (4_1) y0 = vis_fpsub32(dtmp0, y0); 599*25c28e83SPiotr Jasiukajtis 600*25c28e83SPiotr Jasiukajtis fmuld %f38,%f44,%f38 ! (3_1) y0 *= dtmp0; 601*25c28e83SPiotr Jasiukajtis and %l6,MASK_0x7fffffff,%o1 ! (0_0) ax = ux & 0x7fffffff; 602*25c28e83SPiotr Jasiukajtis lda [%i3]0x82,%f0 ! (0_0) ftmp0 = *px; 603*25c28e83SPiotr Jasiukajtis fpadd32 %f8,DC1,%f24 ! (7_1) y = vis_fpadd32(x,dconst1); 604*25c28e83SPiotr Jasiukajtis 605*25c28e83SPiotr Jasiukajtis fmuld %f42,%f22,%f44 ! (0_1) dtmp0 *= xx; 606*25c28e83SPiotr Jasiukajtis cmp %o1,%o4 ! (0_0) ax ? 0x39b89c55 607*25c28e83SPiotr Jasiukajtis bl,pn %icc,.update14 ! (0_0) if ( ax < 0x39b89c55 ) 608*25c28e83SPiotr Jasiukajtis faddd DONE,%f28,%f28 ! (6_1) div += done; 609*25c28e83SPiotr Jasiukajtis.cont14: 610*25c28e83SPiotr Jasiukajtis fmuld %f32,%f40,%f42 ! (4_1) dtmp0 = div0 * y0; 611*25c28e83SPiotr Jasiukajtis cmp %o1,%o5 ! (0_0) ax ? 0x4c700518 612*25c28e83SPiotr Jasiukajtis bg,pn %icc,.update15 ! (0_0) if ( ax > 0x4c700518 ) 613*25c28e83SPiotr Jasiukajtis faddd %f4,K1,%f4 ! (1_1) dtmp0 += K1; 614*25c28e83SPiotr Jasiukajtis.cont15: 615*25c28e83SPiotr Jasiukajtis fmuld %f18,%f26,%f18 ! (2_1) xx *= y0; 616*25c28e83SPiotr Jasiukajtis srl %g1,18,%o1 ! (1_1) ax >>= 18; 617*25c28e83SPiotr Jasiukajtis std %f28,[%fp+tmp0] ! (6_1) i = ((unsigned long long*)&div)[0]; 618*25c28e83SPiotr Jasiukajtis fstod %f0,%f22 ! (0_0) ftmp0 = *px; 619*25c28e83SPiotr Jasiukajtis 620*25c28e83SPiotr Jasiukajtis fmuld %f34,%f38,%f34 ! (3_1) dtmp1 = div0 * y0; 621*25c28e83SPiotr Jasiukajtis and %o1,-8,%o1 ! (1_1) ax &= -8; 622*25c28e83SPiotr Jasiukajtis ldx [%fp+tmp1],%g1 ! (5_1) i = ((unsigned long long*)&div)[0]; 623*25c28e83SPiotr Jasiukajtis fand %f24,DC2,%f26 ! (7_1) y = vis_fand(y,dconst2); 624*25c28e83SPiotr Jasiukajtis 625*25c28e83SPiotr Jasiukajtis ldd [%o1+%l7],%f0 ! (1_1) res = *(double*)((char*)parr1 + ax); 626*25c28e83SPiotr Jasiukajtis and %o7,-8,%o7 ! (1_1) ux &= -8; 627*25c28e83SPiotr Jasiukajtis mov %l6,%i3 628*25c28e83SPiotr Jasiukajtis faddd %f48,%f44,%f12 ! (0_1) res += dtmp0; 629*25c28e83SPiotr Jasiukajtis 630*25c28e83SPiotr Jasiukajtis fmuld %f4,%f50,%f4 ! (1_1) dtmp0 *= x2; 631*25c28e83SPiotr Jasiukajtis nop 632*25c28e83SPiotr Jasiukajtis ldd [%l0+%o7],%f48 ! (1_1) dtmp0 = *(double*)((char*)sign_arr + ux); 633*25c28e83SPiotr Jasiukajtis fand %f30,DC3,%f24 ! (5_1) y0 = vis_fand(div,dconst3); 634*25c28e83SPiotr Jasiukajtis 635*25c28e83SPiotr Jasiukajtis fmuld %f18,%f18,%f50 ! (2_1) x2 = xx * xx; 636*25c28e83SPiotr Jasiukajtis srlx %g1,43,%g1 ! (5_1) i >>= 43; 637*25c28e83SPiotr Jasiukajtis mov %l4,%o7 638*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f42,%f44 ! (4_1) dtmp0 = dtwo - dtmp0; 639*25c28e83SPiotr Jasiukajtis 640*25c28e83SPiotr Jasiukajtis and %g1,508,%l6 ! (5_1) i &= 508; 641*25c28e83SPiotr Jasiukajtis nop 642*25c28e83SPiotr Jasiukajtis bn,pn %icc,.exit 643*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f34,%f46 ! (3_1) dtmp1 = dtwo - dtmp1; 644*25c28e83SPiotr Jasiukajtis 645*25c28e83SPiotr Jasiukajtis fmuld %f0,%f48,%f48 ! (1_1) res *= dtmp0; 646*25c28e83SPiotr Jasiukajtis add %o0,stridey,%g1 ! py += stridey; 647*25c28e83SPiotr Jasiukajtis ld [%i4+%l6],%f0 ! (5_1) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); 648*25c28e83SPiotr Jasiukajtis fdtos %f12,%f12 ! (0_1) ftmp0 = (float)res; 649*25c28e83SPiotr Jasiukajtis 650*25c28e83SPiotr Jasiukajtis fmuld %f8,%f26,%f34 ! (7_1) div = x * y; 651*25c28e83SPiotr Jasiukajtis srl %o7,28,%o1 ! (2_1) ux >>= 28; 652*25c28e83SPiotr Jasiukajtis lda [%l5]0x82,%l6 ! (1_0) ux = ((int*)px)[0]; 653*25c28e83SPiotr Jasiukajtis faddd %f4,K0,%f42 ! (1_1) dtmp0 += K0; 654*25c28e83SPiotr Jasiukajtis 655*25c28e83SPiotr Jasiukajtis fmuld K2,%f50,%f4 ! (2_1) dtmp0 = K2 * x2; 656*25c28e83SPiotr Jasiukajtis and %o7,MASK_0x7fffffff,%o7 ! (2_1) ax = ux & 0x7fffffff; 657*25c28e83SPiotr Jasiukajtis st %f12,[%o0] ! (0_1) py[0] = ftmp0; 658*25c28e83SPiotr Jasiukajtis fsubd %f8,%f26,%f8 ! (7_1) xx = x - y; 659*25c28e83SPiotr Jasiukajtis 660*25c28e83SPiotr Jasiukajtis fmuld %f38,%f46,%f26 ! (3_1) y0 *= dtmp1; 661*25c28e83SPiotr Jasiukajtis add %l5,stridex,%l4 ! px += stridex; 662*25c28e83SPiotr Jasiukajtis add %o7,MASK_0x100000,%o0 ! (2_1) ax += 0x00100000; 663*25c28e83SPiotr Jasiukajtis fpsub32 %f0,%f24,%f38 ! (5_1) y0 = vis_fpsub32(dtmp0, y0); 664*25c28e83SPiotr Jasiukajtis 665*25c28e83SPiotr Jasiukajtis fmuld %f40,%f44,%f40 ! (4_1) y0 *= dtmp0; 666*25c28e83SPiotr Jasiukajtis and %l6,MASK_0x7fffffff,%o7 ! (1_0) ax = ux & 0x7fffffff; 667*25c28e83SPiotr Jasiukajtis lda [%l5]0x82,%f0 ! (1_0) ftmp0 = *px; 668*25c28e83SPiotr Jasiukajtis fpadd32 %f22,DC1,%f24 ! (0_0) y = vis_fpadd32(x,dconst1); 669*25c28e83SPiotr Jasiukajtis 670*25c28e83SPiotr Jasiukajtis fmuld %f42,%f20,%f44 ! (1_1) dtmp0 *= xx; 671*25c28e83SPiotr Jasiukajtis cmp %o7,%o4 ! (1_0) ax ? 0x39b89c55 672*25c28e83SPiotr Jasiukajtis bl,pn %icc,.update16 ! (1_0) if ( ax < 0x39b89c55 ) 673*25c28e83SPiotr Jasiukajtis faddd DONE,%f34,%f34 ! (7_1) div += done; 674*25c28e83SPiotr Jasiukajtis.cont16: 675*25c28e83SPiotr Jasiukajtis fmuld %f30,%f38,%f42 ! (5_1) dtmp0 = div0 * y0; 676*25c28e83SPiotr Jasiukajtis cmp %o7,%o5 ! (1_0) ax ? 0x4c700518 677*25c28e83SPiotr Jasiukajtis bg,pn %icc,.update17 ! (1_0) if ( ax > 0x4c700518 ) 678*25c28e83SPiotr Jasiukajtis faddd %f4,K1,%f4 ! (2_1) dtmp0 += K1; 679*25c28e83SPiotr Jasiukajtis.cont17: 680*25c28e83SPiotr Jasiukajtis fmuld %f16,%f26,%f16 ! (3_1) xx *= y0; 681*25c28e83SPiotr Jasiukajtis srl %o0,18,%o7 ! (2_1) ax >>= 18; 682*25c28e83SPiotr Jasiukajtis std %f34,[%fp+tmp1] ! (7_1) i = ((unsigned long long*)&div)[0]; 683*25c28e83SPiotr Jasiukajtis fstod %f0,%f20 ! (1_0) x = (double)ftmp0; 684*25c28e83SPiotr Jasiukajtis 685*25c28e83SPiotr Jasiukajtis fmuld %f32,%f40,%f32 ! (4_1) dtmp1 = div0 * y0; 686*25c28e83SPiotr Jasiukajtis ldx [%fp+tmp0],%o0 ! (6_1) i = ((unsigned long long*)&div)[0]; 687*25c28e83SPiotr Jasiukajtis and %o1,-8,%o1 ! (2_1) ux &= -8; 688*25c28e83SPiotr Jasiukajtis fand %f24,DC2,%f26 ! (0_0) y = vis_fand(y,dconst2); 689*25c28e83SPiotr Jasiukajtis 690*25c28e83SPiotr Jasiukajtis faddd %f48,%f44,%f12 ! (1_1) res += dtmp0; 691*25c28e83SPiotr Jasiukajtis and %o7,-8,%o7 ! (2_1) ax &= -8; 692*25c28e83SPiotr Jasiukajtis ldd [%l0+%o1],%f48 ! (2_1) dtmp0 = *(double*)((char*)sign_arr + ux); 693*25c28e83SPiotr Jasiukajtis bn,pn %icc,.exit 694*25c28e83SPiotr Jasiukajtis 695*25c28e83SPiotr Jasiukajtis ldd [%o7+%l7],%f0 ! (2_1) res = *(double*)((char*)parr1 + ax); 696*25c28e83SPiotr Jasiukajtis mov %l6,%l5 697*25c28e83SPiotr Jasiukajtis fmuld %f4,%f50,%f4 ! (2_1) dtmp0 *= x2; 698*25c28e83SPiotr Jasiukajtis fand %f28,DC3,%f24 ! (6_1) y0 = vis_fand(div,dconst3); 699*25c28e83SPiotr Jasiukajtis 700*25c28e83SPiotr Jasiukajtis fmuld %f16,%f16,%f50 ! (3_1) x2 = xx * xx; 701*25c28e83SPiotr Jasiukajtis srlx %o0,43,%o0 ! (6_1) i >>= 43; 702*25c28e83SPiotr Jasiukajtis mov %l3,%o7 703*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f42,%f44 ! (5_1) dtmp0 = dtwo - dtmp0; 704*25c28e83SPiotr Jasiukajtis 705*25c28e83SPiotr Jasiukajtis and %o0,508,%l6 ! (6_1) i &= 508; 706*25c28e83SPiotr Jasiukajtis add %l4,stridex,%l3 ! px += stridex; 707*25c28e83SPiotr Jasiukajtis bn,pn %icc,.exit 708*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f32,%f46 ! (4_1) dtmp1 = dtwo - dtmp1; 709*25c28e83SPiotr Jasiukajtis 710*25c28e83SPiotr Jasiukajtis fmuld %f0,%f48,%f48 ! (2_1) res *= dtmp0; 711*25c28e83SPiotr Jasiukajtis add %g1,stridey,%o0 ! py += stridey; 712*25c28e83SPiotr Jasiukajtis ld [%i4+%l6],%f0 ! (6_1) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); 713*25c28e83SPiotr Jasiukajtis fdtos %f12,%f12 ! (1_1) ftmp0 = (float)res; 714*25c28e83SPiotr Jasiukajtis 715*25c28e83SPiotr Jasiukajtis fmuld %f22,%f26,%f32 ! (0_0) div = x * y; 716*25c28e83SPiotr Jasiukajtis srl %o7,28,%o1 ! (3_1) ux >>= 28; 717*25c28e83SPiotr Jasiukajtis lda [%l4]0x82,%l6 ! (2_0) ux = ((int*)px)[0]; 718*25c28e83SPiotr Jasiukajtis faddd %f4,K0,%f42 ! (2_1) dtmp0 += K0; 719*25c28e83SPiotr Jasiukajtis 720*25c28e83SPiotr Jasiukajtis fmuld K2,%f50,%f4 ! (3_1) dtmp0 = K2 * x2; 721*25c28e83SPiotr Jasiukajtis and %o7,MASK_0x7fffffff,%o7 ! (3_1) ax = ux & 0x7fffffff; 722*25c28e83SPiotr Jasiukajtis st %f12,[%g1] ! (1_1) py[0] = ftmp0; 723*25c28e83SPiotr Jasiukajtis fsubd %f22,%f26,%f22 ! (0_0) xx = x - y; 724*25c28e83SPiotr Jasiukajtis 725*25c28e83SPiotr Jasiukajtis fmuld %f40,%f46,%f26 ! (4_1) y0 *= dtmp1; 726*25c28e83SPiotr Jasiukajtis add %o7,MASK_0x100000,%g1 ! (3_1) ax += 0x00100000; 727*25c28e83SPiotr Jasiukajtis and %o1,-8,%o1 ! (3_1) ux &= -8; 728*25c28e83SPiotr Jasiukajtis fpsub32 %f0,%f24,%f40 ! (6_1) y0 = vis_fpsub32(dtmp0, y0); 729*25c28e83SPiotr Jasiukajtis 730*25c28e83SPiotr Jasiukajtis fmuld %f38,%f44,%f38 ! (5_1) y0 *= dtmp0; 731*25c28e83SPiotr Jasiukajtis and %l6,MASK_0x7fffffff,%o7 ! (2_0) ax = ux & 0x7fffffff; 732*25c28e83SPiotr Jasiukajtis lda [%l4]0x82,%f0 ! (2_0) ftmp0 = *px; 733*25c28e83SPiotr Jasiukajtis fpadd32 %f20,DC1,%f24 ! (1_0) y = vis_fpadd32(x,dconst1); 734*25c28e83SPiotr Jasiukajtis 735*25c28e83SPiotr Jasiukajtis fmuld %f42,%f18,%f44 ! (2_1) dtmp0 *= xx; 736*25c28e83SPiotr Jasiukajtis cmp %o7,%o4 ! (2_0) ax ? 0x39b89c55 737*25c28e83SPiotr Jasiukajtis bl,pn %icc,.update18 ! (2_0) if ( ax < 0x39b89c55 ) 738*25c28e83SPiotr Jasiukajtis faddd DONE,%f32,%f32 ! (0_0) div += done; 739*25c28e83SPiotr Jasiukajtis.cont18: 740*25c28e83SPiotr Jasiukajtis fmuld %f28,%f40,%f42 ! (6_1) dtmp0 = div0 * y0; 741*25c28e83SPiotr Jasiukajtis cmp %o7,%o5 ! (2_0) ax ? 0x4c700518 742*25c28e83SPiotr Jasiukajtis bg,pn %icc,.update19 ! (2_0) if ( ax > 0x4c700518 ) 743*25c28e83SPiotr Jasiukajtis faddd %f4,K1,%f4 ! (3_1) dtmp0 += K1; 744*25c28e83SPiotr Jasiukajtis.cont19: 745*25c28e83SPiotr Jasiukajtis fmuld %f14,%f26,%f14 ! (4_1) xx *= y0; 746*25c28e83SPiotr Jasiukajtis srl %g1,18,%o7 ! (3_1) ax >>= 18; 747*25c28e83SPiotr Jasiukajtis std %f32,[%fp+tmp0] ! (0_0) i = ((unsigned long long*)&div)[0]; 748*25c28e83SPiotr Jasiukajtis fstod %f0,%f18 ! (2_0) x = (double)ftmp0; 749*25c28e83SPiotr Jasiukajtis 750*25c28e83SPiotr Jasiukajtis fmuld %f30,%f38,%f30 ! (5_1) dtmp1 = div0 * y0; 751*25c28e83SPiotr Jasiukajtis and %o7,-8,%o7 ! (3_1) ax &= -8; 752*25c28e83SPiotr Jasiukajtis ldx [%fp+tmp1],%g1 ! (7_1) i = ((unsigned long long*)&div)[0]; 753*25c28e83SPiotr Jasiukajtis fand %f24,DC2,%f26 ! (1_0) y = vis_fand(y,dconst2); 754*25c28e83SPiotr Jasiukajtis 755*25c28e83SPiotr Jasiukajtis faddd %f48,%f44,%f12 ! (2_1) res += dtmp0; 756*25c28e83SPiotr Jasiukajtis mov %l6,%l4 757*25c28e83SPiotr Jasiukajtis ldd [%l0+%o1],%f48 ! (3_1) dtmp0 = *(double*)((char*)sign_arr + ux); 758*25c28e83SPiotr Jasiukajtis bn,pn %icc,.exit 759*25c28e83SPiotr Jasiukajtis 760*25c28e83SPiotr Jasiukajtis fmuld %f4,%f50,%f4 ! (3_1) dtmp0 *= x2; 761*25c28e83SPiotr Jasiukajtis ldd [%o7+%l7],%f0 ! (3_1) res = *(double*)((char*)parr1 + ax) 762*25c28e83SPiotr Jasiukajtis nop 763*25c28e83SPiotr Jasiukajtis fand %f34,DC3,%f24 ! (7_1) y0 = vis_fand(div,dconst3); 764*25c28e83SPiotr Jasiukajtis 765*25c28e83SPiotr Jasiukajtis fmuld %f14,%f14,%f50 ! (4_1) x2 = xx * xx; 766*25c28e83SPiotr Jasiukajtis srlx %g1,43,%g1 ! (7_1) i >>= 43; 767*25c28e83SPiotr Jasiukajtis mov %i0,%o7 768*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f42,%f44 ! (6_1) dtmp0 = dtwo - dtmp0; 769*25c28e83SPiotr Jasiukajtis 770*25c28e83SPiotr Jasiukajtis and %g1,508,%l6 ! (7_1) i &= 508; 771*25c28e83SPiotr Jasiukajtis add %l3,stridex,%i0 ! px += stridex; 772*25c28e83SPiotr Jasiukajtis bn,pn %icc,.exit 773*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f30,%f46 ! (5_1) dtmp1 = dtwo - dtmp1; 774*25c28e83SPiotr Jasiukajtis 775*25c28e83SPiotr Jasiukajtis fmuld %f0,%f48,%f48 ! (3_1) res *= dtmp0; 776*25c28e83SPiotr Jasiukajtis add %o0,stridey,%g1 ! py += stridey; 777*25c28e83SPiotr Jasiukajtis ld [%i4+%l6],%f0 ! (7_1) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); 778*25c28e83SPiotr Jasiukajtis fdtos %f12,%f12 ! (2_1) ftmp0 = (float)res; 779*25c28e83SPiotr Jasiukajtis 780*25c28e83SPiotr Jasiukajtis fmuld %f20,%f26,%f30 ! (1_0) div = x * y; 781*25c28e83SPiotr Jasiukajtis srl %o7,28,%o1 ! (4_1) ux >>= 28; 782*25c28e83SPiotr Jasiukajtis lda [%l3]0x82,%l6 ! (3_0) ux = ((int*)px)[0]; 783*25c28e83SPiotr Jasiukajtis faddd %f4,K0,%f42 ! (3_1) dtmp0 += K0; 784*25c28e83SPiotr Jasiukajtis 785*25c28e83SPiotr Jasiukajtis fmuld K2,%f50,%f4 ! (4_1) dtmp0 = K2 * x2; 786*25c28e83SPiotr Jasiukajtis and %o7,MASK_0x7fffffff,%o7 ! (4_1) ax = ux & 0x7fffffff; 787*25c28e83SPiotr Jasiukajtis st %f12,[%o0] ! (2_1) py[0] = ftmp0; 788*25c28e83SPiotr Jasiukajtis fsubd %f20,%f26,%f20 ! (1_0) xx = x - y; 789*25c28e83SPiotr Jasiukajtis 790*25c28e83SPiotr Jasiukajtis fmuld %f38,%f46,%f26 ! (5_1) y0 *= dtmp1; 791*25c28e83SPiotr Jasiukajtis add %o7,MASK_0x100000,%o0 ! (4_1) ax += 0x00100000; 792*25c28e83SPiotr Jasiukajtis and %o1,-8,%o1 ! (4_1) ux &= -8; 793*25c28e83SPiotr Jasiukajtis fpsub32 %f0,%f24,%f38 ! (7_1) y0 = vis_fpsub32(dtmp0, y0); 794*25c28e83SPiotr Jasiukajtis 795*25c28e83SPiotr Jasiukajtis fmuld %f40,%f44,%f40 ! (6_1) y0 *= dtmp0; 796*25c28e83SPiotr Jasiukajtis and %l6,MASK_0x7fffffff,%o7 ! (3_0) ax = ux & 0x7fffffff; 797*25c28e83SPiotr Jasiukajtis lda [%l3]0x82,%f0 ! (3_0) ftmp0 = *px; 798*25c28e83SPiotr Jasiukajtis fpadd32 %f18,DC1,%f24 ! (2_0) y = vis_fpadd32(x,dconst1); 799*25c28e83SPiotr Jasiukajtis 800*25c28e83SPiotr Jasiukajtis fmuld %f42,%f16,%f44 ! (3_1) dtmp0 *= xx; 801*25c28e83SPiotr Jasiukajtis cmp %o7,%o4 ! (3_0) ax ? 0x39b89c55 802*25c28e83SPiotr Jasiukajtis bl,pn %icc,.update20 ! (3_0) if ( ax < 0x39b89c55 ) 803*25c28e83SPiotr Jasiukajtis faddd DONE,%f30,%f30 ! (1_0) div += done; 804*25c28e83SPiotr Jasiukajtis.cont20: 805*25c28e83SPiotr Jasiukajtis fmuld %f34,%f38,%f42 ! (7_1) dtmp0 = div0 * y0; 806*25c28e83SPiotr Jasiukajtis cmp %o7,%o5 ! (3_0) ax ? 0x4c700518 807*25c28e83SPiotr Jasiukajtis bg,pn %icc,.update21 ! (3_0) if ( ax > 0x4c700518 ) 808*25c28e83SPiotr Jasiukajtis faddd %f4,K1,%f4 ! (4_1) dtmp0 += K1; 809*25c28e83SPiotr Jasiukajtis.cont21: 810*25c28e83SPiotr Jasiukajtis fmuld %f36,%f26,%f36 ! (5_1) xx *= y0; 811*25c28e83SPiotr Jasiukajtis srl %o0,18,%o7 ! (4_1) ax >>= 18; 812*25c28e83SPiotr Jasiukajtis std %f30,[%fp+tmp1] ! (1_0) i = ((unsigned long long*)&div)[0]; 813*25c28e83SPiotr Jasiukajtis fstod %f0,%f16 ! (3_0) x = (double)ftmp0; 814*25c28e83SPiotr Jasiukajtis 815*25c28e83SPiotr Jasiukajtis fmuld %f28,%f40,%f28 ! (6_1) dtmp1 = div0 * y0; 816*25c28e83SPiotr Jasiukajtis and %o7,-8,%o7 ! (4_1) ax &= -8; 817*25c28e83SPiotr Jasiukajtis ldx [%fp+tmp0],%o0 ! (0_0) i = ((unsigned long long*)&div)[0]; 818*25c28e83SPiotr Jasiukajtis fand %f24,DC2,%f26 ! (2_0) y = vis_fand(y,dconst2); 819*25c28e83SPiotr Jasiukajtis 820*25c28e83SPiotr Jasiukajtis faddd %f48,%f44,%f12 ! (3_1) res += dtmp0; 821*25c28e83SPiotr Jasiukajtis nop 822*25c28e83SPiotr Jasiukajtis ldd [%l0+%o1],%f48 ! (4_1) dtmp0 = *(double*)((char*)sign_arr + ux); 823*25c28e83SPiotr Jasiukajtis bn,pn %icc,.exit 824*25c28e83SPiotr Jasiukajtis 825*25c28e83SPiotr Jasiukajtis ldd [%o7+%l7],%f0 ! (4_1) res = *(double*)((char*)parr1 + ax); 826*25c28e83SPiotr Jasiukajtis mov %l6,%l3 827*25c28e83SPiotr Jasiukajtis fmuld %f4,%f50,%f4 ! (4_1) dtmp0 *= x2; 828*25c28e83SPiotr Jasiukajtis fand %f32,DC3,%f24 ! (0_0) y0 = vis_fand(div,dconst3); 829*25c28e83SPiotr Jasiukajtis 830*25c28e83SPiotr Jasiukajtis fmuld %f36,%f36,%f50 ! (5_1) x2 = xx * xx; 831*25c28e83SPiotr Jasiukajtis srlx %o0,43,%o0 ! (0_0) i >>= 43; 832*25c28e83SPiotr Jasiukajtis mov %i2,%o7 833*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f42,%f44 ! (7_1) dtmp0 = dtwo - dtmp0; 834*25c28e83SPiotr Jasiukajtis 835*25c28e83SPiotr Jasiukajtis and %o0,508,%l6 ! (0_0) i &= 508; 836*25c28e83SPiotr Jasiukajtis add %i0,stridex,%i2 ! px += stridex; 837*25c28e83SPiotr Jasiukajtis bn,pn %icc,.exit 838*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f28,%f46 ! (6_1) dtmp1 = dtwo - dtmp1; 839*25c28e83SPiotr Jasiukajtis 840*25c28e83SPiotr Jasiukajtis fmuld %f0,%f48,%f48 ! (4_1) res *= dtmp0; 841*25c28e83SPiotr Jasiukajtis add %g1,stridey,%o0 ! py += stridey; 842*25c28e83SPiotr Jasiukajtis ld [%i4+%l6],%f0 ! (0_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); 843*25c28e83SPiotr Jasiukajtis fdtos %f12,%f12 ! (3_1) ftmp0 = (float)res; 844*25c28e83SPiotr Jasiukajtis 845*25c28e83SPiotr Jasiukajtis fmuld %f18,%f26,%f28 ! (2_0) div = x * y; 846*25c28e83SPiotr Jasiukajtis srl %o7,28,%o1 ! (5_1) ux >>= 28; 847*25c28e83SPiotr Jasiukajtis lda [%i0]0x82,%l6 ! (4_0) ux = ((int*)px)[0]; 848*25c28e83SPiotr Jasiukajtis faddd %f4,K0,%f42 ! (4_1) dtmp0 += K0; 849*25c28e83SPiotr Jasiukajtis 850*25c28e83SPiotr Jasiukajtis fmuld K2,%f50,%f4 ! (5_1) dtmp0 = K2 * x2; 851*25c28e83SPiotr Jasiukajtis and %o7,MASK_0x7fffffff,%o7 ! (5_1) ax = ux & 0x7fffffff; 852*25c28e83SPiotr Jasiukajtis st %f12,[%g1] ! (3_1) py[0] = ftmp0; 853*25c28e83SPiotr Jasiukajtis fsubd %f18,%f26,%f18 ! (2_0) xx = x - y; 854*25c28e83SPiotr Jasiukajtis 855*25c28e83SPiotr Jasiukajtis fmuld %f40,%f46,%f26 ! (6_1) y0 *= dtmp1; 856*25c28e83SPiotr Jasiukajtis add %o7,MASK_0x100000,%g1 ! (5_1) ax += 0x00100000; 857*25c28e83SPiotr Jasiukajtis and %o1,-8,%o1 ! (5_1) ux &= -8; 858*25c28e83SPiotr Jasiukajtis fpsub32 %f0,%f24,%f40 ! (0_0) y0 = vis_fpsub32(dtmp0, y0); 859*25c28e83SPiotr Jasiukajtis 860*25c28e83SPiotr Jasiukajtis fmuld %f38,%f44,%f38 ! (7_1) y0 *= dtmp0; 861*25c28e83SPiotr Jasiukajtis and %l6,MASK_0x7fffffff,%o7 ! (4_0) ax = ux & 0x7fffffff; 862*25c28e83SPiotr Jasiukajtis lda [%i0]0x82,%f0 ! (4_0) ftmp0 = *px; 863*25c28e83SPiotr Jasiukajtis fpadd32 %f16,DC1,%f24 ! (3_0) y = vis_fpadd32(x,dconst1); 864*25c28e83SPiotr Jasiukajtis 865*25c28e83SPiotr Jasiukajtis fmuld %f42,%f14,%f44 ! (4_1) dtmp0 *= xx; 866*25c28e83SPiotr Jasiukajtis cmp %o7,%o4 ! (4_0) ax ? 0x39b89c55 867*25c28e83SPiotr Jasiukajtis bl,pn %icc,.update22 ! (4_0) if ( ax < 0x39b89c55 ) 868*25c28e83SPiotr Jasiukajtis faddd DONE,%f28,%f28 ! (2_0) div += done; 869*25c28e83SPiotr Jasiukajtis.cont22: 870*25c28e83SPiotr Jasiukajtis fmuld %f32,%f40,%f42 ! (0_0) dtmp0 = div0 * y0; 871*25c28e83SPiotr Jasiukajtis cmp %o7,%o5 ! (4_0) ax ? 0x4c700518 872*25c28e83SPiotr Jasiukajtis bg,pn %icc,.update23 ! (4_0) if ( ax > 0x4c700518 ) 873*25c28e83SPiotr Jasiukajtis faddd %f4,K1,%f4 ! (5_1) dtmp0 += K1; 874*25c28e83SPiotr Jasiukajtis.cont23: 875*25c28e83SPiotr Jasiukajtis fmuld %f10,%f26,%f10 ! (6_1) xx *= y0; 876*25c28e83SPiotr Jasiukajtis srl %g1,18,%o7 ! (5_1) ax >>= 18; 877*25c28e83SPiotr Jasiukajtis std %f28,[%fp+tmp0] ! (2_0) i = ((unsigned long long*)&div)[0]; 878*25c28e83SPiotr Jasiukajtis fstod %f0,%f14 ! (4_0) x = (double)ftmp0; 879*25c28e83SPiotr Jasiukajtis 880*25c28e83SPiotr Jasiukajtis fmuld %f34,%f38,%f34 ! (7_1) dtmp1 = div0 * y0; 881*25c28e83SPiotr Jasiukajtis and %o7,-8,%o7 ! (5_1) ax &= -8; 882*25c28e83SPiotr Jasiukajtis ldx [%fp+tmp1],%g1 ! (1_0) i = ((unsigned long long*)&div)[0]; 883*25c28e83SPiotr Jasiukajtis fand %f24,DC2,%f26 ! (3_0) y = vis_fand(y,dconst2); 884*25c28e83SPiotr Jasiukajtis 885*25c28e83SPiotr Jasiukajtis faddd %f48,%f44,%f12 ! (4_1) res += dtmp0; 886*25c28e83SPiotr Jasiukajtis mov %l6,%i0 887*25c28e83SPiotr Jasiukajtis ldd [%l0+%o1],%f48 ! (5_1) dtmp0 = *(double*)((char*)sign_arr + ux); 888*25c28e83SPiotr Jasiukajtis bn,pn %icc,.exit 889*25c28e83SPiotr Jasiukajtis 890*25c28e83SPiotr Jasiukajtis ldd [%o7+%l7],%f0 ! (5_1) res = *(double*)((char*)parr1 + ax); 891*25c28e83SPiotr Jasiukajtis nop 892*25c28e83SPiotr Jasiukajtis fmuld %f4,%f50,%f4 ! (5_1) dtmp0 *= x2; 893*25c28e83SPiotr Jasiukajtis fand %f30,DC3,%f24 ! (1_0) y0 = vis_fand(div,dconst3); 894*25c28e83SPiotr Jasiukajtis 895*25c28e83SPiotr Jasiukajtis fmuld %f10,%f10,%f50 ! (6_1) x2 = xx * xx; 896*25c28e83SPiotr Jasiukajtis srlx %g1,43,%g1 ! (1_0) i >>= 43; 897*25c28e83SPiotr Jasiukajtis mov %l2,%o7 898*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f42,%f44 ! (0_0) dtmp0 = dtwo - dtmp0; 899*25c28e83SPiotr Jasiukajtis 900*25c28e83SPiotr Jasiukajtis and %g1,508,%l6 ! (1_0) i &= 508; 901*25c28e83SPiotr Jasiukajtis add %i2,stridex,%l2 ! px += stridex; 902*25c28e83SPiotr Jasiukajtis bn,pn %icc,.exit 903*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f34,%f46 ! (7_1) dtmp1 = dtwo - dtmp1; 904*25c28e83SPiotr Jasiukajtis 905*25c28e83SPiotr Jasiukajtis fmuld %f0,%f48,%f48 ! (5_1) res *= dtmp0; 906*25c28e83SPiotr Jasiukajtis add %o0,stridey,%g1 ! py += stridey; 907*25c28e83SPiotr Jasiukajtis ld [%i4+%l6],%f0 ! (1_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); 908*25c28e83SPiotr Jasiukajtis fdtos %f12,%f12 ! (4_1) ftmp0 = (float)res; 909*25c28e83SPiotr Jasiukajtis 910*25c28e83SPiotr Jasiukajtis fmuld %f16,%f26,%f34 ! (3_0) div = x * y; 911*25c28e83SPiotr Jasiukajtis srl %o7,28,%o1 ! (6_1) ux >>= 28; 912*25c28e83SPiotr Jasiukajtis lda [%i2]0x82,%l6 ! (5_0) ux = ((int*)px)[0]; 913*25c28e83SPiotr Jasiukajtis faddd %f4,K0,%f42 ! (5_1) dtmp0 += K0; 914*25c28e83SPiotr Jasiukajtis 915*25c28e83SPiotr Jasiukajtis fmuld K2,%f50,%f4 ! (6_1) dtmp0 = K2 * x2; 916*25c28e83SPiotr Jasiukajtis and %o7,MASK_0x7fffffff,%o7 ! (6_1) ax = ux & 0x7fffffff; 917*25c28e83SPiotr Jasiukajtis st %f12,[%o0] ! (4_1) py[0] = ftmp0; 918*25c28e83SPiotr Jasiukajtis fsubd %f16,%f26,%f16 ! (3_0) xx = x - y; 919*25c28e83SPiotr Jasiukajtis 920*25c28e83SPiotr Jasiukajtis fmuld %f38,%f46,%f26 ! (7_1) y0 *= dtmp1; 921*25c28e83SPiotr Jasiukajtis add %o7,MASK_0x100000,%o0 ! (6_1) ax += 0x00100000; 922*25c28e83SPiotr Jasiukajtis and %o1,-8,%o1 ! (6_1) ux &= -8; 923*25c28e83SPiotr Jasiukajtis fpsub32 %f0,%f24,%f38 ! (1_0) y0 = vis_fpsub32(dtmp0, y0); 924*25c28e83SPiotr Jasiukajtis 925*25c28e83SPiotr Jasiukajtis fmuld %f40,%f44,%f40 ! (0_0) y0 *= dtmp0; 926*25c28e83SPiotr Jasiukajtis and %l6,MASK_0x7fffffff,%o7 ! (5_0) ax = ux & 0x7fffffff; 927*25c28e83SPiotr Jasiukajtis lda [%i2]0x82,%f0 ! (5_0) ftmp0 = *px; 928*25c28e83SPiotr Jasiukajtis fpadd32 %f14,DC1,%f24 ! (4_0) y = vis_fpadd32(x,dconst1); 929*25c28e83SPiotr Jasiukajtis 930*25c28e83SPiotr Jasiukajtis fmuld %f42,%f36,%f44 ! (5_1) dtmp0 *= xx; 931*25c28e83SPiotr Jasiukajtis cmp %o7,%o4 ! (5_0) ax ? 0x39b89c55 932*25c28e83SPiotr Jasiukajtis bl,pn %icc,.update24 ! (5_0) if ( ax < 0x39b89c55 ) 933*25c28e83SPiotr Jasiukajtis faddd DONE,%f34,%f34 ! (3_0) div += done; 934*25c28e83SPiotr Jasiukajtis.cont24: 935*25c28e83SPiotr Jasiukajtis fmuld %f30,%f38,%f42 ! (1_0) dtmp0 = div0 * y0; 936*25c28e83SPiotr Jasiukajtis cmp %o7,%o5 ! (5_0) ax ? 0x4c700518 937*25c28e83SPiotr Jasiukajtis bg,pn %icc,.update25 ! (5_0) if ( ax > 0x4c700518 ) 938*25c28e83SPiotr Jasiukajtis faddd %f4,K1,%f4 ! (6_1) dtmp0 += K1; 939*25c28e83SPiotr Jasiukajtis.cont25: 940*25c28e83SPiotr Jasiukajtis fmuld %f8,%f26,%f8 ! (7_1) xx *= y0; 941*25c28e83SPiotr Jasiukajtis srl %o0,18,%o7 ! (6_1) ax >>= 18; 942*25c28e83SPiotr Jasiukajtis std %f34,[%fp+tmp1] ! (3_0) i = ((unsigned long long*)&div)[0]; 943*25c28e83SPiotr Jasiukajtis fstod %f0,%f36 ! (5_0) x = (double)ftmp0; 944*25c28e83SPiotr Jasiukajtis 945*25c28e83SPiotr Jasiukajtis fmuld %f32,%f40,%f32 ! (0_0) dtmp1 = div0 * y0; 946*25c28e83SPiotr Jasiukajtis and %o7,-8,%o7 ! (6_1) ax &= -8; 947*25c28e83SPiotr Jasiukajtis ldx [%fp+tmp0],%o0 ! (2_0) i = ((unsigned long long*)&div)[0]; 948*25c28e83SPiotr Jasiukajtis fand %f24,DC2,%f26 ! (4_0) y = vis_fand(y,dconst2); 949*25c28e83SPiotr Jasiukajtis 950*25c28e83SPiotr Jasiukajtis faddd %f48,%f44,%f12 ! (5_1) res += dtmp0; 951*25c28e83SPiotr Jasiukajtis mov %l6,%i2 952*25c28e83SPiotr Jasiukajtis ldd [%l0+%o1],%f48 ! (6_1) dtmp0 = *(double*)((char*)sign_arr + ux); 953*25c28e83SPiotr Jasiukajtis bn,pn %icc,.exit 954*25c28e83SPiotr Jasiukajtis 955*25c28e83SPiotr Jasiukajtis ldd [%o7+%l7],%f0 ! (6_1) res = *(double*)((char*)parr1 + ax); 956*25c28e83SPiotr Jasiukajtis nop 957*25c28e83SPiotr Jasiukajtis fmuld %f4,%f50,%f4 ! (6_1) dtmp0 *= x2; 958*25c28e83SPiotr Jasiukajtis fand %f28,DC3,%f24 ! (2_0) y0 = vis_fand(div,dconst3); 959*25c28e83SPiotr Jasiukajtis 960*25c28e83SPiotr Jasiukajtis fmuld %f8,%f8,%f50 ! (7_1) x2 = xx * xx; 961*25c28e83SPiotr Jasiukajtis srlx %o0,43,%o0 ! (2_0) i >>= 43; 962*25c28e83SPiotr Jasiukajtis mov %g5,%o7 963*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f42,%f44 ! (1_0) dtmp0 = dtwo - dtmp0; 964*25c28e83SPiotr Jasiukajtis 965*25c28e83SPiotr Jasiukajtis and %o0,508,%l6 ! (2_0) i &= 508; 966*25c28e83SPiotr Jasiukajtis add %l2,stridex,%g5 ! px += stridex; 967*25c28e83SPiotr Jasiukajtis bn,pn %icc,.exit 968*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f32,%f46 ! (0_0) dtmp1 = dtwo - dtmp1; 969*25c28e83SPiotr Jasiukajtis 970*25c28e83SPiotr Jasiukajtis fmuld %f0,%f48,%f48 ! (6_1) res *= dtmp0; 971*25c28e83SPiotr Jasiukajtis add %g1,stridey,%o0 ! py += stridey; 972*25c28e83SPiotr Jasiukajtis ld [%i4+%l6],%f0 ! (2_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); 973*25c28e83SPiotr Jasiukajtis fdtos %f12,%f12 ! (5_1) ftmp0 = (float)res; 974*25c28e83SPiotr Jasiukajtis 975*25c28e83SPiotr Jasiukajtis fmuld %f14,%f26,%f32 ! (4_0) div = x * y; 976*25c28e83SPiotr Jasiukajtis srl %o7,28,%o1 ! (7_1) ux >>= 28; 977*25c28e83SPiotr Jasiukajtis lda [%l2]0x82,%l6 ! (6_0) ux = ((int*)px)[0]; 978*25c28e83SPiotr Jasiukajtis faddd %f4,K0,%f42 ! (6_1) dtmp0 += K0; 979*25c28e83SPiotr Jasiukajtis 980*25c28e83SPiotr Jasiukajtis fmuld K2,%f50,%f4 ! (7_1) dtmp0 = K2 * x2; 981*25c28e83SPiotr Jasiukajtis and %o7,MASK_0x7fffffff,%o7 ! (7_1) ax = ux & 0x7fffffff; 982*25c28e83SPiotr Jasiukajtis st %f12,[%g1] ! (5_1) py[0] = ftmp0; 983*25c28e83SPiotr Jasiukajtis fsubd %f14,%f26,%f14 ! (4_0) xx = x - y; 984*25c28e83SPiotr Jasiukajtis 985*25c28e83SPiotr Jasiukajtis fmuld %f40,%f46,%f26 ! (0_0) y0 *= dtmp1; 986*25c28e83SPiotr Jasiukajtis add %o7,MASK_0x100000,%g1 ! (7_1) ax += 0x00100000; 987*25c28e83SPiotr Jasiukajtis and %o1,-8,%o1 ! (7_1) ux &= -8; 988*25c28e83SPiotr Jasiukajtis fpsub32 %f0,%f24,%f40 ! (2_0) y0 = vis_fpsub32(dtmp0, y0); 989*25c28e83SPiotr Jasiukajtis 990*25c28e83SPiotr Jasiukajtis fmuld %f38,%f44,%f38 ! (1_0) y0 *= dtmp0; 991*25c28e83SPiotr Jasiukajtis and %l6,MASK_0x7fffffff,%o7 ! (6_0) ax = ux & 0x7fffffff; 992*25c28e83SPiotr Jasiukajtis lda [%l2]0x82,%f0 ! (6_0) ftmp0 = *px; 993*25c28e83SPiotr Jasiukajtis fpadd32 %f36,DC1,%f24 ! (5_0) y = vis_fpadd32(x,dconst1); 994*25c28e83SPiotr Jasiukajtis 995*25c28e83SPiotr Jasiukajtis fmuld %f42,%f10,%f44 ! (6_1) dtmp0 *= xx; 996*25c28e83SPiotr Jasiukajtis cmp %o7,%o4 ! (6_0) ax ? 0x39b89c55 997*25c28e83SPiotr Jasiukajtis bl,pn %icc,.update26 ! (6_0) if ( ax < 0x39b89c55 ) 998*25c28e83SPiotr Jasiukajtis faddd DONE,%f32,%f32 ! (4_0) div += done; 999*25c28e83SPiotr Jasiukajtis.cont26: 1000*25c28e83SPiotr Jasiukajtis fmuld %f28,%f40,%f42 ! (2_0) dtmp0 = div0 * y0; 1001*25c28e83SPiotr Jasiukajtis cmp %o7,%o5 ! (6_0) ax ? 0x4c700518 1002*25c28e83SPiotr Jasiukajtis bg,pn %icc,.update27 ! (6_0) if ( ax > 0x4c700518 ) 1003*25c28e83SPiotr Jasiukajtis faddd %f4,K1,%f4 ! (7_1) dtmp0 += K1; 1004*25c28e83SPiotr Jasiukajtis.cont27: 1005*25c28e83SPiotr Jasiukajtis fmuld %f22,%f26,%f22 ! (0_0) xx *= y0; 1006*25c28e83SPiotr Jasiukajtis srl %g1,18,%o7 ! (7_1) ax >>= 18; 1007*25c28e83SPiotr Jasiukajtis std %f32,[%fp+tmp0] ! (4_0) i = ((unsigned long long*)&div)[0]; 1008*25c28e83SPiotr Jasiukajtis fstod %f0,%f10 ! (6_0) x = (double)ftmp0; 1009*25c28e83SPiotr Jasiukajtis 1010*25c28e83SPiotr Jasiukajtis fmuld %f30,%f38,%f30 ! (1_0) dtmp1 = div0 * y0; 1011*25c28e83SPiotr Jasiukajtis and %o7,-8,%o7 ! (7_1) ax &= -8; 1012*25c28e83SPiotr Jasiukajtis ldx [%fp+tmp1],%g1 ! (3_0) i = ((unsigned long long*)&div)[0]; 1013*25c28e83SPiotr Jasiukajtis fand %f24,DC2,%f26 ! (5_0) y = vis_fand(y,dconst2); 1014*25c28e83SPiotr Jasiukajtis 1015*25c28e83SPiotr Jasiukajtis faddd %f48,%f44,%f12 ! (6_1) res += dtmp0; 1016*25c28e83SPiotr Jasiukajtis mov %l6,%l2 1017*25c28e83SPiotr Jasiukajtis ldd [%l0+%o1],%f48 ! (7_1) dtmp0 = *(double*)((char*)sign_arr + ux); 1018*25c28e83SPiotr Jasiukajtis bn,pn %icc,.exit 1019*25c28e83SPiotr Jasiukajtis 1020*25c28e83SPiotr Jasiukajtis ldd [%o7+%l7],%f0 ! (7_1) res = *(double*)((char*)parr1 + ax); 1021*25c28e83SPiotr Jasiukajtis nop 1022*25c28e83SPiotr Jasiukajtis fmuld %f4,%f50,%f4 ! (7_1) dtmp0 *= x2; 1023*25c28e83SPiotr Jasiukajtis fand %f34,DC3,%f24 ! (3_0) y0 = vis_fand(div,dconst3); 1024*25c28e83SPiotr Jasiukajtis 1025*25c28e83SPiotr Jasiukajtis fmuld %f22,%f22,%f50 ! (0_0) x2 = xx * xx; 1026*25c28e83SPiotr Jasiukajtis srlx %g1,43,%g1 ! (3_0) i >>= 43; 1027*25c28e83SPiotr Jasiukajtis mov %i3,%o7 1028*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f42,%f44 ! (2_0) dtmp0 = dtwo - dtmp0; 1029*25c28e83SPiotr Jasiukajtis 1030*25c28e83SPiotr Jasiukajtis and %g1,508,%l6 ! (3_0) i &= 508; 1031*25c28e83SPiotr Jasiukajtis add %g5,stridex,%i3 ! px += stridex; 1032*25c28e83SPiotr Jasiukajtis bn,pn %icc,.exit 1033*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f30,%f46 ! (1_0) dtmp1 = dtwo - dtmp1; 1034*25c28e83SPiotr Jasiukajtis 1035*25c28e83SPiotr Jasiukajtis fmuld %f0,%f48,%f48 ! (7_1) res *= dtmp0; 1036*25c28e83SPiotr Jasiukajtis add %o0,stridey,%g1 ! py += stridey; 1037*25c28e83SPiotr Jasiukajtis ld [%i4+%l6],%f0 ! (3_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); 1038*25c28e83SPiotr Jasiukajtis fdtos %f12,%f12 ! (6_1) ftmp0 = (float)res; 1039*25c28e83SPiotr Jasiukajtis 1040*25c28e83SPiotr Jasiukajtis fmuld %f36,%f26,%f30 ! (5_0) div = x * y; 1041*25c28e83SPiotr Jasiukajtis srl %o7,28,%o1 ! (0_0) ux >>= 28; 1042*25c28e83SPiotr Jasiukajtis lda [%g5]0x82,%l6 ! (7_0) ux = ((int*)px)[0]; 1043*25c28e83SPiotr Jasiukajtis faddd %f4,K0,%f42 ! (7_1) dtmp0 += K0; 1044*25c28e83SPiotr Jasiukajtis 1045*25c28e83SPiotr Jasiukajtis fmuld K2,%f50,%f4 ! (0_0) dtmp0 = K2 * x2; 1046*25c28e83SPiotr Jasiukajtis and %o7,MASK_0x7fffffff,%o7 ! (0_0) ax = ux & 0x7fffffff; 1047*25c28e83SPiotr Jasiukajtis st %f12,[%o0] ! (6_1) py[0] = ftmp0; 1048*25c28e83SPiotr Jasiukajtis fsubd %f36,%f26,%f36 ! (5_0) xx = x - y; 1049*25c28e83SPiotr Jasiukajtis 1050*25c28e83SPiotr Jasiukajtis fmuld %f38,%f46,%f26 ! (1_0) y0 *= dtmp1; 1051*25c28e83SPiotr Jasiukajtis add %o7,MASK_0x100000,%o0 ! (0_0) ax += 0x00100000; 1052*25c28e83SPiotr Jasiukajtis and %o1,-8,%o1 ! (0_0) ux &= -8; 1053*25c28e83SPiotr Jasiukajtis fpsub32 %f0,%f24,%f38 ! (3_0) y0 = vis_fpsub32(dtmp0, y0); 1054*25c28e83SPiotr Jasiukajtis 1055*25c28e83SPiotr Jasiukajtis fmuld %f40,%f44,%f40 ! (2_0) y0 *= dtmp0; 1056*25c28e83SPiotr Jasiukajtis and %l6,MASK_0x7fffffff,%o7 ! (7_0) ax = ux & 0x7fffffff; 1057*25c28e83SPiotr Jasiukajtis lda [%g5]0x82,%f0 ! (7_0) ftmp0 = *px; 1058*25c28e83SPiotr Jasiukajtis fpadd32 %f10,DC1,%f24 ! (6_0) y = vis_fpadd32(x,dconst1); 1059*25c28e83SPiotr Jasiukajtis 1060*25c28e83SPiotr Jasiukajtis fmuld %f42,%f8,%f44 ! (7_1) dtmp0 *= xx; 1061*25c28e83SPiotr Jasiukajtis cmp %o7,%o4 ! (7_0) ax ? 0x39b89c55 1062*25c28e83SPiotr Jasiukajtis bl,pn %icc,.update28 ! (7_0) if ( ax < 0x39b89c55 ) 1063*25c28e83SPiotr Jasiukajtis faddd DONE,%f30,%f30 ! (5_0) div += done; 1064*25c28e83SPiotr Jasiukajtis.cont28: 1065*25c28e83SPiotr Jasiukajtis fmuld %f34,%f38,%f42 ! (3_0) dtmp0 = div0 * y0; 1066*25c28e83SPiotr Jasiukajtis cmp %o7,%o5 ! (7_0) ax ? 0x4c700518 1067*25c28e83SPiotr Jasiukajtis bg,pn %icc,.update29 ! (7_0) if ( ax > 0x4c700518 ) 1068*25c28e83SPiotr Jasiukajtis faddd %f4,K1,%f4 ! (0_0) dtmp0 += K1; 1069*25c28e83SPiotr Jasiukajtis.cont29: 1070*25c28e83SPiotr Jasiukajtis fmuld %f20,%f26,%f20 ! (1_0) xx *= y0; 1071*25c28e83SPiotr Jasiukajtis srl %o0,18,%o7 ! (0_0) ax >>= 18; 1072*25c28e83SPiotr Jasiukajtis std %f30,[%fp+tmp1] ! (5_0) i = ((unsigned long long*)&div)[0]; 1073*25c28e83SPiotr Jasiukajtis fstod %f0,%f8 ! (7_0) x = (double)ftmp0; 1074*25c28e83SPiotr Jasiukajtis 1075*25c28e83SPiotr Jasiukajtis fmuld %f28,%f40,%f28 ! (2_0) dtmp1 = div0 * y0; 1076*25c28e83SPiotr Jasiukajtis and %o7,-8,%o7 ! (0_0) ux &= -8; 1077*25c28e83SPiotr Jasiukajtis ldx [%fp+tmp0],%o0 ! (4_0) i = ((unsigned long long*)&div)[0]; 1078*25c28e83SPiotr Jasiukajtis fand %f24,DC2,%f26 ! (6_0) y = vis_fand(y,dconst2); 1079*25c28e83SPiotr Jasiukajtis 1080*25c28e83SPiotr Jasiukajtis faddd %f48,%f44,%f12 ! (7_1) res += dtmp0; 1081*25c28e83SPiotr Jasiukajtis subcc counter,8,counter 1082*25c28e83SPiotr Jasiukajtis ldd [%l0+%o1],%f48 ! (0_0) dtmp0 = *(double*)((char*)sign_arr + ux); 1083*25c28e83SPiotr Jasiukajtis bn,pn %icc,.exit 1084*25c28e83SPiotr Jasiukajtis 1085*25c28e83SPiotr Jasiukajtis fmuld %f4,%f50,%f4 ! (0_0) dtmp0 *= x2; 1086*25c28e83SPiotr Jasiukajtis mov %l6,%g5 1087*25c28e83SPiotr Jasiukajtis ldd [%o7+%l7],%f0 ! (0_0) res = *(double*)((char*)parr1 + ax); 1088*25c28e83SPiotr Jasiukajtis fand %f32,DC3,%f24 ! (4_0) y0 = vis_fand(div,dconst3); 1089*25c28e83SPiotr Jasiukajtis 1090*25c28e83SPiotr Jasiukajtis fmuld %f20,%f20,%f50 ! (1_0) x2 = xx * xx; 1091*25c28e83SPiotr Jasiukajtis srlx %o0,43,%l6 ! (4_0) i >>= 43; 1092*25c28e83SPiotr Jasiukajtis mov %l5,%o7 1093*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f42,%f44 ! (3_0) dtmp0 = dtwo - dtmp0; 1094*25c28e83SPiotr Jasiukajtis 1095*25c28e83SPiotr Jasiukajtis add %g1,stridey,%o0 ! py += stridey; 1096*25c28e83SPiotr Jasiukajtis and %l6,508,%l6 ! (4_0) i &= 508; 1097*25c28e83SPiotr Jasiukajtis bn,pn %icc,.exit 1098*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f28,%f46 ! (2_0) dtmp1 = dtwo - dtmp1; 1099*25c28e83SPiotr Jasiukajtis 1100*25c28e83SPiotr Jasiukajtis fmuld %f0,%f48,%f48 ! (0_0) res *= dtmp0; 1101*25c28e83SPiotr Jasiukajtis ld [%i4+%l6],%f0 ! (4_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); 1102*25c28e83SPiotr Jasiukajtis add %i3,stridex,%l5 ! px += stridex; 1103*25c28e83SPiotr Jasiukajtis fdtos %f12,%f12 ! (7_1) ftmp0 = (float)res; 1104*25c28e83SPiotr Jasiukajtis 1105*25c28e83SPiotr Jasiukajtis lda [%i3]0x82,%l6 ! (0_0) ux = ((int*)px)[0]; 1106*25c28e83SPiotr Jasiukajtis fmuld %f10,%f26,%f28 ! (6_0) div = x * y; 1107*25c28e83SPiotr Jasiukajtis bpos,pt %icc,.main_loop 1108*25c28e83SPiotr Jasiukajtis faddd %f4,K0,%f42 ! (0_0) dtmp0 += K0; 1109*25c28e83SPiotr Jasiukajtis 1110*25c28e83SPiotr Jasiukajtis srl %o7,28,%l5 ! (1_0) ux >>= 28; 1111*25c28e83SPiotr Jasiukajtis st %f12,[%g1] ! (7_1) py[0] = ftmp0; 1112*25c28e83SPiotr Jasiukajtis 1113*25c28e83SPiotr Jasiukajtis.tail: 1114*25c28e83SPiotr Jasiukajtis addcc counter,7,counter 1115*25c28e83SPiotr Jasiukajtis bneg,pn %icc,.begin 1116*25c28e83SPiotr Jasiukajtis or %g0,%o0,%o1 1117*25c28e83SPiotr Jasiukajtis 1118*25c28e83SPiotr Jasiukajtis fsubd %f10,%f26,%f10 ! (6_1) xx = x - y; 1119*25c28e83SPiotr Jasiukajtis and %o7,MASK_0x7fffffff,%g1 ! (1_1) ax = ux & 0x7fffffff; 1120*25c28e83SPiotr Jasiukajtis fmuld K2,%f50,%f4 ! (1_1) dtmp0 = K2 * x2; 1121*25c28e83SPiotr Jasiukajtis 1122*25c28e83SPiotr Jasiukajtis fmuld %f40,%f46,%f26 ! (2_1) y0 *= dtmp1; 1123*25c28e83SPiotr Jasiukajtis add %g1,MASK_0x100000,%g1 ! (1_1) ax += 0x00100000; 1124*25c28e83SPiotr Jasiukajtis and %l5,-8,%l5 ! (1_1) ux &= -8; 1125*25c28e83SPiotr Jasiukajtis fpsub32 %f0,%f24,%f40 ! (4_1) y0 = vis_fpsub32(dtmp0, y0); 1126*25c28e83SPiotr Jasiukajtis 1127*25c28e83SPiotr Jasiukajtis fmuld %f38,%f44,%f38 ! (3_1) y0 *= dtmp0; 1128*25c28e83SPiotr Jasiukajtis 1129*25c28e83SPiotr Jasiukajtis fmuld %f42,%f22,%f44 ! (0_1) dtmp0 *= xx; 1130*25c28e83SPiotr Jasiukajtis faddd DONE,%f28,%f28 ! (6_1) div += done; 1131*25c28e83SPiotr Jasiukajtis 1132*25c28e83SPiotr Jasiukajtis fmuld %f32,%f40,%f42 ! (4_1) dtmp0 = div0 * y0; 1133*25c28e83SPiotr Jasiukajtis faddd %f4,K1,%f4 ! (1_1) dtmp0 += K1; 1134*25c28e83SPiotr Jasiukajtis 1135*25c28e83SPiotr Jasiukajtis fmuld %f18,%f26,%f18 ! (2_1) xx *= y0; 1136*25c28e83SPiotr Jasiukajtis srl %g1,18,%o7 ! (1_1) ax >>= 18; 1137*25c28e83SPiotr Jasiukajtis std %f28,[%fp+tmp0] ! (6_1) i = ((unsigned long long*)&div)[0]; 1138*25c28e83SPiotr Jasiukajtis 1139*25c28e83SPiotr Jasiukajtis fmuld %f34,%f38,%f34 ! (3_1) dtmp1 = div0 * y0; 1140*25c28e83SPiotr Jasiukajtis and %o7,-8,%o7 ! (1_1) ax &= -8; 1141*25c28e83SPiotr Jasiukajtis ldx [%fp+tmp1],%g1 ! (5_1) i = ((unsigned long long*)&div)[0]; 1142*25c28e83SPiotr Jasiukajtis 1143*25c28e83SPiotr Jasiukajtis faddd %f48,%f44,%f12 ! (0_1) res += dtmp0; 1144*25c28e83SPiotr Jasiukajtis add %o7,%l7,%o7 ! (1_1) (char*)parr1 + ax; 1145*25c28e83SPiotr Jasiukajtis ldd [%l0+%l5],%f48 ! (1_1) dtmp0 = *(double*)((char*)sign_arr + ux); 1146*25c28e83SPiotr Jasiukajtis 1147*25c28e83SPiotr Jasiukajtis fmuld %f4,%f50,%f4 ! (1_1) dtmp0 *= x2; 1148*25c28e83SPiotr Jasiukajtis fand %f30,DC3,%f24 ! (5_1) y0 = vis_fand(div,dconst3); 1149*25c28e83SPiotr Jasiukajtis ldd [%o7],%f0 ! (1_1) res = *(double*)((char*)parr1 + ax); 1150*25c28e83SPiotr Jasiukajtis 1151*25c28e83SPiotr Jasiukajtis fmuld %f18,%f18,%f50 ! (2_1) x2 = xx * xx; 1152*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f42,%f44 ! (4_1) dtmp0 = dtwo - dtmp0; 1153*25c28e83SPiotr Jasiukajtis srlx %g1,43,%g1 ! (5_1) i >>= 43; 1154*25c28e83SPiotr Jasiukajtis 1155*25c28e83SPiotr Jasiukajtis and %g1,508,%l6 ! (5_1) i &= 508; 1156*25c28e83SPiotr Jasiukajtis mov %l4,%o7 1157*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f34,%f46 ! (3_1) dtmp1 = dtwo - dtmp1; 1158*25c28e83SPiotr Jasiukajtis 1159*25c28e83SPiotr Jasiukajtis fmuld %f0,%f48,%f48 ! (1_1) res *= dtmp0; 1160*25c28e83SPiotr Jasiukajtis add %o0,stridey,%g1 ! py += stridey; 1161*25c28e83SPiotr Jasiukajtis ld [%i4+%l6],%f0 ! (5_1) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); 1162*25c28e83SPiotr Jasiukajtis fdtos %f12,%f12 ! (0_1) ftmp0 = (float)res; 1163*25c28e83SPiotr Jasiukajtis 1164*25c28e83SPiotr Jasiukajtis srl %o7,28,%l4 ! (2_1) ux >>= 28; 1165*25c28e83SPiotr Jasiukajtis st %f12,[%o0] ! (0_1) py[0] = ftmp0; 1166*25c28e83SPiotr Jasiukajtis faddd %f4,K0,%f42 ! (1_1) dtmp0 += K0; 1167*25c28e83SPiotr Jasiukajtis 1168*25c28e83SPiotr Jasiukajtis subcc counter,1,counter 1169*25c28e83SPiotr Jasiukajtis bneg,pn %icc,.begin 1170*25c28e83SPiotr Jasiukajtis or %g0,%g1,%o1 1171*25c28e83SPiotr Jasiukajtis 1172*25c28e83SPiotr Jasiukajtis fmuld K2,%f50,%f4 ! (2_1) dtmp0 = K2 * x2; 1173*25c28e83SPiotr Jasiukajtis and %o7,MASK_0x7fffffff,%o0 ! (2_1) ax = ux & 0x7fffffff; 1174*25c28e83SPiotr Jasiukajtis 1175*25c28e83SPiotr Jasiukajtis fmuld %f38,%f46,%f26 ! (3_1) y0 *= dtmp1; 1176*25c28e83SPiotr Jasiukajtis add %o0,MASK_0x100000,%o0 ! (2_1) ax += 0x00100000; 1177*25c28e83SPiotr Jasiukajtis and %l4,-8,%l4 ! (2_1) ux &= -8; 1178*25c28e83SPiotr Jasiukajtis fpsub32 %f0,%f24,%f38 ! (5_1) y0 = vis_fpsub32(dtmp0, y0); 1179*25c28e83SPiotr Jasiukajtis 1180*25c28e83SPiotr Jasiukajtis fmuld %f40,%f44,%f40 ! (4_1) y0 *= dtmp0; 1181*25c28e83SPiotr Jasiukajtis 1182*25c28e83SPiotr Jasiukajtis fmuld %f42,%f20,%f44 ! (1_1) dtmp0 *= xx; 1183*25c28e83SPiotr Jasiukajtis 1184*25c28e83SPiotr Jasiukajtis fmuld %f30,%f38,%f42 ! (5_1) dtmp0 = div0 * y0; 1185*25c28e83SPiotr Jasiukajtis faddd %f4,K1,%f4 ! (2_1) dtmp0 += K1; 1186*25c28e83SPiotr Jasiukajtis 1187*25c28e83SPiotr Jasiukajtis fmuld %f16,%f26,%f16 ! (3_1) xx *= y0; 1188*25c28e83SPiotr Jasiukajtis srl %o0,18,%o7 ! (2_1) ax >>= 18; 1189*25c28e83SPiotr Jasiukajtis 1190*25c28e83SPiotr Jasiukajtis fmuld %f32,%f40,%f32 ! (4_1) dtmp1 = div0 * y0; 1191*25c28e83SPiotr Jasiukajtis and %o7,-8,%o7 ! (2_1) ax &= -8; 1192*25c28e83SPiotr Jasiukajtis ldx [%fp+tmp0],%o0 ! (6_1) i = ((unsigned long long*)&div)[0]; 1193*25c28e83SPiotr Jasiukajtis 1194*25c28e83SPiotr Jasiukajtis faddd %f48,%f44,%f12 ! (1_1) res += dtmp0; 1195*25c28e83SPiotr Jasiukajtis add %o7,%l7,%o7 ! (2_1) (char*)parr1 + ax; 1196*25c28e83SPiotr Jasiukajtis ldd [%l0+%l4],%f48 ! (2_1) dtmp0 = *(double*)((char*)sign_arr + ux); 1197*25c28e83SPiotr Jasiukajtis 1198*25c28e83SPiotr Jasiukajtis fmuld %f4,%f50,%f4 ! (2_1) dtmp0 *= x2; 1199*25c28e83SPiotr Jasiukajtis fand %f28,DC3,%f24 ! (6_1) y0 = vis_fand(div,dconst3); 1200*25c28e83SPiotr Jasiukajtis ldd [%o7],%f0 ! (2_1) res = *(double*)((char*)parr1 + ax); 1201*25c28e83SPiotr Jasiukajtis 1202*25c28e83SPiotr Jasiukajtis fmuld %f16,%f16,%f50 ! (3_1) x2 = xx * xx; 1203*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f42,%f44 ! (5_1) dtmp0 = dtwo - dtmp0; 1204*25c28e83SPiotr Jasiukajtis srlx %o0,43,%o0 ! (6_1) i >>= 43; 1205*25c28e83SPiotr Jasiukajtis 1206*25c28e83SPiotr Jasiukajtis and %o0,508,%l6 ! (6_1) i &= 508; 1207*25c28e83SPiotr Jasiukajtis mov %l3,%o7 1208*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f32,%f46 ! (4_1) dtmp1 = dtwo - dtmp1; 1209*25c28e83SPiotr Jasiukajtis 1210*25c28e83SPiotr Jasiukajtis fmuld %f0,%f48,%f48 ! (2_1) res *= dtmp0; 1211*25c28e83SPiotr Jasiukajtis add %g1,stridey,%o0 ! py += stridey; 1212*25c28e83SPiotr Jasiukajtis ld [%i4+%l6],%f0 ! (6_1) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); 1213*25c28e83SPiotr Jasiukajtis fdtos %f12,%f12 ! (1_1) ftmp0 = (float)res; 1214*25c28e83SPiotr Jasiukajtis 1215*25c28e83SPiotr Jasiukajtis srl %o7,28,%l3 ! (3_1) ux >>= 28; 1216*25c28e83SPiotr Jasiukajtis st %f12,[%g1] ! (1_1) py[0] = ftmp0; 1217*25c28e83SPiotr Jasiukajtis faddd %f4,K0,%f42 ! (2_1) dtmp0 += K0; 1218*25c28e83SPiotr Jasiukajtis 1219*25c28e83SPiotr Jasiukajtis subcc counter,1,counter 1220*25c28e83SPiotr Jasiukajtis bneg,pn %icc,.begin 1221*25c28e83SPiotr Jasiukajtis or %g0,%o0,%o1 1222*25c28e83SPiotr Jasiukajtis 1223*25c28e83SPiotr Jasiukajtis fmuld K2,%f50,%f4 ! (3_1) dtmp0 = K2 * x2; 1224*25c28e83SPiotr Jasiukajtis and %o7,MASK_0x7fffffff,%g1 ! (3_1) ax = ux & 0x7fffffff; 1225*25c28e83SPiotr Jasiukajtis 1226*25c28e83SPiotr Jasiukajtis fmuld %f40,%f46,%f26 ! (4_1) y0 *= dtmp1; 1227*25c28e83SPiotr Jasiukajtis add %g1,MASK_0x100000,%g1 ! (3_1) ax += 0x00100000; 1228*25c28e83SPiotr Jasiukajtis and %l3,-8,%l3 ! (3_1) ux &= -8; 1229*25c28e83SPiotr Jasiukajtis fpsub32 %f0,%f24,%f40 ! (6_1) y0 = vis_fpsub32(dtmp0, y0); 1230*25c28e83SPiotr Jasiukajtis 1231*25c28e83SPiotr Jasiukajtis fmuld %f38,%f44,%f38 ! (5_1) y0 *= dtmp0; 1232*25c28e83SPiotr Jasiukajtis 1233*25c28e83SPiotr Jasiukajtis fmuld %f42,%f18,%f44 ! (2_1) dtmp0 *= xx; 1234*25c28e83SPiotr Jasiukajtis 1235*25c28e83SPiotr Jasiukajtis fmuld %f28,%f40,%f42 ! (6_1) dtmp0 = div0 * y0; 1236*25c28e83SPiotr Jasiukajtis faddd %f4,K1,%f4 ! (3_1) dtmp0 += K1; 1237*25c28e83SPiotr Jasiukajtis 1238*25c28e83SPiotr Jasiukajtis fmuld %f14,%f26,%f14 ! (4_1) xx *= y0; 1239*25c28e83SPiotr Jasiukajtis srl %g1,18,%o7 ! (3_1) ax >>= 18; 1240*25c28e83SPiotr Jasiukajtis 1241*25c28e83SPiotr Jasiukajtis fmuld %f30,%f38,%f30 ! (5_1) dtmp1 = div0 * y0; 1242*25c28e83SPiotr Jasiukajtis and %o7,-8,%o7 ! (3_1) ax &= -8; 1243*25c28e83SPiotr Jasiukajtis 1244*25c28e83SPiotr Jasiukajtis faddd %f48,%f44,%f12 ! (2_1) res += dtmp0; 1245*25c28e83SPiotr Jasiukajtis add %o7,%l7,%o7 ! (3_1) (char*)parr1 + ax; 1246*25c28e83SPiotr Jasiukajtis ldd [%l0+%l3],%f48 ! (3_1) dtmp0 = *(double*)((char*)sign_arr + ux); 1247*25c28e83SPiotr Jasiukajtis 1248*25c28e83SPiotr Jasiukajtis fmuld %f4,%f50,%f4 ! (3_1) dtmp0 *= x2; 1249*25c28e83SPiotr Jasiukajtis ldd [%o7],%f0 ! (3_1) res = *(double*)((char*)parr1 + ax) 1250*25c28e83SPiotr Jasiukajtis 1251*25c28e83SPiotr Jasiukajtis fmuld %f14,%f14,%f50 ! (4_1) x2 = xx * xx; 1252*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f42,%f44 ! (6_1) dtmp0 = dtwo - dtmp0; 1253*25c28e83SPiotr Jasiukajtis 1254*25c28e83SPiotr Jasiukajtis mov %i0,%o7 1255*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f30,%f46 ! (5_1) dtmp1 = dtwo - dtmp1; 1256*25c28e83SPiotr Jasiukajtis 1257*25c28e83SPiotr Jasiukajtis fmuld %f0,%f48,%f48 ! (3_1) res *= dtmp0; 1258*25c28e83SPiotr Jasiukajtis add %o0,stridey,%g1 ! py += stridey; 1259*25c28e83SPiotr Jasiukajtis fdtos %f12,%f12 ! (2_1) ftmp0 = (float)res; 1260*25c28e83SPiotr Jasiukajtis 1261*25c28e83SPiotr Jasiukajtis srl %o7,28,%i0 ! (4_1) ux >>= 28; 1262*25c28e83SPiotr Jasiukajtis st %f12,[%o0] ! (2_1) py[0] = ftmp0; 1263*25c28e83SPiotr Jasiukajtis faddd %f4,K0,%f42 ! (3_1) dtmp0 += K0; 1264*25c28e83SPiotr Jasiukajtis 1265*25c28e83SPiotr Jasiukajtis subcc counter,1,counter 1266*25c28e83SPiotr Jasiukajtis bneg,pn %icc,.begin 1267*25c28e83SPiotr Jasiukajtis or %g0,%g1,%o1 1268*25c28e83SPiotr Jasiukajtis 1269*25c28e83SPiotr Jasiukajtis fmuld K2,%f50,%f4 ! (4_1) dtmp0 = K2 * x2; 1270*25c28e83SPiotr Jasiukajtis and %o7,MASK_0x7fffffff,%o0 ! (4_1) ax = ux & 0x7fffffff; 1271*25c28e83SPiotr Jasiukajtis 1272*25c28e83SPiotr Jasiukajtis fmuld %f38,%f46,%f26 ! (5_1) y0 *= dtmp1; 1273*25c28e83SPiotr Jasiukajtis add %o0,MASK_0x100000,%o0 ! (4_1) ax += 0x00100000; 1274*25c28e83SPiotr Jasiukajtis and %i0,-8,%i0 ! (4_1) ux &= -8; 1275*25c28e83SPiotr Jasiukajtis 1276*25c28e83SPiotr Jasiukajtis fmuld %f40,%f44,%f40 ! (6_1) y0 *= dtmp0; 1277*25c28e83SPiotr Jasiukajtis 1278*25c28e83SPiotr Jasiukajtis fmuld %f42,%f16,%f44 ! (3_1) dtmp0 *= xx; 1279*25c28e83SPiotr Jasiukajtis 1280*25c28e83SPiotr Jasiukajtis faddd %f4,K1,%f4 ! (4_1) dtmp0 += K1; 1281*25c28e83SPiotr Jasiukajtis 1282*25c28e83SPiotr Jasiukajtis fmuld %f36,%f26,%f36 ! (5_1) xx *= y0; 1283*25c28e83SPiotr Jasiukajtis srl %o0,18,%o7 ! (4_1) ax >>= 18; 1284*25c28e83SPiotr Jasiukajtis 1285*25c28e83SPiotr Jasiukajtis fmuld %f28,%f40,%f28 ! (6_1) dtmp1 = div0 * y0; 1286*25c28e83SPiotr Jasiukajtis and %o7,-8,%o7 ! (4_1) ax &= -8; 1287*25c28e83SPiotr Jasiukajtis 1288*25c28e83SPiotr Jasiukajtis faddd %f48,%f44,%f12 ! (3_1) res += dtmp0; 1289*25c28e83SPiotr Jasiukajtis add %o7,%l7,%o7 ! (4_1) (char*)parr1 + ax; 1290*25c28e83SPiotr Jasiukajtis ldd [%l0+%i0],%f48 ! (4_1) dtmp0 = *(double*)((char*)sign_arr + ux); 1291*25c28e83SPiotr Jasiukajtis 1292*25c28e83SPiotr Jasiukajtis fmuld %f4,%f50,%f4 ! (4_1) dtmp0 *= x2; 1293*25c28e83SPiotr Jasiukajtis ldd [%o7],%f0 ! (4_1) res = *(double*)((char*)parr1 + ax); 1294*25c28e83SPiotr Jasiukajtis 1295*25c28e83SPiotr Jasiukajtis fmuld %f36,%f36,%f50 ! (5_1) x2 = xx * xx; 1296*25c28e83SPiotr Jasiukajtis 1297*25c28e83SPiotr Jasiukajtis mov %i2,%o7 1298*25c28e83SPiotr Jasiukajtis fsubd DTWO,%f28,%f46 ! (6_1) dtmp1 = dtwo - dtmp1; 1299*25c28e83SPiotr Jasiukajtis 1300*25c28e83SPiotr Jasiukajtis fmuld %f0,%f48,%f48 ! (4_1) res *= dtmp0; 1301*25c28e83SPiotr Jasiukajtis add %g1,stridey,%o0 ! py += stridey; 1302*25c28e83SPiotr Jasiukajtis fdtos %f12,%f12 ! (3_1) ftmp0 = (float)res; 1303*25c28e83SPiotr Jasiukajtis 1304*25c28e83SPiotr Jasiukajtis srl %o7,28,%i2 ! (5_1) ux >>= 28; 1305*25c28e83SPiotr Jasiukajtis st %f12,[%g1] ! (3_1) py[0] = ftmp0; 1306*25c28e83SPiotr Jasiukajtis faddd %f4,K0,%f42 ! (4_1) dtmp0 += K0; 1307*25c28e83SPiotr Jasiukajtis 1308*25c28e83SPiotr Jasiukajtis subcc counter,1,counter 1309*25c28e83SPiotr Jasiukajtis bneg,pn %icc,.begin 1310*25c28e83SPiotr Jasiukajtis or %g0,%o0,%o1 1311*25c28e83SPiotr Jasiukajtis 1312*25c28e83SPiotr Jasiukajtis fmuld K2,%f50,%f4 ! (5_1) dtmp0 = K2 * x2; 1313*25c28e83SPiotr Jasiukajtis and %o7,MASK_0x7fffffff,%g1 ! (5_1) ax = ux & 0x7fffffff; 1314*25c28e83SPiotr Jasiukajtis 1315*25c28e83SPiotr Jasiukajtis fmuld %f40,%f46,%f26 ! (6_1) y0 *= dtmp1; 1316*25c28e83SPiotr Jasiukajtis add %g1,MASK_0x100000,%g1 ! (5_1) ax += 0x00100000; 1317*25c28e83SPiotr Jasiukajtis and %i2,-8,%i2 ! (5_1) ux &= -8; 1318*25c28e83SPiotr Jasiukajtis 1319*25c28e83SPiotr Jasiukajtis fmuld %f42,%f14,%f44 ! (4_1) dtmp0 *= xx; 1320*25c28e83SPiotr Jasiukajtis 1321*25c28e83SPiotr Jasiukajtis faddd %f4,K1,%f4 ! (5_1) dtmp0 += K1; 1322*25c28e83SPiotr Jasiukajtis 1323*25c28e83SPiotr Jasiukajtis fmuld %f10,%f26,%f10 ! (6_1) xx *= y0; 1324*25c28e83SPiotr Jasiukajtis srl %g1,18,%o7 ! (5_1) ax >>= 18; 1325*25c28e83SPiotr Jasiukajtis 1326*25c28e83SPiotr Jasiukajtis and %o7,-8,%o7 ! (5_1) ax &= -8; 1327*25c28e83SPiotr Jasiukajtis 1328*25c28e83SPiotr Jasiukajtis faddd %f48,%f44,%f12 ! (4_1) res += dtmp0; 1329*25c28e83SPiotr Jasiukajtis add %o7,%l7,%o7 ! (5_1) (char*)parr1 + ax; 1330*25c28e83SPiotr Jasiukajtis ldd [%l0+%i2],%f48 ! (5_1) dtmp0 = *(double*)((char*)sign_arr + ux); 1331*25c28e83SPiotr Jasiukajtis 1332*25c28e83SPiotr Jasiukajtis fmuld %f4,%f50,%f4 ! (5_1) dtmp0 *= x2; 1333*25c28e83SPiotr Jasiukajtis ldd [%o7],%f0 ! (5_1) res = *(double*)((char*)parr1 + ax); 1334*25c28e83SPiotr Jasiukajtis 1335*25c28e83SPiotr Jasiukajtis fmuld %f10,%f10,%f50 ! (6_1) x2 = xx * xx; 1336*25c28e83SPiotr Jasiukajtis 1337*25c28e83SPiotr Jasiukajtis mov %l2,%o7 1338*25c28e83SPiotr Jasiukajtis 1339*25c28e83SPiotr Jasiukajtis fmuld %f0,%f48,%f48 ! (5_1) res *= dtmp0; 1340*25c28e83SPiotr Jasiukajtis add %o0,stridey,%g1 ! py += stridey; 1341*25c28e83SPiotr Jasiukajtis fdtos %f12,%f12 ! (4_1) ftmp0 = (float)res; 1342*25c28e83SPiotr Jasiukajtis 1343*25c28e83SPiotr Jasiukajtis srl %o7,28,%l2 ! (6_1) ux >>= 28; 1344*25c28e83SPiotr Jasiukajtis st %f12,[%o0] ! (4_1) py[0] = ftmp0; 1345*25c28e83SPiotr Jasiukajtis faddd %f4,K0,%f42 ! (5_1) dtmp0 += K0; 1346*25c28e83SPiotr Jasiukajtis 1347*25c28e83SPiotr Jasiukajtis subcc counter,1,counter 1348*25c28e83SPiotr Jasiukajtis bneg,pn %icc,.begin 1349*25c28e83SPiotr Jasiukajtis or %g0,%g1,%o1 1350*25c28e83SPiotr Jasiukajtis 1351*25c28e83SPiotr Jasiukajtis fmuld K2,%f50,%f4 ! (6_1) dtmp0 = K2 * x2; 1352*25c28e83SPiotr Jasiukajtis and %o7,MASK_0x7fffffff,%o0 ! (6_1) ax = ux & 0x7fffffff; 1353*25c28e83SPiotr Jasiukajtis 1354*25c28e83SPiotr Jasiukajtis add %o0,MASK_0x100000,%o0 ! (6_1) ax += 0x00100000; 1355*25c28e83SPiotr Jasiukajtis and %l2,-8,%l2 ! (6_1) ux &= -8; 1356*25c28e83SPiotr Jasiukajtis 1357*25c28e83SPiotr Jasiukajtis fmuld %f42,%f36,%f44 ! (5_1) dtmp0 *= xx; 1358*25c28e83SPiotr Jasiukajtis 1359*25c28e83SPiotr Jasiukajtis faddd %f4,K1,%f4 ! (6_1) dtmp0 += K1; 1360*25c28e83SPiotr Jasiukajtis 1361*25c28e83SPiotr Jasiukajtis srl %o0,18,%o7 ! (6_1) ax >>= 18; 1362*25c28e83SPiotr Jasiukajtis 1363*25c28e83SPiotr Jasiukajtis and %o7,-8,%o7 ! (6_1) ax &= -8; 1364*25c28e83SPiotr Jasiukajtis 1365*25c28e83SPiotr Jasiukajtis faddd %f48,%f44,%f12 ! (5_1) res += dtmp0; 1366*25c28e83SPiotr Jasiukajtis add %o7,%l7,%o7 ! (6_1) (char*)parr1 + ax; 1367*25c28e83SPiotr Jasiukajtis ldd [%l0+%l2],%f48 ! (6_1) dtmp0 = *(double*)((char*)sign_arr + ux); 1368*25c28e83SPiotr Jasiukajtis 1369*25c28e83SPiotr Jasiukajtis fmuld %f4,%f50,%f4 ! (6_1) dtmp0 *= x2; 1370*25c28e83SPiotr Jasiukajtis ldd [%o7],%f0 ! (6_1) res = *(double*)((char*)parr1 + ax); 1371*25c28e83SPiotr Jasiukajtis 1372*25c28e83SPiotr Jasiukajtis fmuld %f0,%f48,%f48 ! (6_1) res *= dtmp0; 1373*25c28e83SPiotr Jasiukajtis add %g1,stridey,%o0 ! py += stridey; 1374*25c28e83SPiotr Jasiukajtis fdtos %f12,%f12 ! (5_1) ftmp0 = (float)res; 1375*25c28e83SPiotr Jasiukajtis 1376*25c28e83SPiotr Jasiukajtis st %f12,[%g1] ! (5_1) py[0] = ftmp0; 1377*25c28e83SPiotr Jasiukajtis faddd %f4,K0,%f42 ! (6_1) dtmp0 += K0; 1378*25c28e83SPiotr Jasiukajtis 1379*25c28e83SPiotr Jasiukajtis subcc counter,1,counter 1380*25c28e83SPiotr Jasiukajtis bneg,pn %icc,.begin 1381*25c28e83SPiotr Jasiukajtis or %g0,%o0,%o1 1382*25c28e83SPiotr Jasiukajtis 1383*25c28e83SPiotr Jasiukajtis fmuld %f42,%f10,%f44 ! (6_1) dtmp0 *= xx; 1384*25c28e83SPiotr Jasiukajtis 1385*25c28e83SPiotr Jasiukajtis faddd %f48,%f44,%f12 ! (6_1) res += dtmp0; 1386*25c28e83SPiotr Jasiukajtis 1387*25c28e83SPiotr Jasiukajtis add %o0,stridey,%g1 ! py += stridey; 1388*25c28e83SPiotr Jasiukajtis fdtos %f12,%f12 ! (6_1) ftmp0 = (float)res; 1389*25c28e83SPiotr Jasiukajtis 1390*25c28e83SPiotr Jasiukajtis st %f12,[%o0] ! (6_1) py[0] = ftmp0; 1391*25c28e83SPiotr Jasiukajtis 1392*25c28e83SPiotr Jasiukajtis ba .begin 1393*25c28e83SPiotr Jasiukajtis or %g0,%g1,%o1 ! py += stridey; 1394*25c28e83SPiotr Jasiukajtis 1395*25c28e83SPiotr Jasiukajtis.exit: 1396*25c28e83SPiotr Jasiukajtis ret 1397*25c28e83SPiotr Jasiukajtis restore %g0,%g0,%g0 1398*25c28e83SPiotr Jasiukajtis 1399*25c28e83SPiotr Jasiukajtis .align 16 1400*25c28e83SPiotr Jasiukajtis.spec0: 1401*25c28e83SPiotr Jasiukajtis add %i3,stridex,%i3 ! px += stridex; 1402*25c28e83SPiotr Jasiukajtis sub counter,1,counter 1403*25c28e83SPiotr Jasiukajtis st %l6,[%o1] ! *(int*)py = ux; 1404*25c28e83SPiotr Jasiukajtis 1405*25c28e83SPiotr Jasiukajtis ba .begin1 1406*25c28e83SPiotr Jasiukajtis add %o1,stridey,%o1 ! py += stridey; 1407*25c28e83SPiotr Jasiukajtis 1408*25c28e83SPiotr Jasiukajtis .align 16 1409*25c28e83SPiotr Jasiukajtis.spec1: 1410*25c28e83SPiotr Jasiukajtis sethi %hi(0x7f800000),%l3 1411*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fc90c00),%l4 ! pi_2 1412*25c28e83SPiotr Jasiukajtis 1413*25c28e83SPiotr Jasiukajtis sethi %hi(0x80000000),%o0 1414*25c28e83SPiotr Jasiukajtis add %l4,0x3db,%l4 ! pi_2 1415*25c28e83SPiotr Jasiukajtis 1416*25c28e83SPiotr Jasiukajtis cmp %l5,%l3 ! if ( ax > 0x7f800000 ) 1417*25c28e83SPiotr Jasiukajtis bg,a,pn %icc,1f 1418*25c28e83SPiotr Jasiukajtis fabss %f0,%f0 ! fpx = fabsf(*px); 1419*25c28e83SPiotr Jasiukajtis 1420*25c28e83SPiotr Jasiukajtis and %l6,%o0,%l6 ! sign = ux & 0x80000000; 1421*25c28e83SPiotr Jasiukajtis 1422*25c28e83SPiotr Jasiukajtis or %l6,%l4,%l6 ! sign |= pi_2; 1423*25c28e83SPiotr Jasiukajtis 1424*25c28e83SPiotr Jasiukajtis add %i3,stridex,%i3 ! px += stridex; 1425*25c28e83SPiotr Jasiukajtis sub counter,1,counter 1426*25c28e83SPiotr Jasiukajtis st %l6,[%o1] ! *(int*)py = sign; 1427*25c28e83SPiotr Jasiukajtis 1428*25c28e83SPiotr Jasiukajtis ba .begin1 1429*25c28e83SPiotr Jasiukajtis add %o1,stridey,%o1 ! py += stridey; 1430*25c28e83SPiotr Jasiukajtis 1431*25c28e83SPiotr Jasiukajtis1: 1432*25c28e83SPiotr Jasiukajtis fmuls %f0,%f0,%f0 ! fpx *= fpx; 1433*25c28e83SPiotr Jasiukajtis 1434*25c28e83SPiotr Jasiukajtis add %i3,stridex,%i3 ! px += stridex 1435*25c28e83SPiotr Jasiukajtis sub counter,1,counter 1436*25c28e83SPiotr Jasiukajtis st %f0,[%o1] ! *py = fpx; 1437*25c28e83SPiotr Jasiukajtis 1438*25c28e83SPiotr Jasiukajtis ba .begin1 1439*25c28e83SPiotr Jasiukajtis add %o1,stridey,%o1 ! py += stridey; 1440*25c28e83SPiotr Jasiukajtis 1441*25c28e83SPiotr Jasiukajtis .align 16 1442*25c28e83SPiotr Jasiukajtis.update0: 1443*25c28e83SPiotr Jasiukajtis cmp counter,1 1444*25c28e83SPiotr Jasiukajtis fzeros %f0 1445*25c28e83SPiotr Jasiukajtis ble,a .cont0 1446*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fffffff),%l6 1447*25c28e83SPiotr Jasiukajtis 1448*25c28e83SPiotr Jasiukajtis sub counter,1,counter 1449*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1450*25c28e83SPiotr Jasiukajtis 1451*25c28e83SPiotr Jasiukajtis stx %l5,[%fp+tmp_px] 1452*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fffffff),%l6 1453*25c28e83SPiotr Jasiukajtis ba .cont0 1454*25c28e83SPiotr Jasiukajtis or %g0,1,counter 1455*25c28e83SPiotr Jasiukajtis 1456*25c28e83SPiotr Jasiukajtis .align 16 1457*25c28e83SPiotr Jasiukajtis.update1: 1458*25c28e83SPiotr Jasiukajtis cmp counter,1 1459*25c28e83SPiotr Jasiukajtis fzeros %f0 1460*25c28e83SPiotr Jasiukajtis ble,a .cont1 1461*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fffffff),%l6 1462*25c28e83SPiotr Jasiukajtis 1463*25c28e83SPiotr Jasiukajtis sub counter,1,counter 1464*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1465*25c28e83SPiotr Jasiukajtis 1466*25c28e83SPiotr Jasiukajtis stx %l5,[%fp+tmp_px] 1467*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fffffff),%l6 1468*25c28e83SPiotr Jasiukajtis ba .cont1 1469*25c28e83SPiotr Jasiukajtis or %g0,1,counter 1470*25c28e83SPiotr Jasiukajtis 1471*25c28e83SPiotr Jasiukajtis .align 16 1472*25c28e83SPiotr Jasiukajtis.update2: 1473*25c28e83SPiotr Jasiukajtis cmp counter,2 1474*25c28e83SPiotr Jasiukajtis fzeros %f0 1475*25c28e83SPiotr Jasiukajtis ble,a .cont2 1476*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fffffff),%l6 1477*25c28e83SPiotr Jasiukajtis 1478*25c28e83SPiotr Jasiukajtis sub counter,2,counter 1479*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1480*25c28e83SPiotr Jasiukajtis 1481*25c28e83SPiotr Jasiukajtis stx %l4,[%fp+tmp_px] 1482*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fffffff),%l6 1483*25c28e83SPiotr Jasiukajtis ba .cont2 1484*25c28e83SPiotr Jasiukajtis or %g0,2,counter 1485*25c28e83SPiotr Jasiukajtis 1486*25c28e83SPiotr Jasiukajtis .align 16 1487*25c28e83SPiotr Jasiukajtis.update3: 1488*25c28e83SPiotr Jasiukajtis cmp counter,2 1489*25c28e83SPiotr Jasiukajtis fzeros %f0 1490*25c28e83SPiotr Jasiukajtis ble,a .cont3 1491*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fffffff),%l6 1492*25c28e83SPiotr Jasiukajtis 1493*25c28e83SPiotr Jasiukajtis sub counter,2,counter 1494*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1495*25c28e83SPiotr Jasiukajtis 1496*25c28e83SPiotr Jasiukajtis stx %l4,[%fp+tmp_px] 1497*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fffffff),%l6 1498*25c28e83SPiotr Jasiukajtis ba .cont3 1499*25c28e83SPiotr Jasiukajtis or %g0,2,counter 1500*25c28e83SPiotr Jasiukajtis 1501*25c28e83SPiotr Jasiukajtis .align 16 1502*25c28e83SPiotr Jasiukajtis.update4: 1503*25c28e83SPiotr Jasiukajtis cmp counter,3 1504*25c28e83SPiotr Jasiukajtis fzeros %f0 1505*25c28e83SPiotr Jasiukajtis ble,a .cont4 1506*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fffffff),%l6 1507*25c28e83SPiotr Jasiukajtis 1508*25c28e83SPiotr Jasiukajtis sub counter,3,counter 1509*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1510*25c28e83SPiotr Jasiukajtis 1511*25c28e83SPiotr Jasiukajtis stx %l3,[%fp+tmp_px] 1512*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fffffff),%l6 1513*25c28e83SPiotr Jasiukajtis ba .cont4 1514*25c28e83SPiotr Jasiukajtis or %g0,3,counter 1515*25c28e83SPiotr Jasiukajtis 1516*25c28e83SPiotr Jasiukajtis .align 16 1517*25c28e83SPiotr Jasiukajtis.update5: 1518*25c28e83SPiotr Jasiukajtis cmp counter,3 1519*25c28e83SPiotr Jasiukajtis fzeros %f0 1520*25c28e83SPiotr Jasiukajtis ble,a .cont5 1521*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fffffff),%l6 1522*25c28e83SPiotr Jasiukajtis 1523*25c28e83SPiotr Jasiukajtis sub counter,3,counter 1524*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1525*25c28e83SPiotr Jasiukajtis 1526*25c28e83SPiotr Jasiukajtis stx %l3,[%fp+tmp_px] 1527*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fffffff),%l6 1528*25c28e83SPiotr Jasiukajtis ba .cont5 1529*25c28e83SPiotr Jasiukajtis or %g0,3,counter 1530*25c28e83SPiotr Jasiukajtis 1531*25c28e83SPiotr Jasiukajtis .align 16 1532*25c28e83SPiotr Jasiukajtis.update6: 1533*25c28e83SPiotr Jasiukajtis cmp counter,4 1534*25c28e83SPiotr Jasiukajtis fzeros %f0 1535*25c28e83SPiotr Jasiukajtis ble,a .cont6 1536*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fffffff),%l6 1537*25c28e83SPiotr Jasiukajtis 1538*25c28e83SPiotr Jasiukajtis sub counter,4,counter 1539*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1540*25c28e83SPiotr Jasiukajtis 1541*25c28e83SPiotr Jasiukajtis stx %i0,[%fp+tmp_px] 1542*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fffffff),%l6 1543*25c28e83SPiotr Jasiukajtis ba .cont6 1544*25c28e83SPiotr Jasiukajtis or %g0,4,counter 1545*25c28e83SPiotr Jasiukajtis 1546*25c28e83SPiotr Jasiukajtis .align 16 1547*25c28e83SPiotr Jasiukajtis.update7: 1548*25c28e83SPiotr Jasiukajtis cmp counter,4 1549*25c28e83SPiotr Jasiukajtis fzeros %f0 1550*25c28e83SPiotr Jasiukajtis ble,a .cont7 1551*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fffffff),%l6 1552*25c28e83SPiotr Jasiukajtis 1553*25c28e83SPiotr Jasiukajtis sub counter,4,counter 1554*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1555*25c28e83SPiotr Jasiukajtis 1556*25c28e83SPiotr Jasiukajtis stx %i0,[%fp+tmp_px] 1557*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fffffff),%l6 1558*25c28e83SPiotr Jasiukajtis ba .cont7 1559*25c28e83SPiotr Jasiukajtis or %g0,4,counter 1560*25c28e83SPiotr Jasiukajtis 1561*25c28e83SPiotr Jasiukajtis .align 16 1562*25c28e83SPiotr Jasiukajtis.update8: 1563*25c28e83SPiotr Jasiukajtis cmp counter,5 1564*25c28e83SPiotr Jasiukajtis fzeros %f0 1565*25c28e83SPiotr Jasiukajtis ble,a .cont8 1566*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fffffff),%l6 1567*25c28e83SPiotr Jasiukajtis 1568*25c28e83SPiotr Jasiukajtis sub counter,5,counter 1569*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1570*25c28e83SPiotr Jasiukajtis 1571*25c28e83SPiotr Jasiukajtis stx %i2,[%fp+tmp_px] 1572*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fffffff),%l6 1573*25c28e83SPiotr Jasiukajtis ba .cont8 1574*25c28e83SPiotr Jasiukajtis or %g0,5,counter 1575*25c28e83SPiotr Jasiukajtis 1576*25c28e83SPiotr Jasiukajtis .align 16 1577*25c28e83SPiotr Jasiukajtis.update9: 1578*25c28e83SPiotr Jasiukajtis cmp counter,5 1579*25c28e83SPiotr Jasiukajtis fzeros %f0 1580*25c28e83SPiotr Jasiukajtis ble,a .cont9 1581*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fffffff),%l6 1582*25c28e83SPiotr Jasiukajtis 1583*25c28e83SPiotr Jasiukajtis sub counter,5,counter 1584*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1585*25c28e83SPiotr Jasiukajtis 1586*25c28e83SPiotr Jasiukajtis stx %i2,[%fp+tmp_px] 1587*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fffffff),%l6 1588*25c28e83SPiotr Jasiukajtis ba .cont9 1589*25c28e83SPiotr Jasiukajtis or %g0,5,counter 1590*25c28e83SPiotr Jasiukajtis 1591*25c28e83SPiotr Jasiukajtis .align 16 1592*25c28e83SPiotr Jasiukajtis.update10: 1593*25c28e83SPiotr Jasiukajtis cmp counter,6 1594*25c28e83SPiotr Jasiukajtis fzeros %f0 1595*25c28e83SPiotr Jasiukajtis ble,a .cont10 1596*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fffffff),%l6 1597*25c28e83SPiotr Jasiukajtis 1598*25c28e83SPiotr Jasiukajtis sub counter,6,counter 1599*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1600*25c28e83SPiotr Jasiukajtis 1601*25c28e83SPiotr Jasiukajtis stx %l2,[%fp+tmp_px] 1602*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fffffff),%l6 1603*25c28e83SPiotr Jasiukajtis ba .cont10 1604*25c28e83SPiotr Jasiukajtis or %g0,6,counter 1605*25c28e83SPiotr Jasiukajtis 1606*25c28e83SPiotr Jasiukajtis .align 16 1607*25c28e83SPiotr Jasiukajtis.update11: 1608*25c28e83SPiotr Jasiukajtis cmp counter,6 1609*25c28e83SPiotr Jasiukajtis fzeros %f0 1610*25c28e83SPiotr Jasiukajtis ble,a .cont11 1611*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fffffff),%l6 1612*25c28e83SPiotr Jasiukajtis 1613*25c28e83SPiotr Jasiukajtis sub counter,6,counter 1614*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1615*25c28e83SPiotr Jasiukajtis 1616*25c28e83SPiotr Jasiukajtis stx %l2,[%fp+tmp_px] 1617*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fffffff),%l6 1618*25c28e83SPiotr Jasiukajtis ba .cont11 1619*25c28e83SPiotr Jasiukajtis or %g0,6,counter 1620*25c28e83SPiotr Jasiukajtis 1621*25c28e83SPiotr Jasiukajtis .align 16 1622*25c28e83SPiotr Jasiukajtis.update12: 1623*25c28e83SPiotr Jasiukajtis cmp counter,7 1624*25c28e83SPiotr Jasiukajtis fzeros %f0 1625*25c28e83SPiotr Jasiukajtis ble,a .cont12 1626*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fffffff),%l6 1627*25c28e83SPiotr Jasiukajtis 1628*25c28e83SPiotr Jasiukajtis sub counter,7,counter 1629*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1630*25c28e83SPiotr Jasiukajtis 1631*25c28e83SPiotr Jasiukajtis stx %g5,[%fp+tmp_px] 1632*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fffffff),%l6 1633*25c28e83SPiotr Jasiukajtis ba .cont12 1634*25c28e83SPiotr Jasiukajtis or %g0,7,counter 1635*25c28e83SPiotr Jasiukajtis 1636*25c28e83SPiotr Jasiukajtis .align 16 1637*25c28e83SPiotr Jasiukajtis.update13: 1638*25c28e83SPiotr Jasiukajtis cmp counter,7 1639*25c28e83SPiotr Jasiukajtis fzeros %f0 1640*25c28e83SPiotr Jasiukajtis ble,a .cont13 1641*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fffffff),%l6 1642*25c28e83SPiotr Jasiukajtis 1643*25c28e83SPiotr Jasiukajtis sub counter,7,counter 1644*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1645*25c28e83SPiotr Jasiukajtis 1646*25c28e83SPiotr Jasiukajtis stx %g5,[%fp+tmp_px] 1647*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fffffff),%l6 1648*25c28e83SPiotr Jasiukajtis ba .cont13 1649*25c28e83SPiotr Jasiukajtis or %g0,7,counter 1650*25c28e83SPiotr Jasiukajtis 1651*25c28e83SPiotr Jasiukajtis .align 16 1652*25c28e83SPiotr Jasiukajtis.update14: 1653*25c28e83SPiotr Jasiukajtis cmp counter,0 1654*25c28e83SPiotr Jasiukajtis fzeros %f0 1655*25c28e83SPiotr Jasiukajtis ble,a .cont14 1656*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fffffff),%l6 1657*25c28e83SPiotr Jasiukajtis 1658*25c28e83SPiotr Jasiukajtis sub counter,0,counter 1659*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1660*25c28e83SPiotr Jasiukajtis 1661*25c28e83SPiotr Jasiukajtis stx %i3,[%fp+tmp_px] 1662*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fffffff),%l6 1663*25c28e83SPiotr Jasiukajtis ba .cont14 1664*25c28e83SPiotr Jasiukajtis or %g0,0,counter 1665*25c28e83SPiotr Jasiukajtis 1666*25c28e83SPiotr Jasiukajtis .align 16 1667*25c28e83SPiotr Jasiukajtis.update15: 1668*25c28e83SPiotr Jasiukajtis cmp counter,0 1669*25c28e83SPiotr Jasiukajtis fzeros %f0 1670*25c28e83SPiotr Jasiukajtis ble,a .cont15 1671*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fffffff),%l6 1672*25c28e83SPiotr Jasiukajtis 1673*25c28e83SPiotr Jasiukajtis sub counter,0,counter 1674*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1675*25c28e83SPiotr Jasiukajtis 1676*25c28e83SPiotr Jasiukajtis stx %i3,[%fp+tmp_px] 1677*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fffffff),%l6 1678*25c28e83SPiotr Jasiukajtis ba .cont15 1679*25c28e83SPiotr Jasiukajtis or %g0,0,counter 1680*25c28e83SPiotr Jasiukajtis 1681*25c28e83SPiotr Jasiukajtis .align 16 1682*25c28e83SPiotr Jasiukajtis.update16: 1683*25c28e83SPiotr Jasiukajtis cmp counter,1 1684*25c28e83SPiotr Jasiukajtis fzeros %f0 1685*25c28e83SPiotr Jasiukajtis ble,a .cont16 1686*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fffffff),%l6 1687*25c28e83SPiotr Jasiukajtis 1688*25c28e83SPiotr Jasiukajtis sub counter,1,counter 1689*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1690*25c28e83SPiotr Jasiukajtis 1691*25c28e83SPiotr Jasiukajtis stx %l5,[%fp+tmp_px] 1692*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fffffff),%l6 1693*25c28e83SPiotr Jasiukajtis ba .cont16 1694*25c28e83SPiotr Jasiukajtis or %g0,1,counter 1695*25c28e83SPiotr Jasiukajtis 1696*25c28e83SPiotr Jasiukajtis .align 16 1697*25c28e83SPiotr Jasiukajtis.update17: 1698*25c28e83SPiotr Jasiukajtis cmp counter,1 1699*25c28e83SPiotr Jasiukajtis fzeros %f0 1700*25c28e83SPiotr Jasiukajtis ble,a .cont17 1701*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fffffff),%l6 1702*25c28e83SPiotr Jasiukajtis 1703*25c28e83SPiotr Jasiukajtis sub counter,1,counter 1704*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1705*25c28e83SPiotr Jasiukajtis 1706*25c28e83SPiotr Jasiukajtis stx %l5,[%fp+tmp_px] 1707*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fffffff),%l6 1708*25c28e83SPiotr Jasiukajtis ba .cont17 1709*25c28e83SPiotr Jasiukajtis or %g0,1,counter 1710*25c28e83SPiotr Jasiukajtis 1711*25c28e83SPiotr Jasiukajtis .align 16 1712*25c28e83SPiotr Jasiukajtis.update18: 1713*25c28e83SPiotr Jasiukajtis cmp counter,2 1714*25c28e83SPiotr Jasiukajtis fzeros %f0 1715*25c28e83SPiotr Jasiukajtis ble,a .cont18 1716*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fffffff),%l6 1717*25c28e83SPiotr Jasiukajtis 1718*25c28e83SPiotr Jasiukajtis sub counter,2,counter 1719*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1720*25c28e83SPiotr Jasiukajtis 1721*25c28e83SPiotr Jasiukajtis stx %l4,[%fp+tmp_px] 1722*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fffffff),%l6 1723*25c28e83SPiotr Jasiukajtis ba .cont18 1724*25c28e83SPiotr Jasiukajtis or %g0,2,counter 1725*25c28e83SPiotr Jasiukajtis 1726*25c28e83SPiotr Jasiukajtis .align 16 1727*25c28e83SPiotr Jasiukajtis.update19: 1728*25c28e83SPiotr Jasiukajtis cmp counter,2 1729*25c28e83SPiotr Jasiukajtis fzeros %f0 1730*25c28e83SPiotr Jasiukajtis ble,a .cont19 1731*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fffffff),%l6 1732*25c28e83SPiotr Jasiukajtis 1733*25c28e83SPiotr Jasiukajtis sub counter,2,counter 1734*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1735*25c28e83SPiotr Jasiukajtis 1736*25c28e83SPiotr Jasiukajtis stx %l4,[%fp+tmp_px] 1737*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fffffff),%l6 1738*25c28e83SPiotr Jasiukajtis ba .cont19 1739*25c28e83SPiotr Jasiukajtis or %g0,2,counter 1740*25c28e83SPiotr Jasiukajtis 1741*25c28e83SPiotr Jasiukajtis .align 16 1742*25c28e83SPiotr Jasiukajtis.update20: 1743*25c28e83SPiotr Jasiukajtis cmp counter,3 1744*25c28e83SPiotr Jasiukajtis fzeros %f0 1745*25c28e83SPiotr Jasiukajtis ble,a .cont20 1746*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fffffff),%l6 1747*25c28e83SPiotr Jasiukajtis 1748*25c28e83SPiotr Jasiukajtis sub counter,3,counter 1749*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1750*25c28e83SPiotr Jasiukajtis 1751*25c28e83SPiotr Jasiukajtis stx %l3,[%fp+tmp_px] 1752*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fffffff),%l6 1753*25c28e83SPiotr Jasiukajtis ba .cont20 1754*25c28e83SPiotr Jasiukajtis or %g0,3,counter 1755*25c28e83SPiotr Jasiukajtis 1756*25c28e83SPiotr Jasiukajtis .align 16 1757*25c28e83SPiotr Jasiukajtis.update21: 1758*25c28e83SPiotr Jasiukajtis cmp counter,3 1759*25c28e83SPiotr Jasiukajtis fzeros %f0 1760*25c28e83SPiotr Jasiukajtis ble,a .cont21 1761*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fffffff),%l6 1762*25c28e83SPiotr Jasiukajtis 1763*25c28e83SPiotr Jasiukajtis sub counter,3,counter 1764*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1765*25c28e83SPiotr Jasiukajtis 1766*25c28e83SPiotr Jasiukajtis stx %l3,[%fp+tmp_px] 1767*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fffffff),%l6 1768*25c28e83SPiotr Jasiukajtis ba .cont21 1769*25c28e83SPiotr Jasiukajtis or %g0,3,counter 1770*25c28e83SPiotr Jasiukajtis 1771*25c28e83SPiotr Jasiukajtis .align 16 1772*25c28e83SPiotr Jasiukajtis.update22: 1773*25c28e83SPiotr Jasiukajtis cmp counter,4 1774*25c28e83SPiotr Jasiukajtis fzeros %f0 1775*25c28e83SPiotr Jasiukajtis ble,a .cont22 1776*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fffffff),%l6 1777*25c28e83SPiotr Jasiukajtis 1778*25c28e83SPiotr Jasiukajtis sub counter,4,counter 1779*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1780*25c28e83SPiotr Jasiukajtis 1781*25c28e83SPiotr Jasiukajtis stx %i0,[%fp+tmp_px] 1782*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fffffff),%l6 1783*25c28e83SPiotr Jasiukajtis ba .cont22 1784*25c28e83SPiotr Jasiukajtis or %g0,4,counter 1785*25c28e83SPiotr Jasiukajtis 1786*25c28e83SPiotr Jasiukajtis .align 16 1787*25c28e83SPiotr Jasiukajtis.update23: 1788*25c28e83SPiotr Jasiukajtis cmp counter,4 1789*25c28e83SPiotr Jasiukajtis fzeros %f0 1790*25c28e83SPiotr Jasiukajtis ble,a .cont23 1791*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fffffff),%l6 1792*25c28e83SPiotr Jasiukajtis 1793*25c28e83SPiotr Jasiukajtis sub counter,4,counter 1794*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1795*25c28e83SPiotr Jasiukajtis 1796*25c28e83SPiotr Jasiukajtis stx %i0,[%fp+tmp_px] 1797*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fffffff),%l6 1798*25c28e83SPiotr Jasiukajtis ba .cont23 1799*25c28e83SPiotr Jasiukajtis or %g0,4,counter 1800*25c28e83SPiotr Jasiukajtis 1801*25c28e83SPiotr Jasiukajtis .align 16 1802*25c28e83SPiotr Jasiukajtis.update24: 1803*25c28e83SPiotr Jasiukajtis cmp counter,5 1804*25c28e83SPiotr Jasiukajtis fzeros %f0 1805*25c28e83SPiotr Jasiukajtis ble,a .cont24 1806*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fffffff),%l6 1807*25c28e83SPiotr Jasiukajtis 1808*25c28e83SPiotr Jasiukajtis sub counter,5,counter 1809*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1810*25c28e83SPiotr Jasiukajtis 1811*25c28e83SPiotr Jasiukajtis stx %i2,[%fp+tmp_px] 1812*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fffffff),%l6 1813*25c28e83SPiotr Jasiukajtis ba .cont24 1814*25c28e83SPiotr Jasiukajtis or %g0,5,counter 1815*25c28e83SPiotr Jasiukajtis 1816*25c28e83SPiotr Jasiukajtis .align 16 1817*25c28e83SPiotr Jasiukajtis.update25: 1818*25c28e83SPiotr Jasiukajtis cmp counter,5 1819*25c28e83SPiotr Jasiukajtis fzeros %f0 1820*25c28e83SPiotr Jasiukajtis ble,a .cont25 1821*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fffffff),%l6 1822*25c28e83SPiotr Jasiukajtis 1823*25c28e83SPiotr Jasiukajtis sub counter,5,counter 1824*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1825*25c28e83SPiotr Jasiukajtis 1826*25c28e83SPiotr Jasiukajtis stx %i2,[%fp+tmp_px] 1827*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fffffff),%l6 1828*25c28e83SPiotr Jasiukajtis ba .cont25 1829*25c28e83SPiotr Jasiukajtis or %g0,5,counter 1830*25c28e83SPiotr Jasiukajtis 1831*25c28e83SPiotr Jasiukajtis .align 16 1832*25c28e83SPiotr Jasiukajtis.update26: 1833*25c28e83SPiotr Jasiukajtis cmp counter,6 1834*25c28e83SPiotr Jasiukajtis fzeros %f0 1835*25c28e83SPiotr Jasiukajtis ble,a .cont26 1836*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fffffff),%l6 1837*25c28e83SPiotr Jasiukajtis 1838*25c28e83SPiotr Jasiukajtis sub counter,6,counter 1839*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1840*25c28e83SPiotr Jasiukajtis 1841*25c28e83SPiotr Jasiukajtis stx %l2,[%fp+tmp_px] 1842*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fffffff),%l6 1843*25c28e83SPiotr Jasiukajtis ba .cont26 1844*25c28e83SPiotr Jasiukajtis or %g0,6,counter 1845*25c28e83SPiotr Jasiukajtis 1846*25c28e83SPiotr Jasiukajtis .align 16 1847*25c28e83SPiotr Jasiukajtis.update27: 1848*25c28e83SPiotr Jasiukajtis cmp counter,6 1849*25c28e83SPiotr Jasiukajtis fzeros %f0 1850*25c28e83SPiotr Jasiukajtis ble,a .cont27 1851*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fffffff),%l6 1852*25c28e83SPiotr Jasiukajtis 1853*25c28e83SPiotr Jasiukajtis sub counter,6,counter 1854*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1855*25c28e83SPiotr Jasiukajtis 1856*25c28e83SPiotr Jasiukajtis stx %l2,[%fp+tmp_px] 1857*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fffffff),%l6 1858*25c28e83SPiotr Jasiukajtis ba .cont27 1859*25c28e83SPiotr Jasiukajtis or %g0,6,counter 1860*25c28e83SPiotr Jasiukajtis 1861*25c28e83SPiotr Jasiukajtis .align 16 1862*25c28e83SPiotr Jasiukajtis.update28: 1863*25c28e83SPiotr Jasiukajtis cmp counter,7 1864*25c28e83SPiotr Jasiukajtis fzeros %f0 1865*25c28e83SPiotr Jasiukajtis ble,a .cont28 1866*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fffffff),%l6 1867*25c28e83SPiotr Jasiukajtis 1868*25c28e83SPiotr Jasiukajtis sub counter,7,counter 1869*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1870*25c28e83SPiotr Jasiukajtis 1871*25c28e83SPiotr Jasiukajtis stx %g5,[%fp+tmp_px] 1872*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fffffff),%l6 1873*25c28e83SPiotr Jasiukajtis ba .cont28 1874*25c28e83SPiotr Jasiukajtis or %g0,7,counter 1875*25c28e83SPiotr Jasiukajtis 1876*25c28e83SPiotr Jasiukajtis .align 16 1877*25c28e83SPiotr Jasiukajtis.update29: 1878*25c28e83SPiotr Jasiukajtis cmp counter,7 1879*25c28e83SPiotr Jasiukajtis fzeros %f0 1880*25c28e83SPiotr Jasiukajtis ble,a .cont29 1881*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fffffff),%l6 1882*25c28e83SPiotr Jasiukajtis 1883*25c28e83SPiotr Jasiukajtis sub counter,7,counter 1884*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1885*25c28e83SPiotr Jasiukajtis 1886*25c28e83SPiotr Jasiukajtis stx %g5,[%fp+tmp_px] 1887*25c28e83SPiotr Jasiukajtis sethi %hi(0x3fffffff),%l6 1888*25c28e83SPiotr Jasiukajtis ba .cont29 1889*25c28e83SPiotr Jasiukajtis or %g0,7,counter 1890*25c28e83SPiotr Jasiukajtis 1891*25c28e83SPiotr Jasiukajtis SET_SIZE(__vatanf) 1892*25c28e83SPiotr Jasiukajtis 1893