1*25c28e83SPiotr Jasiukajtis/* 2*25c28e83SPiotr Jasiukajtis * CDDL HEADER START 3*25c28e83SPiotr Jasiukajtis * 4*25c28e83SPiotr Jasiukajtis * The contents of this file are subject to the terms of the 5*25c28e83SPiotr Jasiukajtis * Common Development and Distribution License (the "License"). 6*25c28e83SPiotr Jasiukajtis * You may not use this file except in compliance with the License. 7*25c28e83SPiotr Jasiukajtis * 8*25c28e83SPiotr Jasiukajtis * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*25c28e83SPiotr Jasiukajtis * or http://www.opensolaris.org/os/licensing. 10*25c28e83SPiotr Jasiukajtis * See the License for the specific language governing permissions 11*25c28e83SPiotr Jasiukajtis * and limitations under the License. 12*25c28e83SPiotr Jasiukajtis * 13*25c28e83SPiotr Jasiukajtis * When distributing Covered Code, include this CDDL HEADER in each 14*25c28e83SPiotr Jasiukajtis * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*25c28e83SPiotr Jasiukajtis * If applicable, add the following below this CDDL HEADER, with the 16*25c28e83SPiotr Jasiukajtis * fields enclosed by brackets "[]" replaced with your own identifying 17*25c28e83SPiotr Jasiukajtis * information: Portions Copyright [yyyy] [name of copyright owner] 18*25c28e83SPiotr Jasiukajtis * 19*25c28e83SPiotr Jasiukajtis * CDDL HEADER END 20*25c28e83SPiotr Jasiukajtis */ 21*25c28e83SPiotr Jasiukajtis/* 22*25c28e83SPiotr Jasiukajtis * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 23*25c28e83SPiotr Jasiukajtis */ 24*25c28e83SPiotr Jasiukajtis/* 25*25c28e83SPiotr Jasiukajtis * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 26*25c28e83SPiotr Jasiukajtis * Use is subject to license terms. 27*25c28e83SPiotr Jasiukajtis */ 28*25c28e83SPiotr Jasiukajtis 29*25c28e83SPiotr Jasiukajtis .file "__vrhypotf.S" 30*25c28e83SPiotr Jasiukajtis 31*25c28e83SPiotr Jasiukajtis#include "libm.h" 32*25c28e83SPiotr Jasiukajtis 33*25c28e83SPiotr Jasiukajtis RO_DATA 34*25c28e83SPiotr Jasiukajtis .align 64 35*25c28e83SPiotr Jasiukajtis.CONST_TBL: 36*25c28e83SPiotr Jasiukajtis! i = [0,63] 37*25c28e83SPiotr Jasiukajtis! TBL[2*i+0] = 1.0 / (*(double*)&(0x3ff0000000000000LL + (i << 46))); 38*25c28e83SPiotr Jasiukajtis! TBL[2*i+1] = (double)(0.5/sqrtl(2) / sqrtl(*(double*)&(0x3ff0000000000000LL + (i << 46)))); 39*25c28e83SPiotr Jasiukajtis! TBL[128+2*i+0] = 1.0 / (*(double*)&(0x3ff0000000000000LL + (i << 46))); 40*25c28e83SPiotr Jasiukajtis! TBL[128+2*i+1] = (double)(0.25 / sqrtl(*(double*)&(0x3ff0000000000000LL + (i << 46)))); 41*25c28e83SPiotr Jasiukajtis 42*25c28e83SPiotr Jasiukajtis .word 0x3ff00000, 0x00000000, 0x3fd6a09e, 0x667f3bcd, 43*25c28e83SPiotr Jasiukajtis .word 0x3fef81f8, 0x1f81f820, 0x3fd673e3, 0x2ef63a03, 44*25c28e83SPiotr Jasiukajtis .word 0x3fef07c1, 0xf07c1f08, 0x3fd6482d, 0x37a5a3d2, 45*25c28e83SPiotr Jasiukajtis .word 0x3fee9131, 0xabf0b767, 0x3fd61d72, 0xb7978671, 46*25c28e83SPiotr Jasiukajtis .word 0x3fee1e1e, 0x1e1e1e1e, 0x3fd5f3aa, 0x673fa911, 47*25c28e83SPiotr Jasiukajtis .word 0x3fedae60, 0x76b981db, 0x3fd5cacb, 0x7802f342, 48*25c28e83SPiotr Jasiukajtis .word 0x3fed41d4, 0x1d41d41d, 0x3fd5a2cd, 0x8c69d61a, 49*25c28e83SPiotr Jasiukajtis .word 0x3fecd856, 0x89039b0b, 0x3fd57ba8, 0xb0ee01b9, 50*25c28e83SPiotr Jasiukajtis .word 0x3fec71c7, 0x1c71c71c, 0x3fd55555, 0x55555555, 51*25c28e83SPiotr Jasiukajtis .word 0x3fec0e07, 0x0381c0e0, 0x3fd52fcc, 0x468d6b54, 52*25c28e83SPiotr Jasiukajtis .word 0x3febacf9, 0x14c1bad0, 0x3fd50b06, 0xa8fc6b70, 53*25c28e83SPiotr Jasiukajtis .word 0x3feb4e81, 0xb4e81b4f, 0x3fd4e6fd, 0xf33cf032, 54*25c28e83SPiotr Jasiukajtis .word 0x3feaf286, 0xbca1af28, 0x3fd4c3ab, 0xe93bcf74, 55*25c28e83SPiotr Jasiukajtis .word 0x3fea98ef, 0x606a63be, 0x3fd4a10a, 0x97af7b92, 56*25c28e83SPiotr Jasiukajtis .word 0x3fea41a4, 0x1a41a41a, 0x3fd47f14, 0x4fe17f9f, 57*25c28e83SPiotr Jasiukajtis .word 0x3fe9ec8e, 0x951033d9, 0x3fd45dc3, 0xa3c34fa3, 58*25c28e83SPiotr Jasiukajtis .word 0x3fe99999, 0x9999999a, 0x3fd43d13, 0x6248490f, 59*25c28e83SPiotr Jasiukajtis .word 0x3fe948b0, 0xfcd6e9e0, 0x3fd41cfe, 0x93ff5199, 60*25c28e83SPiotr Jasiukajtis .word 0x3fe8f9c1, 0x8f9c18fa, 0x3fd3fd80, 0x77e70577, 61*25c28e83SPiotr Jasiukajtis .word 0x3fe8acb9, 0x0f6bf3aa, 0x3fd3de94, 0x8077db58, 62*25c28e83SPiotr Jasiukajtis .word 0x3fe86186, 0x18618618, 0x3fd3c036, 0x50e00e03, 63*25c28e83SPiotr Jasiukajtis .word 0x3fe81818, 0x18181818, 0x3fd3a261, 0xba6d7a37, 64*25c28e83SPiotr Jasiukajtis .word 0x3fe7d05f, 0x417d05f4, 0x3fd38512, 0xba21f51e, 65*25c28e83SPiotr Jasiukajtis .word 0x3fe78a4c, 0x8178a4c8, 0x3fd36845, 0x766eec92, 66*25c28e83SPiotr Jasiukajtis .word 0x3fe745d1, 0x745d1746, 0x3fd34bf6, 0x3d156826, 67*25c28e83SPiotr Jasiukajtis .word 0x3fe702e0, 0x5c0b8170, 0x3fd33021, 0x8127c0e0, 68*25c28e83SPiotr Jasiukajtis .word 0x3fe6c16c, 0x16c16c17, 0x3fd314c3, 0xd92a9e91, 69*25c28e83SPiotr Jasiukajtis .word 0x3fe68168, 0x16816817, 0x3fd2f9d9, 0xfd52fd50, 70*25c28e83SPiotr Jasiukajtis .word 0x3fe642c8, 0x590b2164, 0x3fd2df60, 0xc5df2c9e, 71*25c28e83SPiotr Jasiukajtis .word 0x3fe60581, 0x60581606, 0x3fd2c555, 0x2988e428, 72*25c28e83SPiotr Jasiukajtis .word 0x3fe5c988, 0x2b931057, 0x3fd2abb4, 0x3c0eb0f4, 73*25c28e83SPiotr Jasiukajtis .word 0x3fe58ed2, 0x308158ed, 0x3fd2927b, 0x2cd320f5, 74*25c28e83SPiotr Jasiukajtis .word 0x3fe55555, 0x55555555, 0x3fd279a7, 0x4590331c, 75*25c28e83SPiotr Jasiukajtis .word 0x3fe51d07, 0xeae2f815, 0x3fd26135, 0xe91daf55, 76*25c28e83SPiotr Jasiukajtis .word 0x3fe4e5e0, 0xa72f0539, 0x3fd24924, 0x92492492, 77*25c28e83SPiotr Jasiukajtis .word 0x3fe4afd6, 0xa052bf5b, 0x3fd23170, 0xd2be638a, 78*25c28e83SPiotr Jasiukajtis .word 0x3fe47ae1, 0x47ae147b, 0x3fd21a18, 0x51ff630a, 79*25c28e83SPiotr Jasiukajtis .word 0x3fe446f8, 0x6562d9fb, 0x3fd20318, 0xcc6a8f5d, 80*25c28e83SPiotr Jasiukajtis .word 0x3fe41414, 0x14141414, 0x3fd1ec70, 0x124e98f9, 81*25c28e83SPiotr Jasiukajtis .word 0x3fe3e22c, 0xbce4a902, 0x3fd1d61c, 0x070ae7d3, 82*25c28e83SPiotr Jasiukajtis .word 0x3fe3b13b, 0x13b13b14, 0x3fd1c01a, 0xa03be896, 83*25c28e83SPiotr Jasiukajtis .word 0x3fe38138, 0x13813814, 0x3fd1aa69, 0xe4f2777f, 84*25c28e83SPiotr Jasiukajtis .word 0x3fe3521c, 0xfb2b78c1, 0x3fd19507, 0xecf5b9e9, 85*25c28e83SPiotr Jasiukajtis .word 0x3fe323e3, 0x4a2b10bf, 0x3fd17ff2, 0xe00ec3ee, 86*25c28e83SPiotr Jasiukajtis .word 0x3fe2f684, 0xbda12f68, 0x3fd16b28, 0xf55d72d4, 87*25c28e83SPiotr Jasiukajtis .word 0x3fe2c9fb, 0x4d812ca0, 0x3fd156a8, 0x72b5ef62, 88*25c28e83SPiotr Jasiukajtis .word 0x3fe29e41, 0x29e4129e, 0x3fd1426f, 0xac0654db, 89*25c28e83SPiotr Jasiukajtis .word 0x3fe27350, 0xb8812735, 0x3fd12e7d, 0x02c40253, 90*25c28e83SPiotr Jasiukajtis .word 0x3fe24924, 0x92492492, 0x3fd11ace, 0xe560242a, 91*25c28e83SPiotr Jasiukajtis .word 0x3fe21fb7, 0x8121fb78, 0x3fd10763, 0xcec30b26, 92*25c28e83SPiotr Jasiukajtis .word 0x3fe1f704, 0x7dc11f70, 0x3fd0f43a, 0x45cdedad, 93*25c28e83SPiotr Jasiukajtis .word 0x3fe1cf06, 0xada2811d, 0x3fd0e150, 0xdce2b60c, 94*25c28e83SPiotr Jasiukajtis .word 0x3fe1a7b9, 0x611a7b96, 0x3fd0cea6, 0x317186dc, 95*25c28e83SPiotr Jasiukajtis .word 0x3fe18118, 0x11811812, 0x3fd0bc38, 0xeb8ba412, 96*25c28e83SPiotr Jasiukajtis .word 0x3fe15b1e, 0x5f75270d, 0x3fd0aa07, 0xbd7b7488, 97*25c28e83SPiotr Jasiukajtis .word 0x3fe135c8, 0x1135c811, 0x3fd09811, 0x63615499, 98*25c28e83SPiotr Jasiukajtis .word 0x3fe11111, 0x11111111, 0x3fd08654, 0xa2d4f6db, 99*25c28e83SPiotr Jasiukajtis .word 0x3fe0ecf5, 0x6be69c90, 0x3fd074d0, 0x4a8b1438, 100*25c28e83SPiotr Jasiukajtis .word 0x3fe0c971, 0x4fbcda3b, 0x3fd06383, 0x31ff307a, 101*25c28e83SPiotr Jasiukajtis .word 0x3fe0a681, 0x0a6810a7, 0x3fd0526c, 0x39213bfa, 102*25c28e83SPiotr Jasiukajtis .word 0x3fe08421, 0x08421084, 0x3fd0418a, 0x4806de7d, 103*25c28e83SPiotr Jasiukajtis .word 0x3fe0624d, 0xd2f1a9fc, 0x3fd030dc, 0x4ea03a72, 104*25c28e83SPiotr Jasiukajtis .word 0x3fe04104, 0x10410410, 0x3fd02061, 0x446ffa9a, 105*25c28e83SPiotr Jasiukajtis .word 0x3fe02040, 0x81020408, 0x3fd01018, 0x28467ee9, 106*25c28e83SPiotr Jasiukajtis .word 0x3ff00000, 0x00000000, 0x3fd00000, 0x00000000, 107*25c28e83SPiotr Jasiukajtis .word 0x3fef81f8, 0x1f81f820, 0x3fcfc0bd, 0x88a0f1d9, 108*25c28e83SPiotr Jasiukajtis .word 0x3fef07c1, 0xf07c1f08, 0x3fcf82ec, 0x882c0f9b, 109*25c28e83SPiotr Jasiukajtis .word 0x3fee9131, 0xabf0b767, 0x3fcf467f, 0x2814b0cc, 110*25c28e83SPiotr Jasiukajtis .word 0x3fee1e1e, 0x1e1e1e1e, 0x3fcf0b68, 0x48d2af1c, 111*25c28e83SPiotr Jasiukajtis .word 0x3fedae60, 0x76b981db, 0x3fced19b, 0x75e78957, 112*25c28e83SPiotr Jasiukajtis .word 0x3fed41d4, 0x1d41d41d, 0x3fce990c, 0xdad55ed2, 113*25c28e83SPiotr Jasiukajtis .word 0x3fecd856, 0x89039b0b, 0x3fce61b1, 0x38f18adc, 114*25c28e83SPiotr Jasiukajtis .word 0x3fec71c7, 0x1c71c71c, 0x3fce2b7d, 0xddfefa66, 115*25c28e83SPiotr Jasiukajtis .word 0x3fec0e07, 0x0381c0e0, 0x3fcdf668, 0x9b7e6350, 116*25c28e83SPiotr Jasiukajtis .word 0x3febacf9, 0x14c1bad0, 0x3fcdc267, 0xbea45549, 117*25c28e83SPiotr Jasiukajtis .word 0x3feb4e81, 0xb4e81b4f, 0x3fcd8f72, 0x08e6b82d, 118*25c28e83SPiotr Jasiukajtis .word 0x3feaf286, 0xbca1af28, 0x3fcd5d7e, 0xa914b937, 119*25c28e83SPiotr Jasiukajtis .word 0x3fea98ef, 0x606a63be, 0x3fcd2c85, 0x34ed6d86, 120*25c28e83SPiotr Jasiukajtis .word 0x3fea41a4, 0x1a41a41a, 0x3fccfc7d, 0xa32a9213, 121*25c28e83SPiotr Jasiukajtis .word 0x3fe9ec8e, 0x951033d9, 0x3fcccd60, 0x45f5d358, 122*25c28e83SPiotr Jasiukajtis .word 0x3fe99999, 0x9999999a, 0x3fcc9f25, 0xc5bfedd9, 123*25c28e83SPiotr Jasiukajtis .word 0x3fe948b0, 0xfcd6e9e0, 0x3fcc71c7, 0x1c71c71c, 124*25c28e83SPiotr Jasiukajtis .word 0x3fe8f9c1, 0x8f9c18fa, 0x3fcc453d, 0x90f057a2, 125*25c28e83SPiotr Jasiukajtis .word 0x3fe8acb9, 0x0f6bf3aa, 0x3fcc1982, 0xb2ece47b, 126*25c28e83SPiotr Jasiukajtis .word 0x3fe86186, 0x18618618, 0x3fcbee90, 0x56fb9c39, 127*25c28e83SPiotr Jasiukajtis .word 0x3fe81818, 0x18181818, 0x3fcbc460, 0x92eb3118, 128*25c28e83SPiotr Jasiukajtis .word 0x3fe7d05f, 0x417d05f4, 0x3fcb9aed, 0xba588347, 129*25c28e83SPiotr Jasiukajtis .word 0x3fe78a4c, 0x8178a4c8, 0x3fcb7232, 0x5b79db11, 130*25c28e83SPiotr Jasiukajtis .word 0x3fe745d1, 0x745d1746, 0x3fcb4a29, 0x3c1d9550, 131*25c28e83SPiotr Jasiukajtis .word 0x3fe702e0, 0x5c0b8170, 0x3fcb22cd, 0x56d87d7e, 132*25c28e83SPiotr Jasiukajtis .word 0x3fe6c16c, 0x16c16c17, 0x3fcafc19, 0xd8606169, 133*25c28e83SPiotr Jasiukajtis .word 0x3fe68168, 0x16816817, 0x3fcad60a, 0x1d0fb394, 134*25c28e83SPiotr Jasiukajtis .word 0x3fe642c8, 0x590b2164, 0x3fcab099, 0xae8f539a, 135*25c28e83SPiotr Jasiukajtis .word 0x3fe60581, 0x60581606, 0x3fca8bc4, 0x41a3d02c, 136*25c28e83SPiotr Jasiukajtis .word 0x3fe5c988, 0x2b931057, 0x3fca6785, 0xb41bacf7, 137*25c28e83SPiotr Jasiukajtis .word 0x3fe58ed2, 0x308158ed, 0x3fca43da, 0x0adc6899, 138*25c28e83SPiotr Jasiukajtis .word 0x3fe55555, 0x55555555, 0x3fca20bd, 0x700c2c3e, 139*25c28e83SPiotr Jasiukajtis .word 0x3fe51d07, 0xeae2f815, 0x3fc9fe2c, 0x315637ee, 140*25c28e83SPiotr Jasiukajtis .word 0x3fe4e5e0, 0xa72f0539, 0x3fc9dc22, 0xbe484458, 141*25c28e83SPiotr Jasiukajtis .word 0x3fe4afd6, 0xa052bf5b, 0x3fc9ba9d, 0xa6c73588, 142*25c28e83SPiotr Jasiukajtis .word 0x3fe47ae1, 0x47ae147b, 0x3fc99999, 0x9999999a, 143*25c28e83SPiotr Jasiukajtis .word 0x3fe446f8, 0x6562d9fb, 0x3fc97913, 0x63068b54, 144*25c28e83SPiotr Jasiukajtis .word 0x3fe41414, 0x14141414, 0x3fc95907, 0xeb87ab44, 145*25c28e83SPiotr Jasiukajtis .word 0x3fe3e22c, 0xbce4a902, 0x3fc93974, 0x368cfa31, 146*25c28e83SPiotr Jasiukajtis .word 0x3fe3b13b, 0x13b13b14, 0x3fc91a55, 0x6151761c, 147*25c28e83SPiotr Jasiukajtis .word 0x3fe38138, 0x13813814, 0x3fc8fba8, 0xa1bf6f96, 148*25c28e83SPiotr Jasiukajtis .word 0x3fe3521c, 0xfb2b78c1, 0x3fc8dd6b, 0x4563a009, 149*25c28e83SPiotr Jasiukajtis .word 0x3fe323e3, 0x4a2b10bf, 0x3fc8bf9a, 0xb06e1af3, 150*25c28e83SPiotr Jasiukajtis .word 0x3fe2f684, 0xbda12f68, 0x3fc8a234, 0x5cc04426, 151*25c28e83SPiotr Jasiukajtis .word 0x3fe2c9fb, 0x4d812ca0, 0x3fc88535, 0xd90703c6, 152*25c28e83SPiotr Jasiukajtis .word 0x3fe29e41, 0x29e4129e, 0x3fc8689c, 0xc7e07e7d, 153*25c28e83SPiotr Jasiukajtis .word 0x3fe27350, 0xb8812735, 0x3fc84c66, 0xdf0ca4c2, 154*25c28e83SPiotr Jasiukajtis .word 0x3fe24924, 0x92492492, 0x3fc83091, 0xe6a7f7e7, 155*25c28e83SPiotr Jasiukajtis .word 0x3fe21fb7, 0x8121fb78, 0x3fc8151b, 0xb86fee1d, 156*25c28e83SPiotr Jasiukajtis .word 0x3fe1f704, 0x7dc11f70, 0x3fc7fa02, 0x3f1068d1, 157*25c28e83SPiotr Jasiukajtis .word 0x3fe1cf06, 0xada2811d, 0x3fc7df43, 0x7579b9b5, 158*25c28e83SPiotr Jasiukajtis .word 0x3fe1a7b9, 0x611a7b96, 0x3fc7c4dd, 0x663ebb88, 159*25c28e83SPiotr Jasiukajtis .word 0x3fe18118, 0x11811812, 0x3fc7aace, 0x2afa8b72, 160*25c28e83SPiotr Jasiukajtis .word 0x3fe15b1e, 0x5f75270d, 0x3fc79113, 0xebbd7729, 161*25c28e83SPiotr Jasiukajtis .word 0x3fe135c8, 0x1135c811, 0x3fc777ac, 0xde80baea, 162*25c28e83SPiotr Jasiukajtis .word 0x3fe11111, 0x11111111, 0x3fc75e97, 0x46a0b098, 163*25c28e83SPiotr Jasiukajtis .word 0x3fe0ecf5, 0x6be69c90, 0x3fc745d1, 0x745d1746, 164*25c28e83SPiotr Jasiukajtis .word 0x3fe0c971, 0x4fbcda3b, 0x3fc72d59, 0xc45f1fc5, 165*25c28e83SPiotr Jasiukajtis .word 0x3fe0a681, 0x0a6810a7, 0x3fc7152e, 0x9f44f01f, 166*25c28e83SPiotr Jasiukajtis .word 0x3fe08421, 0x08421084, 0x3fc6fd4e, 0x79325467, 167*25c28e83SPiotr Jasiukajtis .word 0x3fe0624d, 0xd2f1a9fc, 0x3fc6e5b7, 0xd16657e1, 168*25c28e83SPiotr Jasiukajtis .word 0x3fe04104, 0x10410410, 0x3fc6ce69, 0x31d5858d, 169*25c28e83SPiotr Jasiukajtis .word 0x3fe02040, 0x81020408, 0x3fc6b761, 0x2ec892f6, 170*25c28e83SPiotr Jasiukajtis 171*25c28e83SPiotr Jasiukajtis .word 0x000fffff, 0xffffffff ! DC0 172*25c28e83SPiotr Jasiukajtis .word 0x3ff00000, 0 ! DC1 173*25c28e83SPiotr Jasiukajtis .word 0x7fffc000, 0 ! DC2 174*25c28e83SPiotr Jasiukajtis .word 0x7fe00000, 0 ! DA0 175*25c28e83SPiotr Jasiukajtis .word 0x60000000, 0 ! DA1 176*25c28e83SPiotr Jasiukajtis .word 0x80808080, 0x3f800000 ! SCALE , FONE = 1.0f 177*25c28e83SPiotr Jasiukajtis .word 0x3fefffff, 0xfee7f18f ! KA0 = 9.99999997962321453275e-01 178*25c28e83SPiotr Jasiukajtis .word 0xbfdfffff, 0xfe07e52f ! KA1 = -4.99999998166077580600e-01 179*25c28e83SPiotr Jasiukajtis .word 0x3fd80118, 0x0ca296d9 ! KA2 = 3.75066768969515586277e-01 180*25c28e83SPiotr Jasiukajtis .word 0xbfd400fc, 0x0bbb8e78 ! KA3 = -3.12560092408808548438e-01 181*25c28e83SPiotr Jasiukajtis 182*25c28e83SPiotr Jasiukajtis#define _0x7f800000 %o0 183*25c28e83SPiotr Jasiukajtis#define _0x7fffffff %o7 184*25c28e83SPiotr Jasiukajtis#define TBL %l2 185*25c28e83SPiotr Jasiukajtis 186*25c28e83SPiotr Jasiukajtis#define TBL_SHIFT 2048 187*25c28e83SPiotr Jasiukajtis 188*25c28e83SPiotr Jasiukajtis#define stridex %l3 189*25c28e83SPiotr Jasiukajtis#define stridey %l4 190*25c28e83SPiotr Jasiukajtis#define stridez %l5 191*25c28e83SPiotr Jasiukajtis#define counter %i0 192*25c28e83SPiotr Jasiukajtis 193*25c28e83SPiotr Jasiukajtis#define DA0 %f52 194*25c28e83SPiotr Jasiukajtis#define DA1 %f44 195*25c28e83SPiotr Jasiukajtis#define SCALE %f6 196*25c28e83SPiotr Jasiukajtis 197*25c28e83SPiotr Jasiukajtis#define DC0 %f46 198*25c28e83SPiotr Jasiukajtis#define DC1 %f8 199*25c28e83SPiotr Jasiukajtis#define FZERO %f9 200*25c28e83SPiotr Jasiukajtis#define DC2 %f50 201*25c28e83SPiotr Jasiukajtis 202*25c28e83SPiotr Jasiukajtis#define KA3 %f56 203*25c28e83SPiotr Jasiukajtis#define KA2 %f58 204*25c28e83SPiotr Jasiukajtis#define KA1 %f60 205*25c28e83SPiotr Jasiukajtis#define KA0 %f54 206*25c28e83SPiotr Jasiukajtis 207*25c28e83SPiotr Jasiukajtis#define tmp_counter STACK_BIAS-0x04 208*25c28e83SPiotr Jasiukajtis#define tmp_px STACK_BIAS-0x20 209*25c28e83SPiotr Jasiukajtis#define tmp_py STACK_BIAS-0x18 210*25c28e83SPiotr Jasiukajtis 211*25c28e83SPiotr Jasiukajtis#define ftmp0 STACK_BIAS-0x10 212*25c28e83SPiotr Jasiukajtis#define ftmp1 STACK_BIAS-0x0c 213*25c28e83SPiotr Jasiukajtis#define ftmp2 STACK_BIAS-0x10 214*25c28e83SPiotr Jasiukajtis#define ftmp3 STACK_BIAS-0x0c 215*25c28e83SPiotr Jasiukajtis#define ftmp4 STACK_BIAS-0x08 216*25c28e83SPiotr Jasiukajtis 217*25c28e83SPiotr Jasiukajtis! sizeof temp storage - must be a multiple of 16 for V9 218*25c28e83SPiotr Jasiukajtis#define tmps 0x20 219*25c28e83SPiotr Jasiukajtis 220*25c28e83SPiotr Jasiukajtis!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 221*25c28e83SPiotr Jasiukajtis! !!!!! algorithm !!!!! 222*25c28e83SPiotr Jasiukajtis! x0 = *px; 223*25c28e83SPiotr Jasiukajtis! ax = *(int*)px; 224*25c28e83SPiotr Jasiukajtis! 225*25c28e83SPiotr Jasiukajtis! y0 = *py; 226*25c28e83SPiotr Jasiukajtis! ay = *(int*)py; 227*25c28e83SPiotr Jasiukajtis! 228*25c28e83SPiotr Jasiukajtis! ax &= 0x7fffffff; 229*25c28e83SPiotr Jasiukajtis! ay &= 0x7fffffff; 230*25c28e83SPiotr Jasiukajtis! 231*25c28e83SPiotr Jasiukajtis! px += stridex; 232*25c28e83SPiotr Jasiukajtis! py += stridey; 233*25c28e83SPiotr Jasiukajtis! 234*25c28e83SPiotr Jasiukajtis! if ( ax >= 0x7f800000 || ay >= 0x7f800000 ) 235*25c28e83SPiotr Jasiukajtis! { 236*25c28e83SPiotr Jasiukajtis! *pz = fabsf(x0) * fabsf(y0); 237*25c28e83SPiotr Jasiukajtis! if( ax == 0x7f800000 ) *pz = 0.0f; 238*25c28e83SPiotr Jasiukajtis! else if( ay == 0x7f800000 ) *pz = 0.0f; 239*25c28e83SPiotr Jasiukajtis! pz += stridez; 240*25c28e83SPiotr Jasiukajtis! continue; 241*25c28e83SPiotr Jasiukajtis! } 242*25c28e83SPiotr Jasiukajtis! 243*25c28e83SPiotr Jasiukajtis! if ( ay == 0 ) 244*25c28e83SPiotr Jasiukajtis! { 245*25c28e83SPiotr Jasiukajtis! if ( ax == 0 ) 246*25c28e83SPiotr Jasiukajtis! { 247*25c28e83SPiotr Jasiukajtis! *pz = 1.0f / 0.0f; 248*25c28e83SPiotr Jasiukajtis! pz += stridez; 249*25c28e83SPiotr Jasiukajtis! continue; 250*25c28e83SPiotr Jasiukajtis! } 251*25c28e83SPiotr Jasiukajtis! } 252*25c28e83SPiotr Jasiukajtis! 253*25c28e83SPiotr Jasiukajtis! hyp0 = x0 * (double)x0; 254*25c28e83SPiotr Jasiukajtis! dtmp0 = y0 * (double)y0; 255*25c28e83SPiotr Jasiukajtis! hyp0 += dtmp0; 256*25c28e83SPiotr Jasiukajtis! 257*25c28e83SPiotr Jasiukajtis! ibase0 = ((int*)&hyp0)[0]; 258*25c28e83SPiotr Jasiukajtis! 259*25c28e83SPiotr Jasiukajtis! dbase0 = vis_fand(hyp0,DA0); 260*25c28e83SPiotr Jasiukajtis! dbase0 = vis_fmul8x16(SCALE, dbase0); 261*25c28e83SPiotr Jasiukajtis! dbase0 = vis_fpsub32(DA1,dbase0); 262*25c28e83SPiotr Jasiukajtis! 263*25c28e83SPiotr Jasiukajtis! hyp0 = vis_fand(hyp0,DC0); 264*25c28e83SPiotr Jasiukajtis! hyp0 = vis_for(hyp0,DC1); 265*25c28e83SPiotr Jasiukajtis! h_hi0 = vis_fand(hyp0,DC2); 266*25c28e83SPiotr Jasiukajtis! 267*25c28e83SPiotr Jasiukajtis! ibase0 >>= 10; 268*25c28e83SPiotr Jasiukajtis! si0 = ibase0 & 0x7f0; 269*25c28e83SPiotr Jasiukajtis! xx0 = ((double*)((char*)TBL + si0))[0]; 270*25c28e83SPiotr Jasiukajtis! 271*25c28e83SPiotr Jasiukajtis! dtmp1 = hyp0 - h_hi0; 272*25c28e83SPiotr Jasiukajtis! xx0 = dtmp1 * xx0; 273*25c28e83SPiotr Jasiukajtis! res0 = ((double*)((char*)arr + si0))[1]; 274*25c28e83SPiotr Jasiukajtis! dtmp2 = KA3 * xx0; 275*25c28e83SPiotr Jasiukajtis! dtmp2 += KA2; 276*25c28e83SPiotr Jasiukajtis! dtmp2 *= xx0; 277*25c28e83SPiotr Jasiukajtis! dtmp2 += KA1; 278*25c28e83SPiotr Jasiukajtis! dtmp2 *= xx0; 279*25c28e83SPiotr Jasiukajtis! dtmp2 += KA0; 280*25c28e83SPiotr Jasiukajtis! res0 *= dtmp2; 281*25c28e83SPiotr Jasiukajtis! res0 *= dbase0; 282*25c28e83SPiotr Jasiukajtis! ftmp0 = (float)res0; 283*25c28e83SPiotr Jasiukajtis! *pz = ftmp0; 284*25c28e83SPiotr Jasiukajtis! pz += stridez; 285*25c28e83SPiotr Jasiukajtis!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 286*25c28e83SPiotr Jasiukajtis 287*25c28e83SPiotr Jasiukajtis ENTRY(__vrhypotf) 288*25c28e83SPiotr Jasiukajtis save %sp,-SA(MINFRAME)-tmps,%sp 289*25c28e83SPiotr Jasiukajtis PIC_SETUP(l7) 290*25c28e83SPiotr Jasiukajtis PIC_SET(l7,.CONST_TBL,l2) 291*25c28e83SPiotr Jasiukajtis wr %g0,0x82,%asi 292*25c28e83SPiotr Jasiukajtis 293*25c28e83SPiotr Jasiukajtis#ifdef __sparcv9 294*25c28e83SPiotr Jasiukajtis ldx [%fp+STACK_BIAS+176],stridez 295*25c28e83SPiotr Jasiukajtis#else 296*25c28e83SPiotr Jasiukajtis ld [%fp+STACK_BIAS+92],stridez 297*25c28e83SPiotr Jasiukajtis#endif 298*25c28e83SPiotr Jasiukajtis 299*25c28e83SPiotr Jasiukajtis stx %i1,[%fp+tmp_px] 300*25c28e83SPiotr Jasiukajtis sll %i2,2,stridex 301*25c28e83SPiotr Jasiukajtis 302*25c28e83SPiotr Jasiukajtis stx %i3,[%fp+tmp_py] 303*25c28e83SPiotr Jasiukajtis sll %i4,2,stridey 304*25c28e83SPiotr Jasiukajtis 305*25c28e83SPiotr Jasiukajtis st %i0,[%fp+tmp_counter] 306*25c28e83SPiotr Jasiukajtis sll stridez,2,stridez 307*25c28e83SPiotr Jasiukajtis mov %i5,%o1 308*25c28e83SPiotr Jasiukajtis 309*25c28e83SPiotr Jasiukajtis ldd [TBL+TBL_SHIFT],DC0 310*25c28e83SPiotr Jasiukajtis ldd [TBL+TBL_SHIFT+8],DC1 311*25c28e83SPiotr Jasiukajtis ldd [TBL+TBL_SHIFT+16],DC2 312*25c28e83SPiotr Jasiukajtis ldd [TBL+TBL_SHIFT+24],DA0 313*25c28e83SPiotr Jasiukajtis ldd [TBL+TBL_SHIFT+32],DA1 314*25c28e83SPiotr Jasiukajtis ldd [TBL+TBL_SHIFT+40],SCALE 315*25c28e83SPiotr Jasiukajtis ldd [TBL+TBL_SHIFT+48],KA0 316*25c28e83SPiotr Jasiukajtis 317*25c28e83SPiotr Jasiukajtis ldd [TBL+TBL_SHIFT+56],KA1 318*25c28e83SPiotr Jasiukajtis sethi %hi(0x7f800000),%o0 319*25c28e83SPiotr Jasiukajtis 320*25c28e83SPiotr Jasiukajtis ldd [TBL+TBL_SHIFT+64],KA2 321*25c28e83SPiotr Jasiukajtis sethi %hi(0x7ffffc00),%o7 322*25c28e83SPiotr Jasiukajtis 323*25c28e83SPiotr Jasiukajtis ldd [TBL+TBL_SHIFT+72],KA3 324*25c28e83SPiotr Jasiukajtis add %o7,1023,%o7 325*25c28e83SPiotr Jasiukajtis 326*25c28e83SPiotr Jasiukajtis.begin: 327*25c28e83SPiotr Jasiukajtis ld [%fp+tmp_counter],counter 328*25c28e83SPiotr Jasiukajtis ldx [%fp+tmp_px],%o4 329*25c28e83SPiotr Jasiukajtis ldx [%fp+tmp_py],%i2 330*25c28e83SPiotr Jasiukajtis st %g0,[%fp+tmp_counter] 331*25c28e83SPiotr Jasiukajtis.begin1: 332*25c28e83SPiotr Jasiukajtis cmp counter,0 333*25c28e83SPiotr Jasiukajtis ble,pn %icc,.exit 334*25c28e83SPiotr Jasiukajtis nop 335*25c28e83SPiotr Jasiukajtis 336*25c28e83SPiotr Jasiukajtis lda [%i2]0x82,%l6 ! (3_0) ay = *(int*)py; 337*25c28e83SPiotr Jasiukajtis 338*25c28e83SPiotr Jasiukajtis lda [%o4]0x82,%i5 ! (3_0) ax = *(int*)px; 339*25c28e83SPiotr Jasiukajtis 340*25c28e83SPiotr Jasiukajtis lda [%i2]0x82,%f2 ! (3_0) y0 = *py; 341*25c28e83SPiotr Jasiukajtis and %l6,_0x7fffffff,%l6 ! (3_0) ay &= 0x7fffffff; 342*25c28e83SPiotr Jasiukajtis 343*25c28e83SPiotr Jasiukajtis and %i5,_0x7fffffff,%i5 ! (3_0) ax &= 0x7fffffff; 344*25c28e83SPiotr Jasiukajtis cmp %l6,_0x7f800000 ! (3_0) ay ? 0x7f800000 345*25c28e83SPiotr Jasiukajtis bge,pn %icc,.spec0 ! (3_0) if ( ay >= 0x7f800000 ) 346*25c28e83SPiotr Jasiukajtis lda [%o4]0x82,%f4 ! (3_0) x0 = *px; 347*25c28e83SPiotr Jasiukajtis 348*25c28e83SPiotr Jasiukajtis cmp %i5,_0x7f800000 ! (3_0) ax ? 0x7f800000 349*25c28e83SPiotr Jasiukajtis bge,pn %icc,.spec0 ! (3_0) if ( ax >= 0x7f800000 ) 350*25c28e83SPiotr Jasiukajtis nop 351*25c28e83SPiotr Jasiukajtis 352*25c28e83SPiotr Jasiukajtis cmp %l6,0 ! (3_0) 353*25c28e83SPiotr Jasiukajtis be,pn %icc,.spec1 ! (3_0) if ( ay == 0 ) 354*25c28e83SPiotr Jasiukajtis fsmuld %f4,%f4,%f36 ! (3_0) hyp0 = x0 * (double)x0; 355*25c28e83SPiotr Jasiukajtis.cont_spec1: 356*25c28e83SPiotr Jasiukajtis lda [%i2+stridey]0x82,%l6 ! (4_0) ay = *(int*)py; 357*25c28e83SPiotr Jasiukajtis 358*25c28e83SPiotr Jasiukajtis fsmuld %f2,%f2,%f62 ! (3_0) dtmp0 = y0 * (double)y0; 359*25c28e83SPiotr Jasiukajtis lda [stridex+%o4]0x82,%i5 ! (4_0) ax = *(int*)px; 360*25c28e83SPiotr Jasiukajtis 361*25c28e83SPiotr Jasiukajtis add %o4,stridex,%l0 ! px += stridex 362*25c28e83SPiotr Jasiukajtis 363*25c28e83SPiotr Jasiukajtis add %i2,stridey,%i2 ! py += stridey 364*25c28e83SPiotr Jasiukajtis and %l6,_0x7fffffff,%l6 ! (4_0) ay &= 0x7fffffff; 365*25c28e83SPiotr Jasiukajtis 366*25c28e83SPiotr Jasiukajtis and %i5,_0x7fffffff,%i5 ! (4_0) ax &= 0x7fffffff; 367*25c28e83SPiotr Jasiukajtis lda [%i2]0x82,%f2 ! (4_0) y0 = *py; 368*25c28e83SPiotr Jasiukajtis 369*25c28e83SPiotr Jasiukajtis faddd %f36,%f62,%f20 ! (3_0) hyp0 += dtmp0; 370*25c28e83SPiotr Jasiukajtis cmp %l6,_0x7f800000 ! (4_0) ay ? 0x7f800000 371*25c28e83SPiotr Jasiukajtis 372*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update0 ! (4_0) if ( ay >= 0x7f800000 ) 373*25c28e83SPiotr Jasiukajtis lda [stridex+%o4]0x82,%f4 ! (4_0) x0 = *px; 374*25c28e83SPiotr Jasiukajtis.cont0: 375*25c28e83SPiotr Jasiukajtis cmp %i5,_0x7f800000 ! (4_0) ax ? 0x7f800000 376*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update1 ! (4_0) if ( ax >= 0x7f800000 ) 377*25c28e83SPiotr Jasiukajtis st %f20,[%fp+ftmp4] ! (3_0) ibase0 = ((int*)&hyp0)[0]; 378*25c28e83SPiotr Jasiukajtis.cont1: 379*25c28e83SPiotr Jasiukajtis cmp %l6,0 ! (4_1) ay ? 0 380*25c28e83SPiotr Jasiukajtis be,pn %icc,.update2 ! (4_1) if ( ay == 0 ) 381*25c28e83SPiotr Jasiukajtis fsmuld %f4,%f4,%f38 ! (4_1) hyp0 = x0 * (double)x0; 382*25c28e83SPiotr Jasiukajtis.cont2: 383*25c28e83SPiotr Jasiukajtis lda [%i2+stridey]0x82,%l6 ! (0_0) ay = *(int*)py; 384*25c28e83SPiotr Jasiukajtis 385*25c28e83SPiotr Jasiukajtis fsmuld %f2,%f2,%f62 ! (4_1) dtmp0 = y0 * (double)y0; 386*25c28e83SPiotr Jasiukajtis lda [%l0+stridex]0x82,%i5 ! (0_0) ax = *(int*)px; 387*25c28e83SPiotr Jasiukajtis 388*25c28e83SPiotr Jasiukajtis add %l0,stridex,%i1 ! px += stridex 389*25c28e83SPiotr Jasiukajtis 390*25c28e83SPiotr Jasiukajtis add %i2,stridey,%i2 ! py += stridey 391*25c28e83SPiotr Jasiukajtis and %l6,_0x7fffffff,%l6 ! (0_0) ay &= 0x7fffffff; 392*25c28e83SPiotr Jasiukajtis 393*25c28e83SPiotr Jasiukajtis and %i5,_0x7fffffff,%i5 ! (0_0) ax &= 0x7fffffff; 394*25c28e83SPiotr Jasiukajtis lda [%i2]0x82,%f2 ! (0_0) y0 = *py; 395*25c28e83SPiotr Jasiukajtis 396*25c28e83SPiotr Jasiukajtis cmp %l6,_0x7f800000 ! (0_0) ay ? 0x7f800000 397*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update3 ! (0_0) if ( ay >= 0x7f800000 ) 398*25c28e83SPiotr Jasiukajtis faddd %f38,%f62,%f12 ! (4_1) hyp0 += dtmp0; 399*25c28e83SPiotr Jasiukajtis.cont3: 400*25c28e83SPiotr Jasiukajtis lda [%i1]0x82,%f4 ! (0_0) x0 = *px; 401*25c28e83SPiotr Jasiukajtis 402*25c28e83SPiotr Jasiukajtis cmp %i5,_0x7f800000 ! (0_0) ax ? 0x7f800000 403*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update4 ! (0_0) if ( ax >= 0x7f800000 ) 404*25c28e83SPiotr Jasiukajtis st %f12,[%fp+ftmp0] ! (4_1) ibase0 = ((int*)&hyp0)[0]; 405*25c28e83SPiotr Jasiukajtis.cont4: 406*25c28e83SPiotr Jasiukajtis cmp %l6,0 ! (0_0) ay ? 0 407*25c28e83SPiotr Jasiukajtis be,pn %icc,.update5 ! (0_0) if ( ay == 0 ) 408*25c28e83SPiotr Jasiukajtis fsmuld %f4,%f4,%f38 ! (0_0) hyp0 = x0 * (double)x0; 409*25c28e83SPiotr Jasiukajtis.cont5: 410*25c28e83SPiotr Jasiukajtis lda [%i2+stridey]0x82,%l6 ! (1_0) ay = *(int*)py; 411*25c28e83SPiotr Jasiukajtis 412*25c28e83SPiotr Jasiukajtis fsmuld %f2,%f2,%f62 ! (0_0) dtmp0 = y0 * (double)y0; 413*25c28e83SPiotr Jasiukajtis lda [%i1+stridex]0x82,%i5 ! (1_0) ax = *(int*)px; 414*25c28e83SPiotr Jasiukajtis 415*25c28e83SPiotr Jasiukajtis add %i1,stridex,%g5 ! px += stridex 416*25c28e83SPiotr Jasiukajtis 417*25c28e83SPiotr Jasiukajtis add %i2,stridey,%o3 ! py += stridey 418*25c28e83SPiotr Jasiukajtis and %l6,_0x7fffffff,%l6 ! (1_0) ay &= 0x7fffffff; 419*25c28e83SPiotr Jasiukajtis fand %f20,DC0,%f30 ! (3_1) hyp0 = vis_fand(hyp0,DC0); 420*25c28e83SPiotr Jasiukajtis 421*25c28e83SPiotr Jasiukajtis and %i5,_0x7fffffff,%i5 ! (1_0) ax &= 0x7fffffff; 422*25c28e83SPiotr Jasiukajtis lda [%o3]0x82,%f2 ! (1_0) y0 = *py; 423*25c28e83SPiotr Jasiukajtis 424*25c28e83SPiotr Jasiukajtis faddd %f38,%f62,%f14 ! (0_0) hyp0 += dtmp0; 425*25c28e83SPiotr Jasiukajtis cmp %l6,_0x7f800000 ! (1_0) ay ? 0x7f800000 426*25c28e83SPiotr Jasiukajtis 427*25c28e83SPiotr Jasiukajtis lda [%g5]0x82,%f4 ! (1_0) x0 = *px; 428*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update6 ! (1_0) if ( ay >= 0x7f800000 ) 429*25c28e83SPiotr Jasiukajtis for %f30,DC1,%f28 ! (3_1) hyp0 = vis_for(hyp0,DC1); 430*25c28e83SPiotr Jasiukajtis.cont6: 431*25c28e83SPiotr Jasiukajtis cmp %i5,_0x7f800000 ! (1_0) ax ? 0x7f800000 432*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update7 ! (1_0) if ( ax >= 0x7f800000 ) 433*25c28e83SPiotr Jasiukajtis ld [%fp+ftmp4],%l1 ! (3_1) ibase0 = ((int*)&hyp0)[0]; 434*25c28e83SPiotr Jasiukajtis.cont7: 435*25c28e83SPiotr Jasiukajtis st %f14,[%fp+ftmp1] ! (0_0) ibase0 = ((int*)&hyp0)[0]; 436*25c28e83SPiotr Jasiukajtis 437*25c28e83SPiotr Jasiukajtis cmp %l6,0 ! (1_0) ay ? 0 438*25c28e83SPiotr Jasiukajtis be,pn %icc,.update8 ! (1_0) if ( ay == 0 ) 439*25c28e83SPiotr Jasiukajtis fand %f28,DC2,%f30 ! (3_1) h_hi0 = vis_fand(hyp0,DC2); 440*25c28e83SPiotr Jasiukajtis.cont8: 441*25c28e83SPiotr Jasiukajtis fsmuld %f4,%f4,%f38 ! (1_0) hyp0 = x0 * (double)x0; 442*25c28e83SPiotr Jasiukajtis sra %l1,10,%o5 ! (3_1) ibase0 >>= 10; 443*25c28e83SPiotr Jasiukajtis 444*25c28e83SPiotr Jasiukajtis and %o5,2032,%o4 ! (3_1) si0 = ibase0 & 0x7f0; 445*25c28e83SPiotr Jasiukajtis lda [%o3+stridey]0x82,%l6 ! (2_0) ay = *(int*)py; 446*25c28e83SPiotr Jasiukajtis 447*25c28e83SPiotr Jasiukajtis fsmuld %f2,%f2,%f62 ! (1_0) dtmp0 = y0 * (double)y0; 448*25c28e83SPiotr Jasiukajtis add %o4,TBL,%l7 ! (3_1) (char*)TBL + si0 449*25c28e83SPiotr Jasiukajtis lda [stridex+%g5]0x82,%i5 ! (2_0) ax = *(int*)px; 450*25c28e83SPiotr Jasiukajtis fsubd %f28,%f30,%f28 ! (3_1) dtmp1 = hyp0 - h_hi0; 451*25c28e83SPiotr Jasiukajtis 452*25c28e83SPiotr Jasiukajtis add %g5,stridex,%i4 ! px += stridex 453*25c28e83SPiotr Jasiukajtis ldd [TBL+%o4],%f42 ! (3_1) xx0 = ((double*)((char*)TBL + si0))[0]; 454*25c28e83SPiotr Jasiukajtis 455*25c28e83SPiotr Jasiukajtis and %l6,_0x7fffffff,%l6 ! (2_0) ay &= 0x7fffffff; 456*25c28e83SPiotr Jasiukajtis add %o3,stridey,%i2 ! py += stridey 457*25c28e83SPiotr Jasiukajtis fand %f12,DC0,%f30 ! (4_1) hyp0 = vis_fand(hyp0,DC0); 458*25c28e83SPiotr Jasiukajtis 459*25c28e83SPiotr Jasiukajtis and %i5,_0x7fffffff,%i5 ! (2_0) ax &= 0x7fffffff; 460*25c28e83SPiotr Jasiukajtis lda [%i2]0x82,%f2 ! (2_0) y0 = *py; 461*25c28e83SPiotr Jasiukajtis 462*25c28e83SPiotr Jasiukajtis faddd %f38,%f62,%f16 ! (1_0) hyp0 += dtmp0; 463*25c28e83SPiotr Jasiukajtis cmp %l6,_0x7f800000 ! (2_0) ay ? 0x7f800000 464*25c28e83SPiotr Jasiukajtis fmuld %f28,%f42,%f26 ! (3_1) xx0 = dtmp1 * xx0; 465*25c28e83SPiotr Jasiukajtis 466*25c28e83SPiotr Jasiukajtis lda [stridex+%g5]0x82,%f4 ! (2_0) x0 = *px; 467*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update9 ! (2_0) if ( ay >= 0x7f800000 468*25c28e83SPiotr Jasiukajtis for %f30,DC1,%f28 ! (4_1) hyp0 = vis_for(hyp0,DC1); 469*25c28e83SPiotr Jasiukajtis.cont9: 470*25c28e83SPiotr Jasiukajtis cmp %i5,_0x7f800000 ! (2_0) ax ? 0x7f800000 471*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update10 ! (2_0) if ( ax >= 0x7f800000 ) 472*25c28e83SPiotr Jasiukajtis ld [%fp+ftmp0],%i3 ! (4_1) ibase0 = ((int*)&hyp0)[0]; 473*25c28e83SPiotr Jasiukajtis.cont10: 474*25c28e83SPiotr Jasiukajtis st %f16,[%fp+ftmp2] ! (1_0) ibase0 = ((int*)&hyp0)[0]; 475*25c28e83SPiotr Jasiukajtis 476*25c28e83SPiotr Jasiukajtis fmuld KA3,%f26,%f34 ! (3_1) dtmp2 = KA3 * xx0; 477*25c28e83SPiotr Jasiukajtis cmp %l6,0 ! (2_0) ay ? 0 478*25c28e83SPiotr Jasiukajtis be,pn %icc,.update11 ! (2_0) if ( ay == 0 ) 479*25c28e83SPiotr Jasiukajtis fand %f28,DC2,%f30 ! (4_1) h_hi0 = vis_fand(hyp0,DC2); 480*25c28e83SPiotr Jasiukajtis.cont11: 481*25c28e83SPiotr Jasiukajtis fsmuld %f4,%f4,%f36 ! (2_0) hyp0 = x0 * (double)x0; 482*25c28e83SPiotr Jasiukajtis sra %i3,10,%i3 ! (4_1) ibase0 >>= 10; 483*25c28e83SPiotr Jasiukajtis 484*25c28e83SPiotr Jasiukajtis and %i3,2032,%i3 ! (4_1) si0 = ibase0 & 0x7f0; 485*25c28e83SPiotr Jasiukajtis lda [%i2+stridey]0x82,%l6 ! (3_0) ay = *(int*)py; 486*25c28e83SPiotr Jasiukajtis 487*25c28e83SPiotr Jasiukajtis fsmuld %f2,%f2,%f62 ! (2_0) dtmp0 = y0 * (double)y0; 488*25c28e83SPiotr Jasiukajtis add %i3,TBL,%i3 ! (4_1) (char*)TBL + si0 489*25c28e83SPiotr Jasiukajtis lda [%i4+stridex]0x82,%i5 ! (3_0) ax = *(int*)px; 490*25c28e83SPiotr Jasiukajtis fsubd %f28,%f30,%f28 ! (4_1) dtmp1 = hyp0 - h_hi0; 491*25c28e83SPiotr Jasiukajtis 492*25c28e83SPiotr Jasiukajtis add %i4,stridex,%o4 ! px += stridex 493*25c28e83SPiotr Jasiukajtis ldd [%i3],%f42 ! (4_1) xx0 = ((double*)((char*)TBL + si0))[0]; 494*25c28e83SPiotr Jasiukajtis faddd %f34,KA2,%f10 ! (3_1) dtmp2 += KA2; 495*25c28e83SPiotr Jasiukajtis 496*25c28e83SPiotr Jasiukajtis add %i2,stridey,%i2 ! py += stridey 497*25c28e83SPiotr Jasiukajtis and %l6,_0x7fffffff,%l6 ! (3_0) ay &= 0x7fffffff; 498*25c28e83SPiotr Jasiukajtis fand %f14,DC0,%f30 ! (0_0) hyp0 = vis_fand(hyp0,DC0); 499*25c28e83SPiotr Jasiukajtis 500*25c28e83SPiotr Jasiukajtis and %i5,_0x7fffffff,%i5 ! (3_0) ax &= 0x7fffffff; 501*25c28e83SPiotr Jasiukajtis lda [%i2]0x82,%f2 ! (3_0) y0 = *py; 502*25c28e83SPiotr Jasiukajtis 503*25c28e83SPiotr Jasiukajtis faddd %f36,%f62,%f18 ! (2_0) hyp0 += dtmp0; 504*25c28e83SPiotr Jasiukajtis cmp %l6,_0x7f800000 ! (3_0) ay ? 0x7f800000 505*25c28e83SPiotr Jasiukajtis fmuld %f28,%f42,%f32 ! (4_1) xx0 = dtmp1 * xx0; 506*25c28e83SPiotr Jasiukajtis 507*25c28e83SPiotr Jasiukajtis fmuld %f10,%f26,%f10 ! (3_1) dtmp2 *= xx0; 508*25c28e83SPiotr Jasiukajtis lda [%o4]0x82,%f4 ! (3_0) x0 = *px; 509*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update12 ! (3_0) if ( ay >= 0x7f800000 ) 510*25c28e83SPiotr Jasiukajtis for %f30,DC1,%f28 ! (0_0) hyp0 = vis_for(hyp0,DC1); 511*25c28e83SPiotr Jasiukajtis.cont12: 512*25c28e83SPiotr Jasiukajtis cmp %i5,_0x7f800000 ! (3_0) ax ? 0x7f800000 513*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update13 ! (3_0) if ( ax >= 0x7f800000 ) 514*25c28e83SPiotr Jasiukajtis ld [%fp+ftmp1],%i1 ! (0_0) ibase0 = ((int*)&hyp0)[0]; 515*25c28e83SPiotr Jasiukajtis.cont13: 516*25c28e83SPiotr Jasiukajtis st %f18,[%fp+ftmp3] ! (2_0) ibase0 = ((int*)&hyp0)[0]; 517*25c28e83SPiotr Jasiukajtis 518*25c28e83SPiotr Jasiukajtis fmuld KA3,%f32,%f34 ! (4_1) dtmp2 = KA3 * xx0; 519*25c28e83SPiotr Jasiukajtis cmp %l6,0 ! (3_0) 520*25c28e83SPiotr Jasiukajtis be,pn %icc,.update14 ! (3_0) if ( ay == 0 ) 521*25c28e83SPiotr Jasiukajtis fand %f28,DC2,%f30 ! (0_0) h_hi0 = vis_fand(hyp0,DC2); 522*25c28e83SPiotr Jasiukajtis.cont14: 523*25c28e83SPiotr Jasiukajtis fsmuld %f4,%f4,%f36 ! (3_0) hyp0 = x0 * (double)x0; 524*25c28e83SPiotr Jasiukajtis sra %i1,10,%l1 ! (0_0) ibase0 >>= 10; 525*25c28e83SPiotr Jasiukajtis faddd %f10,KA1,%f40 ! (3_1) dtmp2 += KA1; 526*25c28e83SPiotr Jasiukajtis 527*25c28e83SPiotr Jasiukajtis and %l1,2032,%o5 ! (0_0) si0 = ibase0 & 0x7f0; 528*25c28e83SPiotr Jasiukajtis lda [%i2+stridey]0x82,%l6 ! (4_0) ay = *(int*)py; 529*25c28e83SPiotr Jasiukajtis 530*25c28e83SPiotr Jasiukajtis fsmuld %f2,%f2,%f62 ! (3_0) dtmp0 = y0 * (double)y0; 531*25c28e83SPiotr Jasiukajtis add %o5,TBL,%l1 ! (0_0) (char*)TBL + si0 532*25c28e83SPiotr Jasiukajtis lda [stridex+%o4]0x82,%i5 ! (4_0) ax = *(int*)px; 533*25c28e83SPiotr Jasiukajtis fsubd %f28,%f30,%f28 ! (0_0) dtmp1 = hyp0 - h_hi0; 534*25c28e83SPiotr Jasiukajtis 535*25c28e83SPiotr Jasiukajtis add %o4,stridex,%l0 ! px += stridex 536*25c28e83SPiotr Jasiukajtis ldd [TBL+%o5],%f42 ! (0_0) xx0 = ((double*)((char*)TBL + si0))[0]; 537*25c28e83SPiotr Jasiukajtis faddd %f34,KA2,%f10 ! (4_1) dtmp2 += KA2; 538*25c28e83SPiotr Jasiukajtis 539*25c28e83SPiotr Jasiukajtis fmuld %f40,%f26,%f40 ! (3_1) dtmp2 *= xx0; 540*25c28e83SPiotr Jasiukajtis add %i2,stridey,%i2 ! py += stridey 541*25c28e83SPiotr Jasiukajtis and %l6,_0x7fffffff,%l6 ! (4_0) ay &= 0x7fffffff; 542*25c28e83SPiotr Jasiukajtis fand %f16,DC0,%f30 ! (1_0) hyp0 = vis_fand(hyp0,DC0); 543*25c28e83SPiotr Jasiukajtis 544*25c28e83SPiotr Jasiukajtis and %i5,_0x7fffffff,%i5 ! (4_0) ax &= 0x7fffffff; 545*25c28e83SPiotr Jasiukajtis lda [%i2]0x82,%f2 ! (4_0) y0 = *py; 546*25c28e83SPiotr Jasiukajtis fand %f20,DA0,%f24 ! (3_1) dbase0 = vis_fand(hyp0,DA0); 547*25c28e83SPiotr Jasiukajtis 548*25c28e83SPiotr Jasiukajtis faddd %f36,%f62,%f20 ! (3_0) hyp0 += dtmp0; 549*25c28e83SPiotr Jasiukajtis cmp %l6,_0x7f800000 ! (4_0) ay ? 0x7f800000 550*25c28e83SPiotr Jasiukajtis ldd [%l7+8],%f36 ! (3_1) res0 = ((double*)((char*)arr + si0))[1]; 551*25c28e83SPiotr Jasiukajtis fmuld %f28,%f42,%f26 ! (0_0) xx0 = dtmp1 * xx0; 552*25c28e83SPiotr Jasiukajtis 553*25c28e83SPiotr Jasiukajtis fmuld %f10,%f32,%f10 ! (4_1) dtmp2 *= xx0; 554*25c28e83SPiotr Jasiukajtis lda [stridex+%o4]0x82,%f4 ! (4_0) x0 = *px; 555*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update15 ! (4_0) if ( ay >= 0x7f800000 ) 556*25c28e83SPiotr Jasiukajtis for %f30,DC1,%f28 ! (1_0) hyp0 = vis_for(hyp0,DC1); 557*25c28e83SPiotr Jasiukajtis.cont15: 558*25c28e83SPiotr Jasiukajtis fmul8x16 SCALE,%f24,%f24 ! (3_1) dbase0 = vis_fmul8x16(SCALE, dbase0); 559*25c28e83SPiotr Jasiukajtis cmp %i5,_0x7f800000 ! (4_0) ax ? 0x7f800000 560*25c28e83SPiotr Jasiukajtis ld [%fp+ftmp2],%i1 ! (1_0) ibase0 = ((int*)&hyp0)[0]; 561*25c28e83SPiotr Jasiukajtis faddd %f40,KA0,%f62 ! (3_1) dtmp2 += KA0; 562*25c28e83SPiotr Jasiukajtis 563*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update16 ! (4_0) if ( ax >= 0x7f800000 ) 564*25c28e83SPiotr Jasiukajtis st %f20,[%fp+ftmp4] ! (3_0) ibase0 = ((int*)&hyp0)[0]; 565*25c28e83SPiotr Jasiukajtis.cont16: 566*25c28e83SPiotr Jasiukajtis fmuld KA3,%f26,%f34 ! (0_0) dtmp2 = KA3 * xx0; 567*25c28e83SPiotr Jasiukajtis fand %f28,DC2,%f30 ! (1_0) h_hi0 = vis_fand(hyp0,DC2); 568*25c28e83SPiotr Jasiukajtis 569*25c28e83SPiotr Jasiukajtis mov %o1,%i4 570*25c28e83SPiotr Jasiukajtis cmp counter,5 571*25c28e83SPiotr Jasiukajtis bl,pn %icc,.tail 572*25c28e83SPiotr Jasiukajtis nop 573*25c28e83SPiotr Jasiukajtis 574*25c28e83SPiotr Jasiukajtis ba .main_loop 575*25c28e83SPiotr Jasiukajtis sub counter,5,counter 576*25c28e83SPiotr Jasiukajtis 577*25c28e83SPiotr Jasiukajtis .align 16 578*25c28e83SPiotr Jasiukajtis.main_loop: 579*25c28e83SPiotr Jasiukajtis fsmuld %f4,%f4,%f38 ! (4_1) hyp0 = x0 * (double)x0; 580*25c28e83SPiotr Jasiukajtis sra %i1,10,%o2 ! (1_1) ibase0 >>= 10; 581*25c28e83SPiotr Jasiukajtis cmp %l6,0 ! (4_1) ay ? 0 582*25c28e83SPiotr Jasiukajtis faddd %f10,KA1,%f40 ! (4_2) dtmp2 += KA1; 583*25c28e83SPiotr Jasiukajtis 584*25c28e83SPiotr Jasiukajtis fmuld %f36,%f62,%f36 ! (3_2) res0 *= dtmp2; 585*25c28e83SPiotr Jasiukajtis and %o2,2032,%o2 ! (1_1) si0 = ibase0 & 0x7f0; 586*25c28e83SPiotr Jasiukajtis lda [%i2+stridey]0x82,%l6 ! (0_0) ay = *(int*)py; 587*25c28e83SPiotr Jasiukajtis fpsub32 DA1,%f24,%f24 ! (3_2) dbase0 = vis_fpsub32(DA1,dbase0); 588*25c28e83SPiotr Jasiukajtis 589*25c28e83SPiotr Jasiukajtis fsmuld %f2,%f2,%f62 ! (4_1) dtmp0 = y0 * (double)y0; 590*25c28e83SPiotr Jasiukajtis add %o2,TBL,%o2 ! (1_1) (char*)TBL + si0 591*25c28e83SPiotr Jasiukajtis lda [%l0+stridex]0x82,%o1 ! (0_0) ax = *(int*)px; 592*25c28e83SPiotr Jasiukajtis fsubd %f28,%f30,%f28 ! (1_1) dtmp1 = hyp0 - h_hi0; 593*25c28e83SPiotr Jasiukajtis 594*25c28e83SPiotr Jasiukajtis add %l0,stridex,%i1 ! px += stridex 595*25c28e83SPiotr Jasiukajtis ldd [%o2],%f42 ! (1_1) xx0 = ((double*)((char*)TBL + si0))[0]; 596*25c28e83SPiotr Jasiukajtis be,pn %icc,.update17 ! (4_1) if ( ay == 0 ) 597*25c28e83SPiotr Jasiukajtis faddd %f34,KA2,%f10 ! (0_1) dtmp2 += KA2; 598*25c28e83SPiotr Jasiukajtis.cont17: 599*25c28e83SPiotr Jasiukajtis fmuld %f40,%f32,%f40 ! (4_2) dtmp2 *= xx0; 600*25c28e83SPiotr Jasiukajtis add %i2,stridey,%i2 ! py += stridey 601*25c28e83SPiotr Jasiukajtis and %l6,_0x7fffffff,%l6 ! (0_0) ay &= 0x7fffffff; 602*25c28e83SPiotr Jasiukajtis fand %f18,DC0,%f30 ! (2_1) hyp0 = vis_fand(hyp0,DC0); 603*25c28e83SPiotr Jasiukajtis 604*25c28e83SPiotr Jasiukajtis fmuld %f36,%f24,%f32 ! (3_2) res0 *= dbase0; 605*25c28e83SPiotr Jasiukajtis and %o1,_0x7fffffff,%o1 ! (0_0) ax &= 0x7fffffff; 606*25c28e83SPiotr Jasiukajtis lda [%i2]0x82,%f2 ! (0_0) y0 = *py; 607*25c28e83SPiotr Jasiukajtis fand %f12,DA0,%f24 ! (4_2) dbase0 = vis_fand(hyp0,DA0); 608*25c28e83SPiotr Jasiukajtis 609*25c28e83SPiotr Jasiukajtis faddd %f38,%f62,%f12 ! (4_1) hyp0 += dtmp0; 610*25c28e83SPiotr Jasiukajtis cmp %l6,_0x7f800000 ! (0_0) ay ? 0x7f800000 611*25c28e83SPiotr Jasiukajtis ldd [%i3+8],%f62 ! (4_2) res0 = ((double*)((char*)arr + si0))[1]; 612*25c28e83SPiotr Jasiukajtis fmuld %f28,%f42,%f36 ! (1_1) xx0 = dtmp1 * xx0; 613*25c28e83SPiotr Jasiukajtis 614*25c28e83SPiotr Jasiukajtis fmuld %f10,%f26,%f10 ! (0_1) dtmp2 *= xx0; 615*25c28e83SPiotr Jasiukajtis lda [%i1]0x82,%f4 ! (0_0) x0 = *px; 616*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update18 ! (0_0) if ( ay >= 0x7f800000 ) 617*25c28e83SPiotr Jasiukajtis for %f30,DC1,%f28 ! (2_1) hyp0 = vis_for(hyp0,DC1); 618*25c28e83SPiotr Jasiukajtis.cont18: 619*25c28e83SPiotr Jasiukajtis fmul8x16 SCALE,%f24,%f24 ! (4_2) dbase0 = vis_fmul8x16(SCALE, dbase0); 620*25c28e83SPiotr Jasiukajtis cmp %o1,_0x7f800000 ! (0_0) ax ? 0x7f800000 621*25c28e83SPiotr Jasiukajtis ld [%fp+ftmp3],%l0 ! (2_1) ibase0 = ((int*)&hyp0)[0]; 622*25c28e83SPiotr Jasiukajtis faddd %f40,KA0,%f42 ! (4_2) dtmp2 += KA0; 623*25c28e83SPiotr Jasiukajtis 624*25c28e83SPiotr Jasiukajtis add %i4,stridez,%i3 ! pz += stridez 625*25c28e83SPiotr Jasiukajtis st %f12,[%fp+ftmp0] ! (4_1) ibase0 = ((int*)&hyp0)[0]; 626*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update19 ! (0_0) if ( ax >= 0x7f800000 ) 627*25c28e83SPiotr Jasiukajtis fdtos %f32,%f1 ! (3_2) ftmp0 = (float)res0; 628*25c28e83SPiotr Jasiukajtis.cont19: 629*25c28e83SPiotr Jasiukajtis fmuld KA3,%f36,%f34 ! (1_1) dtmp2 = KA3 * xx0; 630*25c28e83SPiotr Jasiukajtis cmp %l6,0 ! (0_0) ay ? 0 631*25c28e83SPiotr Jasiukajtis st %f1,[%i4] ! (3_2) *pz = ftmp0; 632*25c28e83SPiotr Jasiukajtis fand %f28,DC2,%f30 ! (2_1) h_hi0 = vis_fand(hyp0,DC2); 633*25c28e83SPiotr Jasiukajtis 634*25c28e83SPiotr Jasiukajtis fsmuld %f4,%f4,%f38 ! (0_0) hyp0 = x0 * (double)x0; 635*25c28e83SPiotr Jasiukajtis sra %l0,10,%i4 ! (2_1) ibase0 >>= 10; 636*25c28e83SPiotr Jasiukajtis be,pn %icc,.update20 ! (0_0) if ( ay == 0 ) 637*25c28e83SPiotr Jasiukajtis faddd %f10,KA1,%f40 ! (0_1) dtmp2 += KA1; 638*25c28e83SPiotr Jasiukajtis.cont20: 639*25c28e83SPiotr Jasiukajtis fmuld %f62,%f42,%f32 ! (4_2) res0 *= dtmp2; 640*25c28e83SPiotr Jasiukajtis and %i4,2032,%g1 ! (2_1) si0 = ibase0 & 0x7f0; 641*25c28e83SPiotr Jasiukajtis lda [%i2+stridey]0x82,%l6 ! (1_0) ay = *(int*)py; 642*25c28e83SPiotr Jasiukajtis fpsub32 DA1,%f24,%f24 ! (4_2) dbase0 = vis_fpsub32(DA1,dbase0); 643*25c28e83SPiotr Jasiukajtis 644*25c28e83SPiotr Jasiukajtis fsmuld %f2,%f2,%f62 ! (0_0) dtmp0 = y0 * (double)y0; 645*25c28e83SPiotr Jasiukajtis add %g1,TBL,%l0 ! (2_1) (char*)TBL + si0 646*25c28e83SPiotr Jasiukajtis lda [%i1+stridex]0x82,%i5 ! (1_0) ax = *(int*)px; 647*25c28e83SPiotr Jasiukajtis fsubd %f28,%f30,%f28 ! (2_1) dtmp1 = hyp0 - h_hi0; 648*25c28e83SPiotr Jasiukajtis 649*25c28e83SPiotr Jasiukajtis nop 650*25c28e83SPiotr Jasiukajtis add %i1,stridex,%g5 ! px += stridex 651*25c28e83SPiotr Jasiukajtis ldd [TBL+%g1],%f42 ! (2_1) xx0 = ((double*)((char*)TBL + si0))[0]; 652*25c28e83SPiotr Jasiukajtis faddd %f34,KA2,%f10 ! (1_1) dtmp2 += KA2; 653*25c28e83SPiotr Jasiukajtis 654*25c28e83SPiotr Jasiukajtis fmuld %f40,%f26,%f40 ! (0_1) dtmp2 *= xx0; 655*25c28e83SPiotr Jasiukajtis add %i2,stridey,%o3 ! py += stridey 656*25c28e83SPiotr Jasiukajtis and %l6,_0x7fffffff,%l6 ! (1_0) ay &= 0x7fffffff; 657*25c28e83SPiotr Jasiukajtis fand %f20,DC0,%f30 ! (3_1) hyp0 = vis_fand(hyp0,DC0); 658*25c28e83SPiotr Jasiukajtis 659*25c28e83SPiotr Jasiukajtis fmuld %f32,%f24,%f26 ! (4_2) res0 *= dbase0; 660*25c28e83SPiotr Jasiukajtis and %i5,_0x7fffffff,%i5 ! (1_0) ax &= 0x7fffffff; 661*25c28e83SPiotr Jasiukajtis lda [%o3]0x82,%f2 ! (1_0) y0 = *py; 662*25c28e83SPiotr Jasiukajtis fand %f14,DA0,%f24 ! (0_1) dbase0 = vis_fand(hyp0,DA0); 663*25c28e83SPiotr Jasiukajtis 664*25c28e83SPiotr Jasiukajtis faddd %f38,%f62,%f14 ! (0_0) hyp0 += dtmp0; 665*25c28e83SPiotr Jasiukajtis cmp %l6,_0x7f800000 ! (1_0) ay ? 0x7f800000 666*25c28e83SPiotr Jasiukajtis ldd [%l1+8],%f62 ! (0_1) res0 = ((double*)((char*)arr + si0))[1]; 667*25c28e83SPiotr Jasiukajtis fmuld %f28,%f42,%f32 ! (2_1) xx0 = dtmp1 * xx0; 668*25c28e83SPiotr Jasiukajtis 669*25c28e83SPiotr Jasiukajtis fmuld %f10,%f36,%f10 ! (1_1) dtmp2 *= xx0; 670*25c28e83SPiotr Jasiukajtis lda [%g5]0x82,%f4 ! (1_0) x0 = *px; 671*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update21 ! (1_0) if ( ay >= 0x7f800000 ) 672*25c28e83SPiotr Jasiukajtis for %f30,DC1,%f28 ! (3_1) hyp0 = vis_for(hyp0,DC1); 673*25c28e83SPiotr Jasiukajtis.cont21: 674*25c28e83SPiotr Jasiukajtis fmul8x16 SCALE,%f24,%f24 ! (0_1) dbase0 = vis_fmul8x16(SCALE, dbase0); 675*25c28e83SPiotr Jasiukajtis cmp %i5,_0x7f800000 ! (1_0) ax ? 0x7f800000 676*25c28e83SPiotr Jasiukajtis ld [%fp+ftmp4],%l1 ! (3_1) ibase0 = ((int*)&hyp0)[0]; 677*25c28e83SPiotr Jasiukajtis faddd %f40,KA0,%f42 ! (0_1) dtmp2 += KA0 678*25c28e83SPiotr Jasiukajtis 679*25c28e83SPiotr Jasiukajtis add %i3,stridez,%o1 ! pz += stridez 680*25c28e83SPiotr Jasiukajtis st %f14,[%fp+ftmp1] ! (0_0) ibase0 = ((int*)&hyp0)[0]; 681*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update22 ! (1_0) if ( ax >= 0x7f800000 ) 682*25c28e83SPiotr Jasiukajtis fdtos %f26,%f1 ! (4_2) ftmp0 = (float)res0; 683*25c28e83SPiotr Jasiukajtis.cont22: 684*25c28e83SPiotr Jasiukajtis fmuld KA3,%f32,%f34 ! (2_1) dtmp2 = KA3 * xx0; 685*25c28e83SPiotr Jasiukajtis cmp %l6,0 ! (1_0) ay ? 0 686*25c28e83SPiotr Jasiukajtis st %f1,[%i3] ! (4_2) *pz = ftmp0; 687*25c28e83SPiotr Jasiukajtis fand %f28,DC2,%f30 ! (3_1) h_hi0 = vis_fand(hyp0,DC2); 688*25c28e83SPiotr Jasiukajtis 689*25c28e83SPiotr Jasiukajtis fsmuld %f4,%f4,%f38 ! (1_0) hyp0 = x0 * (double)x0; 690*25c28e83SPiotr Jasiukajtis sra %l1,10,%o5 ! (3_1) ibase0 >>= 10; 691*25c28e83SPiotr Jasiukajtis be,pn %icc,.update23 ! (1_0) if ( ay == 0 ) 692*25c28e83SPiotr Jasiukajtis faddd %f10,KA1,%f40 ! (1_1) dtmp2 += KA1; 693*25c28e83SPiotr Jasiukajtis.cont23: 694*25c28e83SPiotr Jasiukajtis fmuld %f62,%f42,%f26 ! (0_1) res0 *= dtmp2; 695*25c28e83SPiotr Jasiukajtis and %o5,2032,%o4 ! (3_1) si0 = ibase0 & 0x7f0; 696*25c28e83SPiotr Jasiukajtis lda [%o3+stridey]0x82,%l6 ! (2_0) ay = *(int*)py; 697*25c28e83SPiotr Jasiukajtis fpsub32 DA1,%f24,%f24 ! (0_1) dbase0 = vis_fpsub32(DA1,dbase0); 698*25c28e83SPiotr Jasiukajtis 699*25c28e83SPiotr Jasiukajtis fsmuld %f2,%f2,%f62 ! (1_0) dtmp0 = y0 * (double)y0; 700*25c28e83SPiotr Jasiukajtis add %o4,TBL,%l7 ! (3_1) (char*)TBL + si0 701*25c28e83SPiotr Jasiukajtis lda [stridex+%g5]0x82,%i5 ! (2_0) ax = *(int*)px; 702*25c28e83SPiotr Jasiukajtis fsubd %f28,%f30,%f28 ! (3_1) dtmp1 = hyp0 - h_hi0; 703*25c28e83SPiotr Jasiukajtis 704*25c28e83SPiotr Jasiukajtis nop 705*25c28e83SPiotr Jasiukajtis add %g5,stridex,%i4 ! px += stridex 706*25c28e83SPiotr Jasiukajtis ldd [TBL+%o4],%f42 ! (3_1) xx0 = ((double*)((char*)TBL + si0))[0]; 707*25c28e83SPiotr Jasiukajtis faddd %f34,KA2,%f10 ! (2_1) dtmp2 += KA2; 708*25c28e83SPiotr Jasiukajtis 709*25c28e83SPiotr Jasiukajtis fmuld %f40,%f36,%f40 ! (1_1) dtmp2 *= xx0; 710*25c28e83SPiotr Jasiukajtis and %l6,_0x7fffffff,%l6 ! (2_0) ay &= 0x7fffffff; 711*25c28e83SPiotr Jasiukajtis add %o3,stridey,%i2 ! py += stridey 712*25c28e83SPiotr Jasiukajtis fand %f12,DC0,%f30 ! (4_1) hyp0 = vis_fand(hyp0,DC0); 713*25c28e83SPiotr Jasiukajtis 714*25c28e83SPiotr Jasiukajtis fmuld %f26,%f24,%f36 ! (0_1) res0 *= dbase0; 715*25c28e83SPiotr Jasiukajtis and %i5,_0x7fffffff,%i5 ! (2_0) ax &= 0x7fffffff; 716*25c28e83SPiotr Jasiukajtis lda [%i2]0x82,%f2 ! (2_0) y0 = *py; 717*25c28e83SPiotr Jasiukajtis fand %f16,DA0,%f24 ! (1_1) dbase0 = vis_fand(hyp0,DA0); 718*25c28e83SPiotr Jasiukajtis 719*25c28e83SPiotr Jasiukajtis faddd %f38,%f62,%f16 ! (1_0) hyp0 += dtmp0; 720*25c28e83SPiotr Jasiukajtis cmp %l6,_0x7f800000 ! (2_0) ay ? 0x7f800000 721*25c28e83SPiotr Jasiukajtis ldd [%o2+8],%f38 ! (1_1) res0 = ((double*)((char*)arr + si0))[1]; 722*25c28e83SPiotr Jasiukajtis fmuld %f28,%f42,%f26 ! (3_1) xx0 = dtmp1 * xx0; 723*25c28e83SPiotr Jasiukajtis 724*25c28e83SPiotr Jasiukajtis fmuld %f10,%f32,%f10 ! (2_1) dtmp2 *= xx0; 725*25c28e83SPiotr Jasiukajtis lda [stridex+%g5]0x82,%f4 ! (2_0) x0 = *px; 726*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update24 ! (2_0) if ( ay >= 0x7f800000 727*25c28e83SPiotr Jasiukajtis for %f30,DC1,%f28 ! (4_1) hyp0 = vis_for(hyp0,DC1); 728*25c28e83SPiotr Jasiukajtis.cont24: 729*25c28e83SPiotr Jasiukajtis fmul8x16 SCALE,%f24,%f24 ! (1_1) dbase0 = vis_fmul8x16(SCALE, dbase0); 730*25c28e83SPiotr Jasiukajtis cmp %i5,_0x7f800000 ! (2_0) ax ? 0x7f800000 731*25c28e83SPiotr Jasiukajtis ld [%fp+ftmp0],%i3 ! (4_1) ibase0 = ((int*)&hyp0)[0]; 732*25c28e83SPiotr Jasiukajtis faddd %f40,KA0,%f62 ! (1_1) dtmp2 += KA0; 733*25c28e83SPiotr Jasiukajtis 734*25c28e83SPiotr Jasiukajtis add %o1,stridez,%g1 ! pz += stridez 735*25c28e83SPiotr Jasiukajtis st %f16,[%fp+ftmp2] ! (1_0) ibase0 = ((int*)&hyp0)[0]; 736*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update25 ! (2_0) if ( ax >= 0x7f800000 ) 737*25c28e83SPiotr Jasiukajtis fdtos %f36,%f1 ! (0_1) ftmp0 = (float)res0; 738*25c28e83SPiotr Jasiukajtis.cont25: 739*25c28e83SPiotr Jasiukajtis fmuld KA3,%f26,%f34 ! (3_1) dtmp2 = KA3 * xx0; 740*25c28e83SPiotr Jasiukajtis cmp %l6,0 ! (2_0) ay ? 0 741*25c28e83SPiotr Jasiukajtis st %f1,[%o1] ! (0_1) *pz = ftmp0; 742*25c28e83SPiotr Jasiukajtis fand %f28,DC2,%f30 ! (4_1) h_hi0 = vis_fand(hyp0,DC2); 743*25c28e83SPiotr Jasiukajtis 744*25c28e83SPiotr Jasiukajtis fsmuld %f4,%f4,%f36 ! (2_0) hyp0 = x0 * (double)x0; 745*25c28e83SPiotr Jasiukajtis sra %i3,10,%i3 ! (4_1) ibase0 >>= 10; 746*25c28e83SPiotr Jasiukajtis be,pn %icc,.update26 ! (2_0) if ( ay == 0 ) 747*25c28e83SPiotr Jasiukajtis faddd %f10,KA1,%f40 ! (2_1) dtmp2 += KA1; 748*25c28e83SPiotr Jasiukajtis.cont26: 749*25c28e83SPiotr Jasiukajtis fmuld %f38,%f62,%f38 ! (1_1) res0 *= dtmp2; 750*25c28e83SPiotr Jasiukajtis and %i3,2032,%i3 ! (4_1) si0 = ibase0 & 0x7f0; 751*25c28e83SPiotr Jasiukajtis lda [%i2+stridey]0x82,%l6 ! (3_0) ay = *(int*)py; 752*25c28e83SPiotr Jasiukajtis fpsub32 DA1,%f24,%f24 ! (1_1) dbase0 = vis_fpsub32(DA1,dbase0); 753*25c28e83SPiotr Jasiukajtis 754*25c28e83SPiotr Jasiukajtis fsmuld %f2,%f2,%f62 ! (2_0) dtmp0 = y0 * (double)y0; 755*25c28e83SPiotr Jasiukajtis add %i3,TBL,%i3 ! (4_1) (char*)TBL + si0 756*25c28e83SPiotr Jasiukajtis lda [%i4+stridex]0x82,%i5 ! (3_0) ax = *(int*)px; 757*25c28e83SPiotr Jasiukajtis fsubd %f28,%f30,%f28 ! (4_1) dtmp1 = hyp0 - h_hi0; 758*25c28e83SPiotr Jasiukajtis 759*25c28e83SPiotr Jasiukajtis nop 760*25c28e83SPiotr Jasiukajtis add %i4,stridex,%o4 ! px += stridex 761*25c28e83SPiotr Jasiukajtis ldd [%i3],%f42 ! (4_1) xx0 = ((double*)((char*)TBL + si0))[0]; 762*25c28e83SPiotr Jasiukajtis faddd %f34,KA2,%f10 ! (3_1) dtmp2 += KA2; 763*25c28e83SPiotr Jasiukajtis 764*25c28e83SPiotr Jasiukajtis fmuld %f40,%f32,%f40 ! (2_1) dtmp2 *= xx0; 765*25c28e83SPiotr Jasiukajtis add %i2,stridey,%i2 ! py += stridey 766*25c28e83SPiotr Jasiukajtis and %l6,_0x7fffffff,%l6 ! (3_0) ay &= 0x7fffffff; 767*25c28e83SPiotr Jasiukajtis fand %f14,DC0,%f30 ! (0_0) hyp0 = vis_fand(hyp0,DC0); 768*25c28e83SPiotr Jasiukajtis 769*25c28e83SPiotr Jasiukajtis fmuld %f38,%f24,%f38 ! (1_1) res0 *= dbase0; 770*25c28e83SPiotr Jasiukajtis and %i5,_0x7fffffff,%i5 ! (3_0) ax &= 0x7fffffff; 771*25c28e83SPiotr Jasiukajtis lda [%i2]0x82,%f2 ! (3_0) y0 = *py; 772*25c28e83SPiotr Jasiukajtis fand %f18,DA0,%f24 ! (2_1) dbase0 = vis_fand(hyp0,DA0); 773*25c28e83SPiotr Jasiukajtis 774*25c28e83SPiotr Jasiukajtis faddd %f36,%f62,%f18 ! (2_0) hyp0 += dtmp0; 775*25c28e83SPiotr Jasiukajtis cmp %l6,_0x7f800000 ! (3_0) ay ? 0x7f800000 776*25c28e83SPiotr Jasiukajtis ldd [%l0+8],%f62 ! (2_1) res0 = ((double*)((char*)arr + si0))[1]; 777*25c28e83SPiotr Jasiukajtis fmuld %f28,%f42,%f32 ! (4_1) xx0 = dtmp1 * xx0; 778*25c28e83SPiotr Jasiukajtis 779*25c28e83SPiotr Jasiukajtis fmuld %f10,%f26,%f10 ! (3_1) dtmp2 *= xx0; 780*25c28e83SPiotr Jasiukajtis lda [%o4]0x82,%f4 ! (3_0) x0 = *px; 781*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update27 ! (3_0) if ( ay >= 0x7f800000 ) 782*25c28e83SPiotr Jasiukajtis for %f30,DC1,%f28 ! (0_0) hyp0 = vis_for(hyp0,DC1); 783*25c28e83SPiotr Jasiukajtis.cont27: 784*25c28e83SPiotr Jasiukajtis fmul8x16 SCALE,%f24,%f24 ! (2_1) dbase0 = vis_fmul8x16(SCALE, dbase0); 785*25c28e83SPiotr Jasiukajtis cmp %i5,_0x7f800000 ! (3_0) ax ? 0x7f800000 786*25c28e83SPiotr Jasiukajtis ld [%fp+ftmp1],%i1 ! (0_0) ibase0 = ((int*)&hyp0)[0]; 787*25c28e83SPiotr Jasiukajtis faddd %f40,KA0,%f42 ! (2_1) dtmp2 += KA0; 788*25c28e83SPiotr Jasiukajtis 789*25c28e83SPiotr Jasiukajtis add %g1,stridez,%o3 ! pz += stridez 790*25c28e83SPiotr Jasiukajtis st %f18,[%fp+ftmp3] ! (2_0) ibase0 = ((int*)&hyp0)[0]; 791*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update28 ! (3_0) if ( ax >= 0x7f800000 ) 792*25c28e83SPiotr Jasiukajtis fdtos %f38,%f1 ! (1_1) ftmp0 = (float)res0; 793*25c28e83SPiotr Jasiukajtis.cont28: 794*25c28e83SPiotr Jasiukajtis fmuld KA3,%f32,%f34 ! (4_1) dtmp2 = KA3 * xx0; 795*25c28e83SPiotr Jasiukajtis cmp %l6,0 ! (3_0) 796*25c28e83SPiotr Jasiukajtis st %f1,[%g1] ! (1_1) *pz = ftmp0; 797*25c28e83SPiotr Jasiukajtis fand %f28,DC2,%f30 ! (0_0) h_hi0 = vis_fand(hyp0,DC2); 798*25c28e83SPiotr Jasiukajtis 799*25c28e83SPiotr Jasiukajtis fsmuld %f4,%f4,%f36 ! (3_0) hyp0 = x0 * (double)x0; 800*25c28e83SPiotr Jasiukajtis sra %i1,10,%l1 ! (0_0) ibase0 >>= 10; 801*25c28e83SPiotr Jasiukajtis be,pn %icc,.update29 ! (3_0) if ( ay == 0 ) 802*25c28e83SPiotr Jasiukajtis faddd %f10,KA1,%f40 ! (3_1) dtmp2 += KA1; 803*25c28e83SPiotr Jasiukajtis.cont29: 804*25c28e83SPiotr Jasiukajtis fmuld %f62,%f42,%f38 ! (2_1) res0 *= dtmp2; 805*25c28e83SPiotr Jasiukajtis and %l1,2032,%o5 ! (0_0) si0 = ibase0 & 0x7f0; 806*25c28e83SPiotr Jasiukajtis lda [%i2+stridey]0x82,%l6 ! (4_0) ay = *(int*)py; 807*25c28e83SPiotr Jasiukajtis fpsub32 DA1,%f24,%f24 ! (2_1) dbase0 = vis_fpsub32(DA1,dbase0); 808*25c28e83SPiotr Jasiukajtis 809*25c28e83SPiotr Jasiukajtis fsmuld %f2,%f2,%f62 ! (3_0) dtmp0 = y0 * (double)y0; 810*25c28e83SPiotr Jasiukajtis add %o5,TBL,%l1 ! (0_0) (char*)TBL + si0 811*25c28e83SPiotr Jasiukajtis lda [stridex+%o4]0x82,%i5 ! (4_0) ax = *(int*)px; 812*25c28e83SPiotr Jasiukajtis fsubd %f28,%f30,%f28 ! (0_0) dtmp1 = hyp0 - h_hi0; 813*25c28e83SPiotr Jasiukajtis 814*25c28e83SPiotr Jasiukajtis add %o3,stridez,%i4 ! pz += stridez 815*25c28e83SPiotr Jasiukajtis add %o4,stridex,%l0 ! px += stridex 816*25c28e83SPiotr Jasiukajtis ldd [TBL+%o5],%f42 ! (0_0) xx0 = ((double*)((char*)TBL + si0))[0]; 817*25c28e83SPiotr Jasiukajtis faddd %f34,KA2,%f10 ! (4_1) dtmp2 += KA2; 818*25c28e83SPiotr Jasiukajtis 819*25c28e83SPiotr Jasiukajtis fmuld %f40,%f26,%f40 ! (3_1) dtmp2 *= xx0; 820*25c28e83SPiotr Jasiukajtis add %i2,stridey,%i2 ! py += stridey 821*25c28e83SPiotr Jasiukajtis and %l6,_0x7fffffff,%l6 ! (4_0) ay &= 0x7fffffff; 822*25c28e83SPiotr Jasiukajtis fand %f16,DC0,%f30 ! (1_0) hyp0 = vis_fand(hyp0,DC0); 823*25c28e83SPiotr Jasiukajtis 824*25c28e83SPiotr Jasiukajtis fmuld %f38,%f24,%f38 ! (2_1) res0 *= dbase0; 825*25c28e83SPiotr Jasiukajtis and %i5,_0x7fffffff,%i5 ! (4_0) ax &= 0x7fffffff; 826*25c28e83SPiotr Jasiukajtis lda [%i2]0x82,%f2 ! (4_0) y0 = *py; 827*25c28e83SPiotr Jasiukajtis fand %f20,DA0,%f24 ! (3_1) dbase0 = vis_fand(hyp0,DA0); 828*25c28e83SPiotr Jasiukajtis 829*25c28e83SPiotr Jasiukajtis faddd %f36,%f62,%f20 ! (3_0) hyp0 += dtmp0; 830*25c28e83SPiotr Jasiukajtis cmp %l6,_0x7f800000 ! (4_0) ay ? 0x7f800000 831*25c28e83SPiotr Jasiukajtis ldd [%l7+8],%f36 ! (3_1) res0 = ((double*)((char*)arr + si0))[1]; 832*25c28e83SPiotr Jasiukajtis fmuld %f28,%f42,%f26 ! (0_0) xx0 = dtmp1 * xx0; 833*25c28e83SPiotr Jasiukajtis 834*25c28e83SPiotr Jasiukajtis fmuld %f10,%f32,%f10 ! (4_1) dtmp2 *= xx0; 835*25c28e83SPiotr Jasiukajtis lda [stridex+%o4]0x82,%f4 ! (4_0) x0 = *px; 836*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update30 ! (4_0) if ( ay >= 0x7f800000 ) 837*25c28e83SPiotr Jasiukajtis for %f30,DC1,%f28 ! (1_0) hyp0 = vis_for(hyp0,DC1); 838*25c28e83SPiotr Jasiukajtis.cont30: 839*25c28e83SPiotr Jasiukajtis fmul8x16 SCALE,%f24,%f24 ! (3_1) dbase0 = vis_fmul8x16(SCALE, dbase0); 840*25c28e83SPiotr Jasiukajtis cmp %i5,_0x7f800000 ! (4_0) ax ? 0x7f800000 841*25c28e83SPiotr Jasiukajtis ld [%fp+ftmp2],%i1 ! (1_0) ibase0 = ((int*)&hyp0)[0]; 842*25c28e83SPiotr Jasiukajtis faddd %f40,KA0,%f62 ! (3_1) dtmp2 += KA0; 843*25c28e83SPiotr Jasiukajtis 844*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update31 ! (4_0) if ( ax >= 0x7f800000 ) 845*25c28e83SPiotr Jasiukajtis st %f20,[%fp+ftmp4] ! (3_0) ibase0 = ((int*)&hyp0)[0]; 846*25c28e83SPiotr Jasiukajtis.cont31: 847*25c28e83SPiotr Jasiukajtis subcc counter,5,counter ! counter -= 5; 848*25c28e83SPiotr Jasiukajtis fdtos %f38,%f1 ! (2_1) ftmp0 = (float)res0; 849*25c28e83SPiotr Jasiukajtis 850*25c28e83SPiotr Jasiukajtis fmuld KA3,%f26,%f34 ! (0_0) dtmp2 = KA3 * xx0; 851*25c28e83SPiotr Jasiukajtis st %f1,[%o3] ! (2_1) *pz = ftmp0; 852*25c28e83SPiotr Jasiukajtis bpos,pt %icc,.main_loop 853*25c28e83SPiotr Jasiukajtis fand %f28,DC2,%f30 ! (1_0) h_hi0 = vis_fand(hyp0,DC2); 854*25c28e83SPiotr Jasiukajtis 855*25c28e83SPiotr Jasiukajtis add counter,5,counter 856*25c28e83SPiotr Jasiukajtis 857*25c28e83SPiotr Jasiukajtis.tail: 858*25c28e83SPiotr Jasiukajtis subcc counter,1,counter 859*25c28e83SPiotr Jasiukajtis bneg .begin 860*25c28e83SPiotr Jasiukajtis mov %i4,%o1 861*25c28e83SPiotr Jasiukajtis 862*25c28e83SPiotr Jasiukajtis sra %i1,10,%o2 ! (1_1) ibase0 >>= 10; 863*25c28e83SPiotr Jasiukajtis faddd %f10,KA1,%f40 ! (4_2) dtmp2 += KA1; 864*25c28e83SPiotr Jasiukajtis 865*25c28e83SPiotr Jasiukajtis fmuld %f36,%f62,%f36 ! (3_2) res0 *= dtmp2; 866*25c28e83SPiotr Jasiukajtis and %o2,2032,%o2 ! (1_1) si0 = ibase0 & 0x7f0; 867*25c28e83SPiotr Jasiukajtis fpsub32 DA1,%f24,%f24 ! (3_2) dbase0 = vis_fpsub32(DA1,dbase0); 868*25c28e83SPiotr Jasiukajtis 869*25c28e83SPiotr Jasiukajtis add %o2,TBL,%o2 ! (1_1) (char*)TBL + si0 870*25c28e83SPiotr Jasiukajtis fsubd %f28,%f30,%f28 ! (1_1) dtmp1 = hyp0 - h_hi0; 871*25c28e83SPiotr Jasiukajtis 872*25c28e83SPiotr Jasiukajtis ldd [%o2],%f42 ! (1_1) xx0 = ((double*)((char*)TBL + si0))[0]; 873*25c28e83SPiotr Jasiukajtis faddd %f34,KA2,%f10 ! (0_1) dtmp2 += KA2; 874*25c28e83SPiotr Jasiukajtis 875*25c28e83SPiotr Jasiukajtis fmuld %f40,%f32,%f40 ! (4_2) dtmp2 *= xx0; 876*25c28e83SPiotr Jasiukajtis 877*25c28e83SPiotr Jasiukajtis fmuld %f36,%f24,%f32 ! (3_2) res0 *= dbase0; 878*25c28e83SPiotr Jasiukajtis fand %f12,DA0,%f24 ! (4_2) dbase0 = vis_fand(hyp0,DA0); 879*25c28e83SPiotr Jasiukajtis 880*25c28e83SPiotr Jasiukajtis ldd [%i3+8],%f62 ! (4_2) res0 = ((double*)((char*)arr + si0))[1]; 881*25c28e83SPiotr Jasiukajtis fmuld %f28,%f42,%f36 ! (1_1) xx0 = dtmp1 * xx0; 882*25c28e83SPiotr Jasiukajtis 883*25c28e83SPiotr Jasiukajtis fmuld %f10,%f26,%f10 ! (0_1) dtmp2 *= xx0; 884*25c28e83SPiotr Jasiukajtis 885*25c28e83SPiotr Jasiukajtis fmul8x16 SCALE,%f24,%f24 ! (4_2) dbase0 = vis_fmul8x16(SCALE, dbase0); 886*25c28e83SPiotr Jasiukajtis faddd %f40,KA0,%f42 ! (4_2) dtmp2 += KA0; 887*25c28e83SPiotr Jasiukajtis 888*25c28e83SPiotr Jasiukajtis add %i4,stridez,%i3 ! pz += stridez 889*25c28e83SPiotr Jasiukajtis fdtos %f32,%f1 ! (3_2) ftmp0 = (float)res0; 890*25c28e83SPiotr Jasiukajtis 891*25c28e83SPiotr Jasiukajtis fmuld KA3,%f36,%f34 ! (1_1) dtmp2 = KA3 * xx0; 892*25c28e83SPiotr Jasiukajtis st %f1,[%i4] ! (3_2) *pz = ftmp0; 893*25c28e83SPiotr Jasiukajtis 894*25c28e83SPiotr Jasiukajtis subcc counter,1,counter 895*25c28e83SPiotr Jasiukajtis bneg .begin 896*25c28e83SPiotr Jasiukajtis mov %i3,%o1 897*25c28e83SPiotr Jasiukajtis 898*25c28e83SPiotr Jasiukajtis faddd %f10,KA1,%f40 ! (0_1) dtmp2 += KA1; 899*25c28e83SPiotr Jasiukajtis 900*25c28e83SPiotr Jasiukajtis fmuld %f62,%f42,%f32 ! (4_2) res0 *= dtmp2; 901*25c28e83SPiotr Jasiukajtis fpsub32 DA1,%f24,%f24 ! (4_2) dbase0 = vis_fpsub32(DA1,dbase0); 902*25c28e83SPiotr Jasiukajtis 903*25c28e83SPiotr Jasiukajtis 904*25c28e83SPiotr Jasiukajtis faddd %f34,KA2,%f10 ! (1_1) dtmp2 += KA2; 905*25c28e83SPiotr Jasiukajtis 906*25c28e83SPiotr Jasiukajtis fmuld %f40,%f26,%f40 ! (0_1) dtmp2 *= xx0; 907*25c28e83SPiotr Jasiukajtis 908*25c28e83SPiotr Jasiukajtis fmuld %f32,%f24,%f26 ! (4_2) res0 *= dbase0; 909*25c28e83SPiotr Jasiukajtis fand %f14,DA0,%f24 ! (0_1) dbase0 = vis_fand(hyp0,DA0); 910*25c28e83SPiotr Jasiukajtis 911*25c28e83SPiotr Jasiukajtis ldd [%l1+8],%f62 ! (0_1) res0 = ((double*)((char*)arr + si0))[1]; 912*25c28e83SPiotr Jasiukajtis 913*25c28e83SPiotr Jasiukajtis fmuld %f10,%f36,%f10 ! (1_1) dtmp2 *= xx0; 914*25c28e83SPiotr Jasiukajtis 915*25c28e83SPiotr Jasiukajtis fmul8x16 SCALE,%f24,%f24 ! (0_1) dbase0 = vis_fmul8x16(SCALE, dbase0); 916*25c28e83SPiotr Jasiukajtis faddd %f40,KA0,%f42 ! (0_1) dtmp2 += KA0 917*25c28e83SPiotr Jasiukajtis 918*25c28e83SPiotr Jasiukajtis add %i3,stridez,%o1 ! pz += stridez 919*25c28e83SPiotr Jasiukajtis fdtos %f26,%f1 ! (4_2) ftmp0 = (float)res0; 920*25c28e83SPiotr Jasiukajtis 921*25c28e83SPiotr Jasiukajtis st %f1,[%i3] ! (4_2) *pz = ftmp0; 922*25c28e83SPiotr Jasiukajtis 923*25c28e83SPiotr Jasiukajtis subcc counter,1,counter 924*25c28e83SPiotr Jasiukajtis bneg .begin 925*25c28e83SPiotr Jasiukajtis nop 926*25c28e83SPiotr Jasiukajtis 927*25c28e83SPiotr Jasiukajtis faddd %f10,KA1,%f40 ! (1_1) dtmp2 += KA1; 928*25c28e83SPiotr Jasiukajtis 929*25c28e83SPiotr Jasiukajtis fmuld %f62,%f42,%f26 ! (0_1) res0 *= dtmp2; 930*25c28e83SPiotr Jasiukajtis fpsub32 DA1,%f24,%f24 ! (0_1) dbase0 = vis_fpsub32(DA1,dbase0); 931*25c28e83SPiotr Jasiukajtis 932*25c28e83SPiotr Jasiukajtis fmuld %f40,%f36,%f40 ! (1_1) dtmp2 *= xx0; 933*25c28e83SPiotr Jasiukajtis 934*25c28e83SPiotr Jasiukajtis fmuld %f26,%f24,%f36 ! (0_1) res0 *= dbase0; 935*25c28e83SPiotr Jasiukajtis fand %f16,DA0,%f24 ! (1_1) dbase0 = vis_fand(hyp0,DA0); 936*25c28e83SPiotr Jasiukajtis 937*25c28e83SPiotr Jasiukajtis ldd [%o2+8],%f38 ! (1_1) res0 = ((double*)((char*)arr + si0))[1]; 938*25c28e83SPiotr Jasiukajtis 939*25c28e83SPiotr Jasiukajtis fmul8x16 SCALE,%f24,%f24 ! (1_1) dbase0 = vis_fmul8x16(SCALE, dbase0); 940*25c28e83SPiotr Jasiukajtis faddd %f40,KA0,%f62 ! (1_1) dtmp2 += KA0; 941*25c28e83SPiotr Jasiukajtis 942*25c28e83SPiotr Jasiukajtis add %o1,stridez,%g1 ! pz += stridez 943*25c28e83SPiotr Jasiukajtis fdtos %f36,%f1 ! (0_1) ftmp0 = (float)res0; 944*25c28e83SPiotr Jasiukajtis 945*25c28e83SPiotr Jasiukajtis st %f1,[%o1] ! (0_1) *pz = ftmp0; 946*25c28e83SPiotr Jasiukajtis 947*25c28e83SPiotr Jasiukajtis subcc counter,1,counter 948*25c28e83SPiotr Jasiukajtis bneg .begin 949*25c28e83SPiotr Jasiukajtis mov %g1,%o1 950*25c28e83SPiotr Jasiukajtis 951*25c28e83SPiotr Jasiukajtis fmuld %f38,%f62,%f38 ! (1_1) res0 *= dtmp2; 952*25c28e83SPiotr Jasiukajtis fpsub32 DA1,%f24,%f24 ! (1_1) dbase0 = vis_fpsub32(DA1,dbase0); 953*25c28e83SPiotr Jasiukajtis 954*25c28e83SPiotr Jasiukajtis fmuld %f38,%f24,%f38 ! (1_1) res0 *= dbase0; 955*25c28e83SPiotr Jasiukajtis 956*25c28e83SPiotr Jasiukajtis fdtos %f38,%f1 ! (1_1) ftmp0 = (float)res0; 957*25c28e83SPiotr Jasiukajtis st %f1,[%g1] ! (1_1) *pz = ftmp0; 958*25c28e83SPiotr Jasiukajtis 959*25c28e83SPiotr Jasiukajtis ba .begin 960*25c28e83SPiotr Jasiukajtis add %g1,stridez,%o1 ! pz += stridez 961*25c28e83SPiotr Jasiukajtis 962*25c28e83SPiotr Jasiukajtis .align 16 963*25c28e83SPiotr Jasiukajtis.spec0: 964*25c28e83SPiotr Jasiukajtis fabss %f2,%f2 ! fabsf(y0); 965*25c28e83SPiotr Jasiukajtis 966*25c28e83SPiotr Jasiukajtis fabss %f4,%f4 ! fabsf(x0); 967*25c28e83SPiotr Jasiukajtis 968*25c28e83SPiotr Jasiukajtis fcmps %f2,%f4 969*25c28e83SPiotr Jasiukajtis 970*25c28e83SPiotr Jasiukajtis cmp %l6,_0x7f800000 ! ay ? 0x7f800000 971*25c28e83SPiotr Jasiukajtis be,a 1f ! if( ay == 0x7f800000 ) 972*25c28e83SPiotr Jasiukajtis st %g0,[%o1] ! *pz = 0.0f; 973*25c28e83SPiotr Jasiukajtis 974*25c28e83SPiotr Jasiukajtis cmp %i5,_0x7f800000 ! ax ? 0x7f800000 975*25c28e83SPiotr Jasiukajtis be,a 1f ! if( ax == 0x7f800000 ) 976*25c28e83SPiotr Jasiukajtis st %g0,[%o1] ! *pz = 0.0f; 977*25c28e83SPiotr Jasiukajtis 978*25c28e83SPiotr Jasiukajtis fmuls %f2,%f4,%f2 ! fabsf(x0) * fabsf(y0); 979*25c28e83SPiotr Jasiukajtis st %f2,[%o1] ! *pz = fabsf(x0) + fabsf(y0); 980*25c28e83SPiotr Jasiukajtis1: 981*25c28e83SPiotr Jasiukajtis add %o4,stridex,%o4 ! px += stridex; 982*25c28e83SPiotr Jasiukajtis add %i2,stridey,%i2 ! py += stridey; 983*25c28e83SPiotr Jasiukajtis 984*25c28e83SPiotr Jasiukajtis add %o1,stridez,%o1 ! pz += stridez; 985*25c28e83SPiotr Jasiukajtis ba .begin1 986*25c28e83SPiotr Jasiukajtis sub counter,1,counter ! counter--; 987*25c28e83SPiotr Jasiukajtis 988*25c28e83SPiotr Jasiukajtis .align 16 989*25c28e83SPiotr Jasiukajtis.spec1: 990*25c28e83SPiotr Jasiukajtis cmp %i5,0 ! ax ? 0 991*25c28e83SPiotr Jasiukajtis bne,pt %icc,.cont_spec1 ! if ( ax != 0 ) 992*25c28e83SPiotr Jasiukajtis nop 993*25c28e83SPiotr Jasiukajtis 994*25c28e83SPiotr Jasiukajtis add %o4,stridex,%o4 ! px += stridex; 995*25c28e83SPiotr Jasiukajtis add %i2,stridey,%i2 ! py += stridey; 996*25c28e83SPiotr Jasiukajtis 997*25c28e83SPiotr Jasiukajtis fdivs %f7,%f9,%f2 ! 1.0f / 0.0f 998*25c28e83SPiotr Jasiukajtis st %f2,[%o1] ! *pz = 1.0f / 0.0f; 999*25c28e83SPiotr Jasiukajtis 1000*25c28e83SPiotr Jasiukajtis add %o1,stridez,%o1 ! pz += stridez; 1001*25c28e83SPiotr Jasiukajtis ba .begin1 1002*25c28e83SPiotr Jasiukajtis sub counter,1,counter ! counter--; 1003*25c28e83SPiotr Jasiukajtis 1004*25c28e83SPiotr Jasiukajtis .align 16 1005*25c28e83SPiotr Jasiukajtis.update0: 1006*25c28e83SPiotr Jasiukajtis cmp counter,1 1007*25c28e83SPiotr Jasiukajtis ble .cont0 1008*25c28e83SPiotr Jasiukajtis ld [TBL+TBL_SHIFT+44],%f2 1009*25c28e83SPiotr Jasiukajtis 1010*25c28e83SPiotr Jasiukajtis sub counter,1,counter 1011*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1012*25c28e83SPiotr Jasiukajtis 1013*25c28e83SPiotr Jasiukajtis stx %l0,[%fp+tmp_px] 1014*25c28e83SPiotr Jasiukajtis 1015*25c28e83SPiotr Jasiukajtis stx %i2,[%fp+tmp_py] 1016*25c28e83SPiotr Jasiukajtis ba .cont0 1017*25c28e83SPiotr Jasiukajtis mov 1,counter 1018*25c28e83SPiotr Jasiukajtis 1019*25c28e83SPiotr Jasiukajtis .align 16 1020*25c28e83SPiotr Jasiukajtis.update1: 1021*25c28e83SPiotr Jasiukajtis cmp counter,1 1022*25c28e83SPiotr Jasiukajtis ble .cont1 1023*25c28e83SPiotr Jasiukajtis ld [TBL+TBL_SHIFT+44],%f4 1024*25c28e83SPiotr Jasiukajtis 1025*25c28e83SPiotr Jasiukajtis sub counter,1,counter 1026*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1027*25c28e83SPiotr Jasiukajtis 1028*25c28e83SPiotr Jasiukajtis stx %l0,[%fp+tmp_px] 1029*25c28e83SPiotr Jasiukajtis 1030*25c28e83SPiotr Jasiukajtis stx %i2,[%fp+tmp_py] 1031*25c28e83SPiotr Jasiukajtis ba .cont1 1032*25c28e83SPiotr Jasiukajtis mov 1,counter 1033*25c28e83SPiotr Jasiukajtis 1034*25c28e83SPiotr Jasiukajtis .align 16 1035*25c28e83SPiotr Jasiukajtis.update2: 1036*25c28e83SPiotr Jasiukajtis cmp %i5,0 1037*25c28e83SPiotr Jasiukajtis bne .cont2 1038*25c28e83SPiotr Jasiukajtis 1039*25c28e83SPiotr Jasiukajtis cmp counter,1 1040*25c28e83SPiotr Jasiukajtis ble .cont2 1041*25c28e83SPiotr Jasiukajtis ld [TBL+TBL_SHIFT+44],%f2 1042*25c28e83SPiotr Jasiukajtis 1043*25c28e83SPiotr Jasiukajtis sub counter,1,counter 1044*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1045*25c28e83SPiotr Jasiukajtis 1046*25c28e83SPiotr Jasiukajtis stx %l0,[%fp+tmp_px] 1047*25c28e83SPiotr Jasiukajtis 1048*25c28e83SPiotr Jasiukajtis stx %i2,[%fp+tmp_py] 1049*25c28e83SPiotr Jasiukajtis ba .cont2 1050*25c28e83SPiotr Jasiukajtis mov 1,counter 1051*25c28e83SPiotr Jasiukajtis 1052*25c28e83SPiotr Jasiukajtis .align 16 1053*25c28e83SPiotr Jasiukajtis.update3: 1054*25c28e83SPiotr Jasiukajtis cmp counter,2 1055*25c28e83SPiotr Jasiukajtis ble .cont3 1056*25c28e83SPiotr Jasiukajtis ld [TBL+TBL_SHIFT+44],%f2 1057*25c28e83SPiotr Jasiukajtis 1058*25c28e83SPiotr Jasiukajtis sub counter,2,counter 1059*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1060*25c28e83SPiotr Jasiukajtis 1061*25c28e83SPiotr Jasiukajtis stx %i1,[%fp+tmp_px] 1062*25c28e83SPiotr Jasiukajtis 1063*25c28e83SPiotr Jasiukajtis stx %i2,[%fp+tmp_py] 1064*25c28e83SPiotr Jasiukajtis ba .cont3 1065*25c28e83SPiotr Jasiukajtis mov 2,counter 1066*25c28e83SPiotr Jasiukajtis 1067*25c28e83SPiotr Jasiukajtis .align 16 1068*25c28e83SPiotr Jasiukajtis.update4: 1069*25c28e83SPiotr Jasiukajtis cmp counter,2 1070*25c28e83SPiotr Jasiukajtis ble .cont4 1071*25c28e83SPiotr Jasiukajtis ld [TBL+TBL_SHIFT+44],%f4 1072*25c28e83SPiotr Jasiukajtis 1073*25c28e83SPiotr Jasiukajtis sub counter,2,counter 1074*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1075*25c28e83SPiotr Jasiukajtis 1076*25c28e83SPiotr Jasiukajtis stx %i1,[%fp+tmp_px] 1077*25c28e83SPiotr Jasiukajtis 1078*25c28e83SPiotr Jasiukajtis stx %i2,[%fp+tmp_py] 1079*25c28e83SPiotr Jasiukajtis ba .cont4 1080*25c28e83SPiotr Jasiukajtis mov 2,counter 1081*25c28e83SPiotr Jasiukajtis 1082*25c28e83SPiotr Jasiukajtis .align 16 1083*25c28e83SPiotr Jasiukajtis.update5: 1084*25c28e83SPiotr Jasiukajtis cmp %i5,0 1085*25c28e83SPiotr Jasiukajtis bne .cont5 1086*25c28e83SPiotr Jasiukajtis 1087*25c28e83SPiotr Jasiukajtis cmp counter,2 1088*25c28e83SPiotr Jasiukajtis ble .cont5 1089*25c28e83SPiotr Jasiukajtis ld [TBL+TBL_SHIFT+44],%f2 1090*25c28e83SPiotr Jasiukajtis 1091*25c28e83SPiotr Jasiukajtis sub counter,2,counter 1092*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1093*25c28e83SPiotr Jasiukajtis 1094*25c28e83SPiotr Jasiukajtis stx %i1,[%fp+tmp_px] 1095*25c28e83SPiotr Jasiukajtis 1096*25c28e83SPiotr Jasiukajtis stx %i2,[%fp+tmp_py] 1097*25c28e83SPiotr Jasiukajtis ba .cont5 1098*25c28e83SPiotr Jasiukajtis mov 2,counter 1099*25c28e83SPiotr Jasiukajtis 1100*25c28e83SPiotr Jasiukajtis .align 16 1101*25c28e83SPiotr Jasiukajtis.update6: 1102*25c28e83SPiotr Jasiukajtis cmp counter,3 1103*25c28e83SPiotr Jasiukajtis ble .cont6 1104*25c28e83SPiotr Jasiukajtis ld [TBL+TBL_SHIFT+44],%f2 1105*25c28e83SPiotr Jasiukajtis 1106*25c28e83SPiotr Jasiukajtis sub counter,3,counter 1107*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1108*25c28e83SPiotr Jasiukajtis 1109*25c28e83SPiotr Jasiukajtis stx %g5,[%fp+tmp_px] 1110*25c28e83SPiotr Jasiukajtis 1111*25c28e83SPiotr Jasiukajtis stx %o3,[%fp+tmp_py] 1112*25c28e83SPiotr Jasiukajtis ba .cont6 1113*25c28e83SPiotr Jasiukajtis mov 3,counter 1114*25c28e83SPiotr Jasiukajtis 1115*25c28e83SPiotr Jasiukajtis .align 16 1116*25c28e83SPiotr Jasiukajtis.update7: 1117*25c28e83SPiotr Jasiukajtis cmp counter,3 1118*25c28e83SPiotr Jasiukajtis ble .cont7 1119*25c28e83SPiotr Jasiukajtis ld [TBL+TBL_SHIFT+44],%f4 1120*25c28e83SPiotr Jasiukajtis 1121*25c28e83SPiotr Jasiukajtis sub counter,3,counter 1122*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1123*25c28e83SPiotr Jasiukajtis 1124*25c28e83SPiotr Jasiukajtis stx %g5,[%fp+tmp_px] 1125*25c28e83SPiotr Jasiukajtis 1126*25c28e83SPiotr Jasiukajtis stx %o3,[%fp+tmp_py] 1127*25c28e83SPiotr Jasiukajtis ba .cont7 1128*25c28e83SPiotr Jasiukajtis mov 3,counter 1129*25c28e83SPiotr Jasiukajtis 1130*25c28e83SPiotr Jasiukajtis .align 16 1131*25c28e83SPiotr Jasiukajtis.update8: 1132*25c28e83SPiotr Jasiukajtis cmp %i5,0 1133*25c28e83SPiotr Jasiukajtis bne .cont8 1134*25c28e83SPiotr Jasiukajtis 1135*25c28e83SPiotr Jasiukajtis cmp counter,3 1136*25c28e83SPiotr Jasiukajtis ble .cont8 1137*25c28e83SPiotr Jasiukajtis ld [TBL+TBL_SHIFT+44],%f2 1138*25c28e83SPiotr Jasiukajtis 1139*25c28e83SPiotr Jasiukajtis sub counter,3,counter 1140*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1141*25c28e83SPiotr Jasiukajtis 1142*25c28e83SPiotr Jasiukajtis stx %g5,[%fp+tmp_px] 1143*25c28e83SPiotr Jasiukajtis 1144*25c28e83SPiotr Jasiukajtis stx %o3,[%fp+tmp_py] 1145*25c28e83SPiotr Jasiukajtis ba .cont8 1146*25c28e83SPiotr Jasiukajtis mov 3,counter 1147*25c28e83SPiotr Jasiukajtis 1148*25c28e83SPiotr Jasiukajtis .align 16 1149*25c28e83SPiotr Jasiukajtis.update9: 1150*25c28e83SPiotr Jasiukajtis cmp counter,4 1151*25c28e83SPiotr Jasiukajtis ble .cont9 1152*25c28e83SPiotr Jasiukajtis ld [TBL+TBL_SHIFT+44],%f2 1153*25c28e83SPiotr Jasiukajtis 1154*25c28e83SPiotr Jasiukajtis sub counter,4,counter 1155*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1156*25c28e83SPiotr Jasiukajtis 1157*25c28e83SPiotr Jasiukajtis stx %i4,[%fp+tmp_px] 1158*25c28e83SPiotr Jasiukajtis 1159*25c28e83SPiotr Jasiukajtis stx %i2,[%fp+tmp_py] 1160*25c28e83SPiotr Jasiukajtis ba .cont9 1161*25c28e83SPiotr Jasiukajtis mov 4,counter 1162*25c28e83SPiotr Jasiukajtis 1163*25c28e83SPiotr Jasiukajtis .align 16 1164*25c28e83SPiotr Jasiukajtis.update10: 1165*25c28e83SPiotr Jasiukajtis cmp counter,4 1166*25c28e83SPiotr Jasiukajtis ble .cont10 1167*25c28e83SPiotr Jasiukajtis ld [TBL+TBL_SHIFT+44],%f4 1168*25c28e83SPiotr Jasiukajtis 1169*25c28e83SPiotr Jasiukajtis sub counter,4,counter 1170*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1171*25c28e83SPiotr Jasiukajtis 1172*25c28e83SPiotr Jasiukajtis stx %i4,[%fp+tmp_px] 1173*25c28e83SPiotr Jasiukajtis 1174*25c28e83SPiotr Jasiukajtis stx %i2,[%fp+tmp_py] 1175*25c28e83SPiotr Jasiukajtis ba .cont10 1176*25c28e83SPiotr Jasiukajtis mov 4,counter 1177*25c28e83SPiotr Jasiukajtis 1178*25c28e83SPiotr Jasiukajtis .align 16 1179*25c28e83SPiotr Jasiukajtis.update11: 1180*25c28e83SPiotr Jasiukajtis cmp %i5,0 1181*25c28e83SPiotr Jasiukajtis bne .cont11 1182*25c28e83SPiotr Jasiukajtis 1183*25c28e83SPiotr Jasiukajtis cmp counter,4 1184*25c28e83SPiotr Jasiukajtis ble .cont11 1185*25c28e83SPiotr Jasiukajtis ld [TBL+TBL_SHIFT+44],%f2 1186*25c28e83SPiotr Jasiukajtis 1187*25c28e83SPiotr Jasiukajtis sub counter,4,counter 1188*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1189*25c28e83SPiotr Jasiukajtis 1190*25c28e83SPiotr Jasiukajtis stx %i4,[%fp+tmp_px] 1191*25c28e83SPiotr Jasiukajtis 1192*25c28e83SPiotr Jasiukajtis stx %i2,[%fp+tmp_py] 1193*25c28e83SPiotr Jasiukajtis ba .cont11 1194*25c28e83SPiotr Jasiukajtis mov 4,counter 1195*25c28e83SPiotr Jasiukajtis 1196*25c28e83SPiotr Jasiukajtis .align 16 1197*25c28e83SPiotr Jasiukajtis.update12: 1198*25c28e83SPiotr Jasiukajtis cmp counter,5 1199*25c28e83SPiotr Jasiukajtis ble .cont12 1200*25c28e83SPiotr Jasiukajtis ld [TBL+TBL_SHIFT+44],%f2 1201*25c28e83SPiotr Jasiukajtis 1202*25c28e83SPiotr Jasiukajtis sub counter,5,counter 1203*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1204*25c28e83SPiotr Jasiukajtis 1205*25c28e83SPiotr Jasiukajtis stx %o4,[%fp+tmp_px] 1206*25c28e83SPiotr Jasiukajtis 1207*25c28e83SPiotr Jasiukajtis stx %i2,[%fp+tmp_py] 1208*25c28e83SPiotr Jasiukajtis ba .cont12 1209*25c28e83SPiotr Jasiukajtis mov 5,counter 1210*25c28e83SPiotr Jasiukajtis 1211*25c28e83SPiotr Jasiukajtis .align 16 1212*25c28e83SPiotr Jasiukajtis.update13: 1213*25c28e83SPiotr Jasiukajtis cmp counter,5 1214*25c28e83SPiotr Jasiukajtis ble .cont13 1215*25c28e83SPiotr Jasiukajtis ld [TBL+TBL_SHIFT+44],%f4 1216*25c28e83SPiotr Jasiukajtis 1217*25c28e83SPiotr Jasiukajtis sub counter,5,counter 1218*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1219*25c28e83SPiotr Jasiukajtis 1220*25c28e83SPiotr Jasiukajtis stx %o4,[%fp+tmp_px] 1221*25c28e83SPiotr Jasiukajtis 1222*25c28e83SPiotr Jasiukajtis stx %i2,[%fp+tmp_py] 1223*25c28e83SPiotr Jasiukajtis ba .cont13 1224*25c28e83SPiotr Jasiukajtis mov 5,counter 1225*25c28e83SPiotr Jasiukajtis 1226*25c28e83SPiotr Jasiukajtis .align 16 1227*25c28e83SPiotr Jasiukajtis.update14: 1228*25c28e83SPiotr Jasiukajtis cmp %i5,0 1229*25c28e83SPiotr Jasiukajtis bne .cont14 1230*25c28e83SPiotr Jasiukajtis 1231*25c28e83SPiotr Jasiukajtis cmp counter,5 1232*25c28e83SPiotr Jasiukajtis ble .cont14 1233*25c28e83SPiotr Jasiukajtis ld [TBL+TBL_SHIFT+44],%f2 1234*25c28e83SPiotr Jasiukajtis 1235*25c28e83SPiotr Jasiukajtis sub counter,5,counter 1236*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1237*25c28e83SPiotr Jasiukajtis 1238*25c28e83SPiotr Jasiukajtis stx %o4,[%fp+tmp_px] 1239*25c28e83SPiotr Jasiukajtis 1240*25c28e83SPiotr Jasiukajtis stx %i2,[%fp+tmp_py] 1241*25c28e83SPiotr Jasiukajtis ba .cont14 1242*25c28e83SPiotr Jasiukajtis mov 5,counter 1243*25c28e83SPiotr Jasiukajtis 1244*25c28e83SPiotr Jasiukajtis .align 16 1245*25c28e83SPiotr Jasiukajtis.update15: 1246*25c28e83SPiotr Jasiukajtis cmp counter,6 1247*25c28e83SPiotr Jasiukajtis ble .cont15 1248*25c28e83SPiotr Jasiukajtis ld [TBL+TBL_SHIFT+44],%f2 1249*25c28e83SPiotr Jasiukajtis 1250*25c28e83SPiotr Jasiukajtis sub counter,6,counter 1251*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1252*25c28e83SPiotr Jasiukajtis 1253*25c28e83SPiotr Jasiukajtis stx %l0,[%fp+tmp_px] 1254*25c28e83SPiotr Jasiukajtis 1255*25c28e83SPiotr Jasiukajtis stx %i2,[%fp+tmp_py] 1256*25c28e83SPiotr Jasiukajtis ba .cont15 1257*25c28e83SPiotr Jasiukajtis mov 6,counter 1258*25c28e83SPiotr Jasiukajtis 1259*25c28e83SPiotr Jasiukajtis .align 16 1260*25c28e83SPiotr Jasiukajtis.update16: 1261*25c28e83SPiotr Jasiukajtis cmp counter,6 1262*25c28e83SPiotr Jasiukajtis ble .cont16 1263*25c28e83SPiotr Jasiukajtis ld [TBL+TBL_SHIFT+44],%f4 1264*25c28e83SPiotr Jasiukajtis 1265*25c28e83SPiotr Jasiukajtis sub counter,6,counter 1266*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1267*25c28e83SPiotr Jasiukajtis 1268*25c28e83SPiotr Jasiukajtis stx %l0,[%fp+tmp_px] 1269*25c28e83SPiotr Jasiukajtis 1270*25c28e83SPiotr Jasiukajtis stx %i2,[%fp+tmp_py] 1271*25c28e83SPiotr Jasiukajtis ba .cont16 1272*25c28e83SPiotr Jasiukajtis mov 6,counter 1273*25c28e83SPiotr Jasiukajtis 1274*25c28e83SPiotr Jasiukajtis .align 16 1275*25c28e83SPiotr Jasiukajtis.update17: 1276*25c28e83SPiotr Jasiukajtis cmp %i5,0 1277*25c28e83SPiotr Jasiukajtis bne .cont17 1278*25c28e83SPiotr Jasiukajtis 1279*25c28e83SPiotr Jasiukajtis cmp counter,1 1280*25c28e83SPiotr Jasiukajtis ble .cont17 1281*25c28e83SPiotr Jasiukajtis fmovd DC1,%f62 1282*25c28e83SPiotr Jasiukajtis 1283*25c28e83SPiotr Jasiukajtis sub counter,1,counter 1284*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1285*25c28e83SPiotr Jasiukajtis 1286*25c28e83SPiotr Jasiukajtis stx %l0,[%fp+tmp_px] 1287*25c28e83SPiotr Jasiukajtis 1288*25c28e83SPiotr Jasiukajtis stx %i2,[%fp+tmp_py] 1289*25c28e83SPiotr Jasiukajtis ba .cont17 1290*25c28e83SPiotr Jasiukajtis mov 1,counter 1291*25c28e83SPiotr Jasiukajtis 1292*25c28e83SPiotr Jasiukajtis .align 16 1293*25c28e83SPiotr Jasiukajtis.update18: 1294*25c28e83SPiotr Jasiukajtis cmp counter,2 1295*25c28e83SPiotr Jasiukajtis ble .cont18 1296*25c28e83SPiotr Jasiukajtis ld [TBL+TBL_SHIFT+44],%f2 1297*25c28e83SPiotr Jasiukajtis 1298*25c28e83SPiotr Jasiukajtis sub counter,2,counter 1299*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1300*25c28e83SPiotr Jasiukajtis 1301*25c28e83SPiotr Jasiukajtis stx %i1,[%fp+tmp_px] 1302*25c28e83SPiotr Jasiukajtis 1303*25c28e83SPiotr Jasiukajtis stx %i2,[%fp+tmp_py] 1304*25c28e83SPiotr Jasiukajtis ba .cont18 1305*25c28e83SPiotr Jasiukajtis mov 2,counter 1306*25c28e83SPiotr Jasiukajtis 1307*25c28e83SPiotr Jasiukajtis .align 16 1308*25c28e83SPiotr Jasiukajtis.update19: 1309*25c28e83SPiotr Jasiukajtis cmp counter,2 1310*25c28e83SPiotr Jasiukajtis ble .cont19 1311*25c28e83SPiotr Jasiukajtis ld [TBL+TBL_SHIFT+44],%f4 1312*25c28e83SPiotr Jasiukajtis 1313*25c28e83SPiotr Jasiukajtis sub counter,2,counter 1314*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1315*25c28e83SPiotr Jasiukajtis 1316*25c28e83SPiotr Jasiukajtis stx %i1,[%fp+tmp_px] 1317*25c28e83SPiotr Jasiukajtis 1318*25c28e83SPiotr Jasiukajtis stx %i2,[%fp+tmp_py] 1319*25c28e83SPiotr Jasiukajtis ba .cont19 1320*25c28e83SPiotr Jasiukajtis mov 2,counter 1321*25c28e83SPiotr Jasiukajtis 1322*25c28e83SPiotr Jasiukajtis .align 16 1323*25c28e83SPiotr Jasiukajtis.update20: 1324*25c28e83SPiotr Jasiukajtis cmp %o1,0 1325*25c28e83SPiotr Jasiukajtis bne .cont20 1326*25c28e83SPiotr Jasiukajtis 1327*25c28e83SPiotr Jasiukajtis cmp counter,2 1328*25c28e83SPiotr Jasiukajtis ble .cont20 1329*25c28e83SPiotr Jasiukajtis ld [TBL+TBL_SHIFT+44],%f2 1330*25c28e83SPiotr Jasiukajtis 1331*25c28e83SPiotr Jasiukajtis sub counter,2,counter 1332*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1333*25c28e83SPiotr Jasiukajtis 1334*25c28e83SPiotr Jasiukajtis stx %i1,[%fp+tmp_px] 1335*25c28e83SPiotr Jasiukajtis 1336*25c28e83SPiotr Jasiukajtis stx %i2,[%fp+tmp_py] 1337*25c28e83SPiotr Jasiukajtis ba .cont20 1338*25c28e83SPiotr Jasiukajtis mov 2,counter 1339*25c28e83SPiotr Jasiukajtis 1340*25c28e83SPiotr Jasiukajtis .align 16 1341*25c28e83SPiotr Jasiukajtis.update21: 1342*25c28e83SPiotr Jasiukajtis cmp counter,3 1343*25c28e83SPiotr Jasiukajtis ble .cont21 1344*25c28e83SPiotr Jasiukajtis ld [TBL+TBL_SHIFT+44],%f2 1345*25c28e83SPiotr Jasiukajtis 1346*25c28e83SPiotr Jasiukajtis sub counter,3,counter 1347*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1348*25c28e83SPiotr Jasiukajtis 1349*25c28e83SPiotr Jasiukajtis stx %g5,[%fp+tmp_px] 1350*25c28e83SPiotr Jasiukajtis 1351*25c28e83SPiotr Jasiukajtis stx %o3,[%fp+tmp_py] 1352*25c28e83SPiotr Jasiukajtis ba .cont21 1353*25c28e83SPiotr Jasiukajtis mov 3,counter 1354*25c28e83SPiotr Jasiukajtis 1355*25c28e83SPiotr Jasiukajtis .align 16 1356*25c28e83SPiotr Jasiukajtis.update22: 1357*25c28e83SPiotr Jasiukajtis cmp counter,3 1358*25c28e83SPiotr Jasiukajtis ble .cont22 1359*25c28e83SPiotr Jasiukajtis ld [TBL+TBL_SHIFT+44],%f4 1360*25c28e83SPiotr Jasiukajtis 1361*25c28e83SPiotr Jasiukajtis sub counter,3,counter 1362*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1363*25c28e83SPiotr Jasiukajtis 1364*25c28e83SPiotr Jasiukajtis stx %g5,[%fp+tmp_px] 1365*25c28e83SPiotr Jasiukajtis 1366*25c28e83SPiotr Jasiukajtis stx %o3,[%fp+tmp_py] 1367*25c28e83SPiotr Jasiukajtis ba .cont22 1368*25c28e83SPiotr Jasiukajtis mov 3,counter 1369*25c28e83SPiotr Jasiukajtis 1370*25c28e83SPiotr Jasiukajtis .align 16 1371*25c28e83SPiotr Jasiukajtis.update23: 1372*25c28e83SPiotr Jasiukajtis cmp %i5,0 1373*25c28e83SPiotr Jasiukajtis bne .cont23 1374*25c28e83SPiotr Jasiukajtis 1375*25c28e83SPiotr Jasiukajtis cmp counter,3 1376*25c28e83SPiotr Jasiukajtis ble .cont23 1377*25c28e83SPiotr Jasiukajtis ld [TBL+TBL_SHIFT+44],%f2 1378*25c28e83SPiotr Jasiukajtis 1379*25c28e83SPiotr Jasiukajtis sub counter,3,counter 1380*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1381*25c28e83SPiotr Jasiukajtis 1382*25c28e83SPiotr Jasiukajtis stx %g5,[%fp+tmp_px] 1383*25c28e83SPiotr Jasiukajtis 1384*25c28e83SPiotr Jasiukajtis stx %o3,[%fp+tmp_py] 1385*25c28e83SPiotr Jasiukajtis ba .cont23 1386*25c28e83SPiotr Jasiukajtis mov 3,counter 1387*25c28e83SPiotr Jasiukajtis 1388*25c28e83SPiotr Jasiukajtis .align 16 1389*25c28e83SPiotr Jasiukajtis.update24: 1390*25c28e83SPiotr Jasiukajtis cmp counter,4 1391*25c28e83SPiotr Jasiukajtis ble .cont24 1392*25c28e83SPiotr Jasiukajtis ld [TBL+TBL_SHIFT+44],%f2 1393*25c28e83SPiotr Jasiukajtis 1394*25c28e83SPiotr Jasiukajtis sub counter,4,counter 1395*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1396*25c28e83SPiotr Jasiukajtis 1397*25c28e83SPiotr Jasiukajtis stx %i4,[%fp+tmp_px] 1398*25c28e83SPiotr Jasiukajtis 1399*25c28e83SPiotr Jasiukajtis stx %i2,[%fp+tmp_py] 1400*25c28e83SPiotr Jasiukajtis ba .cont24 1401*25c28e83SPiotr Jasiukajtis mov 4,counter 1402*25c28e83SPiotr Jasiukajtis 1403*25c28e83SPiotr Jasiukajtis .align 16 1404*25c28e83SPiotr Jasiukajtis.update25: 1405*25c28e83SPiotr Jasiukajtis cmp counter,4 1406*25c28e83SPiotr Jasiukajtis ble .cont25 1407*25c28e83SPiotr Jasiukajtis ld [TBL+TBL_SHIFT+44],%f4 1408*25c28e83SPiotr Jasiukajtis 1409*25c28e83SPiotr Jasiukajtis sub counter,4,counter 1410*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1411*25c28e83SPiotr Jasiukajtis 1412*25c28e83SPiotr Jasiukajtis stx %i4,[%fp+tmp_px] 1413*25c28e83SPiotr Jasiukajtis 1414*25c28e83SPiotr Jasiukajtis stx %i2,[%fp+tmp_py] 1415*25c28e83SPiotr Jasiukajtis ba .cont25 1416*25c28e83SPiotr Jasiukajtis mov 4,counter 1417*25c28e83SPiotr Jasiukajtis 1418*25c28e83SPiotr Jasiukajtis .align 16 1419*25c28e83SPiotr Jasiukajtis.update26: 1420*25c28e83SPiotr Jasiukajtis cmp %i5,0 1421*25c28e83SPiotr Jasiukajtis bne .cont26 1422*25c28e83SPiotr Jasiukajtis 1423*25c28e83SPiotr Jasiukajtis cmp counter,4 1424*25c28e83SPiotr Jasiukajtis ble .cont26 1425*25c28e83SPiotr Jasiukajtis ld [TBL+TBL_SHIFT+44],%f2 1426*25c28e83SPiotr Jasiukajtis 1427*25c28e83SPiotr Jasiukajtis sub counter,4,counter 1428*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1429*25c28e83SPiotr Jasiukajtis 1430*25c28e83SPiotr Jasiukajtis stx %i4,[%fp+tmp_px] 1431*25c28e83SPiotr Jasiukajtis 1432*25c28e83SPiotr Jasiukajtis stx %i2,[%fp+tmp_py] 1433*25c28e83SPiotr Jasiukajtis ba .cont26 1434*25c28e83SPiotr Jasiukajtis mov 4,counter 1435*25c28e83SPiotr Jasiukajtis 1436*25c28e83SPiotr Jasiukajtis .align 16 1437*25c28e83SPiotr Jasiukajtis.update27: 1438*25c28e83SPiotr Jasiukajtis cmp counter,5 1439*25c28e83SPiotr Jasiukajtis ble .cont27 1440*25c28e83SPiotr Jasiukajtis ld [TBL+TBL_SHIFT+44],%f2 1441*25c28e83SPiotr Jasiukajtis 1442*25c28e83SPiotr Jasiukajtis sub counter,5,counter 1443*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1444*25c28e83SPiotr Jasiukajtis 1445*25c28e83SPiotr Jasiukajtis stx %o4,[%fp+tmp_px] 1446*25c28e83SPiotr Jasiukajtis 1447*25c28e83SPiotr Jasiukajtis stx %i2,[%fp+tmp_py] 1448*25c28e83SPiotr Jasiukajtis ba .cont27 1449*25c28e83SPiotr Jasiukajtis mov 5,counter 1450*25c28e83SPiotr Jasiukajtis 1451*25c28e83SPiotr Jasiukajtis .align 16 1452*25c28e83SPiotr Jasiukajtis.update28: 1453*25c28e83SPiotr Jasiukajtis cmp counter,5 1454*25c28e83SPiotr Jasiukajtis ble .cont28 1455*25c28e83SPiotr Jasiukajtis ld [TBL+TBL_SHIFT+44],%f4 1456*25c28e83SPiotr Jasiukajtis 1457*25c28e83SPiotr Jasiukajtis sub counter,5,counter 1458*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1459*25c28e83SPiotr Jasiukajtis 1460*25c28e83SPiotr Jasiukajtis stx %o4,[%fp+tmp_px] 1461*25c28e83SPiotr Jasiukajtis 1462*25c28e83SPiotr Jasiukajtis stx %i2,[%fp+tmp_py] 1463*25c28e83SPiotr Jasiukajtis ba .cont28 1464*25c28e83SPiotr Jasiukajtis mov 5,counter 1465*25c28e83SPiotr Jasiukajtis 1466*25c28e83SPiotr Jasiukajtis .align 16 1467*25c28e83SPiotr Jasiukajtis.update29: 1468*25c28e83SPiotr Jasiukajtis cmp %i5,0 1469*25c28e83SPiotr Jasiukajtis bne .cont29 1470*25c28e83SPiotr Jasiukajtis 1471*25c28e83SPiotr Jasiukajtis cmp counter,5 1472*25c28e83SPiotr Jasiukajtis ble .cont29 1473*25c28e83SPiotr Jasiukajtis ld [TBL+TBL_SHIFT+44],%f2 1474*25c28e83SPiotr Jasiukajtis 1475*25c28e83SPiotr Jasiukajtis sub counter,5,counter 1476*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1477*25c28e83SPiotr Jasiukajtis 1478*25c28e83SPiotr Jasiukajtis stx %o4,[%fp+tmp_px] 1479*25c28e83SPiotr Jasiukajtis 1480*25c28e83SPiotr Jasiukajtis stx %i2,[%fp+tmp_py] 1481*25c28e83SPiotr Jasiukajtis ba .cont29 1482*25c28e83SPiotr Jasiukajtis mov 5,counter 1483*25c28e83SPiotr Jasiukajtis 1484*25c28e83SPiotr Jasiukajtis .align 16 1485*25c28e83SPiotr Jasiukajtis.update30: 1486*25c28e83SPiotr Jasiukajtis cmp counter,6 1487*25c28e83SPiotr Jasiukajtis ble .cont30 1488*25c28e83SPiotr Jasiukajtis ld [TBL+TBL_SHIFT+44],%f2 1489*25c28e83SPiotr Jasiukajtis 1490*25c28e83SPiotr Jasiukajtis sub counter,6,counter 1491*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1492*25c28e83SPiotr Jasiukajtis 1493*25c28e83SPiotr Jasiukajtis stx %l0,[%fp+tmp_px] 1494*25c28e83SPiotr Jasiukajtis 1495*25c28e83SPiotr Jasiukajtis stx %i2,[%fp+tmp_py] 1496*25c28e83SPiotr Jasiukajtis ba .cont30 1497*25c28e83SPiotr Jasiukajtis mov 6,counter 1498*25c28e83SPiotr Jasiukajtis 1499*25c28e83SPiotr Jasiukajtis .align 16 1500*25c28e83SPiotr Jasiukajtis.update31: 1501*25c28e83SPiotr Jasiukajtis cmp counter,6 1502*25c28e83SPiotr Jasiukajtis ble .cont31 1503*25c28e83SPiotr Jasiukajtis ld [TBL+TBL_SHIFT+44],%f4 1504*25c28e83SPiotr Jasiukajtis 1505*25c28e83SPiotr Jasiukajtis sub counter,6,counter 1506*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1507*25c28e83SPiotr Jasiukajtis 1508*25c28e83SPiotr Jasiukajtis stx %l0,[%fp+tmp_px] 1509*25c28e83SPiotr Jasiukajtis 1510*25c28e83SPiotr Jasiukajtis stx %i2,[%fp+tmp_py] 1511*25c28e83SPiotr Jasiukajtis ba .cont31 1512*25c28e83SPiotr Jasiukajtis mov 6,counter 1513*25c28e83SPiotr Jasiukajtis 1514*25c28e83SPiotr Jasiukajtis .align 16 1515*25c28e83SPiotr Jasiukajtis.exit: 1516*25c28e83SPiotr Jasiukajtis ret 1517*25c28e83SPiotr Jasiukajtis restore 1518*25c28e83SPiotr Jasiukajtis SET_SIZE(__vrhypotf) 1519*25c28e83SPiotr Jasiukajtis 1520