1*25c28e83SPiotr Jasiukajtis/* 2*25c28e83SPiotr Jasiukajtis * CDDL HEADER START 3*25c28e83SPiotr Jasiukajtis * 4*25c28e83SPiotr Jasiukajtis * The contents of this file are subject to the terms of the 5*25c28e83SPiotr Jasiukajtis * Common Development and Distribution License (the "License"). 6*25c28e83SPiotr Jasiukajtis * You may not use this file except in compliance with the License. 7*25c28e83SPiotr Jasiukajtis * 8*25c28e83SPiotr Jasiukajtis * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*25c28e83SPiotr Jasiukajtis * or http://www.opensolaris.org/os/licensing. 10*25c28e83SPiotr Jasiukajtis * See the License for the specific language governing permissions 11*25c28e83SPiotr Jasiukajtis * and limitations under the License. 12*25c28e83SPiotr Jasiukajtis * 13*25c28e83SPiotr Jasiukajtis * When distributing Covered Code, include this CDDL HEADER in each 14*25c28e83SPiotr Jasiukajtis * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*25c28e83SPiotr Jasiukajtis * If applicable, add the following below this CDDL HEADER, with the 16*25c28e83SPiotr Jasiukajtis * fields enclosed by brackets "[]" replaced with your own identifying 17*25c28e83SPiotr Jasiukajtis * information: Portions Copyright [yyyy] [name of copyright owner] 18*25c28e83SPiotr Jasiukajtis * 19*25c28e83SPiotr Jasiukajtis * CDDL HEADER END 20*25c28e83SPiotr Jasiukajtis */ 21*25c28e83SPiotr Jasiukajtis/* 22*25c28e83SPiotr Jasiukajtis * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 23*25c28e83SPiotr Jasiukajtis */ 24*25c28e83SPiotr Jasiukajtis/* 25*25c28e83SPiotr Jasiukajtis * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 26*25c28e83SPiotr Jasiukajtis * Use is subject to license terms. 27*25c28e83SPiotr Jasiukajtis */ 28*25c28e83SPiotr Jasiukajtis 29*25c28e83SPiotr Jasiukajtis .file "__vrsqrtf.S" 30*25c28e83SPiotr Jasiukajtis 31*25c28e83SPiotr Jasiukajtis#include "libm.h" 32*25c28e83SPiotr Jasiukajtis 33*25c28e83SPiotr Jasiukajtis RO_DATA 34*25c28e83SPiotr Jasiukajtis .align 64 35*25c28e83SPiotr Jasiukajtis 36*25c28e83SPiotr Jasiukajtis! i = [0,63] 37*25c28e83SPiotr Jasiukajtis! TBL[2*i ] = 1 / (*(double*)&(0x3fe0000000000000ULL + (i << 46))) * 2**-24; 38*25c28e83SPiotr Jasiukajtis! TBL[2*i+1] = 1 / sqrtl(*(double*)&(0x3fe0000000000000ULL + (i << 46))); 39*25c28e83SPiotr Jasiukajtis! i = [64,127] 40*25c28e83SPiotr Jasiukajtis! TBL[2*i ] = 1 / (*(double*)&(0x3fe0000000000000ULL + (i << 46))) * 2**-23; 41*25c28e83SPiotr Jasiukajtis! TBL[2*i+1] = 1 / sqrtl(*(double*)&(0x3fe0000000000000ULL + (i << 46))); 42*25c28e83SPiotr Jasiukajtis 43*25c28e83SPiotr Jasiukajtis.CONST_TBL: 44*25c28e83SPiotr Jasiukajtis .word 0x3e800000, 0x00000000, 0x3ff6a09e, 0x667f3bcd, 45*25c28e83SPiotr Jasiukajtis .word 0x3e7f81f8, 0x1f81f820, 0x3ff673e3, 0x2ef63a03, 46*25c28e83SPiotr Jasiukajtis .word 0x3e7f07c1, 0xf07c1f08, 0x3ff6482d, 0x37a5a3d2, 47*25c28e83SPiotr Jasiukajtis .word 0x3e7e9131, 0xabf0b767, 0x3ff61d72, 0xb7978671, 48*25c28e83SPiotr Jasiukajtis .word 0x3e7e1e1e, 0x1e1e1e1e, 0x3ff5f3aa, 0x673fa911, 49*25c28e83SPiotr Jasiukajtis .word 0x3e7dae60, 0x76b981db, 0x3ff5cacb, 0x7802f342, 50*25c28e83SPiotr Jasiukajtis .word 0x3e7d41d4, 0x1d41d41d, 0x3ff5a2cd, 0x8c69d61a, 51*25c28e83SPiotr Jasiukajtis .word 0x3e7cd856, 0x89039b0b, 0x3ff57ba8, 0xb0ee01b9, 52*25c28e83SPiotr Jasiukajtis .word 0x3e7c71c7, 0x1c71c71c, 0x3ff55555, 0x55555555, 53*25c28e83SPiotr Jasiukajtis .word 0x3e7c0e07, 0x0381c0e0, 0x3ff52fcc, 0x468d6b54, 54*25c28e83SPiotr Jasiukajtis .word 0x3e7bacf9, 0x14c1bad0, 0x3ff50b06, 0xa8fc6b70, 55*25c28e83SPiotr Jasiukajtis .word 0x3e7b4e81, 0xb4e81b4f, 0x3ff4e6fd, 0xf33cf032, 56*25c28e83SPiotr Jasiukajtis .word 0x3e7af286, 0xbca1af28, 0x3ff4c3ab, 0xe93bcf74, 57*25c28e83SPiotr Jasiukajtis .word 0x3e7a98ef, 0x606a63be, 0x3ff4a10a, 0x97af7b92, 58*25c28e83SPiotr Jasiukajtis .word 0x3e7a41a4, 0x1a41a41a, 0x3ff47f14, 0x4fe17f9f, 59*25c28e83SPiotr Jasiukajtis .word 0x3e79ec8e, 0x951033d9, 0x3ff45dc3, 0xa3c34fa3, 60*25c28e83SPiotr Jasiukajtis .word 0x3e799999, 0x9999999a, 0x3ff43d13, 0x6248490f, 61*25c28e83SPiotr Jasiukajtis .word 0x3e7948b0, 0xfcd6e9e0, 0x3ff41cfe, 0x93ff5199, 62*25c28e83SPiotr Jasiukajtis .word 0x3e78f9c1, 0x8f9c18fa, 0x3ff3fd80, 0x77e70577, 63*25c28e83SPiotr Jasiukajtis .word 0x3e78acb9, 0x0f6bf3aa, 0x3ff3de94, 0x8077db58, 64*25c28e83SPiotr Jasiukajtis .word 0x3e786186, 0x18618618, 0x3ff3c036, 0x50e00e03, 65*25c28e83SPiotr Jasiukajtis .word 0x3e781818, 0x18181818, 0x3ff3a261, 0xba6d7a37, 66*25c28e83SPiotr Jasiukajtis .word 0x3e77d05f, 0x417d05f4, 0x3ff38512, 0xba21f51e, 67*25c28e83SPiotr Jasiukajtis .word 0x3e778a4c, 0x8178a4c8, 0x3ff36845, 0x766eec92, 68*25c28e83SPiotr Jasiukajtis .word 0x3e7745d1, 0x745d1746, 0x3ff34bf6, 0x3d156826, 69*25c28e83SPiotr Jasiukajtis .word 0x3e7702e0, 0x5c0b8170, 0x3ff33021, 0x8127c0e0, 70*25c28e83SPiotr Jasiukajtis .word 0x3e76c16c, 0x16c16c17, 0x3ff314c3, 0xd92a9e91, 71*25c28e83SPiotr Jasiukajtis .word 0x3e768168, 0x16816817, 0x3ff2f9d9, 0xfd52fd50, 72*25c28e83SPiotr Jasiukajtis .word 0x3e7642c8, 0x590b2164, 0x3ff2df60, 0xc5df2c9e, 73*25c28e83SPiotr Jasiukajtis .word 0x3e760581, 0x60581606, 0x3ff2c555, 0x2988e428, 74*25c28e83SPiotr Jasiukajtis .word 0x3e75c988, 0x2b931057, 0x3ff2abb4, 0x3c0eb0f4, 75*25c28e83SPiotr Jasiukajtis .word 0x3e758ed2, 0x308158ed, 0x3ff2927b, 0x2cd320f5, 76*25c28e83SPiotr Jasiukajtis .word 0x3e755555, 0x55555555, 0x3ff279a7, 0x4590331c, 77*25c28e83SPiotr Jasiukajtis .word 0x3e751d07, 0xeae2f815, 0x3ff26135, 0xe91daf55, 78*25c28e83SPiotr Jasiukajtis .word 0x3e74e5e0, 0xa72f0539, 0x3ff24924, 0x92492492, 79*25c28e83SPiotr Jasiukajtis .word 0x3e74afd6, 0xa052bf5b, 0x3ff23170, 0xd2be638a, 80*25c28e83SPiotr Jasiukajtis .word 0x3e747ae1, 0x47ae147b, 0x3ff21a18, 0x51ff630a, 81*25c28e83SPiotr Jasiukajtis .word 0x3e7446f8, 0x6562d9fb, 0x3ff20318, 0xcc6a8f5d, 82*25c28e83SPiotr Jasiukajtis .word 0x3e741414, 0x14141414, 0x3ff1ec70, 0x124e98f9, 83*25c28e83SPiotr Jasiukajtis .word 0x3e73e22c, 0xbce4a902, 0x3ff1d61c, 0x070ae7d3, 84*25c28e83SPiotr Jasiukajtis .word 0x3e73b13b, 0x13b13b14, 0x3ff1c01a, 0xa03be896, 85*25c28e83SPiotr Jasiukajtis .word 0x3e738138, 0x13813814, 0x3ff1aa69, 0xe4f2777f, 86*25c28e83SPiotr Jasiukajtis .word 0x3e73521c, 0xfb2b78c1, 0x3ff19507, 0xecf5b9e9, 87*25c28e83SPiotr Jasiukajtis .word 0x3e7323e3, 0x4a2b10bf, 0x3ff17ff2, 0xe00ec3ee, 88*25c28e83SPiotr Jasiukajtis .word 0x3e72f684, 0xbda12f68, 0x3ff16b28, 0xf55d72d4, 89*25c28e83SPiotr Jasiukajtis .word 0x3e72c9fb, 0x4d812ca0, 0x3ff156a8, 0x72b5ef62, 90*25c28e83SPiotr Jasiukajtis .word 0x3e729e41, 0x29e4129e, 0x3ff1426f, 0xac0654db, 91*25c28e83SPiotr Jasiukajtis .word 0x3e727350, 0xb8812735, 0x3ff12e7d, 0x02c40253, 92*25c28e83SPiotr Jasiukajtis .word 0x3e724924, 0x92492492, 0x3ff11ace, 0xe560242a, 93*25c28e83SPiotr Jasiukajtis .word 0x3e721fb7, 0x8121fb78, 0x3ff10763, 0xcec30b26, 94*25c28e83SPiotr Jasiukajtis .word 0x3e71f704, 0x7dc11f70, 0x3ff0f43a, 0x45cdedad, 95*25c28e83SPiotr Jasiukajtis .word 0x3e71cf06, 0xada2811d, 0x3ff0e150, 0xdce2b60c, 96*25c28e83SPiotr Jasiukajtis .word 0x3e71a7b9, 0x611a7b96, 0x3ff0cea6, 0x317186dc, 97*25c28e83SPiotr Jasiukajtis .word 0x3e718118, 0x11811812, 0x3ff0bc38, 0xeb8ba412, 98*25c28e83SPiotr Jasiukajtis .word 0x3e715b1e, 0x5f75270d, 0x3ff0aa07, 0xbd7b7488, 99*25c28e83SPiotr Jasiukajtis .word 0x3e7135c8, 0x1135c811, 0x3ff09811, 0x63615499, 100*25c28e83SPiotr Jasiukajtis .word 0x3e711111, 0x11111111, 0x3ff08654, 0xa2d4f6db, 101*25c28e83SPiotr Jasiukajtis .word 0x3e70ecf5, 0x6be69c90, 0x3ff074d0, 0x4a8b1438, 102*25c28e83SPiotr Jasiukajtis .word 0x3e70c971, 0x4fbcda3b, 0x3ff06383, 0x31ff307a, 103*25c28e83SPiotr Jasiukajtis .word 0x3e70a681, 0x0a6810a7, 0x3ff0526c, 0x39213bfa, 104*25c28e83SPiotr Jasiukajtis .word 0x3e708421, 0x08421084, 0x3ff0418a, 0x4806de7d, 105*25c28e83SPiotr Jasiukajtis .word 0x3e70624d, 0xd2f1a9fc, 0x3ff030dc, 0x4ea03a72, 106*25c28e83SPiotr Jasiukajtis .word 0x3e704104, 0x10410410, 0x3ff02061, 0x446ffa9a, 107*25c28e83SPiotr Jasiukajtis .word 0x3e702040, 0x81020408, 0x3ff01018, 0x28467ee9, 108*25c28e83SPiotr Jasiukajtis .word 0x3e800000, 0x00000000, 0x3ff00000, 0x00000000, 109*25c28e83SPiotr Jasiukajtis .word 0x3e7f81f8, 0x1f81f820, 0x3fefc0bd, 0x88a0f1d9, 110*25c28e83SPiotr Jasiukajtis .word 0x3e7f07c1, 0xf07c1f08, 0x3fef82ec, 0x882c0f9b, 111*25c28e83SPiotr Jasiukajtis .word 0x3e7e9131, 0xabf0b767, 0x3fef467f, 0x2814b0cc, 112*25c28e83SPiotr Jasiukajtis .word 0x3e7e1e1e, 0x1e1e1e1e, 0x3fef0b68, 0x48d2af1c, 113*25c28e83SPiotr Jasiukajtis .word 0x3e7dae60, 0x76b981db, 0x3feed19b, 0x75e78957, 114*25c28e83SPiotr Jasiukajtis .word 0x3e7d41d4, 0x1d41d41d, 0x3fee990c, 0xdad55ed2, 115*25c28e83SPiotr Jasiukajtis .word 0x3e7cd856, 0x89039b0b, 0x3fee61b1, 0x38f18adc, 116*25c28e83SPiotr Jasiukajtis .word 0x3e7c71c7, 0x1c71c71c, 0x3fee2b7d, 0xddfefa66, 117*25c28e83SPiotr Jasiukajtis .word 0x3e7c0e07, 0x0381c0e0, 0x3fedf668, 0x9b7e6350, 118*25c28e83SPiotr Jasiukajtis .word 0x3e7bacf9, 0x14c1bad0, 0x3fedc267, 0xbea45549, 119*25c28e83SPiotr Jasiukajtis .word 0x3e7b4e81, 0xb4e81b4f, 0x3fed8f72, 0x08e6b82d, 120*25c28e83SPiotr Jasiukajtis .word 0x3e7af286, 0xbca1af28, 0x3fed5d7e, 0xa914b937, 121*25c28e83SPiotr Jasiukajtis .word 0x3e7a98ef, 0x606a63be, 0x3fed2c85, 0x34ed6d86, 122*25c28e83SPiotr Jasiukajtis .word 0x3e7a41a4, 0x1a41a41a, 0x3fecfc7d, 0xa32a9213, 123*25c28e83SPiotr Jasiukajtis .word 0x3e79ec8e, 0x951033d9, 0x3feccd60, 0x45f5d358, 124*25c28e83SPiotr Jasiukajtis .word 0x3e799999, 0x9999999a, 0x3fec9f25, 0xc5bfedd9, 125*25c28e83SPiotr Jasiukajtis .word 0x3e7948b0, 0xfcd6e9e0, 0x3fec71c7, 0x1c71c71c, 126*25c28e83SPiotr Jasiukajtis .word 0x3e78f9c1, 0x8f9c18fa, 0x3fec453d, 0x90f057a2, 127*25c28e83SPiotr Jasiukajtis .word 0x3e78acb9, 0x0f6bf3aa, 0x3fec1982, 0xb2ece47b, 128*25c28e83SPiotr Jasiukajtis .word 0x3e786186, 0x18618618, 0x3febee90, 0x56fb9c39, 129*25c28e83SPiotr Jasiukajtis .word 0x3e781818, 0x18181818, 0x3febc460, 0x92eb3118, 130*25c28e83SPiotr Jasiukajtis .word 0x3e77d05f, 0x417d05f4, 0x3feb9aed, 0xba588347, 131*25c28e83SPiotr Jasiukajtis .word 0x3e778a4c, 0x8178a4c8, 0x3feb7232, 0x5b79db11, 132*25c28e83SPiotr Jasiukajtis .word 0x3e7745d1, 0x745d1746, 0x3feb4a29, 0x3c1d9550, 133*25c28e83SPiotr Jasiukajtis .word 0x3e7702e0, 0x5c0b8170, 0x3feb22cd, 0x56d87d7e, 134*25c28e83SPiotr Jasiukajtis .word 0x3e76c16c, 0x16c16c17, 0x3feafc19, 0xd8606169, 135*25c28e83SPiotr Jasiukajtis .word 0x3e768168, 0x16816817, 0x3fead60a, 0x1d0fb394, 136*25c28e83SPiotr Jasiukajtis .word 0x3e7642c8, 0x590b2164, 0x3feab099, 0xae8f539a, 137*25c28e83SPiotr Jasiukajtis .word 0x3e760581, 0x60581606, 0x3fea8bc4, 0x41a3d02c, 138*25c28e83SPiotr Jasiukajtis .word 0x3e75c988, 0x2b931057, 0x3fea6785, 0xb41bacf7, 139*25c28e83SPiotr Jasiukajtis .word 0x3e758ed2, 0x308158ed, 0x3fea43da, 0x0adc6899, 140*25c28e83SPiotr Jasiukajtis .word 0x3e755555, 0x55555555, 0x3fea20bd, 0x700c2c3e, 141*25c28e83SPiotr Jasiukajtis .word 0x3e751d07, 0xeae2f815, 0x3fe9fe2c, 0x315637ee, 142*25c28e83SPiotr Jasiukajtis .word 0x3e74e5e0, 0xa72f0539, 0x3fe9dc22, 0xbe484458, 143*25c28e83SPiotr Jasiukajtis .word 0x3e74afd6, 0xa052bf5b, 0x3fe9ba9d, 0xa6c73588, 144*25c28e83SPiotr Jasiukajtis .word 0x3e747ae1, 0x47ae147b, 0x3fe99999, 0x9999999a, 145*25c28e83SPiotr Jasiukajtis .word 0x3e7446f8, 0x6562d9fb, 0x3fe97913, 0x63068b54, 146*25c28e83SPiotr Jasiukajtis .word 0x3e741414, 0x14141414, 0x3fe95907, 0xeb87ab44, 147*25c28e83SPiotr Jasiukajtis .word 0x3e73e22c, 0xbce4a902, 0x3fe93974, 0x368cfa31, 148*25c28e83SPiotr Jasiukajtis .word 0x3e73b13b, 0x13b13b14, 0x3fe91a55, 0x6151761c, 149*25c28e83SPiotr Jasiukajtis .word 0x3e738138, 0x13813814, 0x3fe8fba8, 0xa1bf6f96, 150*25c28e83SPiotr Jasiukajtis .word 0x3e73521c, 0xfb2b78c1, 0x3fe8dd6b, 0x4563a009, 151*25c28e83SPiotr Jasiukajtis .word 0x3e7323e3, 0x4a2b10bf, 0x3fe8bf9a, 0xb06e1af3, 152*25c28e83SPiotr Jasiukajtis .word 0x3e72f684, 0xbda12f68, 0x3fe8a234, 0x5cc04426, 153*25c28e83SPiotr Jasiukajtis .word 0x3e72c9fb, 0x4d812ca0, 0x3fe88535, 0xd90703c6, 154*25c28e83SPiotr Jasiukajtis .word 0x3e729e41, 0x29e4129e, 0x3fe8689c, 0xc7e07e7d, 155*25c28e83SPiotr Jasiukajtis .word 0x3e727350, 0xb8812735, 0x3fe84c66, 0xdf0ca4c2, 156*25c28e83SPiotr Jasiukajtis .word 0x3e724924, 0x92492492, 0x3fe83091, 0xe6a7f7e7, 157*25c28e83SPiotr Jasiukajtis .word 0x3e721fb7, 0x8121fb78, 0x3fe8151b, 0xb86fee1d, 158*25c28e83SPiotr Jasiukajtis .word 0x3e71f704, 0x7dc11f70, 0x3fe7fa02, 0x3f1068d1, 159*25c28e83SPiotr Jasiukajtis .word 0x3e71cf06, 0xada2811d, 0x3fe7df43, 0x7579b9b5, 160*25c28e83SPiotr Jasiukajtis .word 0x3e71a7b9, 0x611a7b96, 0x3fe7c4dd, 0x663ebb88, 161*25c28e83SPiotr Jasiukajtis .word 0x3e718118, 0x11811812, 0x3fe7aace, 0x2afa8b72, 162*25c28e83SPiotr Jasiukajtis .word 0x3e715b1e, 0x5f75270d, 0x3fe79113, 0xebbd7729, 163*25c28e83SPiotr Jasiukajtis .word 0x3e7135c8, 0x1135c811, 0x3fe777ac, 0xde80baea, 164*25c28e83SPiotr Jasiukajtis .word 0x3e711111, 0x11111111, 0x3fe75e97, 0x46a0b098, 165*25c28e83SPiotr Jasiukajtis .word 0x3e70ecf5, 0x6be69c90, 0x3fe745d1, 0x745d1746, 166*25c28e83SPiotr Jasiukajtis .word 0x3e70c971, 0x4fbcda3b, 0x3fe72d59, 0xc45f1fc5, 167*25c28e83SPiotr Jasiukajtis .word 0x3e70a681, 0x0a6810a7, 0x3fe7152e, 0x9f44f01f, 168*25c28e83SPiotr Jasiukajtis .word 0x3e708421, 0x08421084, 0x3fe6fd4e, 0x79325467, 169*25c28e83SPiotr Jasiukajtis .word 0x3e70624d, 0xd2f1a9fc, 0x3fe6e5b7, 0xd16657e1, 170*25c28e83SPiotr Jasiukajtis .word 0x3e704104, 0x10410410, 0x3fe6ce69, 0x31d5858d, 171*25c28e83SPiotr Jasiukajtis .word 0x3e702040, 0x81020408, 0x3fe6b761, 0x2ec892f6, 172*25c28e83SPiotr Jasiukajtis 173*25c28e83SPiotr Jasiukajtis .word 0x3fefffff, 0xfee7f18f ! K0 = 9.99999997962321453275e-01 174*25c28e83SPiotr Jasiukajtis .word 0xbfdfffff, 0xfe07e52f ! K1 = -4.99999998166077580600e-01 175*25c28e83SPiotr Jasiukajtis .word 0x3fd80118, 0x0ca296d9 ! K2 = 3.75066768969515586277e-01 176*25c28e83SPiotr Jasiukajtis .word 0xbfd400fc, 0x0bbb8e78 ! K3 = -3.12560092408808548438e-01 177*25c28e83SPiotr Jasiukajtis .word 0x7ffe0000, 0x7ffe0000 ! DC0 178*25c28e83SPiotr Jasiukajtis .word 0x3f800000, 0x40000000 ! FTWO 179*25c28e83SPiotr Jasiukajtis 180*25c28e83SPiotr Jasiukajtis#define stridex %l4 181*25c28e83SPiotr Jasiukajtis#define stridex2 %l1 182*25c28e83SPiotr Jasiukajtis#define stridey %l3 183*25c28e83SPiotr Jasiukajtis#define stridey2 %i2 184*25c28e83SPiotr Jasiukajtis#define TBL %l2 185*25c28e83SPiotr Jasiukajtis#define counter %i5 186*25c28e83SPiotr Jasiukajtis 187*25c28e83SPiotr Jasiukajtis#define K3 %f38 188*25c28e83SPiotr Jasiukajtis#define K2 %f36 189*25c28e83SPiotr Jasiukajtis#define K1 %f34 190*25c28e83SPiotr Jasiukajtis#define K0 %f32 191*25c28e83SPiotr Jasiukajtis#define DC0 %f4 192*25c28e83SPiotr Jasiukajtis#define FONE %f2 193*25c28e83SPiotr Jasiukajtis#define FTWO %f3 194*25c28e83SPiotr Jasiukajtis 195*25c28e83SPiotr Jasiukajtis#define _0x00800000 %o2 196*25c28e83SPiotr Jasiukajtis#define _0x7f800000 %o4 197*25c28e83SPiotr Jasiukajtis 198*25c28e83SPiotr Jasiukajtis#define tmp0 STACK_BIAS-0x30 199*25c28e83SPiotr Jasiukajtis#define tmp1 STACK_BIAS-0x28 200*25c28e83SPiotr Jasiukajtis#define tmp2 STACK_BIAS-0x20 201*25c28e83SPiotr Jasiukajtis#define tmp3 STACK_BIAS-0x18 202*25c28e83SPiotr Jasiukajtis#define tmp_counter STACK_BIAS-0x10 203*25c28e83SPiotr Jasiukajtis#define tmp_px STACK_BIAS-0x08 204*25c28e83SPiotr Jasiukajtis 205*25c28e83SPiotr Jasiukajtis! sizeof temp storage - must be a multiple of 16 for V9 206*25c28e83SPiotr Jasiukajtis#define tmps 0x30 207*25c28e83SPiotr Jasiukajtis 208*25c28e83SPiotr Jasiukajtis!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 209*25c28e83SPiotr Jasiukajtis! !!!!! algorithm !!!!! 210*25c28e83SPiotr Jasiukajtis! ((float*)&ddx0)[0] = *px; 211*25c28e83SPiotr Jasiukajtis! ax0 = *(int*)px; 212*25c28e83SPiotr Jasiukajtis! 213*25c28e83SPiotr Jasiukajtis! ((float*)&ddx0)[1] = *(px + stridex); 214*25c28e83SPiotr Jasiukajtis! ax1 = *(int*)(px + stridex); 215*25c28e83SPiotr Jasiukajtis! 216*25c28e83SPiotr Jasiukajtis! px += stridex2; 217*25c28e83SPiotr Jasiukajtis! 218*25c28e83SPiotr Jasiukajtis! if ( ax0 >= 0x7f800000 ) 219*25c28e83SPiotr Jasiukajtis! { 220*25c28e83SPiotr Jasiukajtis! RETURN ( FONE / ((float*)&dres0)[0] ); 221*25c28e83SPiotr Jasiukajtis! } 222*25c28e83SPiotr Jasiukajtis! if ( ax0 < 0x00800000 ) 223*25c28e83SPiotr Jasiukajtis! { 224*25c28e83SPiotr Jasiukajtis! float res = ((float*)&dres0)[0]; 225*25c28e83SPiotr Jasiukajtis! 226*25c28e83SPiotr Jasiukajtis! if ( (ax0 & 0x7fffffff) == 0 ) /* |X| = zero */ 227*25c28e83SPiotr Jasiukajtis! { 228*25c28e83SPiotr Jasiukajtis! RETURN ( FONE / res ) 229*25c28e83SPiotr Jasiukajtis! } 230*25c28e83SPiotr Jasiukajtis! else if ( ax0 >= 0 ) /* X = denormal */ 231*25c28e83SPiotr Jasiukajtis! { 232*25c28e83SPiotr Jasiukajtis! double res0, xx0, tbl_div0, tbl_sqrt0; 233*25c28e83SPiotr Jasiukajtis! float fres0; 234*25c28e83SPiotr Jasiukajtis! int iax0, si0, iexp0; 235*25c28e83SPiotr Jasiukajtis! 236*25c28e83SPiotr Jasiukajtis! res = *(int*)&res; 237*25c28e83SPiotr Jasiukajtis! res *= FTWO; 238*25c28e83SPiotr Jasiukajtis! ax0 = *(int*)&res; 239*25c28e83SPiotr Jasiukajtis! iexp0 = ax0 >> 24; 240*25c28e83SPiotr Jasiukajtis! iexp0 = 0x3f + 0x4b - iexp0; 241*25c28e83SPiotr Jasiukajtis! iexp0 = iexp0 << 23; 242*25c28e83SPiotr Jasiukajtis! 243*25c28e83SPiotr Jasiukajtis! si0 = (ax0 >> 13) & 0x7f0; 244*25c28e83SPiotr Jasiukajtis! 245*25c28e83SPiotr Jasiukajtis! tbl_div0 = ((double*)((char*)__TBL_rsqrtf + si0))[0]; 246*25c28e83SPiotr Jasiukajtis! tbl_sqrt0 = ((double*)((char*)__TBL_rsqrtf + si0))[1]; 247*25c28e83SPiotr Jasiukajtis! iax0 = ax0 & 0x7ffe0000; 248*25c28e83SPiotr Jasiukajtis! iax0 = ax0 - iax0; 249*25c28e83SPiotr Jasiukajtis! xx0 = iax0 * tbl_div0; 250*25c28e83SPiotr Jasiukajtis! res0 = tbl_sqrt0 * (((A3 * xx0 + A2) * xx0 + A1) * xx0 + A0); 251*25c28e83SPiotr Jasiukajtis! 252*25c28e83SPiotr Jasiukajtis! fres0 = res0; 253*25c28e83SPiotr Jasiukajtis! iexp0 += *(int*)&fres0; 254*25c28e83SPiotr Jasiukajtis! RETURN(*(float*)&iexp0) 255*25c28e83SPiotr Jasiukajtis! } 256*25c28e83SPiotr Jasiukajtis! else /* X = negative */ 257*25c28e83SPiotr Jasiukajtis! { 258*25c28e83SPiotr Jasiukajtis! RETURN ( sqrtf(res) ) 259*25c28e83SPiotr Jasiukajtis! } 260*25c28e83SPiotr Jasiukajtis! } 261*25c28e83SPiotr Jasiukajtis! if ( ax1 >= 0x7f800000 ) 262*25c28e83SPiotr Jasiukajtis! { 263*25c28e83SPiotr Jasiukajtis! RETURN ( FONE / ((float*)&dres0)[1] ) 264*25c28e83SPiotr Jasiukajtis! } 265*25c28e83SPiotr Jasiukajtis! if ( ax1 < 0x00800000 ) 266*25c28e83SPiotr Jasiukajtis! { 267*25c28e83SPiotr Jasiukajtis! float res = ((float*)&dres0)[1]; 268*25c28e83SPiotr Jasiukajtis! if ( (ax0 & 0x7fffffff) == 0 ) /* |X| = zero */ 269*25c28e83SPiotr Jasiukajtis! { 270*25c28e83SPiotr Jasiukajtis! RETURN ( FONE / res ) 271*25c28e83SPiotr Jasiukajtis! } 272*25c28e83SPiotr Jasiukajtis! else if ( ax0 >= 0 ) /* X = denormal */ 273*25c28e83SPiotr Jasiukajtis! { 274*25c28e83SPiotr Jasiukajtis! double res0, xx0, tbl_div0, tbl_sqrt0; 275*25c28e83SPiotr Jasiukajtis! float fres0; 276*25c28e83SPiotr Jasiukajtis! int iax1, si0, iexp0; 277*25c28e83SPiotr Jasiukajtis! 278*25c28e83SPiotr Jasiukajtis! res = *(int*)&res; 279*25c28e83SPiotr Jasiukajtis! res *= FTWO; 280*25c28e83SPiotr Jasiukajtis! ax1 = *(int*)&res; 281*25c28e83SPiotr Jasiukajtis! iexp0 = ax1 >> 24; 282*25c28e83SPiotr Jasiukajtis! iexp0 = 0x3f + 0x4b - iexp0; 283*25c28e83SPiotr Jasiukajtis! iexp0 = iexp0 << 23; 284*25c28e83SPiotr Jasiukajtis! 285*25c28e83SPiotr Jasiukajtis! si0 = (ax1 >> 13) & 0x7f0; 286*25c28e83SPiotr Jasiukajtis! 287*25c28e83SPiotr Jasiukajtis! tbl_div0 = ((double*)((char*)__TBL_rsqrtf + si0))[0]; 288*25c28e83SPiotr Jasiukajtis! tbl_sqrt0 = ((double*)((char*)__TBL_rsqrtf + si0))[1]; 289*25c28e83SPiotr Jasiukajtis! iax1 = ax1 & 0x7ffe0000; 290*25c28e83SPiotr Jasiukajtis! iax1 = ax1 - iax1; 291*25c28e83SPiotr Jasiukajtis! xx0 = iax1 * tbl_div0; 292*25c28e83SPiotr Jasiukajtis! res0 = tbl_sqrt0 * (((A3 * xx0 + A2) * xx0 + A1) * xx0 + A0); 293*25c28e83SPiotr Jasiukajtis! 294*25c28e83SPiotr Jasiukajtis! fres0 = res0; 295*25c28e83SPiotr Jasiukajtis! iexp0 += *(int*)&fres0; 296*25c28e83SPiotr Jasiukajtis! RETURN(*(float*)&iexp0) 297*25c28e83SPiotr Jasiukajtis! } 298*25c28e83SPiotr Jasiukajtis! else /* X = negative */ 299*25c28e83SPiotr Jasiukajtis! { 300*25c28e83SPiotr Jasiukajtis! RETURN ( sqrtf(res) ) 301*25c28e83SPiotr Jasiukajtis! } 302*25c28e83SPiotr Jasiukajtis! } 303*25c28e83SPiotr Jasiukajtis! 304*25c28e83SPiotr Jasiukajtis! iexp0 = ax0 >> 24; 305*25c28e83SPiotr Jasiukajtis! iexp1 = ax1 >> 24; 306*25c28e83SPiotr Jasiukajtis! iexp0 = 0x3f - iexp0; 307*25c28e83SPiotr Jasiukajtis! iexp1 = 0x3f - iexp1; 308*25c28e83SPiotr Jasiukajtis! iexp1 &= 0x1ff; 309*25c28e83SPiotr Jasiukajtis! lexp0 = iexp0 << 55; 310*25c28e83SPiotr Jasiukajtis! lexp1 = iexp1 << 23; 311*25c28e83SPiotr Jasiukajtis! 312*25c28e83SPiotr Jasiukajtis! lexp0 |= lexp1; 313*25c28e83SPiotr Jasiukajtis! 314*25c28e83SPiotr Jasiukajtis! fdx0 = *((double*)&lexp0); 315*25c28e83SPiotr Jasiukajtis! 316*25c28e83SPiotr Jasiukajtis! si0 = ax0 >> 13; 317*25c28e83SPiotr Jasiukajtis! si1 = ax1 >> 13; 318*25c28e83SPiotr Jasiukajtis! si0 &= 0x7f0; 319*25c28e83SPiotr Jasiukajtis! si1 &= 0x7f0; 320*25c28e83SPiotr Jasiukajtis! 321*25c28e83SPiotr Jasiukajtis! addr0 = (char*)TBL + si0; 322*25c28e83SPiotr Jasiukajtis! addr1 = (char*)TBL + si1; 323*25c28e83SPiotr Jasiukajtis! tbl_div0 = ((double*)((char*)TBL + si0))[0]; 324*25c28e83SPiotr Jasiukajtis! tbl_div1 = ((double*)((char*)TBL + si1))[0]; 325*25c28e83SPiotr Jasiukajtis! tbl_sqrt0 = ((double*)addr0)[1]; 326*25c28e83SPiotr Jasiukajtis! tbl_sqrt1 = ((double*)addr1)[1]; 327*25c28e83SPiotr Jasiukajtis! dfx0 = vis_fand(ddx0,DC0); 328*25c28e83SPiotr Jasiukajtis! dfx0 = vis_fpsub32(ddx0,dfx0); 329*25c28e83SPiotr Jasiukajtis! dtmp0 = (double)(((int*)&dfx0)[0]); 330*25c28e83SPiotr Jasiukajtis! dtmp1 = (double)(((int*)&dfx0)[1]); 331*25c28e83SPiotr Jasiukajtis! xx0 = dtmp0 * tbl_div0; 332*25c28e83SPiotr Jasiukajtis! xx1 = dtmp1 * tbl_div1; 333*25c28e83SPiotr Jasiukajtis! res0 = K3 * xx0; 334*25c28e83SPiotr Jasiukajtis! res1 = K3 * xx1; 335*25c28e83SPiotr Jasiukajtis! res0 += K2; 336*25c28e83SPiotr Jasiukajtis! res1 += K2; 337*25c28e83SPiotr Jasiukajtis! res0 *= xx0; 338*25c28e83SPiotr Jasiukajtis! res1 *= xx1; 339*25c28e83SPiotr Jasiukajtis! res0 += K1; 340*25c28e83SPiotr Jasiukajtis! res1 += K1; 341*25c28e83SPiotr Jasiukajtis! res0 *= xx0; 342*25c28e83SPiotr Jasiukajtis! res1 *= xx1; 343*25c28e83SPiotr Jasiukajtis! res0 += K0; 344*25c28e83SPiotr Jasiukajtis! res1 += K0; 345*25c28e83SPiotr Jasiukajtis! res0 = tbl_sqrt0 * res0; 346*25c28e83SPiotr Jasiukajtis! res1 = tbl_sqrt1 * res1; 347*25c28e83SPiotr Jasiukajtis! ((float*)&dres0)[0] = (float)res0; 348*25c28e83SPiotr Jasiukajtis! ((float*)&dres0)[1] = (float)res1; 349*25c28e83SPiotr Jasiukajtis! dres0 = vis_fpadd32(dres0,fdx0); 350*25c28e83SPiotr Jasiukajtis! *py = ((float*)&dres0)[0]; 351*25c28e83SPiotr Jasiukajtis! *(py + stridey) = ((float*)&dres0)[1]; 352*25c28e83SPiotr Jasiukajtis! py += stridey2; 353*25c28e83SPiotr Jasiukajtis! 354*25c28e83SPiotr Jasiukajtis!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 355*25c28e83SPiotr Jasiukajtis 356*25c28e83SPiotr Jasiukajtis ENTRY(__vrsqrtf) 357*25c28e83SPiotr Jasiukajtis save %sp,-SA(MINFRAME)-tmps,%sp 358*25c28e83SPiotr Jasiukajtis PIC_SETUP(l7) 359*25c28e83SPiotr Jasiukajtis PIC_SET(l7,.CONST_TBL,l2) 360*25c28e83SPiotr Jasiukajtis 361*25c28e83SPiotr Jasiukajtis st %i0,[%fp+tmp_counter] 362*25c28e83SPiotr Jasiukajtis stx %i1,[%fp+tmp_px] 363*25c28e83SPiotr Jasiukajtis 364*25c28e83SPiotr Jasiukajtis ldd [TBL+2048],K0 365*25c28e83SPiotr Jasiukajtis sll %i2,2,stridex 366*25c28e83SPiotr Jasiukajtis 367*25c28e83SPiotr Jasiukajtis ldd [TBL+2048+8],K1 368*25c28e83SPiotr Jasiukajtis sll %i4,2,stridey 369*25c28e83SPiotr Jasiukajtis mov %i3,%i2 370*25c28e83SPiotr Jasiukajtis 371*25c28e83SPiotr Jasiukajtis ldd [TBL+2048+16],K2 372*25c28e83SPiotr Jasiukajtis sethi %hi(0x7f800000),_0x7f800000 373*25c28e83SPiotr Jasiukajtis sll stridex,1,stridex2 374*25c28e83SPiotr Jasiukajtis 375*25c28e83SPiotr Jasiukajtis ldd [TBL+2048+24],K3 376*25c28e83SPiotr Jasiukajtis sethi %hi(0x00800000),_0x00800000 377*25c28e83SPiotr Jasiukajtis 378*25c28e83SPiotr Jasiukajtis ldd [TBL+2048+32],DC0 379*25c28e83SPiotr Jasiukajtis add %g0,0x3f,%l0 380*25c28e83SPiotr Jasiukajtis 381*25c28e83SPiotr Jasiukajtis ldd [TBL+2048+40],FONE 382*25c28e83SPiotr Jasiukajtis! ld [TBL+2048+44],FTWO 383*25c28e83SPiotr Jasiukajtis.begin: 384*25c28e83SPiotr Jasiukajtis ld [%fp+tmp_counter],counter 385*25c28e83SPiotr Jasiukajtis ldx [%fp+tmp_px],%l7 386*25c28e83SPiotr Jasiukajtis st %g0,[%fp+tmp_counter] 387*25c28e83SPiotr Jasiukajtis.begin1: 388*25c28e83SPiotr Jasiukajtis cmp counter,0 389*25c28e83SPiotr Jasiukajtis ble,pn %icc,.exit 390*25c28e83SPiotr Jasiukajtis 391*25c28e83SPiotr Jasiukajtis lda [%l7]0x82,%f14 ! (4_0) ((float*)&ddx0)[0] = *px; 392*25c28e83SPiotr Jasiukajtis 393*25c28e83SPiotr Jasiukajtis lda [stridex+%l7]0x82,%f15 ! (5_0) ((float*)&ddx0)[1] = *(px + stridex); 394*25c28e83SPiotr Jasiukajtis sethi %hi(0x7ffffc00),%o0 395*25c28e83SPiotr Jasiukajtis 396*25c28e83SPiotr Jasiukajtis lda [%l7]0x82,%g1 ! (4_0) ax0 = *(int*)px; 397*25c28e83SPiotr Jasiukajtis add %l7,stridex2,%i1 ! px += stridex2 398*25c28e83SPiotr Jasiukajtis add %o0,0x3ff,%o0 399*25c28e83SPiotr Jasiukajtis 400*25c28e83SPiotr Jasiukajtis lda [stridex+%l7]0x82,%g5 ! (5_0) ax1 = *(int*)(px + stridex); 401*25c28e83SPiotr Jasiukajtis fand %f14,DC0,%f16 ! (4_0) dfx0 = vis_fand(ddx0,DC0); 402*25c28e83SPiotr Jasiukajtis 403*25c28e83SPiotr Jasiukajtis sra %g1,13,%l5 ! (4_0) si0 = ax0 >> 13; 404*25c28e83SPiotr Jasiukajtis add %i1,stridex2,%o5 ! px += stridex2 405*25c28e83SPiotr Jasiukajtis 406*25c28e83SPiotr Jasiukajtis cmp %g1,_0x7f800000 ! (4_1) ax0 ? 0x7f800000 407*25c28e83SPiotr Jasiukajtis bge,pn %icc,.spec0 ! (4_1) if ( ax0 >= 0x7f800000 ) 408*25c28e83SPiotr Jasiukajtis nop 409*25c28e83SPiotr Jasiukajtis 410*25c28e83SPiotr Jasiukajtis cmp %g1,_0x00800000 ! (4_1) ax0 ? 0x00800000 411*25c28e83SPiotr Jasiukajtis bl,pn %icc,.spec1 ! (4_1) if ( ax0 < 0x00800000 ) 412*25c28e83SPiotr Jasiukajtis sra %g5,13,%l6 ! (5_0) si1 = ax1 >> 13; 413*25c28e83SPiotr Jasiukajtis.cont_spec: 414*25c28e83SPiotr Jasiukajtis and %l5,2032,%l5 ! (4_0) si0 &= 0x7f0; 415*25c28e83SPiotr Jasiukajtis 416*25c28e83SPiotr Jasiukajtis ldd [%l5+TBL],%f54 ! (4_0) tbl_div0 = ((double*)((char*)TBL + si0))[0]; 417*25c28e83SPiotr Jasiukajtis sra %g5,24,%l7 ! (5_0) iexp1 = ax1 >> 24; 418*25c28e83SPiotr Jasiukajtis and %l6,2032,%l6 ! (5_0) si1 &= 0x7f0; 419*25c28e83SPiotr Jasiukajtis fpsub32 %f14,%f16,%f16 ! (4_0) dfx0 = vis_fpsub32(ddx0,dfx0); 420*25c28e83SPiotr Jasiukajtis 421*25c28e83SPiotr Jasiukajtis ldd [%l6+TBL],%f46 ! (5_0) tbl_div1 = ((double*)((char*)TBL + si1))[0]; 422*25c28e83SPiotr Jasiukajtis sra %g1,24,%i3 ! (4_0) iexp0 = ax0 >> 24; 423*25c28e83SPiotr Jasiukajtis sub %l0,%l7,%l7 ! (5_0) iexp1 = 0x3f - iexp1; 424*25c28e83SPiotr Jasiukajtis 425*25c28e83SPiotr Jasiukajtis and %l7,511,%l1 ! (5_0) iexp1 = 0x1ff; 426*25c28e83SPiotr Jasiukajtis add %l6,TBL,%l6 ! (5_0) addr1 = (char*)TBL + si1; 427*25c28e83SPiotr Jasiukajtis 428*25c28e83SPiotr Jasiukajtis sllx %l1,23,%l1 ! (5_0) lexp1 = iexp1 << 23; 429*25c28e83SPiotr Jasiukajtis sub %l0,%i3,%o0 ! (4_0) iexp0 = 0x3f - iexp0; 430*25c28e83SPiotr Jasiukajtis fitod %f16,%f56 ! (4_0) dtmp0 = (double)(((int*)dfx0)[0]); 431*25c28e83SPiotr Jasiukajtis 432*25c28e83SPiotr Jasiukajtis sllx %o0,55,%o0 ! (4_0) lexp0 = iexp0 << 55; 433*25c28e83SPiotr Jasiukajtis fitod %f17,%f44 ! (5_0) dtmp1 = (double)(((int*)dfx0)[1]); 434*25c28e83SPiotr Jasiukajtis 435*25c28e83SPiotr Jasiukajtis or %o0,%l1,%o0 ! (4_0) lexp0 |= lexp1; 436*25c28e83SPiotr Jasiukajtis 437*25c28e83SPiotr Jasiukajtis stx %o0,[%fp+tmp0] ! (4_0) fdx0 = *((double*)lexp0); 438*25c28e83SPiotr Jasiukajtis 439*25c28e83SPiotr Jasiukajtis fmuld %f56,%f54,%f40 ! (4_0) xx0 = dtmp0 * tbl_div0; 440*25c28e83SPiotr Jasiukajtis 441*25c28e83SPiotr Jasiukajtis lda [%i1]0x82,%f18 ! (0_0) ((float*)&ddx0)[0] = *px; 442*25c28e83SPiotr Jasiukajtis fmuld %f44,%f46,%f46 ! (5_1) xx1 = dtmp1 * tbl_div1; 443*25c28e83SPiotr Jasiukajtis 444*25c28e83SPiotr Jasiukajtis lda [stridex+%i1]0x82,%f19 ! (1_0) ((float*)&ddx0)[1] = *(px + stridex); 445*25c28e83SPiotr Jasiukajtis 446*25c28e83SPiotr Jasiukajtis lda [%i1]0x82,%g1 ! (0_0) ax0 = *(int*)px; 447*25c28e83SPiotr Jasiukajtis 448*25c28e83SPiotr Jasiukajtis lda [stridex+%i1]0x82,%i4 ! (1_0) ax1 = *(int*)(px + stridex); 449*25c28e83SPiotr Jasiukajtis cmp %g5,_0x7f800000 ! (5_1) ax1 ? 0x7f800000 450*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update0 ! (5_1) if ( ax1 >= 0x7f800000 ) 451*25c28e83SPiotr Jasiukajtis fmuld K3,%f40,%f52 ! (4_1) res0 = K3 * xx0; 452*25c28e83SPiotr Jasiukajtis.cont0: 453*25c28e83SPiotr Jasiukajtis fmuld K3,%f46,%f50 ! (5_1) res1 = K3 * xx1; 454*25c28e83SPiotr Jasiukajtis cmp %g5,_0x00800000 ! (5_1) ax1 ? 0x00800000 455*25c28e83SPiotr Jasiukajtis bl,pn %icc,.update1 ! (5_1) if ( ax1 < 0x00800000 ) 456*25c28e83SPiotr Jasiukajtis fand %f18,DC0,%f56 ! (0_0) dfx0 = vis_fand(ddx0,DC0); 457*25c28e83SPiotr Jasiukajtis.cont1: 458*25c28e83SPiotr Jasiukajtis sra %g1,13,%o0 ! (0_0) si0 = ax0 >> 13; 459*25c28e83SPiotr Jasiukajtis cmp %g1,_0x7f800000 ! (0_0) ax0 ? 0x7f800000 460*25c28e83SPiotr Jasiukajtis 461*25c28e83SPiotr Jasiukajtis sra %i4,13,%g5 ! (1_0) si1 = ax1 >> 13; 462*25c28e83SPiotr Jasiukajtis and %o0,2032,%o0 ! (0_0) si0 &= 0x7f0; 463*25c28e83SPiotr Jasiukajtis 464*25c28e83SPiotr Jasiukajtis ldd [%o0+TBL],%f54 ! (0_0) tbl_div0 = ((double*)((char*)TBL + si0))[0]; 465*25c28e83SPiotr Jasiukajtis sra %i4,24,%i1 ! (1_0) iexp1 = ax1 >> 24; 466*25c28e83SPiotr Jasiukajtis and %g5,2032,%o7 ! (1_0) si1 &= 0x7f0; 467*25c28e83SPiotr Jasiukajtis fpsub32 %f18,%f56,%f30 ! (0_0) dfx0 = vis_fpsub32(ddx0,dfx0); 468*25c28e83SPiotr Jasiukajtis 469*25c28e83SPiotr Jasiukajtis ldd [%o7+TBL],%f44 ! (1_0) tbl_div1 = ((double*)((char*)TBL + si1))[0]; 470*25c28e83SPiotr Jasiukajtis sra %g1,24,%i3 ! (0_0) iexp0 = ax0 >> 24; 471*25c28e83SPiotr Jasiukajtis sub %l0,%i1,%i1 ! (1_0) iexp1 = 0x3f - iexp1; 472*25c28e83SPiotr Jasiukajtis faddd %f52,K2,%f62 ! (4_1) res0 += K2; 473*25c28e83SPiotr Jasiukajtis 474*25c28e83SPiotr Jasiukajtis sub %l0,%i3,%g5 ! (0_0) iexp0 = 0x3f - iexp0; 475*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update2 ! (0_0) if ( ax0 >= 0x7f800000 ) 476*25c28e83SPiotr Jasiukajtis faddd %f50,K2,%f60 ! (5_1) res1 += K2; 477*25c28e83SPiotr Jasiukajtis.cont2: 478*25c28e83SPiotr Jasiukajtis cmp %g1,_0x00800000 ! (0_0) ax0 ? 0x00800000 479*25c28e83SPiotr Jasiukajtis and %i1,511,%i0 ! (1_0) iexp1 = 0x1ff; 480*25c28e83SPiotr Jasiukajtis fitod %f30,%f56 ! (0_0) dtmp0 = (double)(((int*)dfx0)[0]); 481*25c28e83SPiotr Jasiukajtis 482*25c28e83SPiotr Jasiukajtis sllx %i0,23,%i0 ! (1_0) lexp1 = iexp1 << 23; 483*25c28e83SPiotr Jasiukajtis bl,pn %icc,.update3 ! (0_0) if ( ax0 < 0x00800000 ) 484*25c28e83SPiotr Jasiukajtis fitod %f31,%f50 ! (1_0) dtmp0 = (double)(((int*)dfx0)[0]); 485*25c28e83SPiotr Jasiukajtis.cont3: 486*25c28e83SPiotr Jasiukajtis fmuld %f62,%f40,%f30 ! (4_1) res0 *= xx0; 487*25c28e83SPiotr Jasiukajtis sllx %g5,55,%g5 ! (0_0) lexp0 = iexp0 << 55; 488*25c28e83SPiotr Jasiukajtis 489*25c28e83SPiotr Jasiukajtis fmuld %f60,%f46,%f48 ! (5_1) res1 *= xx1; 490*25c28e83SPiotr Jasiukajtis or %g5,%i0,%g5 ! (0_0) lexp0 |= lexp1; 491*25c28e83SPiotr Jasiukajtis stx %g5,[%fp+tmp1] ! (0_0) fdx0 = *((double*)lexp0); 492*25c28e83SPiotr Jasiukajtis 493*25c28e83SPiotr Jasiukajtis fmuld %f56,%f54,%f26 ! (0_0) xx0 = dtmp0 * tbl_div0; 494*25c28e83SPiotr Jasiukajtis sll stridex,1,stridex2 ! stridex2 = stridex * 2; 495*25c28e83SPiotr Jasiukajtis 496*25c28e83SPiotr Jasiukajtis lda [%o5]0x82,%f24 ! (2_0) ((float*)&ddx0)[0] = *px; 497*25c28e83SPiotr Jasiukajtis add %o7,TBL,%o7 ! (1_0) addr0 = (char*)TBL + si0; 498*25c28e83SPiotr Jasiukajtis fmuld %f50,%f44,%f44 ! (1_0) xx0 = dtmp0 * tbl_div0; 499*25c28e83SPiotr Jasiukajtis 500*25c28e83SPiotr Jasiukajtis lda [stridex+%o5]0x82,%f25 ! (3_0) ((float*)&ddx0)[1] = *(px + stridex); 501*25c28e83SPiotr Jasiukajtis add %l5,TBL,%l5 ! (4_1) addr0 = (char*)TBL + si0; 502*25c28e83SPiotr Jasiukajtis faddd %f30,K1,%f62 ! (4_1) res0 += K1; 503*25c28e83SPiotr Jasiukajtis 504*25c28e83SPiotr Jasiukajtis lda [%o5]0x82,%g1 ! (2_0) ax0 = *(int*)px; 505*25c28e83SPiotr Jasiukajtis add %o5,stridex2,%l7 ! px += stridex2 506*25c28e83SPiotr Jasiukajtis faddd %f48,K1,%f42 ! (5_1) res1 += K1; 507*25c28e83SPiotr Jasiukajtis 508*25c28e83SPiotr Jasiukajtis lda [stridex+%o5]0x82,%o5 ! (3_0) ax1 = *(int*)(px + stridex); 509*25c28e83SPiotr Jasiukajtis cmp %i4,_0x7f800000 ! (1_0) ax1 ? 0x7f800000 510*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update4 ! (1_0) if ( ax1 >= 0x7f800000 ) 511*25c28e83SPiotr Jasiukajtis fmuld K3,%f26,%f52 ! (0_0) res0 = K3 * xx0; 512*25c28e83SPiotr Jasiukajtis.cont4: 513*25c28e83SPiotr Jasiukajtis fmuld K3,%f44,%f50 ! (1_0) res1 = K3 * xx1; 514*25c28e83SPiotr Jasiukajtis cmp %i4,_0x00800000 ! (1_0) ax1 ? 0x00800000 515*25c28e83SPiotr Jasiukajtis bl,pn %icc,.update5 ! (1_0) if ( ax1 < 0x00800000 ) 516*25c28e83SPiotr Jasiukajtis fand %f24,DC0,%f54 ! (2_0) dfx0 = vis_fand(ddx0,DC0); 517*25c28e83SPiotr Jasiukajtis.cont5: 518*25c28e83SPiotr Jasiukajtis fmuld %f62,%f40,%f48 ! (4_1) res0 *= xx0; 519*25c28e83SPiotr Jasiukajtis sra %g1,13,%i0 ! (2_0) si0 = ax0 >> 13; 520*25c28e83SPiotr Jasiukajtis cmp %g1,_0x7f800000 ! (2_0) ax0 ? 0x7f800000 521*25c28e83SPiotr Jasiukajtis 522*25c28e83SPiotr Jasiukajtis fmuld %f42,%f46,%f58 ! (5_1) res1 *= xx1; 523*25c28e83SPiotr Jasiukajtis sra %o5,13,%o1 ! (3_0) si1 = ax1 >> 13; 524*25c28e83SPiotr Jasiukajtis and %i0,2032,%i0 ! (2_0) si0 &= 0x7f0; 525*25c28e83SPiotr Jasiukajtis 526*25c28e83SPiotr Jasiukajtis ldd [%i0+TBL],%f30 ! (2_0) tbl_div0 = ((double*)((char*)TBL + si0))[0]; 527*25c28e83SPiotr Jasiukajtis sra %o5,24,%o3 ! (3_0) iexp1 = ax1 >> 24; 528*25c28e83SPiotr Jasiukajtis and %o1,2032,%o1 ! (3_0) si1 &= 0x7f0; 529*25c28e83SPiotr Jasiukajtis fpsub32 %f24,%f54,%f12 ! (2_0) dfx0 = vis_fpsub32(ddx0,dfx0); 530*25c28e83SPiotr Jasiukajtis 531*25c28e83SPiotr Jasiukajtis ldd [%o1+TBL],%f46 ! (3_0) tbl_div1 = ((double*)((char*)TBL + si1))[0]; 532*25c28e83SPiotr Jasiukajtis sra %g1,24,%i3 ! (2_0) iexp0 = ax0 >> 24; 533*25c28e83SPiotr Jasiukajtis sub %l0,%o3,%o3 ! (3_0) iexp1 = 0x3f - iexp1; 534*25c28e83SPiotr Jasiukajtis faddd %f52,K2,%f40 ! (0_0) res0 += K2; 535*25c28e83SPiotr Jasiukajtis 536*25c28e83SPiotr Jasiukajtis ldd [%l5+8],%f42 ! (4_1) tbl_sqrt0 = ((double*)addr0)[1]; 537*25c28e83SPiotr Jasiukajtis sub %l0,%i3,%g5 ! (2_0) iexp0 = 0x3f - iexp0; 538*25c28e83SPiotr Jasiukajtis and %o3,511,%i3 ! (3_0) iexp1 &= 0x1ff; 539*25c28e83SPiotr Jasiukajtis faddd %f50,K2,%f60 ! (1_0) res0 += K2; 540*25c28e83SPiotr Jasiukajtis 541*25c28e83SPiotr Jasiukajtis ldd [%l6+8],%f28 ! (5_1) tbl_sqrt1 = ((double*)addr1)[1]; 542*25c28e83SPiotr Jasiukajtis sllx %g5,55,%g5 ! (2_0) lexp0 = iexp0 << 55; 543*25c28e83SPiotr Jasiukajtis add %i0,TBL,%i0 ! (2_0) addr0 = (char*)TBL + si0; 544*25c28e83SPiotr Jasiukajtis fitod %f12,%f56 ! (2_0) dtmp0 = (double)(((int*)dfx0)[0]); 545*25c28e83SPiotr Jasiukajtis 546*25c28e83SPiotr Jasiukajtis sllx %i3,23,%i3 ! (3_0) lexp1 = iexp1 << 23; 547*25c28e83SPiotr Jasiukajtis fitod %f13,%f50 ! (3_0) dtmp1 = (double)(((int*)dfx0)[1]); 548*25c28e83SPiotr Jasiukajtis 549*25c28e83SPiotr Jasiukajtis fmuld %f40,%f26,%f40 ! (0_0) res0 *= xx0; 550*25c28e83SPiotr Jasiukajtis or %g5,%i3,%g5 ! (2_0) lexp0 |= lexp1; 551*25c28e83SPiotr Jasiukajtis faddd %f48,K0,%f62 ! (4_1) res0 += K0; 552*25c28e83SPiotr Jasiukajtis 553*25c28e83SPiotr Jasiukajtis fmuld %f60,%f44,%f48 ! (1_0) res1 *= xx1; 554*25c28e83SPiotr Jasiukajtis add %o1,TBL,%o1 ! (3_0) addr1 = (char*)TBL + si1; 555*25c28e83SPiotr Jasiukajtis stx %g5,[%fp+tmp2] ! (2_0) fdx0 = *((double*)lexp0); 556*25c28e83SPiotr Jasiukajtis faddd %f58,K0,%f60 ! (5_1) res1 += K0; 557*25c28e83SPiotr Jasiukajtis 558*25c28e83SPiotr Jasiukajtis fmuld %f56,%f30,%f30 ! (2_0) xx0 = dtmp0 * tbl_div0; 559*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update6 ! (2_0) if ( ax0 >= 0x7f800000 ) 560*25c28e83SPiotr Jasiukajtis lda [%l7]0x82,%f14 ! (4_0) ((float*)&ddx0)[0] = *px; 561*25c28e83SPiotr Jasiukajtis.cont6: 562*25c28e83SPiotr Jasiukajtis cmp %g1,_0x00800000 ! (2_0) ax0 ? 0x00800000 563*25c28e83SPiotr Jasiukajtis bl,pn %icc,.update7 ! (2_0) if ( ax0 < 0x00800000 ) 564*25c28e83SPiotr Jasiukajtis nop 565*25c28e83SPiotr Jasiukajtis.cont7: 566*25c28e83SPiotr Jasiukajtis fmuld %f50,%f46,%f24 ! (3_0) xx1 = dtmp1 * tbl_div1; 567*25c28e83SPiotr Jasiukajtis 568*25c28e83SPiotr Jasiukajtis lda [stridex+%l7]0x82,%f15 ! (5_0) ((float*)&ddx0)[1] = *(px + stridex); 569*25c28e83SPiotr Jasiukajtis cmp %o5,_0x7f800000 ! (3_0) ax1 ? 0x7f800000 570*25c28e83SPiotr Jasiukajtis fmuld %f42,%f62,%f58 ! (4_1) res0 = tbl_sqrt0 * res0; 571*25c28e83SPiotr Jasiukajtis faddd %f40,K1,%f46 ! (0_0) res0 += K1; 572*25c28e83SPiotr Jasiukajtis 573*25c28e83SPiotr Jasiukajtis lda [%l7]0x82,%g1 ! (4_0) ax0 = *(int*)px; 574*25c28e83SPiotr Jasiukajtis add %l7,stridex2,%i1 ! px += stridex2 575*25c28e83SPiotr Jasiukajtis fmuld %f28,%f60,%f56 ! (5_1) res1 = tbl_sqrt1 * res1; 576*25c28e83SPiotr Jasiukajtis faddd %f48,K1,%f62 ! (1_0) res1 += K1; 577*25c28e83SPiotr Jasiukajtis 578*25c28e83SPiotr Jasiukajtis lda [stridex+%l7]0x82,%g5 ! (5_0) ax1 = *(int*)(px + stridex); 579*25c28e83SPiotr Jasiukajtis add %o0,TBL,%o0 ! (0_0) addr0 = (char*)TBL + si0; 580*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update8 ! (3_0) if ( ax1 >= 0x7f800000 ) 581*25c28e83SPiotr Jasiukajtis fmuld K3,%f30,%f52 ! (2_0) res0 = K3 * xx0; 582*25c28e83SPiotr Jasiukajtis.cont8: 583*25c28e83SPiotr Jasiukajtis fmuld K3,%f24,%f50 ! (3_0) res1 = K3 * xx1; 584*25c28e83SPiotr Jasiukajtis cmp %o5,_0x00800000 ! (3_0) ax1 ? 0x00800000 585*25c28e83SPiotr Jasiukajtis bl,pn %icc,.update9 ! (3_0) if ( ax1 < 0x00800000 ) 586*25c28e83SPiotr Jasiukajtis fand %f14,DC0,%f16 ! (4_0) dfx0 = vis_fand(ddx0,DC0); 587*25c28e83SPiotr Jasiukajtis.cont9: 588*25c28e83SPiotr Jasiukajtis fmuld %f46,%f26,%f48 ! (0_0) res0 *= xx0; 589*25c28e83SPiotr Jasiukajtis sra %g1,13,%l5 ! (4_0) si0 = ax0 >> 13; 590*25c28e83SPiotr Jasiukajtis add %i1,stridex2,%o5 ! px += stridex2 591*25c28e83SPiotr Jasiukajtis fdtos %f58,%f6 ! (4_1) ((float*)&dres0)[0] = (float)res0; 592*25c28e83SPiotr Jasiukajtis 593*25c28e83SPiotr Jasiukajtis fmuld %f62,%f44,%f40 ! (1_0) res1 *= xx1; 594*25c28e83SPiotr Jasiukajtis sra %g5,13,%l6 ! (5_0) si1 = ax1 >> 13; 595*25c28e83SPiotr Jasiukajtis and %l5,2032,%l5 ! (4_0) si0 &= 0x7f0; 596*25c28e83SPiotr Jasiukajtis fdtos %f56,%f7 ! (5_1) ((float*)&dres0)[1] = (float)res1; 597*25c28e83SPiotr Jasiukajtis 598*25c28e83SPiotr Jasiukajtis ldd [%l5+TBL],%f54 ! (4_0) tbl_div0 = ((double*)((char*)TBL + si0))[0]; 599*25c28e83SPiotr Jasiukajtis sra %g5,24,%l7 ! (5_0) iexp1 = ax1 >> 24; 600*25c28e83SPiotr Jasiukajtis and %l6,2032,%l6 ! (5_0) si1 &= 0x7f0; 601*25c28e83SPiotr Jasiukajtis fpsub32 %f14,%f16,%f16 ! (4_0) dfx0 = vis_fpsub32(ddx0,dfx0); 602*25c28e83SPiotr Jasiukajtis 603*25c28e83SPiotr Jasiukajtis ldd [%l6+TBL],%f46 ! (5_0) tbl_div1 = ((double*)((char*)TBL + si1))[0]; 604*25c28e83SPiotr Jasiukajtis sra %g1,24,%i3 ! (4_0) iexp0 = ax0 >> 24; 605*25c28e83SPiotr Jasiukajtis sub %l0,%l7,%l7 ! (5_0) iexp1 = 0x3f - iexp1; 606*25c28e83SPiotr Jasiukajtis faddd %f52,K2,%f58 ! (2_0) res0 += K2; 607*25c28e83SPiotr Jasiukajtis 608*25c28e83SPiotr Jasiukajtis ldd [%o0+8],%f42 ! (0_0) tbl_sqrt0 = ((double*)addr0)[1]; 609*25c28e83SPiotr Jasiukajtis and %l7,511,%l1 ! (5_0) iexp1 = 0x1ff; 610*25c28e83SPiotr Jasiukajtis add %l6,TBL,%l6 ! (5_0) addr1 = (char*)TBL + si1; 611*25c28e83SPiotr Jasiukajtis faddd %f50,K2,%f60 ! (3_0) res1 += K2; 612*25c28e83SPiotr Jasiukajtis 613*25c28e83SPiotr Jasiukajtis ldd [%o7+8],%f28 ! (1_0) tbl_sqrt1 = ((double*)addr1)[1]; 614*25c28e83SPiotr Jasiukajtis sllx %l1,23,%l1 ! (5_0) lexp1 = iexp1 << 23; 615*25c28e83SPiotr Jasiukajtis sub %l0,%i3,%o0 ! (4_0) iexp0 = 0x3f - iexp0; 616*25c28e83SPiotr Jasiukajtis fitod %f16,%f56 ! (4_0) dtmp0 = (double)(((int*)dfx0)[0]); 617*25c28e83SPiotr Jasiukajtis 618*25c28e83SPiotr Jasiukajtis ldd [%fp+tmp0],%f52 ! (4_1) fdx0 = *((double*)lexp0); 619*25c28e83SPiotr Jasiukajtis sllx %o0,55,%o0 ! (4_0) lexp0 = iexp0 << 55; 620*25c28e83SPiotr Jasiukajtis fitod %f17,%f44 ! (5_0) dtmp1 = (double)(((int*)dfx0)[1]); 621*25c28e83SPiotr Jasiukajtis 622*25c28e83SPiotr Jasiukajtis fmuld %f58,%f30,%f62 ! (2_0) res0 *= xx0; 623*25c28e83SPiotr Jasiukajtis or %o0,%l1,%o0 ! (4_0) lexp0 |= lexp1; 624*25c28e83SPiotr Jasiukajtis faddd %f48,K0,%f22 ! (0_0) res0 += K0; 625*25c28e83SPiotr Jasiukajtis 626*25c28e83SPiotr Jasiukajtis fmuld %f60,%f24,%f58 ! (3_0) res1 *= xx1; 627*25c28e83SPiotr Jasiukajtis stx %o0,[%fp+tmp0] ! (4_0) fdx0 = *((double*)lexp0); 628*25c28e83SPiotr Jasiukajtis faddd %f40,K0,%f26 ! (1_0) res1 += K0; 629*25c28e83SPiotr Jasiukajtis 630*25c28e83SPiotr Jasiukajtis fmuld %f56,%f54,%f40 ! (4_0) xx0 = dtmp0 * tbl_div0; 631*25c28e83SPiotr Jasiukajtis fpadd32 %f6,%f52,%f10 ! (4_1) dres0 = vis_fpadd32(dres0,fdx0); 632*25c28e83SPiotr Jasiukajtis 633*25c28e83SPiotr Jasiukajtis or %g0,%i2,%l7 634*25c28e83SPiotr Jasiukajtis add stridey,stridey,stridey2 635*25c28e83SPiotr Jasiukajtis 636*25c28e83SPiotr Jasiukajtis cmp counter,6 637*25c28e83SPiotr Jasiukajtis bl,pn %icc,.tail 638*25c28e83SPiotr Jasiukajtis nop 639*25c28e83SPiotr Jasiukajtis 640*25c28e83SPiotr Jasiukajtis ba .main_loop 641*25c28e83SPiotr Jasiukajtis sub counter,6,counter ! counter 642*25c28e83SPiotr Jasiukajtis 643*25c28e83SPiotr Jasiukajtis .align 16 644*25c28e83SPiotr Jasiukajtis.main_loop: 645*25c28e83SPiotr Jasiukajtis lda [%i1]0x82,%f18 ! (0_0) ((float*)&ddx0)[0] = *px; 646*25c28e83SPiotr Jasiukajtis cmp %g1,_0x7f800000 ! (4_1) ax0 ? 0x7f800000 647*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update10 ! (4_1) if ( ax0 >= 0x7f800000 ) 648*25c28e83SPiotr Jasiukajtis fmuld %f44,%f46,%f46 ! (5_1) xx1 = dtmp1 * tbl_div1; 649*25c28e83SPiotr Jasiukajtis.cont10: 650*25c28e83SPiotr Jasiukajtis lda [stridex+%i1]0x82,%f19 ! (1_0) ((float*)&ddx0)[1] = *(px + stridex); 651*25c28e83SPiotr Jasiukajtis cmp %g1,_0x00800000 ! (4_1) ax0 ? 0x00800000 652*25c28e83SPiotr Jasiukajtis fmuld %f42,%f22,%f44 ! (0_1) res0 = tbl_sqrt0 * res0; 653*25c28e83SPiotr Jasiukajtis faddd %f62,K1,%f42 ! (2_1) res0 += K1; 654*25c28e83SPiotr Jasiukajtis 655*25c28e83SPiotr Jasiukajtis lda [%i1]0x82,%g1 ! (0_0) ax0 = *(int*)px; 656*25c28e83SPiotr Jasiukajtis fmuld %f28,%f26,%f60 ! (1_1) res1 = tbl_sqrt1 * res1; 657*25c28e83SPiotr Jasiukajtis bl,pn %icc,.update11 ! (4_1) if ( ax0 < 0x00800000 ) 658*25c28e83SPiotr Jasiukajtis faddd %f58,K1,%f62 ! (3_1) res1 += K1; 659*25c28e83SPiotr Jasiukajtis.cont11: 660*25c28e83SPiotr Jasiukajtis lda [stridex+%i1]0x82,%i4 ! (1_0) ax1 = *(int*)(px + stridex); 661*25c28e83SPiotr Jasiukajtis cmp %g5,_0x7f800000 ! (5_1) ax1 ? 0x7f800000 662*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update12 ! (5_1) if ( ax1 >= 0x7f800000 ) 663*25c28e83SPiotr Jasiukajtis fmuld K3,%f40,%f52 ! (4_1) res0 = K3 * xx0; 664*25c28e83SPiotr Jasiukajtis.cont12: 665*25c28e83SPiotr Jasiukajtis fmuld K3,%f46,%f50 ! (5_1) res1 = K3 * xx1; 666*25c28e83SPiotr Jasiukajtis cmp %g5,_0x00800000 ! (5_1) ax1 ? 0x00800000 667*25c28e83SPiotr Jasiukajtis bl,pn %icc,.update13 ! (5_1) if ( ax1 < 0x00800000 ) 668*25c28e83SPiotr Jasiukajtis fand %f18,DC0,%f56 ! (0_0) dfx0 = vis_fand(ddx0,DC0); 669*25c28e83SPiotr Jasiukajtis.cont13: 670*25c28e83SPiotr Jasiukajtis fmuld %f42,%f30,%f48 ! (2_1) res0 *= xx0; 671*25c28e83SPiotr Jasiukajtis sra %g1,13,%o0 ! (0_0) si0 = ax0 >> 13; 672*25c28e83SPiotr Jasiukajtis cmp %g1,_0x7f800000 ! (0_0) ax0 ? 0x7f800000 673*25c28e83SPiotr Jasiukajtis fdtos %f44,%f8 ! (0_1) ((float*)&dres0)[0] = (float)res0; 674*25c28e83SPiotr Jasiukajtis 675*25c28e83SPiotr Jasiukajtis fmuld %f62,%f24,%f58 ! (3_1) res1 *= xx1; 676*25c28e83SPiotr Jasiukajtis sra %i4,13,%g5 ! (1_0) si1 = ax1 >> 13; 677*25c28e83SPiotr Jasiukajtis and %o0,2032,%o0 ! (0_0) si0 &= 0x7f0; 678*25c28e83SPiotr Jasiukajtis fdtos %f60,%f9 ! (1_1) ((float*)&dres0)[1] = (float)res1; 679*25c28e83SPiotr Jasiukajtis 680*25c28e83SPiotr Jasiukajtis ldd [%o0+TBL],%f54 ! (0_0) tbl_div0 = ((double*)((char*)TBL + si0))[0]; 681*25c28e83SPiotr Jasiukajtis sra %i4,24,%i1 ! (1_0) iexp1 = ax1 >> 24; 682*25c28e83SPiotr Jasiukajtis and %g5,2032,%o7 ! (1_0) si1 &= 0x7f0; 683*25c28e83SPiotr Jasiukajtis fpsub32 %f18,%f56,%f30 ! (0_0) dfx0 = vis_fpsub32(ddx0,dfx0); 684*25c28e83SPiotr Jasiukajtis 685*25c28e83SPiotr Jasiukajtis ldd [%o7+TBL],%f44 ! (1_0) tbl_div1 = ((double*)((char*)TBL + si1))[0]; 686*25c28e83SPiotr Jasiukajtis sra %g1,24,%i3 ! (0_0) iexp0 = ax0 >> 24; 687*25c28e83SPiotr Jasiukajtis sub %l0,%i1,%i1 ! (1_0) iexp1 = 0x3f - iexp1; 688*25c28e83SPiotr Jasiukajtis faddd %f52,K2,%f62 ! (4_1) res0 += K2; 689*25c28e83SPiotr Jasiukajtis 690*25c28e83SPiotr Jasiukajtis ldd [%i0+8],%f42 ! (2_1) tbl_sqrt0 = ((double*)addr0)[1]; 691*25c28e83SPiotr Jasiukajtis sub %l0,%i3,%g5 ! (0_0) iexp0 = 0x3f - iexp0; 692*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update14 ! (0_0) if ( ax0 >= 0x7f800000 ) 693*25c28e83SPiotr Jasiukajtis faddd %f50,K2,%f60 ! (5_1) res1 += K2; 694*25c28e83SPiotr Jasiukajtis.cont14: 695*25c28e83SPiotr Jasiukajtis ldd [%o1+8],%f28 ! (3_1) tbl_sqrt1 = ((double*)addr0)[1]; 696*25c28e83SPiotr Jasiukajtis cmp %g1,_0x00800000 ! (0_0) ax0 ? 0x00800000 697*25c28e83SPiotr Jasiukajtis and %i1,511,%i0 ! (1_0) iexp1 = 0x1ff; 698*25c28e83SPiotr Jasiukajtis fitod %f30,%f56 ! (0_0) dtmp0 = (double)(((int*)dfx0)[0]); 699*25c28e83SPiotr Jasiukajtis 700*25c28e83SPiotr Jasiukajtis ldd [%fp+tmp1],%f52 ! (0_1) fdx0 = *((double*)lexp0); 701*25c28e83SPiotr Jasiukajtis sllx %i0,23,%i0 ! (1_0) lexp1 = iexp1 << 23; 702*25c28e83SPiotr Jasiukajtis bl,pn %icc,.update15 ! (0_0) if ( ax0 < 0x00800000 ) 703*25c28e83SPiotr Jasiukajtis fitod %f31,%f50 ! (1_0) dtmp0 = (double)(((int*)dfx0)[0]); 704*25c28e83SPiotr Jasiukajtis.cont15: 705*25c28e83SPiotr Jasiukajtis fmuld %f62,%f40,%f30 ! (4_1) res0 *= xx0; 706*25c28e83SPiotr Jasiukajtis sllx %g5,55,%g5 ! (0_0) lexp0 = iexp0 << 55; 707*25c28e83SPiotr Jasiukajtis st %f10,[%l7] ! (4_2) *py = ((float*)&dres0)[0]; 708*25c28e83SPiotr Jasiukajtis faddd %f48,K0,%f62 ! (2_1) res0 += K0; 709*25c28e83SPiotr Jasiukajtis 710*25c28e83SPiotr Jasiukajtis fmuld %f60,%f46,%f48 ! (5_1) res1 *= xx1; 711*25c28e83SPiotr Jasiukajtis or %g5,%i0,%g5 ! (0_0) lexp0 |= lexp1; 712*25c28e83SPiotr Jasiukajtis stx %g5,[%fp+tmp1] ! (0_0) fdx0 = *((double*)lexp0); 713*25c28e83SPiotr Jasiukajtis faddd %f58,K0,%f60 ! (3_1) res1 += K0; 714*25c28e83SPiotr Jasiukajtis 715*25c28e83SPiotr Jasiukajtis fmuld %f56,%f54,%f26 ! (0_0) xx0 = dtmp0 * tbl_div0; 716*25c28e83SPiotr Jasiukajtis sll stridex,1,stridex2 ! stridex2 = stridex * 2; 717*25c28e83SPiotr Jasiukajtis st %f11,[stridey+%l7] ! (5_2) *(py + stridey) = ((float*)&dres0)[1]; 718*25c28e83SPiotr Jasiukajtis fpadd32 %f8,%f52,%f10 ! (0_1) dres0 = vis_fpadd32(dres0,fdx0); 719*25c28e83SPiotr Jasiukajtis 720*25c28e83SPiotr Jasiukajtis lda [%o5]0x82,%f24 ! (2_0) ((float*)&ddx0)[0] = *px; 721*25c28e83SPiotr Jasiukajtis add %l7,stridey2,%i1 ! py += stridey2 722*25c28e83SPiotr Jasiukajtis add %o7,TBL,%o7 ! (1_0) addr0 = (char*)TBL + si0; 723*25c28e83SPiotr Jasiukajtis fmuld %f50,%f44,%f44 ! (1_0) xx0 = dtmp0 * tbl_div0; 724*25c28e83SPiotr Jasiukajtis 725*25c28e83SPiotr Jasiukajtis lda [stridex+%o5]0x82,%f25 ! (3_0) ((float*)&ddx0)[1] = *(px + stridex); 726*25c28e83SPiotr Jasiukajtis add %l5,TBL,%l5 ! (4_1) addr0 = (char*)TBL + si0; 727*25c28e83SPiotr Jasiukajtis fmuld %f42,%f62,%f58 ! (2_1) res0 = tbl_sqrt0 * res0; 728*25c28e83SPiotr Jasiukajtis faddd %f30,K1,%f62 ! (4_1) res0 += K1; 729*25c28e83SPiotr Jasiukajtis 730*25c28e83SPiotr Jasiukajtis lda [%o5]0x82,%g1 ! (2_0) ax0 = *(int*)px; 731*25c28e83SPiotr Jasiukajtis add %o5,stridex2,%l7 ! px += stridex2 732*25c28e83SPiotr Jasiukajtis fmuld %f28,%f60,%f56 ! (3_1) res1 = tbl_sqrt1 * res1; 733*25c28e83SPiotr Jasiukajtis faddd %f48,K1,%f42 ! (5_1) res1 += K1; 734*25c28e83SPiotr Jasiukajtis 735*25c28e83SPiotr Jasiukajtis lda [stridex+%o5]0x82,%o5 ! (3_0) ax1 = *(int*)(px + stridex); 736*25c28e83SPiotr Jasiukajtis cmp %i4,_0x7f800000 ! (1_0) ax1 ? 0x7f800000 737*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update16 ! (1_0) if ( ax1 >= 0x7f800000 ) 738*25c28e83SPiotr Jasiukajtis fmuld K3,%f26,%f52 ! (0_0) res0 = K3 * xx0; 739*25c28e83SPiotr Jasiukajtis.cont16: 740*25c28e83SPiotr Jasiukajtis fmuld K3,%f44,%f50 ! (1_0) res1 = K3 * xx1; 741*25c28e83SPiotr Jasiukajtis cmp %i4,_0x00800000 ! (1_0) ax1 ? 0x00800000 742*25c28e83SPiotr Jasiukajtis bl,pn %icc,.update17 ! (1_0) if ( ax1 < 0x00800000 ) 743*25c28e83SPiotr Jasiukajtis fand %f24,DC0,%f54 ! (2_0) dfx0 = vis_fand(ddx0,DC0); 744*25c28e83SPiotr Jasiukajtis.cont17: 745*25c28e83SPiotr Jasiukajtis fmuld %f62,%f40,%f48 ! (4_1) res0 *= xx0; 746*25c28e83SPiotr Jasiukajtis sra %g1,13,%i0 ! (2_0) si0 = ax0 >> 13; 747*25c28e83SPiotr Jasiukajtis cmp %g1,_0x7f800000 ! (2_0) ax0 ? 0x7f800000 748*25c28e83SPiotr Jasiukajtis fdtos %f58,%f20 ! (2_1) ((float*)&dres0)[0] = (float)res0; 749*25c28e83SPiotr Jasiukajtis 750*25c28e83SPiotr Jasiukajtis fmuld %f42,%f46,%f58 ! (5_1) res1 *= xx1; 751*25c28e83SPiotr Jasiukajtis sra %o5,13,%o1 ! (3_0) si1 = ax1 >> 13; 752*25c28e83SPiotr Jasiukajtis and %i0,2032,%i0 ! (2_0) si0 &= 0x7f0; 753*25c28e83SPiotr Jasiukajtis fdtos %f56,%f21 ! (3_1) ((float*)&dres0)[0] = (float)res0; 754*25c28e83SPiotr Jasiukajtis 755*25c28e83SPiotr Jasiukajtis ldd [%i0+TBL],%f30 ! (2_0) tbl_div0 = ((double*)((char*)TBL + si0))[0]; 756*25c28e83SPiotr Jasiukajtis sra %o5,24,%o3 ! (3_0) iexp1 = ax1 >> 24; 757*25c28e83SPiotr Jasiukajtis and %o1,2032,%o1 ! (3_0) si1 &= 0x7f0; 758*25c28e83SPiotr Jasiukajtis fpsub32 %f24,%f54,%f12 ! (2_0) dfx0 = vis_fpsub32(ddx0,dfx0); 759*25c28e83SPiotr Jasiukajtis 760*25c28e83SPiotr Jasiukajtis ldd [%o1+TBL],%f46 ! (3_0) tbl_div1 = ((double*)((char*)TBL + si1))[0]; 761*25c28e83SPiotr Jasiukajtis sra %g1,24,%i3 ! (2_0) iexp0 = ax0 >> 24; 762*25c28e83SPiotr Jasiukajtis sub %l0,%o3,%o3 ! (3_0) iexp1 = 0x3f - iexp1; 763*25c28e83SPiotr Jasiukajtis faddd %f52,K2,%f40 ! (0_0) res0 += K2; 764*25c28e83SPiotr Jasiukajtis 765*25c28e83SPiotr Jasiukajtis ldd [%l5+8],%f42 ! (4_1) tbl_sqrt0 = ((double*)addr0)[1]; 766*25c28e83SPiotr Jasiukajtis sub %l0,%i3,%g5 ! (2_0) iexp0 = 0x3f - iexp0; 767*25c28e83SPiotr Jasiukajtis and %o3,511,%i3 ! (3_0) iexp1 &= 0x1ff; 768*25c28e83SPiotr Jasiukajtis faddd %f50,K2,%f60 ! (1_0) res0 += K2; 769*25c28e83SPiotr Jasiukajtis 770*25c28e83SPiotr Jasiukajtis ldd [%l6+8],%f28 ! (5_1) tbl_sqrt1 = ((double*)addr1)[1]; 771*25c28e83SPiotr Jasiukajtis sllx %g5,55,%g5 ! (2_0) lexp0 = iexp0 << 55; 772*25c28e83SPiotr Jasiukajtis add %i0,TBL,%i0 ! (2_0) addr0 = (char*)TBL + si0; 773*25c28e83SPiotr Jasiukajtis fitod %f12,%f56 ! (2_0) dtmp0 = (double)(((int*)dfx0)[0]); 774*25c28e83SPiotr Jasiukajtis 775*25c28e83SPiotr Jasiukajtis ldd [%fp+tmp2],%f52 ! (2_1) fdx0 = *((double*)lexp0); 776*25c28e83SPiotr Jasiukajtis sllx %i3,23,%i3 ! (3_0) lexp1 = iexp1 << 23; 777*25c28e83SPiotr Jasiukajtis add %i1,stridey2,%o3 ! py += stridey2 778*25c28e83SPiotr Jasiukajtis fitod %f13,%f50 ! (3_0) dtmp1 = (double)(((int*)dfx0)[1]); 779*25c28e83SPiotr Jasiukajtis 780*25c28e83SPiotr Jasiukajtis fmuld %f40,%f26,%f40 ! (0_0) res0 *= xx0; 781*25c28e83SPiotr Jasiukajtis or %g5,%i3,%g5 ! (2_0) lexp0 |= lexp1; 782*25c28e83SPiotr Jasiukajtis st %f10,[%i1] ! (0_1) *py = ((float*)&dres0)[0]; 783*25c28e83SPiotr Jasiukajtis faddd %f48,K0,%f62 ! (4_1) res0 += K0; 784*25c28e83SPiotr Jasiukajtis 785*25c28e83SPiotr Jasiukajtis fmuld %f60,%f44,%f48 ! (1_0) res1 *= xx1; 786*25c28e83SPiotr Jasiukajtis add %o1,TBL,%o1 ! (3_0) addr1 = (char*)TBL + si1; 787*25c28e83SPiotr Jasiukajtis stx %g5,[%fp+tmp2] ! (2_0) fdx0 = *((double*)lexp0); 788*25c28e83SPiotr Jasiukajtis faddd %f58,K0,%f60 ! (5_1) res1 += K0; 789*25c28e83SPiotr Jasiukajtis 790*25c28e83SPiotr Jasiukajtis fmuld %f56,%f30,%f30 ! (2_0) xx0 = dtmp0 * tbl_div0; 791*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update18 ! (2_0) if ( ax0 >= 0x7f800000 ) 792*25c28e83SPiotr Jasiukajtis st %f11,[stridey+%i1] ! (1_1) *(py + stridey) = ((float*)&dres0)[1]; 793*25c28e83SPiotr Jasiukajtis fpadd32 %f20,%f52,%f0 ! (2_1) dres0 = vis_fpadd32(dres0,fdx0); 794*25c28e83SPiotr Jasiukajtis.cont18: 795*25c28e83SPiotr Jasiukajtis cmp %g1,_0x00800000 ! (2_0) ax0 ? 0x00800000 796*25c28e83SPiotr Jasiukajtis bl,pn %icc,.update19 ! (2_0) if ( ax0 < 0x00800000 ) 797*25c28e83SPiotr Jasiukajtis lda [%l7]0x82,%f14 ! (4_0) ((float*)&ddx0)[0] = *px; 798*25c28e83SPiotr Jasiukajtis fmuld %f50,%f46,%f24 ! (3_0) xx1 = dtmp1 * tbl_div1; 799*25c28e83SPiotr Jasiukajtis.cont19: 800*25c28e83SPiotr Jasiukajtis lda [stridex+%l7]0x82,%f15 ! (5_0) ((float*)&ddx0)[1] = *(px + stridex); 801*25c28e83SPiotr Jasiukajtis cmp %o5,_0x7f800000 ! (3_0) ax1 ? 0x7f800000 802*25c28e83SPiotr Jasiukajtis fmuld %f42,%f62,%f58 ! (4_1) res0 = tbl_sqrt0 * res0; 803*25c28e83SPiotr Jasiukajtis faddd %f40,K1,%f46 ! (0_0) res0 += K1; 804*25c28e83SPiotr Jasiukajtis 805*25c28e83SPiotr Jasiukajtis lda [%l7]0x82,%g1 ! (4_0) ax0 = *(int*)px; 806*25c28e83SPiotr Jasiukajtis add %l7,stridex2,%i1 ! px += stridex2 807*25c28e83SPiotr Jasiukajtis fmuld %f28,%f60,%f56 ! (5_1) res1 = tbl_sqrt1 * res1; 808*25c28e83SPiotr Jasiukajtis faddd %f48,K1,%f62 ! (1_0) res1 += K1; 809*25c28e83SPiotr Jasiukajtis 810*25c28e83SPiotr Jasiukajtis lda [stridex+%l7]0x82,%g5 ! (5_0) ax1 = *(int*)(px + stridex); 811*25c28e83SPiotr Jasiukajtis add %o0,TBL,%o0 ! (0_0) addr0 = (char*)TBL + si0; 812*25c28e83SPiotr Jasiukajtis bge,pn %icc,.update20 ! (3_0) if ( ax1 >= 0x7f800000 ) 813*25c28e83SPiotr Jasiukajtis fmuld K3,%f30,%f52 ! (2_0) res0 = K3 * xx0; 814*25c28e83SPiotr Jasiukajtis.cont20: 815*25c28e83SPiotr Jasiukajtis fmuld K3,%f24,%f50 ! (3_0) res1 = K3 * xx1; 816*25c28e83SPiotr Jasiukajtis cmp %o5,_0x00800000 ! (3_0) ax1 ? 0x00800000 817*25c28e83SPiotr Jasiukajtis bl,pn %icc,.update21 ! (3_0) if ( ax1 < 0x00800000 ) 818*25c28e83SPiotr Jasiukajtis fand %f14,DC0,%f16 ! (4_0) dfx0 = vis_fand(ddx0,DC0); 819*25c28e83SPiotr Jasiukajtis.cont21: 820*25c28e83SPiotr Jasiukajtis fmuld %f46,%f26,%f48 ! (0_0) res0 *= xx0; 821*25c28e83SPiotr Jasiukajtis sra %g1,13,%l5 ! (4_0) si0 = ax0 >> 13; 822*25c28e83SPiotr Jasiukajtis add %i1,stridex2,%o5 ! px += stridex2 823*25c28e83SPiotr Jasiukajtis fdtos %f58,%f6 ! (4_1) ((float*)&dres0)[0] = (float)res0; 824*25c28e83SPiotr Jasiukajtis 825*25c28e83SPiotr Jasiukajtis fmuld %f62,%f44,%f40 ! (1_0) res1 *= xx1; 826*25c28e83SPiotr Jasiukajtis sra %g5,13,%l6 ! (5_0) si1 = ax1 >> 13; 827*25c28e83SPiotr Jasiukajtis and %l5,2032,%l5 ! (4_0) si0 &= 0x7f0; 828*25c28e83SPiotr Jasiukajtis fdtos %f56,%f7 ! (5_1) ((float*)&dres0)[1] = (float)res1; 829*25c28e83SPiotr Jasiukajtis 830*25c28e83SPiotr Jasiukajtis ldd [%l5+TBL],%f54 ! (4_0) tbl_div0 = ((double*)((char*)TBL + si0))[0]; 831*25c28e83SPiotr Jasiukajtis sra %g5,24,%l7 ! (5_0) iexp1 = ax1 >> 24; 832*25c28e83SPiotr Jasiukajtis and %l6,2032,%l6 ! (5_0) si1 &= 0x7f0; 833*25c28e83SPiotr Jasiukajtis fpsub32 %f14,%f16,%f16 ! (4_0) dfx0 = vis_fpsub32(ddx0,dfx0); 834*25c28e83SPiotr Jasiukajtis 835*25c28e83SPiotr Jasiukajtis ldd [%l6+TBL],%f46 ! (5_0) tbl_div1 = ((double*)((char*)TBL + si1))[0]; 836*25c28e83SPiotr Jasiukajtis sra %g1,24,%i3 ! (4_0) iexp0 = ax0 >> 24; 837*25c28e83SPiotr Jasiukajtis sub %l0,%l7,%l7 ! (5_0) iexp1 = 0x3f - iexp1; 838*25c28e83SPiotr Jasiukajtis faddd %f52,K2,%f58 ! (2_0) res0 += K2; 839*25c28e83SPiotr Jasiukajtis 840*25c28e83SPiotr Jasiukajtis ldd [%o0+8],%f42 ! (0_0) tbl_sqrt0 = ((double*)addr0)[1]; 841*25c28e83SPiotr Jasiukajtis and %l7,511,%l1 ! (5_0) iexp1 = 0x1ff; 842*25c28e83SPiotr Jasiukajtis add %l6,TBL,%l6 ! (5_0) addr1 = (char*)TBL + si1; 843*25c28e83SPiotr Jasiukajtis faddd %f50,K2,%f60 ! (3_0) res1 += K2; 844*25c28e83SPiotr Jasiukajtis 845*25c28e83SPiotr Jasiukajtis ldd [%o7+8],%f28 ! (1_0) tbl_sqrt1 = ((double*)addr1)[1]; 846*25c28e83SPiotr Jasiukajtis sllx %l1,23,%l1 ! (5_0) lexp1 = iexp1 << 23; 847*25c28e83SPiotr Jasiukajtis sub %l0,%i3,%o0 ! (4_0) iexp0 = 0x3f - iexp0; 848*25c28e83SPiotr Jasiukajtis fitod %f16,%f56 ! (4_0) dtmp0 = (double)(((int*)dfx0)[0]); 849*25c28e83SPiotr Jasiukajtis 850*25c28e83SPiotr Jasiukajtis ldd [%fp+tmp0],%f52 ! (4_1) fdx0 = *((double*)lexp0); 851*25c28e83SPiotr Jasiukajtis sllx %o0,55,%o0 ! (4_0) lexp0 = iexp0 << 55; 852*25c28e83SPiotr Jasiukajtis add %o3,stridey2,%l7 ! py += stridey2 853*25c28e83SPiotr Jasiukajtis fitod %f17,%f44 ! (5_0) dtmp1 = (double)(((int*)dfx0)[1]); 854*25c28e83SPiotr Jasiukajtis 855*25c28e83SPiotr Jasiukajtis fmuld %f58,%f30,%f62 ! (2_0) res0 *= xx0; 856*25c28e83SPiotr Jasiukajtis or %o0,%l1,%o0 ! (4_0) lexp0 |= lexp1; 857*25c28e83SPiotr Jasiukajtis st %f0,[%o3] ! (2_1) *py = ((float*)&dres0)[0]; 858*25c28e83SPiotr Jasiukajtis faddd %f48,K0,%f22 ! (0_0) res0 += K0; 859*25c28e83SPiotr Jasiukajtis 860*25c28e83SPiotr Jasiukajtis fmuld %f60,%f24,%f58 ! (3_0) res1 *= xx1; 861*25c28e83SPiotr Jasiukajtis subcc counter,6,counter ! counter -= 6; 862*25c28e83SPiotr Jasiukajtis stx %o0,[%fp+tmp0] ! (4_0) fdx0 = *((double*)lexp0); 863*25c28e83SPiotr Jasiukajtis faddd %f40,K0,%f26 ! (1_0) res1 += K0; 864*25c28e83SPiotr Jasiukajtis 865*25c28e83SPiotr Jasiukajtis fmuld %f56,%f54,%f40 ! (4_0) xx0 = dtmp0 * tbl_div0; 866*25c28e83SPiotr Jasiukajtis st %f1,[stridey+%o3] ! (3_1) *(py + stridey) = ((float*)&dres0)[1]; 867*25c28e83SPiotr Jasiukajtis bpos,pt %icc,.main_loop 868*25c28e83SPiotr Jasiukajtis fpadd32 %f6,%f52,%f10 ! (4_1) dres0 = vis_fpadd32(dres0,fdx0); 869*25c28e83SPiotr Jasiukajtis 870*25c28e83SPiotr Jasiukajtis add counter,6,counter 871*25c28e83SPiotr Jasiukajtis.tail: 872*25c28e83SPiotr Jasiukajtis sll stridex,1,stridex2 873*25c28e83SPiotr Jasiukajtis subcc counter,1,counter 874*25c28e83SPiotr Jasiukajtis bneg,a .begin 875*25c28e83SPiotr Jasiukajtis mov %l7,%i2 876*25c28e83SPiotr Jasiukajtis 877*25c28e83SPiotr Jasiukajtis fmuld %f42,%f22,%f44 ! (0_1) res0 = tbl_sqrt0 * res0; 878*25c28e83SPiotr Jasiukajtis faddd %f62,K1,%f42 ! (2_1) res0 += K1; 879*25c28e83SPiotr Jasiukajtis 880*25c28e83SPiotr Jasiukajtis fmuld %f28,%f26,%f60 ! (1_1) res1 = tbl_sqrt1 * res1; 881*25c28e83SPiotr Jasiukajtis 882*25c28e83SPiotr Jasiukajtis fmuld %f42,%f30,%f48 ! (2_1) res0 *= xx0; 883*25c28e83SPiotr Jasiukajtis fdtos %f44,%f8 ! (0_1) ((float*)&dres0)[0] = (float)res0; 884*25c28e83SPiotr Jasiukajtis 885*25c28e83SPiotr Jasiukajtis fdtos %f60,%f9 ! (1_1) ((float*)&dres0)[1] = (float)res1; 886*25c28e83SPiotr Jasiukajtis 887*25c28e83SPiotr Jasiukajtis ldd [%i0+8],%f42 ! (2_1) tbl_sqrt0 = ((double*)addr0)[1]; 888*25c28e83SPiotr Jasiukajtis 889*25c28e83SPiotr Jasiukajtis ldd [%fp+tmp1],%f52 ! (0_1) fdx0 = *((double*)lexp0); 890*25c28e83SPiotr Jasiukajtis 891*25c28e83SPiotr Jasiukajtis st %f10,[%l7] ! (4_2) *py = ((float*)&dres0)[0]; 892*25c28e83SPiotr Jasiukajtis subcc counter,1,counter 893*25c28e83SPiotr Jasiukajtis bneg,a .begin 894*25c28e83SPiotr Jasiukajtis add %l7,stridey,%i2 895*25c28e83SPiotr Jasiukajtis 896*25c28e83SPiotr Jasiukajtis faddd %f48,K0,%f62 ! (2_1) res0 += K0; 897*25c28e83SPiotr Jasiukajtis st %f11,[stridey+%l7] ! (5_2) *(py + stridey) = ((float*)&dres0)[1]; 898*25c28e83SPiotr Jasiukajtis subcc counter,1,counter 899*25c28e83SPiotr Jasiukajtis bneg,a .begin 900*25c28e83SPiotr Jasiukajtis add %l7,stridey2,%i2 901*25c28e83SPiotr Jasiukajtis fpadd32 %f8,%f52,%f10 ! (0_1) dres0 = vis_fpadd32(dres0,fdx0); 902*25c28e83SPiotr Jasiukajtis 903*25c28e83SPiotr Jasiukajtis add %l7,stridey2,%i1 ! py += stridey2 904*25c28e83SPiotr Jasiukajtis 905*25c28e83SPiotr Jasiukajtis fmuld %f42,%f62,%f58 ! (2_1) res0 = tbl_sqrt0 * res0; 906*25c28e83SPiotr Jasiukajtis 907*25c28e83SPiotr Jasiukajtis fdtos %f58,%f20 ! (2_1) ((float*)&dres0)[0] = (float)res0; 908*25c28e83SPiotr Jasiukajtis 909*25c28e83SPiotr Jasiukajtis ldd [%fp+tmp2],%f52 ! (2_1) fdx0 = *((double*)lexp0); 910*25c28e83SPiotr Jasiukajtis add %i1,stridey2,%o3 ! py += stridey2 911*25c28e83SPiotr Jasiukajtis 912*25c28e83SPiotr Jasiukajtis st %f10,[%i1] ! (0_1) *py = ((float*)&dres0)[0]; 913*25c28e83SPiotr Jasiukajtis subcc counter,1,counter 914*25c28e83SPiotr Jasiukajtis bneg,a .begin 915*25c28e83SPiotr Jasiukajtis add %i1,stridey,%i2 916*25c28e83SPiotr Jasiukajtis 917*25c28e83SPiotr Jasiukajtis st %f11,[stridey+%i1] ! (1_1) *(py + stridey) = ((float*)&dres0)[1]; 918*25c28e83SPiotr Jasiukajtis subcc counter,1,counter 919*25c28e83SPiotr Jasiukajtis bneg,a .begin 920*25c28e83SPiotr Jasiukajtis mov %o3,%i2 921*25c28e83SPiotr Jasiukajtis fpadd32 %f20,%f52,%f0 ! (2_1) dres0 = vis_fpadd32(dres0,fdx0); 922*25c28e83SPiotr Jasiukajtis 923*25c28e83SPiotr Jasiukajtis st %f0,[%o3] ! (2_1) *py = ((float*)&dres0)[0]; 924*25c28e83SPiotr Jasiukajtis ba .begin 925*25c28e83SPiotr Jasiukajtis add %o3,stridey,%i2 926*25c28e83SPiotr Jasiukajtis 927*25c28e83SPiotr Jasiukajtis .align 16 928*25c28e83SPiotr Jasiukajtis.spec0: 929*25c28e83SPiotr Jasiukajtis fdivs FONE,%f14,%f14 ! x0 = FONE / x0; 930*25c28e83SPiotr Jasiukajtis add %l7,stridex,%l7 ! px += stridex 931*25c28e83SPiotr Jasiukajtis st %f14,[%i2] ! *py = x0; 932*25c28e83SPiotr Jasiukajtis sub counter,1,counter 933*25c28e83SPiotr Jasiukajtis ba .begin1 934*25c28e83SPiotr Jasiukajtis add %i2,stridey,%i2 ! py += stridey 935*25c28e83SPiotr Jasiukajtis 936*25c28e83SPiotr Jasiukajtis .align 16 937*25c28e83SPiotr Jasiukajtis.spec1: 938*25c28e83SPiotr Jasiukajtis andcc %g1,%o0,%g0 939*25c28e83SPiotr Jasiukajtis bz,a 1f 940*25c28e83SPiotr Jasiukajtis fdivs FONE,%f14,%f14 ! x0 = DONE / x0; 941*25c28e83SPiotr Jasiukajtis 942*25c28e83SPiotr Jasiukajtis cmp %g1,0 943*25c28e83SPiotr Jasiukajtis bl,a 1f 944*25c28e83SPiotr Jasiukajtis fsqrts %f14,%f14 ! x0 = sqrtf(x0); 945*25c28e83SPiotr Jasiukajtis 946*25c28e83SPiotr Jasiukajtis fitod %f14,%f0 947*25c28e83SPiotr Jasiukajtis fdtos %f0,%f14 948*25c28e83SPiotr Jasiukajtis fmuls %f14,FTWO,%f14 949*25c28e83SPiotr Jasiukajtis st %f14,[%fp+tmp3] 950*25c28e83SPiotr Jasiukajtis ld [%fp+tmp3],%g1 951*25c28e83SPiotr Jasiukajtis sethi %hi(0x4b000000),%o0 952*25c28e83SPiotr Jasiukajtis sra %g1,13,%l5 ! (4_0) si0 = ax0 >> 13; 953*25c28e83SPiotr Jasiukajtis fands %f14,DC0,%f16 ! (4_0) dfx0 = vis_fand(ddx0,DC0); 954*25c28e83SPiotr Jasiukajtis ba .cont_spec 955*25c28e83SPiotr Jasiukajtis sub %g1,%o0,%g1 956*25c28e83SPiotr Jasiukajtis1: 957*25c28e83SPiotr Jasiukajtis add %l7,stridex,%l7 ! px += stridex 958*25c28e83SPiotr Jasiukajtis sub counter,1,counter 959*25c28e83SPiotr Jasiukajtis st %f14,[%i2] ! *py = x0; 960*25c28e83SPiotr Jasiukajtis ba .begin1 961*25c28e83SPiotr Jasiukajtis add %i2,stridey,%i2 ! py += stridey 962*25c28e83SPiotr Jasiukajtis 963*25c28e83SPiotr Jasiukajtis .align 16 964*25c28e83SPiotr Jasiukajtis.update0: 965*25c28e83SPiotr Jasiukajtis cmp counter,1 966*25c28e83SPiotr Jasiukajtis ble .cont0 967*25c28e83SPiotr Jasiukajtis nop 968*25c28e83SPiotr Jasiukajtis 969*25c28e83SPiotr Jasiukajtis sub %i1,stridex,%o1 970*25c28e83SPiotr Jasiukajtis stx %o1,[%fp+tmp_px] 971*25c28e83SPiotr Jasiukajtis 972*25c28e83SPiotr Jasiukajtis sub counter,1,counter 973*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 974*25c28e83SPiotr Jasiukajtis 975*25c28e83SPiotr Jasiukajtis ba .cont0 976*25c28e83SPiotr Jasiukajtis mov 1,counter 977*25c28e83SPiotr Jasiukajtis 978*25c28e83SPiotr Jasiukajtis .align 16 979*25c28e83SPiotr Jasiukajtis.update1: 980*25c28e83SPiotr Jasiukajtis sethi %hi(0x7ffffc00),%o0 981*25c28e83SPiotr Jasiukajtis cmp counter,1 982*25c28e83SPiotr Jasiukajtis ble .cont1 983*25c28e83SPiotr Jasiukajtis 984*25c28e83SPiotr Jasiukajtis add %o0,0x3ff,%o0 985*25c28e83SPiotr Jasiukajtis 986*25c28e83SPiotr Jasiukajtis andcc %g5,%o0,%g0 987*25c28e83SPiotr Jasiukajtis bz,a 1f 988*25c28e83SPiotr Jasiukajtis nop 989*25c28e83SPiotr Jasiukajtis 990*25c28e83SPiotr Jasiukajtis cmp %g5,0 991*25c28e83SPiotr Jasiukajtis bl,a 1f 992*25c28e83SPiotr Jasiukajtis nop 993*25c28e83SPiotr Jasiukajtis 994*25c28e83SPiotr Jasiukajtis fitod %f15,%f0 995*25c28e83SPiotr Jasiukajtis fdtos %f0,%f15 996*25c28e83SPiotr Jasiukajtis fmuls %f15,FTWO,%f15 997*25c28e83SPiotr Jasiukajtis st %f15,[%fp+tmp3] 998*25c28e83SPiotr Jasiukajtis ld [%fp+tmp3],%g5 999*25c28e83SPiotr Jasiukajtis sethi %hi(0x4b000000),%o0 1000*25c28e83SPiotr Jasiukajtis sub %g5,%o0,%g5 1001*25c28e83SPiotr Jasiukajtis 1002*25c28e83SPiotr Jasiukajtis fands %f15,DC0,%f17 ! (4_0) dfx0 = vis_fand(ddx0,DC0); 1003*25c28e83SPiotr Jasiukajtis 1004*25c28e83SPiotr Jasiukajtis sra %g5,13,%l6 ! (5_0) si1 = ax1 >> 13; 1005*25c28e83SPiotr Jasiukajtis 1006*25c28e83SPiotr Jasiukajtis sra %g5,24,%l7 ! (5_0) iexp1 = ax1 >> 24; 1007*25c28e83SPiotr Jasiukajtis and %l6,2032,%l6 ! (5_0) si1 &= 0x7f0; 1008*25c28e83SPiotr Jasiukajtis 1009*25c28e83SPiotr Jasiukajtis fpsub32s %f15,%f17,%f17 ! (4_0) dfx0 = vis_fpsub32(ddx0,dfx0); 1010*25c28e83SPiotr Jasiukajtis 1011*25c28e83SPiotr Jasiukajtis ldd [%l6+TBL],%f46 ! (5_0) tbl_div1 = ((double*)((char*)TBL + si1))[0]; 1012*25c28e83SPiotr Jasiukajtis sub %l0,%l7,%l1 ! (5_0) iexp1 = 0x3f - iexp1; 1013*25c28e83SPiotr Jasiukajtis 1014*25c28e83SPiotr Jasiukajtis sll %l1,23,%l1 ! (5_0) lexp1 = iexp1 << 23; 1015*25c28e83SPiotr Jasiukajtis add %l6,TBL,%l6 ! (5_0) addr1 = (char*)TBL + si1; 1016*25c28e83SPiotr Jasiukajtis st %l1,[%fp+tmp0+4] ! (4_0) fdx0 = *((double*)lexp0); 1017*25c28e83SPiotr Jasiukajtis fitod %f17,%f44 ! (5_0) dtmp1 = (double)(((int*)dfx0)[1]); 1018*25c28e83SPiotr Jasiukajtis 1019*25c28e83SPiotr Jasiukajtis fmuld %f44,%f46,%f46 ! (5_1) xx1 = dtmp1 * tbl_div1; 1020*25c28e83SPiotr Jasiukajtis 1021*25c28e83SPiotr Jasiukajtis ba .cont1 1022*25c28e83SPiotr Jasiukajtis fmuld K3,%f46,%f50 ! (5_1) res1 = K3 * xx1; 1023*25c28e83SPiotr Jasiukajtis1: 1024*25c28e83SPiotr Jasiukajtis sub %i1,stridex,%o1 1025*25c28e83SPiotr Jasiukajtis stx %o1,[%fp+tmp_px] 1026*25c28e83SPiotr Jasiukajtis 1027*25c28e83SPiotr Jasiukajtis sub counter,1,counter 1028*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1029*25c28e83SPiotr Jasiukajtis 1030*25c28e83SPiotr Jasiukajtis ba .cont1 1031*25c28e83SPiotr Jasiukajtis mov 1,counter 1032*25c28e83SPiotr Jasiukajtis 1033*25c28e83SPiotr Jasiukajtis .align 16 1034*25c28e83SPiotr Jasiukajtis.update2: 1035*25c28e83SPiotr Jasiukajtis cmp counter,2 1036*25c28e83SPiotr Jasiukajtis ble .cont2 1037*25c28e83SPiotr Jasiukajtis sub %o5,stridex,%o1 1038*25c28e83SPiotr Jasiukajtis 1039*25c28e83SPiotr Jasiukajtis sub %o1,stridex,%o1 1040*25c28e83SPiotr Jasiukajtis stx %o1,[%fp+tmp_px] 1041*25c28e83SPiotr Jasiukajtis 1042*25c28e83SPiotr Jasiukajtis sub counter,2,counter 1043*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1044*25c28e83SPiotr Jasiukajtis 1045*25c28e83SPiotr Jasiukajtis ba .cont2 1046*25c28e83SPiotr Jasiukajtis mov 2,counter 1047*25c28e83SPiotr Jasiukajtis 1048*25c28e83SPiotr Jasiukajtis .align 16 1049*25c28e83SPiotr Jasiukajtis.update3: 1050*25c28e83SPiotr Jasiukajtis sethi %hi(0x7ffffc00),%o1 1051*25c28e83SPiotr Jasiukajtis cmp counter,2 1052*25c28e83SPiotr Jasiukajtis ble .cont3 1053*25c28e83SPiotr Jasiukajtis 1054*25c28e83SPiotr Jasiukajtis add %o1,0x3ff,%o1 1055*25c28e83SPiotr Jasiukajtis 1056*25c28e83SPiotr Jasiukajtis andcc %g1,%o1,%g0 1057*25c28e83SPiotr Jasiukajtis bz,a 1f 1058*25c28e83SPiotr Jasiukajtis sub %o5,stridex,%o1 1059*25c28e83SPiotr Jasiukajtis 1060*25c28e83SPiotr Jasiukajtis cmp %g1,0 1061*25c28e83SPiotr Jasiukajtis bl,a 1f 1062*25c28e83SPiotr Jasiukajtis sub %o5,stridex,%o1 1063*25c28e83SPiotr Jasiukajtis 1064*25c28e83SPiotr Jasiukajtis fitod %f18,%f0 1065*25c28e83SPiotr Jasiukajtis fdtos %f0,%f18 1066*25c28e83SPiotr Jasiukajtis fmuls %f18,FTWO,%f18 1067*25c28e83SPiotr Jasiukajtis st %f18,[%fp+tmp3] 1068*25c28e83SPiotr Jasiukajtis ld [%fp+tmp3],%g1 1069*25c28e83SPiotr Jasiukajtis sethi %hi(0x4b000000),%o1 1070*25c28e83SPiotr Jasiukajtis sub %g1,%o1,%g1 1071*25c28e83SPiotr Jasiukajtis 1072*25c28e83SPiotr Jasiukajtis fand %f18,DC0,%f56 ! (0_0) dfx0 = vis_fand(ddx0,DC0); 1073*25c28e83SPiotr Jasiukajtis sra %g1,13,%o0 ! (0_0) si0 = ax0 >> 13; 1074*25c28e83SPiotr Jasiukajtis 1075*25c28e83SPiotr Jasiukajtis and %o0,2032,%o0 ! (0_0) si0 &= 0x7f0; 1076*25c28e83SPiotr Jasiukajtis 1077*25c28e83SPiotr Jasiukajtis ldd [%o0+TBL],%f54 ! (0_0) tbl_div0 = ((double*)((char*)TBL + si0))[0]; 1078*25c28e83SPiotr Jasiukajtis fpsub32 %f18,%f56,%f30 ! (0_0) dfx0 = vis_fpsub32(ddx0,dfx0); 1079*25c28e83SPiotr Jasiukajtis 1080*25c28e83SPiotr Jasiukajtis sra %g1,24,%i3 ! (0_0) iexp0 = ax0 >> 24; 1081*25c28e83SPiotr Jasiukajtis sub %l0,%i3,%g5 ! (0_0) iexp0 = 0x3f - iexp0; 1082*25c28e83SPiotr Jasiukajtis ba .cont3 1083*25c28e83SPiotr Jasiukajtis fitod %f30,%f56 ! (0_0) dtmp0 = (double)(((int*)dfx0)[0]); 1084*25c28e83SPiotr Jasiukajtis1: 1085*25c28e83SPiotr Jasiukajtis sub %o1,stridex,%o1 1086*25c28e83SPiotr Jasiukajtis stx %o1,[%fp+tmp_px] 1087*25c28e83SPiotr Jasiukajtis 1088*25c28e83SPiotr Jasiukajtis sub counter,2,counter 1089*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1090*25c28e83SPiotr Jasiukajtis 1091*25c28e83SPiotr Jasiukajtis ba .cont3 1092*25c28e83SPiotr Jasiukajtis mov 2,counter 1093*25c28e83SPiotr Jasiukajtis 1094*25c28e83SPiotr Jasiukajtis .align 16 1095*25c28e83SPiotr Jasiukajtis.update4: 1096*25c28e83SPiotr Jasiukajtis cmp counter,3 1097*25c28e83SPiotr Jasiukajtis ble .cont4 1098*25c28e83SPiotr Jasiukajtis sub %l7,stridex2,%o1 1099*25c28e83SPiotr Jasiukajtis 1100*25c28e83SPiotr Jasiukajtis sub %o1,stridex,%o1 1101*25c28e83SPiotr Jasiukajtis stx %o1,[%fp+tmp_px] 1102*25c28e83SPiotr Jasiukajtis 1103*25c28e83SPiotr Jasiukajtis sub counter,3,counter 1104*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1105*25c28e83SPiotr Jasiukajtis 1106*25c28e83SPiotr Jasiukajtis ba .cont4 1107*25c28e83SPiotr Jasiukajtis mov 3,counter 1108*25c28e83SPiotr Jasiukajtis 1109*25c28e83SPiotr Jasiukajtis .align 16 1110*25c28e83SPiotr Jasiukajtis.update5: 1111*25c28e83SPiotr Jasiukajtis sethi %hi(0x7ffffc00),%o1 1112*25c28e83SPiotr Jasiukajtis cmp counter,3 1113*25c28e83SPiotr Jasiukajtis ble .cont5 1114*25c28e83SPiotr Jasiukajtis 1115*25c28e83SPiotr Jasiukajtis add %o1,0x3ff,%o1 1116*25c28e83SPiotr Jasiukajtis 1117*25c28e83SPiotr Jasiukajtis andcc %i4,%o1,%g0 1118*25c28e83SPiotr Jasiukajtis bz,a 1f 1119*25c28e83SPiotr Jasiukajtis sub %l7,stridex2,%o1 1120*25c28e83SPiotr Jasiukajtis 1121*25c28e83SPiotr Jasiukajtis cmp %i4,0 1122*25c28e83SPiotr Jasiukajtis bl,a 1f 1123*25c28e83SPiotr Jasiukajtis sub %l7,stridex2,%o1 1124*25c28e83SPiotr Jasiukajtis 1125*25c28e83SPiotr Jasiukajtis fitod %f19,%f0 1126*25c28e83SPiotr Jasiukajtis fdtos %f0,%f19 1127*25c28e83SPiotr Jasiukajtis fmuls %f19,FTWO,%f19 1128*25c28e83SPiotr Jasiukajtis st %f19,[%fp+tmp3] 1129*25c28e83SPiotr Jasiukajtis ld [%fp+tmp3],%i4 1130*25c28e83SPiotr Jasiukajtis sethi %hi(0x4b000000),%o1 1131*25c28e83SPiotr Jasiukajtis sub %i4,%o1,%i4 1132*25c28e83SPiotr Jasiukajtis 1133*25c28e83SPiotr Jasiukajtis fands %f19,DC0,%f0 ! (0_0) dfx0 = vis_fand(ddx0,DC0); 1134*25c28e83SPiotr Jasiukajtis 1135*25c28e83SPiotr Jasiukajtis sra %i4,13,%g5 ! (1_0) si1 = ax1 >> 13; 1136*25c28e83SPiotr Jasiukajtis 1137*25c28e83SPiotr Jasiukajtis sra %i4,24,%i1 ! (1_0) iexp1 = ax1 >> 24; 1138*25c28e83SPiotr Jasiukajtis and %g5,2032,%o7 ! (1_0) si1 &= 0x7f0; 1139*25c28e83SPiotr Jasiukajtis fpsub32s %f19,%f0,%f31 ! (0_0) dfx0 = vis_fpsub32(ddx0,dfx0); 1140*25c28e83SPiotr Jasiukajtis 1141*25c28e83SPiotr Jasiukajtis ldd [%o7+TBL],%f44 ! (1_0) tbl_div1 = ((double*)((char*)TBL + si1))[0]; 1142*25c28e83SPiotr Jasiukajtis sub %l0,%i1,%i0 ! (1_0) iexp1 = 0x3f - iexp1; 1143*25c28e83SPiotr Jasiukajtis 1144*25c28e83SPiotr Jasiukajtis sll %i0,23,%i0 ! (1_0) lexp1 = iexp1 << 23; 1145*25c28e83SPiotr Jasiukajtis fitod %f31,%f50 ! (1_0) dtmp0 = (double)(((int*)dfx0)[0]); 1146*25c28e83SPiotr Jasiukajtis 1147*25c28e83SPiotr Jasiukajtis st %i0,[%fp+tmp1+4] ! (0_0) fdx0 = *((double*)lexp0); 1148*25c28e83SPiotr Jasiukajtis 1149*25c28e83SPiotr Jasiukajtis add %o7,TBL,%o7 ! (1_0) addr0 = (char*)TBL + si0; 1150*25c28e83SPiotr Jasiukajtis fmuld %f50,%f44,%f44 ! (1_0) xx0 = dtmp0 * tbl_div0; 1151*25c28e83SPiotr Jasiukajtis 1152*25c28e83SPiotr Jasiukajtis ba .cont5 1153*25c28e83SPiotr Jasiukajtis fmuld K3,%f44,%f50 ! (1_0) res1 = K3 * xx1; 1154*25c28e83SPiotr Jasiukajtis1: 1155*25c28e83SPiotr Jasiukajtis sub %o1,stridex,%o1 1156*25c28e83SPiotr Jasiukajtis stx %o1,[%fp+tmp_px] 1157*25c28e83SPiotr Jasiukajtis 1158*25c28e83SPiotr Jasiukajtis sub counter,3,counter 1159*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1160*25c28e83SPiotr Jasiukajtis 1161*25c28e83SPiotr Jasiukajtis ba .cont5 1162*25c28e83SPiotr Jasiukajtis mov 3,counter 1163*25c28e83SPiotr Jasiukajtis 1164*25c28e83SPiotr Jasiukajtis .align 16 1165*25c28e83SPiotr Jasiukajtis.update6: 1166*25c28e83SPiotr Jasiukajtis cmp counter,4 1167*25c28e83SPiotr Jasiukajtis ble .cont6 1168*25c28e83SPiotr Jasiukajtis sub %l7,stridex,%o3 1169*25c28e83SPiotr Jasiukajtis 1170*25c28e83SPiotr Jasiukajtis sub %o3,stridex,%o3 1171*25c28e83SPiotr Jasiukajtis stx %o3,[%fp+tmp_px] 1172*25c28e83SPiotr Jasiukajtis 1173*25c28e83SPiotr Jasiukajtis sub counter,4,counter 1174*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1175*25c28e83SPiotr Jasiukajtis 1176*25c28e83SPiotr Jasiukajtis ba .cont6 1177*25c28e83SPiotr Jasiukajtis mov 4,counter 1178*25c28e83SPiotr Jasiukajtis 1179*25c28e83SPiotr Jasiukajtis .align 16 1180*25c28e83SPiotr Jasiukajtis.update7: 1181*25c28e83SPiotr Jasiukajtis sethi %hi(0x7ffffc00),%o3 1182*25c28e83SPiotr Jasiukajtis cmp counter,4 1183*25c28e83SPiotr Jasiukajtis ble .cont7 1184*25c28e83SPiotr Jasiukajtis 1185*25c28e83SPiotr Jasiukajtis add %o3,0x3ff,%o3 1186*25c28e83SPiotr Jasiukajtis 1187*25c28e83SPiotr Jasiukajtis andcc %g1,%o3,%g0 1188*25c28e83SPiotr Jasiukajtis bz,a 1f 1189*25c28e83SPiotr Jasiukajtis sub %l7,stridex,%o3 1190*25c28e83SPiotr Jasiukajtis 1191*25c28e83SPiotr Jasiukajtis cmp %g1,0 1192*25c28e83SPiotr Jasiukajtis bl,a 1f 1193*25c28e83SPiotr Jasiukajtis sub %l7,stridex,%o3 1194*25c28e83SPiotr Jasiukajtis 1195*25c28e83SPiotr Jasiukajtis fitod %f24,%f0 1196*25c28e83SPiotr Jasiukajtis fdtos %f0,%f24 1197*25c28e83SPiotr Jasiukajtis fmuls %f24,FTWO,%f24 1198*25c28e83SPiotr Jasiukajtis st %f24,[%fp+tmp3] 1199*25c28e83SPiotr Jasiukajtis ld [%fp+tmp3],%g1 1200*25c28e83SPiotr Jasiukajtis sethi %hi(0x4b000000),%o3 1201*25c28e83SPiotr Jasiukajtis sub %g1,%o3,%g1 1202*25c28e83SPiotr Jasiukajtis 1203*25c28e83SPiotr Jasiukajtis fands %f24,DC0,%f0 ! (2_0) dfx0 = vis_fand(ddx0,DC0); 1204*25c28e83SPiotr Jasiukajtis sra %g1,13,%i0 ! (2_0) si0 = ax0 >> 13; 1205*25c28e83SPiotr Jasiukajtis 1206*25c28e83SPiotr Jasiukajtis and %i0,2032,%i0 ! (2_0) si0 &= 0x7f0; 1207*25c28e83SPiotr Jasiukajtis 1208*25c28e83SPiotr Jasiukajtis ldd [%i0+TBL],%f30 ! (2_0) tbl_div0 = ((double*)((char*)TBL + si0))[0]; 1209*25c28e83SPiotr Jasiukajtis fpsub32s %f24,%f0,%f12 ! (2_0) dfx0 = vis_fpsub32(ddx0,dfx0); 1210*25c28e83SPiotr Jasiukajtis 1211*25c28e83SPiotr Jasiukajtis sra %g1,24,%i3 ! (2_0) iexp0 = ax0 >> 24; 1212*25c28e83SPiotr Jasiukajtis 1213*25c28e83SPiotr Jasiukajtis sub %l0,%i3,%g5 ! (2_0) iexp0 = 0x3f - iexp0; 1214*25c28e83SPiotr Jasiukajtis 1215*25c28e83SPiotr Jasiukajtis sll %g5,23,%g5 ! (2_0) lexp0 = iexp0 << 55; 1216*25c28e83SPiotr Jasiukajtis add %i0,TBL,%i0 ! (2_0) addr0 = (char*)TBL + si0; 1217*25c28e83SPiotr Jasiukajtis fitod %f12,%f56 ! (2_0) dtmp0 = (double)(((int*)dfx0)[0]); 1218*25c28e83SPiotr Jasiukajtis 1219*25c28e83SPiotr Jasiukajtis st %g5,[%fp+tmp2] ! (2_0) fdx0 = *((double*)lexp0); 1220*25c28e83SPiotr Jasiukajtis ba .cont7 1221*25c28e83SPiotr Jasiukajtis fmuld %f56,%f30,%f30 ! (2_0) xx0 = dtmp0 * tbl_div0; 1222*25c28e83SPiotr Jasiukajtis1: 1223*25c28e83SPiotr Jasiukajtis sub %o3,stridex,%o3 1224*25c28e83SPiotr Jasiukajtis stx %o3,[%fp+tmp_px] 1225*25c28e83SPiotr Jasiukajtis 1226*25c28e83SPiotr Jasiukajtis sub counter,4,counter 1227*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1228*25c28e83SPiotr Jasiukajtis 1229*25c28e83SPiotr Jasiukajtis ba .cont7 1230*25c28e83SPiotr Jasiukajtis mov 4,counter 1231*25c28e83SPiotr Jasiukajtis 1232*25c28e83SPiotr Jasiukajtis .align 16 1233*25c28e83SPiotr Jasiukajtis.update8: 1234*25c28e83SPiotr Jasiukajtis cmp counter,5 1235*25c28e83SPiotr Jasiukajtis ble .cont8 1236*25c28e83SPiotr Jasiukajtis nop 1237*25c28e83SPiotr Jasiukajtis 1238*25c28e83SPiotr Jasiukajtis sub %l7,stridex,%o3 1239*25c28e83SPiotr Jasiukajtis stx %o3,[%fp+tmp_px] 1240*25c28e83SPiotr Jasiukajtis 1241*25c28e83SPiotr Jasiukajtis sub counter,5,counter 1242*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1243*25c28e83SPiotr Jasiukajtis 1244*25c28e83SPiotr Jasiukajtis ba .cont8 1245*25c28e83SPiotr Jasiukajtis mov 5,counter 1246*25c28e83SPiotr Jasiukajtis 1247*25c28e83SPiotr Jasiukajtis .align 16 1248*25c28e83SPiotr Jasiukajtis.update9: 1249*25c28e83SPiotr Jasiukajtis sethi %hi(0x7ffffc00),%o3 1250*25c28e83SPiotr Jasiukajtis cmp counter,5 1251*25c28e83SPiotr Jasiukajtis ble .cont9 1252*25c28e83SPiotr Jasiukajtis sub %l7,stridex,%i3 1253*25c28e83SPiotr Jasiukajtis 1254*25c28e83SPiotr Jasiukajtis add %o3,0x3ff,%o3 1255*25c28e83SPiotr Jasiukajtis 1256*25c28e83SPiotr Jasiukajtis andcc %o5,%o3,%g0 1257*25c28e83SPiotr Jasiukajtis bz 1f 1258*25c28e83SPiotr Jasiukajtis ld [%i3],%f0 1259*25c28e83SPiotr Jasiukajtis 1260*25c28e83SPiotr Jasiukajtis cmp %o5,0 1261*25c28e83SPiotr Jasiukajtis bl,a 1f 1262*25c28e83SPiotr Jasiukajtis nop 1263*25c28e83SPiotr Jasiukajtis 1264*25c28e83SPiotr Jasiukajtis fitod %f0,%f0 1265*25c28e83SPiotr Jasiukajtis fdtos %f0,%f0 1266*25c28e83SPiotr Jasiukajtis fmuls %f0,FTWO,%f0 1267*25c28e83SPiotr Jasiukajtis st %f0,[%fp+tmp3] 1268*25c28e83SPiotr Jasiukajtis ld [%fp+tmp3],%o5 1269*25c28e83SPiotr Jasiukajtis sethi %hi(0x4b000000),%o3 1270*25c28e83SPiotr Jasiukajtis sub %o5,%o3,%o5 1271*25c28e83SPiotr Jasiukajtis 1272*25c28e83SPiotr Jasiukajtis fands %f0,DC0,%f8 ! (2_0) dfx0 = vis_fand(ddx0,DC0); 1273*25c28e83SPiotr Jasiukajtis 1274*25c28e83SPiotr Jasiukajtis sra %o5,13,%o1 ! (3_0) si1 = ax1 >> 13; 1275*25c28e83SPiotr Jasiukajtis 1276*25c28e83SPiotr Jasiukajtis sra %o5,24,%o3 ! (3_0) iexp1 = ax1 >> 24; 1277*25c28e83SPiotr Jasiukajtis and %o1,2032,%o1 ! (3_0) si1 &= 0x7f0; 1278*25c28e83SPiotr Jasiukajtis fpsub32s %f0,%f8,%f0 ! (2_0) dfx0 = vis_fpsub32(ddx0,dfx0); 1279*25c28e83SPiotr Jasiukajtis 1280*25c28e83SPiotr Jasiukajtis ldd [%o1+TBL],%f8 ! (3_0) tbl_div1 = ((double*)((char*)TBL + si1))[0]; 1281*25c28e83SPiotr Jasiukajtis sub %l0,%o3,%i3 ! (3_0) iexp1 = 0x3f - iexp1; 1282*25c28e83SPiotr Jasiukajtis 1283*25c28e83SPiotr Jasiukajtis sllx %i3,23,%i3 ! (3_0) lexp1 = iexp1 << 23; 1284*25c28e83SPiotr Jasiukajtis fitod %f0,%f50 ! (3_0) dtmp1 = (double)(((int*)dfx0)[1]); 1285*25c28e83SPiotr Jasiukajtis 1286*25c28e83SPiotr Jasiukajtis add %o1,TBL,%o1 ! (3_0) addr1 = (char*)TBL + si1; 1287*25c28e83SPiotr Jasiukajtis st %i3,[%fp+tmp2+4] ! (2_0) fdx0 = *((double*)lexp0); 1288*25c28e83SPiotr Jasiukajtis 1289*25c28e83SPiotr Jasiukajtis fmuld %f50,%f8,%f24 ! (3_0) xx1 = dtmp1 * tbl_div1; 1290*25c28e83SPiotr Jasiukajtis 1291*25c28e83SPiotr Jasiukajtis ba .cont9 1292*25c28e83SPiotr Jasiukajtis fmuld K3,%f24,%f50 ! (3_0) res1 = K3 * xx1; 1293*25c28e83SPiotr Jasiukajtis1: 1294*25c28e83SPiotr Jasiukajtis stx %i3,[%fp+tmp_px] 1295*25c28e83SPiotr Jasiukajtis 1296*25c28e83SPiotr Jasiukajtis sub counter,5,counter 1297*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1298*25c28e83SPiotr Jasiukajtis 1299*25c28e83SPiotr Jasiukajtis ba .cont9 1300*25c28e83SPiotr Jasiukajtis mov 5,counter 1301*25c28e83SPiotr Jasiukajtis 1302*25c28e83SPiotr Jasiukajtis .align 16 1303*25c28e83SPiotr Jasiukajtis.update10: 1304*25c28e83SPiotr Jasiukajtis cmp counter,0 1305*25c28e83SPiotr Jasiukajtis ble .cont10 1306*25c28e83SPiotr Jasiukajtis sub %i1,stridex,%o3 1307*25c28e83SPiotr Jasiukajtis 1308*25c28e83SPiotr Jasiukajtis sub %o3,stridex,%o3 1309*25c28e83SPiotr Jasiukajtis stx %o3,[%fp+tmp_px] 1310*25c28e83SPiotr Jasiukajtis 1311*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1312*25c28e83SPiotr Jasiukajtis 1313*25c28e83SPiotr Jasiukajtis ba .cont10 1314*25c28e83SPiotr Jasiukajtis mov 0,counter 1315*25c28e83SPiotr Jasiukajtis 1316*25c28e83SPiotr Jasiukajtis .align 16 1317*25c28e83SPiotr Jasiukajtis.update11: 1318*25c28e83SPiotr Jasiukajtis sethi %hi(0x7ffffc00),%i4 1319*25c28e83SPiotr Jasiukajtis cmp counter,0 1320*25c28e83SPiotr Jasiukajtis ble .cont11 1321*25c28e83SPiotr Jasiukajtis sub %i1,stridex,%o3 1322*25c28e83SPiotr Jasiukajtis 1323*25c28e83SPiotr Jasiukajtis sub %o3,stridex,%o3 1324*25c28e83SPiotr Jasiukajtis add %i4,0x3ff,%i4 1325*25c28e83SPiotr Jasiukajtis ld [%o3],%i3 1326*25c28e83SPiotr Jasiukajtis 1327*25c28e83SPiotr Jasiukajtis andcc %i3,%i4,%g0 1328*25c28e83SPiotr Jasiukajtis bz 1f 1329*25c28e83SPiotr Jasiukajtis 1330*25c28e83SPiotr Jasiukajtis cmp %i3,0 1331*25c28e83SPiotr Jasiukajtis bl,a 1f 1332*25c28e83SPiotr Jasiukajtis nop 1333*25c28e83SPiotr Jasiukajtis 1334*25c28e83SPiotr Jasiukajtis fitod %f14,%f0 1335*25c28e83SPiotr Jasiukajtis fdtos %f0,%f14 1336*25c28e83SPiotr Jasiukajtis fmuls %f14,FTWO,%f14 1337*25c28e83SPiotr Jasiukajtis st %f14,[%fp+tmp3] 1338*25c28e83SPiotr Jasiukajtis ld [%fp+tmp3],%i3 1339*25c28e83SPiotr Jasiukajtis sethi %hi(0x4b000000),%o3 1340*25c28e83SPiotr Jasiukajtis sub %i3,%o3,%i3 1341*25c28e83SPiotr Jasiukajtis 1342*25c28e83SPiotr Jasiukajtis fands %f14,DC0,%f16 ! (4_0) dfx0 = vis_fand(ddx0,DC0); 1343*25c28e83SPiotr Jasiukajtis sra %i3,13,%l5 ! (4_0) si0 = ax0 >> 13; 1344*25c28e83SPiotr Jasiukajtis 1345*25c28e83SPiotr Jasiukajtis and %l5,2032,%l5 ! (4_0) si0 &= 0x7f0; 1346*25c28e83SPiotr Jasiukajtis 1347*25c28e83SPiotr Jasiukajtis ldd [%l5+TBL],%f54 ! (4_0) tbl_div0 = ((double*)((char*)TBL + si0))[0]; 1348*25c28e83SPiotr Jasiukajtis fpsub32s %f14,%f16,%f16 ! (4_0) dfx0 = vis_fpsub32(ddx0,dfx0); 1349*25c28e83SPiotr Jasiukajtis 1350*25c28e83SPiotr Jasiukajtis sra %i3,24,%i3 ! (4_0) iexp0 = ax0 >> 24; 1351*25c28e83SPiotr Jasiukajtis 1352*25c28e83SPiotr Jasiukajtis sub %l0,%i3,%o0 ! (4_0) iexp0 = 0x3f - iexp0; 1353*25c28e83SPiotr Jasiukajtis fitod %f16,%f56 ! (4_0) dtmp0 = (double)(((int*)dfx0)[0]); 1354*25c28e83SPiotr Jasiukajtis 1355*25c28e83SPiotr Jasiukajtis sllx %o0,23,%o0 ! (4_0) lexp0 = iexp0 << 55; 1356*25c28e83SPiotr Jasiukajtis 1357*25c28e83SPiotr Jasiukajtis st %o0,[%fp+tmp0] ! (4_0) fdx0 = *((double*)lexp0); 1358*25c28e83SPiotr Jasiukajtis 1359*25c28e83SPiotr Jasiukajtis ba .cont11 1360*25c28e83SPiotr Jasiukajtis fmuld %f56,%f54,%f40 ! (4_0) xx0 = dtmp0 * tbl_div0; 1361*25c28e83SPiotr Jasiukajtis1: 1362*25c28e83SPiotr Jasiukajtis stx %o3,[%fp+tmp_px] 1363*25c28e83SPiotr Jasiukajtis 1364*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1365*25c28e83SPiotr Jasiukajtis 1366*25c28e83SPiotr Jasiukajtis ba .cont11 1367*25c28e83SPiotr Jasiukajtis mov 0,counter 1368*25c28e83SPiotr Jasiukajtis 1369*25c28e83SPiotr Jasiukajtis .align 16 1370*25c28e83SPiotr Jasiukajtis.update12: 1371*25c28e83SPiotr Jasiukajtis cmp counter,1 1372*25c28e83SPiotr Jasiukajtis ble .cont12 1373*25c28e83SPiotr Jasiukajtis nop 1374*25c28e83SPiotr Jasiukajtis 1375*25c28e83SPiotr Jasiukajtis sub %i1,stridex,%i1 1376*25c28e83SPiotr Jasiukajtis stx %i1,[%fp+tmp_px] 1377*25c28e83SPiotr Jasiukajtis 1378*25c28e83SPiotr Jasiukajtis sub counter,1,counter 1379*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1380*25c28e83SPiotr Jasiukajtis 1381*25c28e83SPiotr Jasiukajtis ba .cont12 1382*25c28e83SPiotr Jasiukajtis mov 1,counter 1383*25c28e83SPiotr Jasiukajtis 1384*25c28e83SPiotr Jasiukajtis .align 16 1385*25c28e83SPiotr Jasiukajtis.update13: 1386*25c28e83SPiotr Jasiukajtis sethi %hi(0x7ffffc00),%o3 1387*25c28e83SPiotr Jasiukajtis cmp counter,1 1388*25c28e83SPiotr Jasiukajtis ble .cont13 1389*25c28e83SPiotr Jasiukajtis 1390*25c28e83SPiotr Jasiukajtis add %o3,0x3ff,%o3 1391*25c28e83SPiotr Jasiukajtis 1392*25c28e83SPiotr Jasiukajtis andcc %g5,%o3,%g0 1393*25c28e83SPiotr Jasiukajtis bz 1f 1394*25c28e83SPiotr Jasiukajtis 1395*25c28e83SPiotr Jasiukajtis cmp %g5,0 1396*25c28e83SPiotr Jasiukajtis bl,a 1f 1397*25c28e83SPiotr Jasiukajtis nop 1398*25c28e83SPiotr Jasiukajtis 1399*25c28e83SPiotr Jasiukajtis fitod %f15,%f0 1400*25c28e83SPiotr Jasiukajtis fdtos %f0,%f15 1401*25c28e83SPiotr Jasiukajtis fmuls %f15,FTWO,%f15 1402*25c28e83SPiotr Jasiukajtis st %f15,[%fp+tmp3] 1403*25c28e83SPiotr Jasiukajtis ld [%fp+tmp3],%g5 1404*25c28e83SPiotr Jasiukajtis sethi %hi(0x4b000000),%o3 1405*25c28e83SPiotr Jasiukajtis sub %g5,%o3,%g5 1406*25c28e83SPiotr Jasiukajtis 1407*25c28e83SPiotr Jasiukajtis fands %f15,DC0,%f17 ! (4_0) dfx0 = vis_fand(ddx0,DC0); 1408*25c28e83SPiotr Jasiukajtis 1409*25c28e83SPiotr Jasiukajtis sra %g5,13,%l6 ! (5_0) si1 = ax1 >> 13; 1410*25c28e83SPiotr Jasiukajtis sra %g5,24,%o3 ! (5_0) iexp1 = ax1 >> 24; 1411*25c28e83SPiotr Jasiukajtis and %l6,2032,%l6 ! (5_0) si1 &= 0x7f0; 1412*25c28e83SPiotr Jasiukajtis fpsub32s %f15,%f17,%f17 ! (4_0) dfx0 = vis_fpsub32(ddx0,dfx0); 1413*25c28e83SPiotr Jasiukajtis 1414*25c28e83SPiotr Jasiukajtis ldd [%l6+TBL],%f46 ! (5_0) tbl_div1 = ((double*)((char*)TBL + si1))[0]; 1415*25c28e83SPiotr Jasiukajtis sub %l0,%o3,%l1 ! (5_0) iexp1 = 0x3f - iexp1; 1416*25c28e83SPiotr Jasiukajtis 1417*25c28e83SPiotr Jasiukajtis add %l6,TBL,%l6 ! (5_0) addr1 = (char*)TBL + si1; 1418*25c28e83SPiotr Jasiukajtis 1419*25c28e83SPiotr Jasiukajtis sllx %l1,23,%l1 ! (5_0) lexp1 = iexp1 << 23; 1420*25c28e83SPiotr Jasiukajtis st %l1,[%fp+tmp0+4] ! (4_0) fdx0 = *((double*)lexp0); 1421*25c28e83SPiotr Jasiukajtis 1422*25c28e83SPiotr Jasiukajtis fitod %f17,%f0 ! (5_0) dtmp1 = (double)(((int*)dfx0)[1]); 1423*25c28e83SPiotr Jasiukajtis 1424*25c28e83SPiotr Jasiukajtis fmuld %f0,%f46,%f46 ! (5_1) xx1 = dtmp1 * tbl_div1; 1425*25c28e83SPiotr Jasiukajtis ba .cont13 1426*25c28e83SPiotr Jasiukajtis fmuld K3,%f46,%f50 ! (5_1) res1 = K3 * xx1; 1427*25c28e83SPiotr Jasiukajtis1: 1428*25c28e83SPiotr Jasiukajtis sub %i1,stridex,%i1 1429*25c28e83SPiotr Jasiukajtis stx %i1,[%fp+tmp_px] 1430*25c28e83SPiotr Jasiukajtis 1431*25c28e83SPiotr Jasiukajtis sub counter,1,counter 1432*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1433*25c28e83SPiotr Jasiukajtis 1434*25c28e83SPiotr Jasiukajtis ba .cont13 1435*25c28e83SPiotr Jasiukajtis mov 1,counter 1436*25c28e83SPiotr Jasiukajtis 1437*25c28e83SPiotr Jasiukajtis .align 16 1438*25c28e83SPiotr Jasiukajtis.update14: 1439*25c28e83SPiotr Jasiukajtis cmp counter,2 1440*25c28e83SPiotr Jasiukajtis ble .cont14 1441*25c28e83SPiotr Jasiukajtis sub %o5,stridex,%o3 1442*25c28e83SPiotr Jasiukajtis 1443*25c28e83SPiotr Jasiukajtis sub %o3,stridex,%o3 1444*25c28e83SPiotr Jasiukajtis stx %o3,[%fp+tmp_px] 1445*25c28e83SPiotr Jasiukajtis 1446*25c28e83SPiotr Jasiukajtis sub counter,2,counter 1447*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1448*25c28e83SPiotr Jasiukajtis 1449*25c28e83SPiotr Jasiukajtis ba .cont14 1450*25c28e83SPiotr Jasiukajtis mov 2,counter 1451*25c28e83SPiotr Jasiukajtis 1452*25c28e83SPiotr Jasiukajtis .align 16 1453*25c28e83SPiotr Jasiukajtis.update15: 1454*25c28e83SPiotr Jasiukajtis sethi %hi(0x7ffffc00),%i3 1455*25c28e83SPiotr Jasiukajtis cmp counter,2 1456*25c28e83SPiotr Jasiukajtis ble .cont15 1457*25c28e83SPiotr Jasiukajtis sub %o5,stridex,%o3 1458*25c28e83SPiotr Jasiukajtis 1459*25c28e83SPiotr Jasiukajtis add %i3,0x3ff,%i3 1460*25c28e83SPiotr Jasiukajtis 1461*25c28e83SPiotr Jasiukajtis andcc %g1,%i3,%g0 1462*25c28e83SPiotr Jasiukajtis bz 1f 1463*25c28e83SPiotr Jasiukajtis sub %o3,stridex,%o3 1464*25c28e83SPiotr Jasiukajtis 1465*25c28e83SPiotr Jasiukajtis cmp %g1,0 1466*25c28e83SPiotr Jasiukajtis bl,a 1f 1467*25c28e83SPiotr Jasiukajtis nop 1468*25c28e83SPiotr Jasiukajtis 1469*25c28e83SPiotr Jasiukajtis fitod %f18,%f0 1470*25c28e83SPiotr Jasiukajtis fdtos %f0,%f18 1471*25c28e83SPiotr Jasiukajtis fmuls %f18,FTWO,%f18 1472*25c28e83SPiotr Jasiukajtis st %f18,[%fp+tmp3] 1473*25c28e83SPiotr Jasiukajtis ld [%fp+tmp3],%g1 1474*25c28e83SPiotr Jasiukajtis sethi %hi(0x4b000000),%o3 1475*25c28e83SPiotr Jasiukajtis sub %g1,%o3,%g1 1476*25c28e83SPiotr Jasiukajtis 1477*25c28e83SPiotr Jasiukajtis fands %f18,DC0,%f0 ! (0_0) dfx0 = vis_fand(ddx0,DC0); 1478*25c28e83SPiotr Jasiukajtis sra %g1,13,%o0 ! (0_0) si0 = ax0 >> 13; 1479*25c28e83SPiotr Jasiukajtis and %o0,2032,%o0 ! (0_0) si0 &= 0x7f0; 1480*25c28e83SPiotr Jasiukajtis 1481*25c28e83SPiotr Jasiukajtis ldd [%o0+TBL],%f54 ! (0_0) tbl_div0 = ((double*)((char*)TBL + si0))[0]; 1482*25c28e83SPiotr Jasiukajtis fpsub32s %f18,%f0,%f30 ! (0_0) dfx0 = vis_fpsub32(ddx0,dfx0); 1483*25c28e83SPiotr Jasiukajtis 1484*25c28e83SPiotr Jasiukajtis sra %g1,24,%i3 ! (0_0) iexp0 = ax0 >> 24; 1485*25c28e83SPiotr Jasiukajtis 1486*25c28e83SPiotr Jasiukajtis sub %l0,%i3,%g5 ! (0_0) iexp0 = 0x3f - iexp0; 1487*25c28e83SPiotr Jasiukajtis 1488*25c28e83SPiotr Jasiukajtis ba .cont15 1489*25c28e83SPiotr Jasiukajtis fitod %f30,%f56 ! (0_0) dtmp0 = (double)(((int*)dfx0)[0]); 1490*25c28e83SPiotr Jasiukajtis1: 1491*25c28e83SPiotr Jasiukajtis stx %o3,[%fp+tmp_px] 1492*25c28e83SPiotr Jasiukajtis 1493*25c28e83SPiotr Jasiukajtis sub counter,2,counter 1494*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1495*25c28e83SPiotr Jasiukajtis 1496*25c28e83SPiotr Jasiukajtis ba .cont15 1497*25c28e83SPiotr Jasiukajtis mov 2,counter 1498*25c28e83SPiotr Jasiukajtis 1499*25c28e83SPiotr Jasiukajtis .align 16 1500*25c28e83SPiotr Jasiukajtis.update16: 1501*25c28e83SPiotr Jasiukajtis cmp counter,3 1502*25c28e83SPiotr Jasiukajtis ble .cont16 1503*25c28e83SPiotr Jasiukajtis sub %l7,stridex2,%o3 1504*25c28e83SPiotr Jasiukajtis 1505*25c28e83SPiotr Jasiukajtis sub %o3,stridex,%o3 1506*25c28e83SPiotr Jasiukajtis stx %o3,[%fp+tmp_px] 1507*25c28e83SPiotr Jasiukajtis 1508*25c28e83SPiotr Jasiukajtis sub counter,3,counter 1509*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1510*25c28e83SPiotr Jasiukajtis 1511*25c28e83SPiotr Jasiukajtis ba .cont16 1512*25c28e83SPiotr Jasiukajtis mov 3,counter 1513*25c28e83SPiotr Jasiukajtis 1514*25c28e83SPiotr Jasiukajtis .align 16 1515*25c28e83SPiotr Jasiukajtis.update17: 1516*25c28e83SPiotr Jasiukajtis sethi %hi(0x7ffffc00),%i3 1517*25c28e83SPiotr Jasiukajtis cmp counter,3 1518*25c28e83SPiotr Jasiukajtis ble .cont17 1519*25c28e83SPiotr Jasiukajtis sub %l7,stridex2,%o3 1520*25c28e83SPiotr Jasiukajtis 1521*25c28e83SPiotr Jasiukajtis add %i3,0x3ff,%i3 1522*25c28e83SPiotr Jasiukajtis 1523*25c28e83SPiotr Jasiukajtis andcc %i4,%i3,%g0 1524*25c28e83SPiotr Jasiukajtis bz 1f 1525*25c28e83SPiotr Jasiukajtis sub %o3,stridex,%o3 1526*25c28e83SPiotr Jasiukajtis 1527*25c28e83SPiotr Jasiukajtis cmp %i4,0 1528*25c28e83SPiotr Jasiukajtis bl,a 1f 1529*25c28e83SPiotr Jasiukajtis nop 1530*25c28e83SPiotr Jasiukajtis 1531*25c28e83SPiotr Jasiukajtis fitod %f19,%f0 1532*25c28e83SPiotr Jasiukajtis fdtos %f0,%f19 1533*25c28e83SPiotr Jasiukajtis fmuls %f19,FTWO,%f19 1534*25c28e83SPiotr Jasiukajtis st %f19,[%fp+tmp3] 1535*25c28e83SPiotr Jasiukajtis ld [%fp+tmp3],%i4 1536*25c28e83SPiotr Jasiukajtis sethi %hi(0x4b000000),%o3 1537*25c28e83SPiotr Jasiukajtis sub %i4,%o3,%i4 1538*25c28e83SPiotr Jasiukajtis 1539*25c28e83SPiotr Jasiukajtis fands %f19,DC0,%f0 ! (0_0) dfx0 = vis_fand(ddx0,DC0); 1540*25c28e83SPiotr Jasiukajtis 1541*25c28e83SPiotr Jasiukajtis sra %i4,13,%g5 ! (1_0) si1 = ax1 >> 13; 1542*25c28e83SPiotr Jasiukajtis 1543*25c28e83SPiotr Jasiukajtis sra %i4,24,%i0 ! (1_0) iexp1 = ax1 >> 24; 1544*25c28e83SPiotr Jasiukajtis and %g5,2032,%o7 ! (1_0) si1 &= 0x7f0; 1545*25c28e83SPiotr Jasiukajtis fpsub32s %f19,%f0,%f31 ! (0_0) dfx0 = vis_fpsub32(ddx0,dfx0); 1546*25c28e83SPiotr Jasiukajtis 1547*25c28e83SPiotr Jasiukajtis ldd [%o7+TBL],%f44 ! (1_0) tbl_div1 = ((double*)((char*)TBL + si1))[0]; 1548*25c28e83SPiotr Jasiukajtis sub %l0,%i0,%i0 ! (1_0) iexp1 = 0x3f - iexp1; 1549*25c28e83SPiotr Jasiukajtis 1550*25c28e83SPiotr Jasiukajtis sllx %i0,23,%i0 ! (1_0) lexp1 = iexp1 << 23; 1551*25c28e83SPiotr Jasiukajtis fitod %f31,%f50 ! (1_0) dtmp0 = (double)(((int*)dfx0)[0]); 1552*25c28e83SPiotr Jasiukajtis 1553*25c28e83SPiotr Jasiukajtis st %i0,[%fp+tmp1+4] ! (0_0) fdx0 = *((double*)lexp0); 1554*25c28e83SPiotr Jasiukajtis 1555*25c28e83SPiotr Jasiukajtis add %o7,TBL,%o7 ! (1_0) addr0 = (char*)TBL + si0; 1556*25c28e83SPiotr Jasiukajtis fmuld %f50,%f44,%f44 ! (1_0) xx0 = dtmp0 * tbl_div0; 1557*25c28e83SPiotr Jasiukajtis 1558*25c28e83SPiotr Jasiukajtis ba .cont17 1559*25c28e83SPiotr Jasiukajtis fmuld K3,%f44,%f50 ! (1_0) res1 = K3 * xx1; 1560*25c28e83SPiotr Jasiukajtis1: 1561*25c28e83SPiotr Jasiukajtis stx %o3,[%fp+tmp_px] 1562*25c28e83SPiotr Jasiukajtis 1563*25c28e83SPiotr Jasiukajtis sub counter,3,counter 1564*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1565*25c28e83SPiotr Jasiukajtis 1566*25c28e83SPiotr Jasiukajtis ba .cont17 1567*25c28e83SPiotr Jasiukajtis mov 3,counter 1568*25c28e83SPiotr Jasiukajtis 1569*25c28e83SPiotr Jasiukajtis .align 16 1570*25c28e83SPiotr Jasiukajtis.update18: 1571*25c28e83SPiotr Jasiukajtis cmp counter,4 1572*25c28e83SPiotr Jasiukajtis ble .cont18 1573*25c28e83SPiotr Jasiukajtis fpadd32 %f20,%f52,%f0 ! (2_1) dres0 = vis_fpadd32(dres0,fdx0); 1574*25c28e83SPiotr Jasiukajtis 1575*25c28e83SPiotr Jasiukajtis sub %l7,stridex2,%i3 1576*25c28e83SPiotr Jasiukajtis stx %i3,[%fp+tmp_px] 1577*25c28e83SPiotr Jasiukajtis 1578*25c28e83SPiotr Jasiukajtis sub counter,4,counter 1579*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1580*25c28e83SPiotr Jasiukajtis 1581*25c28e83SPiotr Jasiukajtis ba .cont18 1582*25c28e83SPiotr Jasiukajtis mov 4,counter 1583*25c28e83SPiotr Jasiukajtis 1584*25c28e83SPiotr Jasiukajtis .align 16 1585*25c28e83SPiotr Jasiukajtis.update19: 1586*25c28e83SPiotr Jasiukajtis sethi %hi(0x7ffffc00),%i3 1587*25c28e83SPiotr Jasiukajtis cmp counter,4 1588*25c28e83SPiotr Jasiukajtis ble,a .cont19 1589*25c28e83SPiotr Jasiukajtis fmuld %f50,%f46,%f24 ! (3_0) xx1 = dtmp1 * tbl_div1; 1590*25c28e83SPiotr Jasiukajtis 1591*25c28e83SPiotr Jasiukajtis add %i3,0x3ff,%i3 1592*25c28e83SPiotr Jasiukajtis 1593*25c28e83SPiotr Jasiukajtis andcc %g1,%i3,%g0 1594*25c28e83SPiotr Jasiukajtis bz 1f 1595*25c28e83SPiotr Jasiukajtis nop 1596*25c28e83SPiotr Jasiukajtis 1597*25c28e83SPiotr Jasiukajtis cmp %g1,0 1598*25c28e83SPiotr Jasiukajtis bl,a 1f 1599*25c28e83SPiotr Jasiukajtis nop 1600*25c28e83SPiotr Jasiukajtis 1601*25c28e83SPiotr Jasiukajtis fitod %f24,%f24 1602*25c28e83SPiotr Jasiukajtis fdtos %f24,%f24 1603*25c28e83SPiotr Jasiukajtis fmuls %f24,FTWO,%f24 1604*25c28e83SPiotr Jasiukajtis st %f24,[%fp+tmp3] 1605*25c28e83SPiotr Jasiukajtis ld [%fp+tmp3],%g1 1606*25c28e83SPiotr Jasiukajtis sethi %hi(0x4b000000),%i3 1607*25c28e83SPiotr Jasiukajtis sub %g1,%i3,%g1 1608*25c28e83SPiotr Jasiukajtis 1609*25c28e83SPiotr Jasiukajtis fands %f24,DC0,%f8 ! (2_0) dfx0 = vis_fand(ddx0,DC0); 1610*25c28e83SPiotr Jasiukajtis sra %g1,13,%i0 ! (2_0) si0 = ax0 >> 13; 1611*25c28e83SPiotr Jasiukajtis 1612*25c28e83SPiotr Jasiukajtis and %i0,2032,%i0 ! (2_0) si0 &= 0x7f0; 1613*25c28e83SPiotr Jasiukajtis 1614*25c28e83SPiotr Jasiukajtis ldd [%i0+TBL],%f30 ! (2_0) tbl_div0 = ((double*)((char*)TBL + si0))[0]; 1615*25c28e83SPiotr Jasiukajtis fpsub32s %f24,%f8,%f12 ! (2_0) dfx0 = vis_fpsub32(ddx0,dfx0); 1616*25c28e83SPiotr Jasiukajtis 1617*25c28e83SPiotr Jasiukajtis sra %g1,24,%i3 ! (2_0) iexp0 = ax0 >> 24; 1618*25c28e83SPiotr Jasiukajtis 1619*25c28e83SPiotr Jasiukajtis sub %l0,%i3,%g5 ! (2_0) iexp0 = 0x3f - iexp0; 1620*25c28e83SPiotr Jasiukajtis 1621*25c28e83SPiotr Jasiukajtis sllx %g5,23,%g5 ! (2_0) lexp0 = iexp0 << 55; 1622*25c28e83SPiotr Jasiukajtis add %i0,TBL,%i0 ! (2_0) addr0 = (char*)TBL + si0; 1623*25c28e83SPiotr Jasiukajtis fitod %f12,%f56 ! (2_0) dtmp0 = (double)(((int*)dfx0)[0]); 1624*25c28e83SPiotr Jasiukajtis 1625*25c28e83SPiotr Jasiukajtis st %g5,[%fp+tmp2] ! (2_0) fdx0 = *((double*)lexp0); 1626*25c28e83SPiotr Jasiukajtis fmuld %f56,%f30,%f30 ! (2_0) xx0 = dtmp0 * tbl_div0; 1627*25c28e83SPiotr Jasiukajtis 1628*25c28e83SPiotr Jasiukajtis ba .cont19 1629*25c28e83SPiotr Jasiukajtis fmuld %f50,%f46,%f24 ! (3_0) xx1 = dtmp1 * tbl_div1; 1630*25c28e83SPiotr Jasiukajtis1: 1631*25c28e83SPiotr Jasiukajtis sub %l7,stridex2,%i3 1632*25c28e83SPiotr Jasiukajtis stx %i3,[%fp+tmp_px] 1633*25c28e83SPiotr Jasiukajtis 1634*25c28e83SPiotr Jasiukajtis sub counter,4,counter 1635*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1636*25c28e83SPiotr Jasiukajtis 1637*25c28e83SPiotr Jasiukajtis mov 4,counter 1638*25c28e83SPiotr Jasiukajtis ba .cont19 1639*25c28e83SPiotr Jasiukajtis fmuld %f50,%f46,%f24 ! (3_0) xx1 = dtmp1 * tbl_div1; 1640*25c28e83SPiotr Jasiukajtis 1641*25c28e83SPiotr Jasiukajtis .align 16 1642*25c28e83SPiotr Jasiukajtis.update20: 1643*25c28e83SPiotr Jasiukajtis cmp counter,5 1644*25c28e83SPiotr Jasiukajtis ble .cont20 1645*25c28e83SPiotr Jasiukajtis nop 1646*25c28e83SPiotr Jasiukajtis 1647*25c28e83SPiotr Jasiukajtis sub %l7,stridex,%i3 1648*25c28e83SPiotr Jasiukajtis stx %i3,[%fp+tmp_px] 1649*25c28e83SPiotr Jasiukajtis 1650*25c28e83SPiotr Jasiukajtis sub counter,5,counter 1651*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1652*25c28e83SPiotr Jasiukajtis 1653*25c28e83SPiotr Jasiukajtis ba .cont20 1654*25c28e83SPiotr Jasiukajtis mov 5,counter 1655*25c28e83SPiotr Jasiukajtis 1656*25c28e83SPiotr Jasiukajtis .align 16 1657*25c28e83SPiotr Jasiukajtis.update21: 1658*25c28e83SPiotr Jasiukajtis sethi %hi(0x7ffffc00),%i3 1659*25c28e83SPiotr Jasiukajtis cmp counter,5 1660*25c28e83SPiotr Jasiukajtis ble,a .cont21 1661*25c28e83SPiotr Jasiukajtis nop 1662*25c28e83SPiotr Jasiukajtis 1663*25c28e83SPiotr Jasiukajtis sub %l7,stridex,%i4 1664*25c28e83SPiotr Jasiukajtis add %i3,0x3ff,%i3 1665*25c28e83SPiotr Jasiukajtis 1666*25c28e83SPiotr Jasiukajtis andcc %o5,%i3,%g0 1667*25c28e83SPiotr Jasiukajtis bz 1f 1668*25c28e83SPiotr Jasiukajtis ld [%i4],%f8 1669*25c28e83SPiotr Jasiukajtis 1670*25c28e83SPiotr Jasiukajtis cmp %o5,0 1671*25c28e83SPiotr Jasiukajtis bl,a 1f 1672*25c28e83SPiotr Jasiukajtis nop 1673*25c28e83SPiotr Jasiukajtis 1674*25c28e83SPiotr Jasiukajtis fitod %f8,%f8 1675*25c28e83SPiotr Jasiukajtis fdtos %f8,%f8 1676*25c28e83SPiotr Jasiukajtis fmuls %f8,FTWO,%f8 1677*25c28e83SPiotr Jasiukajtis st %f8,[%fp+tmp3] 1678*25c28e83SPiotr Jasiukajtis ld [%fp+tmp3],%o5 1679*25c28e83SPiotr Jasiukajtis sethi %hi(0x4b000000),%i3 1680*25c28e83SPiotr Jasiukajtis sub %o5,%i3,%o5 1681*25c28e83SPiotr Jasiukajtis 1682*25c28e83SPiotr Jasiukajtis fands %f8,DC0,%f24 ! (2_0) dfx0 = vis_fand(ddx0,DC0); 1683*25c28e83SPiotr Jasiukajtis 1684*25c28e83SPiotr Jasiukajtis sra %o5,13,%o1 ! (3_0) si1 = ax1 >> 13; 1685*25c28e83SPiotr Jasiukajtis 1686*25c28e83SPiotr Jasiukajtis sra %o5,24,%i3 ! (3_0) iexp1 = ax1 >> 24; 1687*25c28e83SPiotr Jasiukajtis and %o1,2032,%o1 ! (3_0) si1 &= 0x7f0; 1688*25c28e83SPiotr Jasiukajtis fpsub32s %f8,%f24,%f24 ! (2_0) dfx0 = vis_fpsub32(ddx0,dfx0); 1689*25c28e83SPiotr Jasiukajtis 1690*25c28e83SPiotr Jasiukajtis ldd [%o1+TBL],%f8 ! (3_0) tbl_div1 = ((double*)((char*)TBL + si1))[0]; 1691*25c28e83SPiotr Jasiukajtis sub %l0,%i3,%i3 ! (3_0) iexp1 = 0x3f - iexp1; 1692*25c28e83SPiotr Jasiukajtis 1693*25c28e83SPiotr Jasiukajtis sllx %i3,23,%i3 ! (3_0) lexp1 = iexp1 << 23; 1694*25c28e83SPiotr Jasiukajtis fitod %f24,%f50 ! (3_0) dtmp1 = (double)(((int*)dfx0)[1]); 1695*25c28e83SPiotr Jasiukajtis 1696*25c28e83SPiotr Jasiukajtis add %o1,TBL,%o1 ! (3_0) addr1 = (char*)TBL + si1; 1697*25c28e83SPiotr Jasiukajtis st %i3,[%fp+tmp2+4] ! (2_0) fdx0 = *((double*)lexp0); 1698*25c28e83SPiotr Jasiukajtis 1699*25c28e83SPiotr Jasiukajtis fmuld %f50,%f8,%f24 ! (3_0) xx1 = dtmp1 * tbl_div1; 1700*25c28e83SPiotr Jasiukajtis 1701*25c28e83SPiotr Jasiukajtis ba .cont21 1702*25c28e83SPiotr Jasiukajtis fmuld K3,%f24,%f50 ! (3_0) res1 = K3 * xx1; 1703*25c28e83SPiotr Jasiukajtis1: 1704*25c28e83SPiotr Jasiukajtis sub %l7,stridex,%i3 1705*25c28e83SPiotr Jasiukajtis stx %i3,[%fp+tmp_px] 1706*25c28e83SPiotr Jasiukajtis 1707*25c28e83SPiotr Jasiukajtis sub counter,5,counter 1708*25c28e83SPiotr Jasiukajtis st counter,[%fp+tmp_counter] 1709*25c28e83SPiotr Jasiukajtis 1710*25c28e83SPiotr Jasiukajtis ba .cont21 1711*25c28e83SPiotr Jasiukajtis mov 5,counter 1712*25c28e83SPiotr Jasiukajtis 1713*25c28e83SPiotr Jasiukajtis .align 16 1714*25c28e83SPiotr Jasiukajtis.exit: 1715*25c28e83SPiotr Jasiukajtis ret 1716*25c28e83SPiotr Jasiukajtis restore 1717*25c28e83SPiotr Jasiukajtis 1718*25c28e83SPiotr Jasiukajtis SET_SIZE(__vrsqrtf) 1719*25c28e83SPiotr Jasiukajtis 1720