1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 24 */ 25 /* 26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 27 * Use is subject to license terms. 28 */ 29 30 #include "libm_inlines.h" 31 32 #ifdef __RESTRICT 33 #define restrict _Restrict 34 #else 35 #define restrict 36 #endif 37 38 extern double sqrt(double); 39 40 /* 41 * Instead of type punning, use union type. 42 */ 43 typedef union h32 { 44 float f; 45 unsigned u; 46 } h32; 47 48 void 49 __vhypotf(int n, float *restrict x, int stridex, float *restrict y, 50 int stridey, float *restrict z, int stridez) 51 { 52 float x0, x1, x2, y0, y1, y2, z0, z1, z2, *pz0, *pz1, *pz2; 53 h32 hx0, hx1, hx2, hy0, hy1, hy2; 54 int i, j0, j1, j2; 55 56 do { 57 LOOP0: 58 hx0.f = *x; 59 hy0.f = *y; 60 hx0.u &= ~0x80000000; 61 hy0.u &= ~0x80000000; 62 x0 = hx0.f; 63 y0 = hy0.f; 64 if (hy0.u > hx0.u) { 65 i = hy0.u - hx0.u; 66 j0 = hy0.u & 0x7f800000; 67 if (hx0.u == 0) 68 i = 0x7f800000; 69 } else { 70 i = hx0.u - hy0.u; 71 j0 = hx0.u & 0x7f800000; 72 if (hy0.u == 0) 73 i = 0x7f800000; 74 else if (hx0.u == 0) 75 i = 0x7f800000; 76 } 77 if (i >= 0x0c800000 || j0 >= 0x7f800000) { 78 z0 = x0 + y0; 79 if (hx0.u == 0x7f800000) 80 z0 = x0; 81 else if (hy0.u == 0x7f800000) 82 z0 = y0; 83 else if (hx0.u > 0x7f800000 || hy0.u > 0x7f800000) 84 z0 = *x + *y; 85 *z = z0; 86 x += stridex; 87 y += stridey; 88 z += stridez; 89 i = 0; 90 if (--n <= 0) 91 break; 92 goto LOOP0; 93 } 94 pz0 = z; 95 x += stridex; 96 y += stridey; 97 z += stridez; 98 i = 1; 99 if (--n <= 0) 100 break; 101 102 LOOP1: 103 hx1.f = *x; 104 hy1.f = *y; 105 hx1.u &= ~0x80000000; 106 hy1.u &= ~0x80000000; 107 x1 = hx1.f; 108 y1 = hy1.f; 109 if (hy1.u > hx1.u) { 110 i = hy1.u - hx1.u; 111 j1 = hy1.u & 0x7f800000; 112 if (hx1.u == 0) 113 i = 0x7f800000; 114 } else { 115 i = hx1.u - hy1.u; 116 j1 = hx1.u & 0x7f800000; 117 if (hy1.u == 0) 118 i = 0x7f800000; 119 else if (hx1.u == 0) 120 i = 0x7f800000; 121 } 122 if (i >= 0x0c800000 || j1 >= 0x7f800000) { 123 z1 = x1 + y1; 124 if (hx1.u == 0x7f800000) 125 z1 = x1; 126 else if (hy1.u == 0x7f800000) 127 z1 = y1; 128 else if (hx1.u > 0x7f800000 || hy1.u > 0x7f800000) 129 z1 = *x + *y; 130 *z = z1; 131 x += stridex; 132 y += stridey; 133 z += stridez; 134 i = 1; 135 if (--n <= 0) 136 break; 137 goto LOOP1; 138 } 139 pz1 = z; 140 x += stridex; 141 y += stridey; 142 z += stridez; 143 i = 2; 144 if (--n <= 0) 145 break; 146 147 LOOP2: 148 hx2.f = *x; 149 hy2.f = *y; 150 hx2.u &= ~0x80000000; 151 hy2.u &= ~0x80000000; 152 x2 = hx2.f; 153 y2 = hy2.f; 154 if (hy2.u > hx2.u) { 155 i = hy2.u - hx2.u; 156 j2 = hy2.u & 0x7f800000; 157 if (hx2.u == 0) 158 i = 0x7f800000; 159 } else { 160 i = hx2.u - hy2.u; 161 j2 = hx2.u & 0x7f800000; 162 if (hy2.u == 0) 163 i = 0x7f800000; 164 else if (hx2.u == 0) 165 i = 0x7f800000; 166 } 167 if (i >= 0x0c800000 || j2 >= 0x7f800000) { 168 z2 = x2 + y2; 169 if (hx2.u == 0x7f800000) 170 z2 = x2; 171 else if (hy2.u == 0x7f800000) 172 z2 = y2; 173 else if (hx2.u > 0x7f800000 || hy2.u > 0x7f800000) 174 z2 = *x + *y; 175 *z = z2; 176 x += stridex; 177 y += stridey; 178 z += stridez; 179 i = 2; 180 if (--n <= 0) 181 break; 182 goto LOOP2; 183 } 184 pz2 = z; 185 186 z0 = sqrt(x0 * (double)x0 + y0 * (double)y0); 187 z1 = sqrt(x1 * (double)x1 + y1 * (double)y1); 188 z2 = sqrt(x2 * (double)x2 + y2 * (double)y2); 189 *pz0 = z0; 190 *pz1 = z1; 191 *pz2 = z2; 192 193 x += stridex; 194 y += stridey; 195 z += stridez; 196 i = 0; 197 } while (--n > 0); 198 199 if (i > 0) { 200 if (i > 1) { 201 z1 = sqrt(x1 * (double)x1 + y1 * (double)y1); 202 *pz1 = z1; 203 } 204 z0 = sqrt(x0 * (double)x0 + y0 * (double)y0); 205 *pz0 = z0; 206 } 207 } 208