1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2003 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include "quad.h" 28 29 static const double C[] = { 30 0.0, 31 0.5, 32 1.0, 33 2.0, 34 68719476736.0, 35 1048576.0, 36 16.0, 37 1.52587890625000000000e-05, 38 5.96046447753906250000e-08, 39 3.72529029846191406250e-09, 40 8.67361737988403547206e-19, 41 2.16840434497100886801e-19, 42 1.32348898008484427979e-23, 43 9.62964972193617926528e-35, 44 4.70197740328915003187e-38 45 }; 46 47 #define zero C[0] 48 #define half C[1] 49 #define one C[2] 50 #define two C[3] 51 #define two36 C[4] 52 #define two20 C[5] 53 #define two4 C[6] 54 #define twom16 C[7] 55 #define twom24 C[8] 56 #define twom28 C[9] 57 #define twom60 C[10] 58 #define twom62 C[11] 59 #define twom76 C[12] 60 #define twom113 C[13] 61 #define twom124 C[14] 62 63 static const unsigned fsr_rn = 0xc0000000u; 64 65 #ifdef __sparcv9 66 67 /* 68 * _Qp_mul(pz, x, y) sets *pz = *x * *y. 69 */ 70 void 71 _Qp_mul(union longdouble *pz, const union longdouble *x, 72 const union longdouble *y) 73 74 #else 75 76 /* 77 * _Q_mul(x, y) returns *x * *y. 78 */ 79 union longdouble 80 _Q_mul(const union longdouble *x, const union longdouble *y) 81 82 #endif /* __sparcv9 */ 83 84 { 85 union longdouble z; 86 union xdouble u; 87 double xx[5], yy[5], c, d, zz[9]; 88 unsigned int xm, ym, fsr, lx, ly, wx[3], wy[3]; 89 unsigned int msw, frac2, frac3, frac4, rm; 90 int ibit, ex, ey, ez, sign; 91 92 xm = x->l.msw & 0x7fffffff; 93 ym = y->l.msw & 0x7fffffff; 94 sign = (x->l.msw ^ y->l.msw) & ~0x7fffffff; 95 96 __quad_getfsrp(&fsr); 97 98 /* handle nan and inf cases */ 99 if (xm >= 0x7fff0000 || ym >= 0x7fff0000) { 100 /* handle nan cases according to V9 app. B */ 101 if (QUAD_ISNAN(*y)) { 102 if (!(y->l.msw & 0x8000)) { 103 /* snan, signal invalid */ 104 if (fsr & FSR_NVM) { 105 __quad_fmulq(x, y, &Z); 106 } else { 107 Z = *y; 108 Z.l.msw |= 0x8000; 109 fsr = (fsr & ~FSR_CEXC) | FSR_NVA | 110 FSR_NVC; 111 __quad_setfsrp(&fsr); 112 } 113 QUAD_RETURN(Z); 114 } else if (QUAD_ISNAN(*x) && !(x->l.msw & 0x8000)) { 115 /* snan, signal invalid */ 116 if (fsr & FSR_NVM) { 117 __quad_fmulq(x, y, &Z); 118 } else { 119 Z = *x; 120 Z.l.msw |= 0x8000; 121 fsr = (fsr & ~FSR_CEXC) | FSR_NVA | 122 FSR_NVC; 123 __quad_setfsrp(&fsr); 124 } 125 QUAD_RETURN(Z); 126 } 127 Z = *y; 128 QUAD_RETURN(Z); 129 } 130 if (QUAD_ISNAN(*x)) { 131 if (!(x->l.msw & 0x8000)) { 132 /* snan, signal invalid */ 133 if (fsr & FSR_NVM) { 134 __quad_fmulq(x, y, &Z); 135 } else { 136 Z = *x; 137 Z.l.msw |= 0x8000; 138 fsr = (fsr & ~FSR_CEXC) | FSR_NVA | 139 FSR_NVC; 140 __quad_setfsrp(&fsr); 141 } 142 QUAD_RETURN(Z); 143 } 144 Z = *x; 145 QUAD_RETURN(Z); 146 } 147 if (xm == 0x7fff0000) { 148 /* x is inf */ 149 if (QUAD_ISZERO(*y)) { 150 /* zero * inf, signal invalid */ 151 if (fsr & FSR_NVM) { 152 __quad_fmulq(x, y, &Z); 153 } else { 154 Z.l.msw = 0x7fffffff; 155 Z.l.frac2 = Z.l.frac3 = 156 Z.l.frac4 = 0xffffffff; 157 fsr = (fsr & ~FSR_CEXC) | FSR_NVA | 158 FSR_NVC; 159 __quad_setfsrp(&fsr); 160 } 161 QUAD_RETURN(Z); 162 } 163 /* inf * finite, return inf */ 164 Z.l.msw = sign | 0x7fff0000; 165 Z.l.frac2 = Z.l.frac3 = Z.l.frac4 = 0; 166 QUAD_RETURN(Z); 167 } 168 /* y is inf */ 169 if (QUAD_ISZERO(*x)) { 170 /* zero * inf, signal invalid */ 171 if (fsr & FSR_NVM) { 172 __quad_fmulq(x, y, &Z); 173 } else { 174 Z.l.msw = 0x7fffffff; 175 Z.l.frac2 = Z.l.frac3 = Z.l.frac4 = 0xffffffff; 176 fsr = (fsr & ~FSR_CEXC) | FSR_NVA | FSR_NVC; 177 __quad_setfsrp(&fsr); 178 } 179 QUAD_RETURN(Z); 180 } 181 /* inf * finite, return inf */ 182 Z.l.msw = sign | 0x7fff0000; 183 Z.l.frac2 = Z.l.frac3 = Z.l.frac4 = 0; 184 QUAD_RETURN(Z); 185 } 186 187 /* handle zero cases */ 188 if (xm == 0 || ym == 0) { 189 if (QUAD_ISZERO(*x) || QUAD_ISZERO(*y)) { 190 Z.l.msw = sign; 191 Z.l.frac2 = Z.l.frac3 = Z.l.frac4 = 0; 192 QUAD_RETURN(Z); 193 } 194 } 195 196 /* now x and y are finite, nonzero */ 197 __quad_setfsrp(&fsr_rn); 198 199 /* get their normalized significands and exponents */ 200 ex = (int)(xm >> 16); 201 lx = xm & 0xffff; 202 if (ex) { 203 lx |= 0x10000; 204 wx[0] = x->l.frac2; 205 wx[1] = x->l.frac3; 206 wx[2] = x->l.frac4; 207 } else { 208 if (lx | (x->l.frac2 & 0xfffe0000)) { 209 wx[0] = x->l.frac2; 210 wx[1] = x->l.frac3; 211 wx[2] = x->l.frac4; 212 ex = 1; 213 } else if (x->l.frac2 | (x->l.frac3 & 0xfffe0000)) { 214 lx = x->l.frac2; 215 wx[0] = x->l.frac3; 216 wx[1] = x->l.frac4; 217 wx[2] = 0; 218 ex = -31; 219 } else if (x->l.frac3 | (x->l.frac4 & 0xfffe0000)) { 220 lx = x->l.frac3; 221 wx[0] = x->l.frac4; 222 wx[1] = wx[2] = 0; 223 ex = -63; 224 } else { 225 lx = x->l.frac4; 226 wx[0] = wx[1] = wx[2] = 0; 227 ex = -95; 228 } 229 while ((lx & 0x10000) == 0) { 230 lx = (lx << 1) | (wx[0] >> 31); 231 wx[0] = (wx[0] << 1) | (wx[1] >> 31); 232 wx[1] = (wx[1] << 1) | (wx[2] >> 31); 233 wx[2] <<= 1; 234 ex--; 235 } 236 } 237 ez = ex - 0x3fff; 238 239 ey = (int)(ym >> 16); 240 ly = ym & 0xffff; 241 if (ey) { 242 ly |= 0x10000; 243 wy[0] = y->l.frac2; 244 wy[1] = y->l.frac3; 245 wy[2] = y->l.frac4; 246 } else { 247 if (ly | (y->l.frac2 & 0xfffe0000)) { 248 wy[0] = y->l.frac2; 249 wy[1] = y->l.frac3; 250 wy[2] = y->l.frac4; 251 ey = 1; 252 } else if (y->l.frac2 | (y->l.frac3 & 0xfffe0000)) { 253 ly = y->l.frac2; 254 wy[0] = y->l.frac3; 255 wy[1] = y->l.frac4; 256 wy[2] = 0; 257 ey = -31; 258 } else if (y->l.frac3 | (y->l.frac4 & 0xfffe0000)) { 259 ly = y->l.frac3; 260 wy[0] = y->l.frac4; 261 wy[1] = wy[2] = 0; 262 ey = -63; 263 } else { 264 ly = y->l.frac4; 265 wy[0] = wy[1] = wy[2] = 0; 266 ey = -95; 267 } 268 while ((ly & 0x10000) == 0) { 269 ly = (ly << 1) | (wy[0] >> 31); 270 wy[0] = (wy[0] << 1) | (wy[1] >> 31); 271 wy[1] = (wy[1] << 1) | (wy[2] >> 31); 272 wy[2] <<= 1; 273 ey--; 274 } 275 } 276 ez += ey; 277 278 /* extract the significand into five doubles */ 279 c = twom16; 280 xx[0] = (double)((int)lx) * c; 281 yy[0] = (double)((int)ly) * c; 282 283 c *= twom24; 284 xx[1] = (double)((int)(wx[0] >> 8)) * c; 285 yy[1] = (double)((int)(wy[0] >> 8)) * c; 286 287 c *= twom24; 288 xx[2] = (double)((int)(((wx[0] << 16) | (wx[1] >> 16)) & 289 0xffffff)) * c; 290 yy[2] = (double)((int)(((wy[0] << 16) | (wy[1] >> 16)) & 291 0xffffff)) * c; 292 293 c *= twom24; 294 xx[3] = (double)((int)(((wx[1] << 8) | (wx[2] >> 24)) & 295 0xffffff)) * c; 296 yy[3] = (double)((int)(((wy[1] << 8) | (wy[2] >> 24)) & 297 0xffffff)) * c; 298 299 c *= twom24; 300 xx[4] = (double)((int)(wx[2] & 0xffffff)) * c; 301 yy[4] = (double)((int)(wy[2] & 0xffffff)) * c; 302 303 /* form the "digits" of the product */ 304 zz[0] = xx[0] * yy[0]; 305 zz[1] = xx[0] * yy[1] + xx[1] * yy[0]; 306 zz[2] = xx[0] * yy[2] + xx[1] * yy[1] + xx[2] * yy[0]; 307 zz[3] = xx[0] * yy[3] + xx[1] * yy[2] + xx[2] * yy[1] + 308 xx[3] * yy[0]; 309 zz[4] = xx[0] * yy[4] + xx[1] * yy[3] + xx[2] * yy[2] + 310 xx[3] * yy[1] + xx[4] * yy[0]; 311 zz[5] = xx[1] * yy[4] + xx[2] * yy[3] + xx[3] * yy[2] + 312 xx[4] * yy[1]; 313 zz[6] = xx[2] * yy[4] + xx[3] * yy[3] + xx[4] * yy[2]; 314 zz[7] = xx[3] * yy[4] + xx[4] * yy[3]; 315 zz[8] = xx[4] * yy[4]; 316 317 /* collect the first few terms */ 318 c = (zz[1] + two20) - two20; 319 zz[0] += c; 320 zz[1] = zz[2] + (zz[1] - c); 321 c = (zz[3] + twom28) - twom28; 322 zz[1] += c; 323 zz[2] = zz[4] + (zz[3] - c); 324 325 /* propagate carries into the third term */ 326 d = zz[6] + (zz[7] + zz[8]); 327 zz[2] += zz[5] + d; 328 329 /* if the third term might lie on a rounding boundary, perturb it */ 330 /* as need be */ 331 if (zz[2] == (twom62 + zz[2]) - twom62) 332 { 333 c = (zz[5] + twom76) - twom76; 334 if ((zz[5] - c) + d != zero) 335 zz[2] += twom124; 336 } 337 338 /* propagate carries to the leading term */ 339 c = zz[1] + zz[2]; 340 zz[2] = zz[2] + (zz[1] - c); 341 zz[1] = c; 342 c = zz[0] + zz[1]; 343 zz[1] = zz[1] + (zz[0] - c); 344 zz[0] = c; 345 346 /* check for carry out */ 347 if (c >= two) { 348 /* postnormalize */ 349 zz[0] *= half; 350 zz[1] *= half; 351 zz[2] *= half; 352 ez++; 353 } 354 355 /* if exponent > 0 strip off integer bit, else denormalize */ 356 if (ez > 0) { 357 ibit = 1; 358 zz[0] -= one; 359 } else { 360 ibit = 0; 361 if (ez > -128) 362 u.l.hi = (unsigned)(0x3fe + ez) << 20; 363 else 364 u.l.hi = 0x37e00000; 365 u.l.lo = 0; 366 zz[0] *= u.d; 367 zz[1] *= u.d; 368 zz[2] *= u.d; 369 ez = 0; 370 } 371 372 /* the first 48 bits of fraction come from zz[0] */ 373 u.d = d = two36 + zz[0]; 374 msw = u.l.lo; 375 zz[0] -= (d - two36); 376 377 u.d = d = two4 + zz[0]; 378 frac2 = u.l.lo; 379 zz[0] -= (d - two4); 380 381 /* the next 32 come from zz[0] and zz[1] */ 382 u.d = d = twom28 + (zz[0] + zz[1]); 383 frac3 = u.l.lo; 384 zz[0] -= (d - twom28); 385 386 /* condense the remaining fraction; errors here won't matter */ 387 c = zz[0] + zz[1]; 388 zz[1] = ((zz[0] - c) + zz[1]) + zz[2]; 389 zz[0] = c; 390 391 /* get the last word of fraction */ 392 u.d = d = twom60 + (zz[0] + zz[1]); 393 frac4 = u.l.lo; 394 zz[0] -= (d - twom60); 395 396 /* keep track of what's left for rounding; note that the error */ 397 /* in computing c will be non-negative due to rounding mode */ 398 c = zz[0] + zz[1]; 399 400 /* get the rounding mode, fudging directed rounding modes */ 401 /* as though the result were positive */ 402 rm = fsr >> 30; 403 if (sign) 404 rm ^= (rm >> 1); 405 406 /* round and raise exceptions */ 407 fsr &= ~FSR_CEXC; 408 if (c != zero) { 409 fsr |= FSR_NXC; 410 411 /* decide whether to round the fraction up */ 412 if (rm == FSR_RP || (rm == FSR_RN && (c > twom113 || 413 (c == twom113 && ((frac4 & 1) || (c - zz[0] != zz[1])))))) { 414 /* round up and renormalize if necessary */ 415 if (++frac4 == 0) 416 if (++frac3 == 0) 417 if (++frac2 == 0) 418 if (++msw == 0x10000) { 419 msw = 0; 420 ez++; 421 } 422 } 423 } 424 425 /* check for under/overflow */ 426 if (ez >= 0x7fff) { 427 if (rm == FSR_RN || rm == FSR_RP) { 428 z.l.msw = sign | 0x7fff0000; 429 z.l.frac2 = z.l.frac3 = z.l.frac4 = 0; 430 } else { 431 z.l.msw = sign | 0x7ffeffff; 432 z.l.frac2 = z.l.frac3 = z.l.frac4 = 0xffffffff; 433 } 434 fsr |= FSR_OFC | FSR_NXC; 435 } else { 436 z.l.msw = sign | (ez << 16) | msw; 437 z.l.frac2 = frac2; 438 z.l.frac3 = frac3; 439 z.l.frac4 = frac4; 440 441 /* !ibit => exact result was tiny before rounding, */ 442 /* t nonzero => result delivered is inexact */ 443 if (!ibit) { 444 if (c != zero) 445 fsr |= FSR_UFC | FSR_NXC; 446 else if (fsr & FSR_UFM) 447 fsr |= FSR_UFC; 448 } 449 } 450 451 if ((fsr & FSR_CEXC) & (fsr >> 23)) { 452 __quad_setfsrp(&fsr); 453 __quad_fmulq(x, y, &Z); 454 } else { 455 Z = z; 456 fsr |= (fsr & 0x1f) << 5; 457 __quad_setfsrp(&fsr); 458 } 459 QUAD_RETURN(Z); 460 } 461