1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2003 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include "quad.h" 30 31 static const double C[] = { 32 0.0, 33 0.5, 34 1.0, 35 2.0, 36 68719476736.0, 37 1048576.0, 38 16.0, 39 1.52587890625000000000e-05, 40 5.96046447753906250000e-08, 41 3.72529029846191406250e-09, 42 8.67361737988403547206e-19, 43 2.16840434497100886801e-19, 44 1.32348898008484427979e-23, 45 9.62964972193617926528e-35, 46 4.70197740328915003187e-38 47 }; 48 49 #define zero C[0] 50 #define half C[1] 51 #define one C[2] 52 #define two C[3] 53 #define two36 C[4] 54 #define two20 C[5] 55 #define two4 C[6] 56 #define twom16 C[7] 57 #define twom24 C[8] 58 #define twom28 C[9] 59 #define twom60 C[10] 60 #define twom62 C[11] 61 #define twom76 C[12] 62 #define twom113 C[13] 63 #define twom124 C[14] 64 65 static const unsigned fsr_rn = 0xc0000000u; 66 67 #ifdef __sparcv9 68 69 /* 70 * _Qp_mul(pz, x, y) sets *pz = *x * *y. 71 */ 72 void 73 _Qp_mul(union longdouble *pz, const union longdouble *x, 74 const union longdouble *y) 75 76 #else 77 78 /* 79 * _Q_mul(x, y) returns *x * *y. 80 */ 81 union longdouble 82 _Q_mul(const union longdouble *x, const union longdouble *y) 83 84 #endif /* __sparcv9 */ 85 86 { 87 union longdouble z; 88 union xdouble u; 89 double xx[5], yy[5], c, d, zz[9]; 90 unsigned int xm, ym, fsr, lx, ly, wx[3], wy[3]; 91 unsigned int msw, frac2, frac3, frac4, rm; 92 int ibit, ex, ey, ez, sign; 93 94 xm = x->l.msw & 0x7fffffff; 95 ym = y->l.msw & 0x7fffffff; 96 sign = (x->l.msw ^ y->l.msw) & ~0x7fffffff; 97 98 __quad_getfsrp(&fsr); 99 100 /* handle nan and inf cases */ 101 if (xm >= 0x7fff0000 || ym >= 0x7fff0000) { 102 /* handle nan cases according to V9 app. B */ 103 if (QUAD_ISNAN(*y)) { 104 if (!(y->l.msw & 0x8000)) { 105 /* snan, signal invalid */ 106 if (fsr & FSR_NVM) { 107 __quad_fmulq(x, y, &Z); 108 } else { 109 Z = *y; 110 Z.l.msw |= 0x8000; 111 fsr = (fsr & ~FSR_CEXC) | FSR_NVA | 112 FSR_NVC; 113 __quad_setfsrp(&fsr); 114 } 115 QUAD_RETURN(Z); 116 } else if (QUAD_ISNAN(*x) && !(x->l.msw & 0x8000)) { 117 /* snan, signal invalid */ 118 if (fsr & FSR_NVM) { 119 __quad_fmulq(x, y, &Z); 120 } else { 121 Z = *x; 122 Z.l.msw |= 0x8000; 123 fsr = (fsr & ~FSR_CEXC) | FSR_NVA | 124 FSR_NVC; 125 __quad_setfsrp(&fsr); 126 } 127 QUAD_RETURN(Z); 128 } 129 Z = *y; 130 QUAD_RETURN(Z); 131 } 132 if (QUAD_ISNAN(*x)) { 133 if (!(x->l.msw & 0x8000)) { 134 /* snan, signal invalid */ 135 if (fsr & FSR_NVM) { 136 __quad_fmulq(x, y, &Z); 137 } else { 138 Z = *x; 139 Z.l.msw |= 0x8000; 140 fsr = (fsr & ~FSR_CEXC) | FSR_NVA | 141 FSR_NVC; 142 __quad_setfsrp(&fsr); 143 } 144 QUAD_RETURN(Z); 145 } 146 Z = *x; 147 QUAD_RETURN(Z); 148 } 149 if (xm == 0x7fff0000) { 150 /* x is inf */ 151 if (QUAD_ISZERO(*y)) { 152 /* zero * inf, signal invalid */ 153 if (fsr & FSR_NVM) { 154 __quad_fmulq(x, y, &Z); 155 } else { 156 Z.l.msw = 0x7fffffff; 157 Z.l.frac2 = Z.l.frac3 = 158 Z.l.frac4 = 0xffffffff; 159 fsr = (fsr & ~FSR_CEXC) | FSR_NVA | 160 FSR_NVC; 161 __quad_setfsrp(&fsr); 162 } 163 QUAD_RETURN(Z); 164 } 165 /* inf * finite, return inf */ 166 Z.l.msw = sign | 0x7fff0000; 167 Z.l.frac2 = Z.l.frac3 = Z.l.frac4 = 0; 168 QUAD_RETURN(Z); 169 } 170 /* y is inf */ 171 if (QUAD_ISZERO(*x)) { 172 /* zero * inf, signal invalid */ 173 if (fsr & FSR_NVM) { 174 __quad_fmulq(x, y, &Z); 175 } else { 176 Z.l.msw = 0x7fffffff; 177 Z.l.frac2 = Z.l.frac3 = Z.l.frac4 = 0xffffffff; 178 fsr = (fsr & ~FSR_CEXC) | FSR_NVA | FSR_NVC; 179 __quad_setfsrp(&fsr); 180 } 181 QUAD_RETURN(Z); 182 } 183 /* inf * finite, return inf */ 184 Z.l.msw = sign | 0x7fff0000; 185 Z.l.frac2 = Z.l.frac3 = Z.l.frac4 = 0; 186 QUAD_RETURN(Z); 187 } 188 189 /* handle zero cases */ 190 if (xm == 0 || ym == 0) { 191 if (QUAD_ISZERO(*x) || QUAD_ISZERO(*y)) { 192 Z.l.msw = sign; 193 Z.l.frac2 = Z.l.frac3 = Z.l.frac4 = 0; 194 QUAD_RETURN(Z); 195 } 196 } 197 198 /* now x and y are finite, nonzero */ 199 __quad_setfsrp(&fsr_rn); 200 201 /* get their normalized significands and exponents */ 202 ex = (int)(xm >> 16); 203 lx = xm & 0xffff; 204 if (ex) { 205 lx |= 0x10000; 206 wx[0] = x->l.frac2; 207 wx[1] = x->l.frac3; 208 wx[2] = x->l.frac4; 209 } else { 210 if (lx | (x->l.frac2 & 0xfffe0000)) { 211 wx[0] = x->l.frac2; 212 wx[1] = x->l.frac3; 213 wx[2] = x->l.frac4; 214 ex = 1; 215 } else if (x->l.frac2 | (x->l.frac3 & 0xfffe0000)) { 216 lx = x->l.frac2; 217 wx[0] = x->l.frac3; 218 wx[1] = x->l.frac4; 219 wx[2] = 0; 220 ex = -31; 221 } else if (x->l.frac3 | (x->l.frac4 & 0xfffe0000)) { 222 lx = x->l.frac3; 223 wx[0] = x->l.frac4; 224 wx[1] = wx[2] = 0; 225 ex = -63; 226 } else { 227 lx = x->l.frac4; 228 wx[0] = wx[1] = wx[2] = 0; 229 ex = -95; 230 } 231 while ((lx & 0x10000) == 0) { 232 lx = (lx << 1) | (wx[0] >> 31); 233 wx[0] = (wx[0] << 1) | (wx[1] >> 31); 234 wx[1] = (wx[1] << 1) | (wx[2] >> 31); 235 wx[2] <<= 1; 236 ex--; 237 } 238 } 239 ez = ex - 0x3fff; 240 241 ey = (int)(ym >> 16); 242 ly = ym & 0xffff; 243 if (ey) { 244 ly |= 0x10000; 245 wy[0] = y->l.frac2; 246 wy[1] = y->l.frac3; 247 wy[2] = y->l.frac4; 248 } else { 249 if (ly | (y->l.frac2 & 0xfffe0000)) { 250 wy[0] = y->l.frac2; 251 wy[1] = y->l.frac3; 252 wy[2] = y->l.frac4; 253 ey = 1; 254 } else if (y->l.frac2 | (y->l.frac3 & 0xfffe0000)) { 255 ly = y->l.frac2; 256 wy[0] = y->l.frac3; 257 wy[1] = y->l.frac4; 258 wy[2] = 0; 259 ey = -31; 260 } else if (y->l.frac3 | (y->l.frac4 & 0xfffe0000)) { 261 ly = y->l.frac3; 262 wy[0] = y->l.frac4; 263 wy[1] = wy[2] = 0; 264 ey = -63; 265 } else { 266 ly = y->l.frac4; 267 wy[0] = wy[1] = wy[2] = 0; 268 ey = -95; 269 } 270 while ((ly & 0x10000) == 0) { 271 ly = (ly << 1) | (wy[0] >> 31); 272 wy[0] = (wy[0] << 1) | (wy[1] >> 31); 273 wy[1] = (wy[1] << 1) | (wy[2] >> 31); 274 wy[2] <<= 1; 275 ey--; 276 } 277 } 278 ez += ey; 279 280 /* extract the significand into five doubles */ 281 c = twom16; 282 xx[0] = (double)((int)lx) * c; 283 yy[0] = (double)((int)ly) * c; 284 285 c *= twom24; 286 xx[1] = (double)((int)(wx[0] >> 8)) * c; 287 yy[1] = (double)((int)(wy[0] >> 8)) * c; 288 289 c *= twom24; 290 xx[2] = (double)((int)(((wx[0] << 16) | (wx[1] >> 16)) & 291 0xffffff)) * c; 292 yy[2] = (double)((int)(((wy[0] << 16) | (wy[1] >> 16)) & 293 0xffffff)) * c; 294 295 c *= twom24; 296 xx[3] = (double)((int)(((wx[1] << 8) | (wx[2] >> 24)) & 297 0xffffff)) * c; 298 yy[3] = (double)((int)(((wy[1] << 8) | (wy[2] >> 24)) & 299 0xffffff)) * c; 300 301 c *= twom24; 302 xx[4] = (double)((int)(wx[2] & 0xffffff)) * c; 303 yy[4] = (double)((int)(wy[2] & 0xffffff)) * c; 304 305 /* form the "digits" of the product */ 306 zz[0] = xx[0] * yy[0]; 307 zz[1] = xx[0] * yy[1] + xx[1] * yy[0]; 308 zz[2] = xx[0] * yy[2] + xx[1] * yy[1] + xx[2] * yy[0]; 309 zz[3] = xx[0] * yy[3] + xx[1] * yy[2] + xx[2] * yy[1] + 310 xx[3] * yy[0]; 311 zz[4] = xx[0] * yy[4] + xx[1] * yy[3] + xx[2] * yy[2] + 312 xx[3] * yy[1] + xx[4] * yy[0]; 313 zz[5] = xx[1] * yy[4] + xx[2] * yy[3] + xx[3] * yy[2] + 314 xx[4] * yy[1]; 315 zz[6] = xx[2] * yy[4] + xx[3] * yy[3] + xx[4] * yy[2]; 316 zz[7] = xx[3] * yy[4] + xx[4] * yy[3]; 317 zz[8] = xx[4] * yy[4]; 318 319 /* collect the first few terms */ 320 c = (zz[1] + two20) - two20; 321 zz[0] += c; 322 zz[1] = zz[2] + (zz[1] - c); 323 c = (zz[3] + twom28) - twom28; 324 zz[1] += c; 325 zz[2] = zz[4] + (zz[3] - c); 326 327 /* propagate carries into the third term */ 328 d = zz[6] + (zz[7] + zz[8]); 329 zz[2] += zz[5] + d; 330 331 /* if the third term might lie on a rounding boundary, perturb it */ 332 /* as need be */ 333 if (zz[2] == (twom62 + zz[2]) - twom62) 334 { 335 c = (zz[5] + twom76) - twom76; 336 if ((zz[5] - c) + d != zero) 337 zz[2] += twom124; 338 } 339 340 /* propagate carries to the leading term */ 341 c = zz[1] + zz[2]; 342 zz[2] = zz[2] + (zz[1] - c); 343 zz[1] = c; 344 c = zz[0] + zz[1]; 345 zz[1] = zz[1] + (zz[0] - c); 346 zz[0] = c; 347 348 /* check for carry out */ 349 if (c >= two) { 350 /* postnormalize */ 351 zz[0] *= half; 352 zz[1] *= half; 353 zz[2] *= half; 354 ez++; 355 } 356 357 /* if exponent > 0 strip off integer bit, else denormalize */ 358 if (ez > 0) { 359 ibit = 1; 360 zz[0] -= one; 361 } else { 362 ibit = 0; 363 if (ez > -128) 364 u.l.hi = (unsigned)(0x3fe + ez) << 20; 365 else 366 u.l.hi = 0x37e00000; 367 u.l.lo = 0; 368 zz[0] *= u.d; 369 zz[1] *= u.d; 370 zz[2] *= u.d; 371 ez = 0; 372 } 373 374 /* the first 48 bits of fraction come from zz[0] */ 375 u.d = d = two36 + zz[0]; 376 msw = u.l.lo; 377 zz[0] -= (d - two36); 378 379 u.d = d = two4 + zz[0]; 380 frac2 = u.l.lo; 381 zz[0] -= (d - two4); 382 383 /* the next 32 come from zz[0] and zz[1] */ 384 u.d = d = twom28 + (zz[0] + zz[1]); 385 frac3 = u.l.lo; 386 zz[0] -= (d - twom28); 387 388 /* condense the remaining fraction; errors here won't matter */ 389 c = zz[0] + zz[1]; 390 zz[1] = ((zz[0] - c) + zz[1]) + zz[2]; 391 zz[0] = c; 392 393 /* get the last word of fraction */ 394 u.d = d = twom60 + (zz[0] + zz[1]); 395 frac4 = u.l.lo; 396 zz[0] -= (d - twom60); 397 398 /* keep track of what's left for rounding; note that the error */ 399 /* in computing c will be non-negative due to rounding mode */ 400 c = zz[0] + zz[1]; 401 402 /* get the rounding mode, fudging directed rounding modes */ 403 /* as though the result were positive */ 404 rm = fsr >> 30; 405 if (sign) 406 rm ^= (rm >> 1); 407 408 /* round and raise exceptions */ 409 fsr &= ~FSR_CEXC; 410 if (c != zero) { 411 fsr |= FSR_NXC; 412 413 /* decide whether to round the fraction up */ 414 if (rm == FSR_RP || (rm == FSR_RN && (c > twom113 || 415 (c == twom113 && ((frac4 & 1) || (c - zz[0] != zz[1])))))) { 416 /* round up and renormalize if necessary */ 417 if (++frac4 == 0) 418 if (++frac3 == 0) 419 if (++frac2 == 0) 420 if (++msw == 0x10000) { 421 msw = 0; 422 ez++; 423 } 424 } 425 } 426 427 /* check for under/overflow */ 428 if (ez >= 0x7fff) { 429 if (rm == FSR_RN || rm == FSR_RP) { 430 z.l.msw = sign | 0x7fff0000; 431 z.l.frac2 = z.l.frac3 = z.l.frac4 = 0; 432 } else { 433 z.l.msw = sign | 0x7ffeffff; 434 z.l.frac2 = z.l.frac3 = z.l.frac4 = 0xffffffff; 435 } 436 fsr |= FSR_OFC | FSR_NXC; 437 } else { 438 z.l.msw = sign | (ez << 16) | msw; 439 z.l.frac2 = frac2; 440 z.l.frac3 = frac3; 441 z.l.frac4 = frac4; 442 443 /* !ibit => exact result was tiny before rounding, */ 444 /* t nonzero => result delivered is inexact */ 445 if (!ibit) { 446 if (c != zero) 447 fsr |= FSR_UFC | FSR_NXC; 448 else if (fsr & FSR_UFM) 449 fsr |= FSR_UFC; 450 } 451 } 452 453 if ((fsr & FSR_CEXC) & (fsr >> 23)) { 454 __quad_setfsrp(&fsr); 455 __quad_fmulq(x, y, &Z); 456 } else { 457 Z = z; 458 fsr |= (fsr & 0x1f) << 5; 459 __quad_setfsrp(&fsr); 460 } 461 QUAD_RETURN(Z); 462 } 463