1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright (c) 1994-1997, by Sun Microsystems, Inc. 24 * All rights reserved. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * This file contains __quad_mag_add and __quad_mag_sub, the core 31 * of the quad precision add and subtract operations. 32 */ 33 34 #include "quad.h" 35 36 /* 37 * __quad_mag_add(x, y, z, fsr) 38 * 39 * Sets *z = *x + *y, rounded according to the rounding mode in *fsr, 40 * and updates the current exceptions in *fsr. This routine assumes 41 * *x and *y are finite, with the same sign (i.e., an addition of 42 * magnitudes), |*x| >= |*y|, and *z already has its sign bit set. 43 */ 44 void 45 __quad_mag_add(const union longdouble *x, const union longdouble *y, 46 union longdouble *z, unsigned int *fsr) 47 { 48 unsigned int lx, ly, ex, ey, frac2, frac3, frac4; 49 unsigned int round, sticky, carry, rm; 50 int e, uflo; 51 52 /* get the leading significand words and exponents */ 53 ex = (x->l.msw & 0x7fffffff) >> 16; 54 lx = x->l.msw & 0xffff; 55 if (ex == 0) 56 ex = 1; 57 else 58 lx |= 0x10000; 59 60 ey = (y->l.msw & 0x7fffffff) >> 16; 61 ly = y->l.msw & 0xffff; 62 if (ey == 0) 63 ey = 1; 64 else 65 ly |= 0x10000; 66 67 /* prenormalize y */ 68 e = (int) ex - (int) ey; 69 round = sticky = 0; 70 if (e >= 114) { 71 frac2 = x->l.frac2; 72 frac3 = x->l.frac3; 73 frac4 = x->l.frac4; 74 sticky = ly | y->l.frac2 | y->l.frac3 | y->l.frac4; 75 } else { 76 frac2 = y->l.frac2; 77 frac3 = y->l.frac3; 78 frac4 = y->l.frac4; 79 if (e >= 96) { 80 sticky = frac4 | frac3 | (frac2 & 0x7fffffff); 81 round = frac2 & 0x80000000; 82 frac4 = ly; 83 frac3 = frac2 = ly = 0; 84 e -= 96; 85 } else if (e >= 64) { 86 sticky = frac4 | (frac3 & 0x7fffffff); 87 round = frac3 & 0x80000000; 88 frac4 = frac2; 89 frac3 = ly; 90 frac2 = ly = 0; 91 e -= 64; 92 } else if (e >= 32) { 93 sticky = frac4 & 0x7fffffff; 94 round = frac4 & 0x80000000; 95 frac4 = frac3; 96 frac3 = frac2; 97 frac2 = ly; 98 ly = 0; 99 e -= 32; 100 } 101 if (e) { 102 sticky |= round | (frac4 & ((1 << (e - 1)) - 1)); 103 round = frac4 & (1 << (e - 1)); 104 frac4 = (frac4 >> e) | (frac3 << (32 - e)); 105 frac3 = (frac3 >> e) | (frac2 << (32 - e)); 106 frac2 = (frac2 >> e) | (ly << (32 - e)); 107 ly >>= e; 108 } 109 110 /* add, propagating carries */ 111 frac4 += x->l.frac4; 112 carry = (frac4 < x->l.frac4); 113 frac3 += x->l.frac3; 114 if (carry) { 115 frac3++; 116 carry = (frac3 <= x->l.frac3); 117 } else { 118 carry = (frac3 < x->l.frac3); 119 } 120 frac2 += x->l.frac2; 121 if (carry) { 122 frac2++; 123 carry = (frac2 <= x->l.frac2); 124 } else { 125 carry = (frac2 < x->l.frac2); 126 } 127 lx += ly; 128 if (carry) 129 lx++; 130 131 /* postnormalize */ 132 if (lx >= 0x20000) { 133 sticky |= round; 134 round = frac4 & 1; 135 frac4 = (frac4 >> 1) | (frac3 << 31); 136 frac3 = (frac3 >> 1) | (frac2 << 31); 137 frac2 = (frac2 >> 1) | (lx << 31); 138 lx >>= 1; 139 ex++; 140 } 141 } 142 143 /* keep track of whether the result before rounding is tiny */ 144 uflo = (lx < 0x10000); 145 146 /* get the rounding mode, fudging directed rounding modes */ 147 /* as though the result were positive */ 148 rm = *fsr >> 30; 149 if (z->l.msw) 150 rm ^= (rm >> 1); 151 152 /* see if we need to round */ 153 if (round | sticky) { 154 *fsr |= FSR_NXC; 155 156 /* round up if necessary */ 157 if (rm == FSR_RP || (rm == FSR_RN && round && 158 (sticky || (frac4 & 1)))) { 159 if (++frac4 == 0) 160 if (++frac3 == 0) 161 if (++frac2 == 0) 162 if (++lx >= 0x20000) { 163 lx >>= 1; 164 ex++; 165 } 166 } 167 } 168 169 /* check for overflow */ 170 if (ex >= 0x7fff) { 171 /* store the default overflowed result */ 172 *fsr |= FSR_OFC | FSR_NXC; 173 if (rm == FSR_RN || rm == FSR_RP) { 174 z->l.msw |= 0x7fff0000; 175 z->l.frac2 = z->l.frac3 = z->l.frac4 = 0; 176 } else { 177 z->l.msw |= 0x7ffeffff; 178 z->l.frac2 = z->l.frac3 = z->l.frac4 = 0xffffffff; 179 } 180 } else { 181 /* store the result */ 182 if (lx >= 0x10000) 183 z->l.msw |= (ex << 16); 184 z->l.msw |= (lx & 0xffff); 185 z->l.frac2 = frac2; 186 z->l.frac3 = frac3; 187 z->l.frac4 = frac4; 188 189 /* if the pre-rounded result was tiny and underflow trapping */ 190 /* is enabled, simulate underflow */ 191 if (uflo && (*fsr & FSR_UFM)) 192 *fsr |= FSR_UFC; 193 } 194 } 195 196 /* 197 * __quad_mag_sub(x, y, z, fsr) 198 * 199 * Sets *z = *x - *y, rounded according to the rounding mode in *fsr, 200 * and updates the current exceptions in *fsr. This routine assumes 201 * *x and *y are finite, with opposite signs (i.e., a subtraction of 202 * magnitudes), |*x| >= |*y|, and *z already has its sign bit set. 203 */ 204 void 205 __quad_mag_sub(const union longdouble *x, const union longdouble *y, 206 union longdouble *z, unsigned int *fsr) 207 { 208 unsigned int lx, ly, ex, ey, frac2, frac3, frac4; 209 unsigned int guard, round, sticky, borrow, rm; 210 int e; 211 212 /* get the leading significand words and exponents */ 213 ex = (x->l.msw & 0x7fffffff) >> 16; 214 lx = x->l.msw & 0xffff; 215 if (ex == 0) 216 ex = 1; 217 else 218 lx |= 0x10000; 219 220 ey = (y->l.msw & 0x7fffffff) >> 16; 221 ly = y->l.msw & 0xffff; 222 if (ey == 0) 223 ey = 1; 224 else 225 ly |= 0x10000; 226 227 /* prenormalize y */ 228 e = (int) ex - (int) ey; 229 guard = round = sticky = 0; 230 if (e > 114) { 231 sticky = ly | y->l.frac2 | y->l.frac3 | y->l.frac4; 232 ly = frac2 = frac3 = frac4 = 0; 233 } else { 234 frac2 = y->l.frac2; 235 frac3 = y->l.frac3; 236 frac4 = y->l.frac4; 237 if (e >= 96) { 238 sticky = frac4 | frac3 | (frac2 & 0x3fffffff); 239 round = frac2 & 0x40000000; 240 guard = frac2 & 0x80000000; 241 frac4 = ly; 242 frac3 = frac2 = ly = 0; 243 e -= 96; 244 } else if (e >= 64) { 245 sticky = frac4 | (frac3 & 0x3fffffff); 246 round = frac3 & 0x40000000; 247 guard = frac3 & 0x80000000; 248 frac4 = frac2; 249 frac3 = ly; 250 frac2 = ly = 0; 251 e -= 64; 252 } else if (e >= 32) { 253 sticky = frac4 & 0x3fffffff; 254 round = frac4 & 0x40000000; 255 guard = frac4 & 0x80000000; 256 frac4 = frac3; 257 frac3 = frac2; 258 frac2 = ly; 259 ly = 0; 260 e -= 32; 261 } 262 if (e > 1) { 263 sticky |= guard | round | 264 (frac4 & ((1 << (e - 2)) - 1)); 265 round = frac4 & (1 << (e - 2)); 266 guard = frac4 & (1 << (e - 1)); 267 frac4 = (frac4 >> e) | (frac3 << (32 - e)); 268 frac3 = (frac3 >> e) | (frac2 << (32 - e)); 269 frac2 = (frac2 >> e) | (ly << (32 - e)); 270 ly >>= e; 271 } else if (e == 1) { 272 sticky |= round; 273 round = guard; 274 guard = frac4 & 1; 275 frac4 = (frac4 >> 1) | (frac3 << 31); 276 frac3 = (frac3 >> 1) | (frac2 << 31); 277 frac2 = (frac2 >> 1) | (ly << 31); 278 ly >>= 1; 279 } 280 } 281 282 /* complement guard, round, and sticky as need be */ 283 if (sticky) { 284 round = !round; 285 guard = !guard; 286 } else if (round) { 287 guard = !guard; 288 } 289 borrow = (guard | round | sticky); 290 291 /* subtract, propagating borrows */ 292 frac4 = x->l.frac4 - frac4; 293 if (borrow) { 294 frac4--; 295 borrow = (frac4 >= x->l.frac4); 296 } else { 297 borrow = (frac4 > x->l.frac4); 298 } 299 frac3 = x->l.frac3 - frac3; 300 if (borrow) { 301 frac3--; 302 borrow = (frac3 >= x->l.frac3); 303 } else { 304 borrow = (frac3 > x->l.frac3); 305 } 306 frac2 = x->l.frac2 - frac2; 307 if (borrow) { 308 frac2--; 309 borrow = (frac2 >= x->l.frac2); 310 } else { 311 borrow = (frac2 > x->l.frac2); 312 } 313 lx -= ly; 314 if (borrow) 315 lx--; 316 317 /* get the rounding mode */ 318 rm = *fsr >> 30; 319 320 /* handle zero result */ 321 if (!(lx | frac2 | frac3 | frac4 | guard)) { 322 z->l.msw = ((rm == FSR_RM)? 0x80000000 : 0); 323 z->l.frac2 = z->l.frac3 = z->l.frac4 = 0; 324 return; 325 } 326 327 /* postnormalize */ 328 if (lx < 0x10000) { 329 /* if cancellation occurred or the exponent is 1, */ 330 /* the result is exact */ 331 if (lx < 0x8000 || ex == 1) { 332 while ((lx | (frac2 & 0xfffe0000)) == 0 && ex > 32) { 333 lx = frac2; 334 frac2 = frac3; 335 frac3 = frac4; 336 frac4 = ((guard)? 0x80000000 : 0); 337 guard = 0; 338 ex -= 32; 339 } 340 while (lx < 0x10000 && ex > 1) { 341 lx = (lx << 1) | (frac2 >> 31); 342 frac2 = (frac2 << 1) | (frac3 >> 31); 343 frac3 = (frac3 << 1) | (frac4 >> 31); 344 frac4 <<= 1; 345 if (guard) { 346 frac4 |= 1; 347 guard = 0; 348 } 349 ex--; 350 } 351 if (lx >= 0x10000) 352 z->l.msw |= (ex << 16); 353 z->l.msw |= (lx & 0xffff); 354 z->l.frac2 = frac2; 355 z->l.frac3 = frac3; 356 z->l.frac4 = frac4; 357 358 /* if the result is tiny and underflow trapping is */ 359 /* enabled, simulate underflow */ 360 if (lx < 0x10000 && (*fsr & FSR_UFM)) 361 *fsr |= FSR_UFC; 362 return; 363 } 364 365 /* otherwise we only borrowed one place */ 366 lx = (lx << 1) | (frac2 >> 31); 367 frac2 = (frac2 << 1) | (frac3 >> 31); 368 frac3 = (frac3 << 1) | (frac4 >> 31); 369 frac4 <<= 1; 370 if (guard) 371 frac4 |= 1; 372 ex--; 373 } else { 374 sticky |= round; 375 round = guard; 376 } 377 378 /* fudge directed rounding modes as though the result were positive */ 379 if (z->l.msw) 380 rm ^= (rm >> 1); 381 382 /* see if we need to round */ 383 if (round | sticky) { 384 *fsr |= FSR_NXC; 385 386 /* round up if necessary */ 387 if (rm == FSR_RP || (rm == FSR_RN && round && 388 (sticky || (frac4 & 1)))) { 389 if (++frac4 == 0) 390 if (++frac3 == 0) 391 if (++frac2 == 0) 392 if (++lx >= 0x20000) { 393 lx >>= 1; 394 ex++; 395 } 396 } 397 } 398 399 /* store the result */ 400 z->l.msw |= (ex << 16) | (lx & 0xffff); 401 z->l.frac2 = frac2; 402 z->l.frac3 = frac3; 403 z->l.frac4 = frac4; 404 } 405