1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 23 */ 24 /* 25 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 26 * Use is subject to license terms. 27 */ 28 29 /* 30 * int __rem_pio2m(x,y,e0,nx,prec,ipio2) 31 * double x[],y[]; int e0,nx,prec; const int ipio2[]; 32 * 33 * __rem_pio2m return the last three digits of N with 34 * y = x - N*pi/2 35 * so that |y| < pi/4. 36 * 37 * The method is to compute the integer (mod 8) and fraction parts of 38 * (2/pi)*x without doing the full multiplication. In general we 39 * skip the part of the product that are known to be a huge integer ( 40 * more accurately, = 0 mod 8 ). Thus the number of operations are 41 * independent of the exponent of the input. 42 * 43 * (2/PI) is represented by an array of 24-bit integers in ipio2[]. 44 * Here PI could as well be a machine value pi. 45 * 46 * Input parameters: 47 * x[] The input value (must be positive) is broken into nx 48 * pieces of 24-bit integers in double precision format. 49 * x[i] will be the i-th 24 bit of x. The scaled exponent 50 * of x[0] is given in input parameter e0 (i.e., x[0]*2^e0 51 * match x's up to 24 bits. 52 * 53 * Example of breaking a double z into x[0]+x[1]+x[2]: 54 * e0 = ilogb(z)-23 55 * z = scalbn(z,-e0) 56 * for i = 0,1,2 57 * x[i] = floor(z) 58 * z = (z-x[i])*2**24 59 * 60 * 61 * y[] ouput result in an array of double precision numbers. 62 * The dimension of y[] is: 63 * 24-bit precision 1 64 * 53-bit precision 2 65 * 64-bit precision 2 66 * 113-bit precision 3 67 * The actual value is the sum of them. Thus for 113-bit 68 * precsion, one may have to do something like: 69 * 70 * long double t,w,r_head, r_tail; 71 * t = (long double)y[2] + (long double)y[1]; 72 * w = (long double)y[0]; 73 * r_head = t+w; 74 * r_tail = w - (r_head - t); 75 * 76 * e0 The exponent of x[0] 77 * 78 * nx dimension of x[] 79 * 80 * prec an interger indicating the precision: 81 * 0 24 bits (single) 82 * 1 53 bits (double) 83 * 2 64 bits (extended) 84 * 3 113 bits (quad) 85 * 86 * ipio2[] 87 * integer array, contains the (24*i)-th to (24*i+23)-th 88 * bit of 2/pi or 2/PI after binary point. The corresponding 89 * floating value is 90 * 91 * ipio2[i] * 2^(-24(i+1)). 92 * 93 * External function: 94 * double scalbn( ), floor( ); 95 * 96 * 97 * Here is the description of some local variables: 98 * 99 * jk jk+1 is the initial number of terms of ipio2[] needed 100 * in the computation. The recommended value is 3,4,4, 101 * 6 for single, double, extended,and quad. 102 * 103 * jz local integer variable indicating the number of 104 * terms of ipio2[] used. 105 * 106 * jx nx - 1 107 * 108 * jv index for pointing to the suitable ipio2[] for the 109 * computation. In general, we want 110 * ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8 111 * is an integer. Thus 112 * e0-3-24*jv >= 0 or (e0-3)/24 >= jv 113 * Hence jv = max(0,(e0-3)/24). 114 * 115 * jp jp+1 is the number of terms in pio2[] needed, jp = jk. 116 * 117 * q[] double array with integral value, representing the 118 * 24-bits chunk of the product of x and 2/pi. 119 * 120 * q0 the corresponding exponent of q[0]. Note that the 121 * exponent for q[i] would be q0-24*i. 122 * 123 * pio2[] double precision array, obtained by cutting pi/2 124 * into 24 bits chunks. 125 * 126 * f[] ipio2[] in floating point 127 * 128 * iq[] integer array by breaking up q[] in 24-bits chunk. 129 * 130 * fq[] final product of x*(2/pi) in fq[0],..,fq[jk] 131 * 132 * ih integer. If >0 it indicats q[] is >= 0.5, hence 133 * it also indicates the *sign* of the result. 134 * 135 */ 136 137 #include <assert.h> 138 #include "libm.h" 139 140 #if defined(__i386) && !defined(__amd64) 141 extern int __swapRP(int); 142 #endif 143 144 static const int init_jk[] = { 3, 4, 4, 6 }; /* initial value for jk */ 145 146 static const double pio2[] = { 147 1.57079625129699707031e+00, 148 7.54978941586159635335e-08, 149 5.39030252995776476554e-15, 150 3.28200341580791294123e-22, 151 1.27065575308067607349e-29, 152 1.22933308981111328932e-36, 153 2.73370053816464559624e-44, 154 2.16741683877804819444e-51, 155 }; 156 157 static const double 158 zero = 0.0, 159 one = 1.0, 160 half = 0.5, 161 eight = 8.0, 162 eighth = 0.125, 163 two24 = 16777216.0, 164 twon24 = 5.960464477539062500E-8; 165 166 int 167 __rem_pio2m(double *x, double *y, int e0, int nx, int prec, const int *ipio2) 168 { 169 int jz, jx, jv, jp, jk, carry, n, iq[20]; 170 int i, j, k, m, q0, ih; 171 double z, fw, f[20], fq[20], q[20]; 172 #if defined(__i386) && !defined(__amd64) 173 int rp; 174 175 rp = __swapRP(fp_extended); 176 #endif 177 178 fq[0] = NAN; /* Make gcc happy */ 179 /* initialize jk */ 180 jp = jk = init_jk[prec]; 181 182 /* determine jx,jv,q0, note that 3>q0 */ 183 jx = nx - 1; 184 jv = (e0 - 3) / 24; 185 if (jv < 0) 186 jv = 0; 187 q0 = e0 - 24 * (jv + 1); 188 189 /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */ 190 j = jv - jx; 191 m = jx + jk; 192 for (i = 0; i <= m; i++, j++) 193 f[i] = (j < 0)? zero : (double)ipio2[j]; 194 195 /* compute q[0],q[1],...q[jk] */ 196 for (i = 0; i <= jk; i++) { 197 for (j = 0, fw = zero; j <= jx; j++) 198 fw += x[j] * f[jx+i-j]; 199 q[i] = fw; 200 } 201 202 jz = jk; 203 recompute: 204 /* distill q[] into iq[] reversingly */ 205 for (i = 0, j = jz, z = q[jz]; j > 0; i++, j--) { 206 fw = (double)((int)(twon24 * z)); 207 iq[i] = (int)(z - two24 * fw); 208 z = q[j-1] + fw; 209 } 210 211 /* compute n */ 212 z = scalbn(z, q0); /* actual value of z */ 213 z -= eight * floor(z * eighth); /* trim off integer >= 8 */ 214 n = (int)z; 215 z -= (double)n; 216 ih = 0; 217 if (q0 > 0) { /* need iq[jz-1] to determine n */ 218 i = (iq[jz-1] >> (24 - q0)); 219 n += i; 220 iq[jz-1] -= i << (24 - q0); 221 ih = iq[jz-1] >> (23 - q0); 222 } else if (q0 == 0) { 223 ih = iq[jz-1] >> 23; 224 } else if (z >= half) { 225 ih = 2; 226 } 227 228 if (ih > 0) { /* q > 0.5 */ 229 n += 1; 230 carry = 0; 231 for (i = 0; i < jz; i++) { /* compute 1-q */ 232 j = iq[i]; 233 if (carry == 0) { 234 if (j != 0) { 235 carry = 1; 236 iq[i] = 0x1000000 - j; 237 } 238 } else { 239 iq[i] = 0xffffff - j; 240 } 241 } 242 if (q0 > 0) { /* rare case: chance is 1 in 12 */ 243 switch (q0) { 244 case 1: 245 iq[jz-1] &= 0x7fffff; 246 break; 247 case 2: 248 iq[jz-1] &= 0x3fffff; 249 break; 250 } 251 } 252 if (ih == 2) { 253 z = one - z; 254 if (carry != 0) 255 z -= scalbn(one, q0); 256 } 257 } 258 259 /* check if recomputation is needed */ 260 if (z == zero) { 261 j = 0; 262 for (i = jz - 1; i >= jk; i--) 263 j |= iq[i]; 264 if (j == 0) { /* need recomputation */ 265 /* set k to no. of terms needed */ 266 for (k = 1; iq[jk-k] == 0; k++) 267 ; 268 269 /* add q[jz+1] to q[jz+k] */ 270 for (i = jz + 1; i <= jz + k; i++) { 271 f[jx+i] = (double)ipio2[jv+i]; 272 for (j = 0, fw = zero; j <= jx; j++) 273 fw += x[j] * f[jx+i-j]; 274 q[i] = fw; 275 } 276 jz += k; 277 goto recompute; 278 } 279 } 280 281 /* cut out zero terms */ 282 if (z == zero) { 283 jz -= 1; 284 q0 -= 24; 285 while (iq[jz] == 0) { 286 jz--; 287 q0 -= 24; 288 } 289 } else { /* break z into 24-bit if neccessary */ 290 z = scalbn(z, -q0); 291 if (z >= two24) { 292 fw = (double)((int)(twon24 * z)); 293 iq[jz] = (int)(z - two24 * fw); 294 jz += 1; 295 q0 += 24; 296 iq[jz] = (int)fw; 297 } else { 298 iq[jz] = (int)z; 299 } 300 } 301 302 /* convert integer "bit" chunk to floating-point value */ 303 fw = scalbn(one, q0); 304 for (i = jz; i >= 0; i--) { 305 q[i] = fw * (double)iq[i]; 306 fw *= twon24; 307 } 308 309 /* compute pio2[0,...,jp]*q[jz,...,0] */ 310 for (i = jz; i >= 0; i--) { 311 for (fw = zero, k = 0; k <= jp && k <= jz - i; k++) 312 fw += pio2[k] * q[i+k]; 313 fq[jz-i] = fw; 314 } 315 316 /* compress fq[] into y[] */ 317 switch (prec) { 318 case 0: 319 fw = zero; 320 for (i = jz; i >= 0; i--) 321 fw += fq[i]; 322 y[0] = (ih == 0)? fw : -fw; 323 break; 324 325 case 1: 326 case 2: 327 fw = zero; 328 for (i = jz; i >= 0; i--) 329 fw += fq[i]; 330 y[0] = (ih == 0)? fw : -fw; 331 332 assert(!isnan(fq[0])); 333 fw = fq[0] - fw; 334 for (i = 1; i <= jz; i++) 335 fw += fq[i]; 336 y[1] = (ih == 0)? fw : -fw; 337 break; 338 339 default: 340 for (i = jz; i > 0; i--) { 341 fw = fq[i-1] + fq[i]; 342 fq[i] += fq[i-1] - fw; 343 fq[i-1] = fw; 344 } 345 for (i = jz; i > 1; i--) { 346 fw = fq[i-1] + fq[i]; 347 fq[i] += fq[i-1] - fw; 348 fq[i-1] = fw; 349 } 350 for (fw = zero, i = jz; i >= 2; i--) 351 fw += fq[i]; 352 if (ih == 0) { 353 y[0] = fq[0]; 354 y[1] = fq[1]; 355 y[2] = fw; 356 } else { 357 y[0] = -fq[0]; 358 y[1] = -fq[1]; 359 y[2] = -fw; 360 } 361 } 362 363 #if defined(__i386) && !defined(__amd64) 364 (void) __swapRP(rp); 365 #endif 366 return (n & 7); 367 } 368