1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 23 */ 24 /* 25 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 26 * Use is subject to license terms. 27 */ 28 29 /* 30 * int __rem_pio2m(x,y,e0,nx,prec,ipio2) 31 * double x[],y[]; int e0,nx,prec; const int ipio2[]; 32 * 33 * __rem_pio2m return the last three digits of N with 34 * y = x - N*pi/2 35 * so that |y| < pi/4. 36 * 37 * The method is to compute the integer (mod 8) and fraction parts of 38 * (2/pi)*x without doing the full multiplication. In general we 39 * skip the part of the product that are known to be a huge integer ( 40 * more accurately, = 0 mod 8 ). Thus the number of operations are 41 * independent of the exponent of the input. 42 * 43 * (2/PI) is represented by an array of 24-bit integers in ipio2[]. 44 * Here PI could as well be a machine value pi. 45 * 46 * Input parameters: 47 * x[] The input value (must be positive) is broken into nx 48 * pieces of 24-bit integers in double precision format. 49 * x[i] will be the i-th 24 bit of x. The scaled exponent 50 * of x[0] is given in input parameter e0 (i.e., x[0]*2^e0 51 * match x's up to 24 bits. 52 * 53 * Example of breaking a double z into x[0]+x[1]+x[2]: 54 * e0 = ilogb(z)-23 55 * z = scalbn(z,-e0) 56 * for i = 0,1,2 57 * x[i] = floor(z) 58 * z = (z-x[i])*2**24 59 * 60 * 61 * y[] ouput result in an array of double precision numbers. 62 * The dimension of y[] is: 63 * 24-bit precision 1 64 * 53-bit precision 2 65 * 64-bit precision 2 66 * 113-bit precision 3 67 * The actual value is the sum of them. Thus for 113-bit 68 * precsion, one may have to do something like: 69 * 70 * long double t,w,r_head, r_tail; 71 * t = (long double)y[2] + (long double)y[1]; 72 * w = (long double)y[0]; 73 * r_head = t+w; 74 * r_tail = w - (r_head - t); 75 * 76 * e0 The exponent of x[0] 77 * 78 * nx dimension of x[] 79 * 80 * prec an interger indicating the precision: 81 * 0 24 bits (single) 82 * 1 53 bits (double) 83 * 2 64 bits (extended) 84 * 3 113 bits (quad) 85 * 86 * ipio2[] 87 * integer array, contains the (24*i)-th to (24*i+23)-th 88 * bit of 2/pi or 2/PI after binary point. The corresponding 89 * floating value is 90 * 91 * ipio2[i] * 2^(-24(i+1)). 92 * 93 * External function: 94 * double scalbn( ), floor( ); 95 * 96 * 97 * Here is the description of some local variables: 98 * 99 * jk jk+1 is the initial number of terms of ipio2[] needed 100 * in the computation. The recommended value is 3,4,4, 101 * 6 for single, double, extended,and quad. 102 * 103 * jz local integer variable indicating the number of 104 * terms of ipio2[] used. 105 * 106 * jx nx - 1 107 * 108 * jv index for pointing to the suitable ipio2[] for the 109 * computation. In general, we want 110 * ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8 111 * is an integer. Thus 112 * e0-3-24*jv >= 0 or (e0-3)/24 >= jv 113 * Hence jv = max(0,(e0-3)/24). 114 * 115 * jp jp+1 is the number of terms in pio2[] needed, jp = jk. 116 * 117 * q[] double array with integral value, representing the 118 * 24-bits chunk of the product of x and 2/pi. 119 * 120 * q0 the corresponding exponent of q[0]. Note that the 121 * exponent for q[i] would be q0-24*i. 122 * 123 * pio2[] double precision array, obtained by cutting pi/2 124 * into 24 bits chunks. 125 * 126 * f[] ipio2[] in floating point 127 * 128 * iq[] integer array by breaking up q[] in 24-bits chunk. 129 * 130 * fq[] final product of x*(2/pi) in fq[0],..,fq[jk] 131 * 132 * ih integer. If >0 it indicats q[] is >= 0.5, hence 133 * it also indicates the *sign* of the result. 134 * 135 */ 136 137 #include "libm.h" 138 139 #if defined(__i386) && !defined(__amd64) 140 extern int __swapRP(int); 141 #endif 142 143 static const int init_jk[] = { 3, 4, 4, 6 }; /* initial value for jk */ 144 145 static const double pio2[] = { 146 1.57079625129699707031e+00, 147 7.54978941586159635335e-08, 148 5.39030252995776476554e-15, 149 3.28200341580791294123e-22, 150 1.27065575308067607349e-29, 151 1.22933308981111328932e-36, 152 2.73370053816464559624e-44, 153 2.16741683877804819444e-51, 154 }; 155 156 static const double 157 zero = 0.0, 158 one = 1.0, 159 half = 0.5, 160 eight = 8.0, 161 eighth = 0.125, 162 two24 = 16777216.0, 163 twon24 = 5.960464477539062500E-8; 164 165 int 166 __rem_pio2m(double *x, double *y, int e0, int nx, int prec, const int *ipio2) 167 { 168 int jz, jx, jv, jp, jk, carry, n, iq[20]; 169 int i, j, k, m, q0, ih; 170 double z, fw, f[20], fq[20], q[20]; 171 #if defined(__i386) && !defined(__amd64) 172 int rp; 173 174 rp = __swapRP(fp_extended); 175 #endif 176 177 /* initialize jk */ 178 jp = jk = init_jk[prec]; 179 180 /* determine jx,jv,q0, note that 3>q0 */ 181 jx = nx - 1; 182 jv = (e0 - 3) / 24; 183 if (jv < 0) 184 jv = 0; 185 q0 = e0 - 24 * (jv + 1); 186 187 /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */ 188 j = jv - jx; 189 m = jx + jk; 190 for (i = 0; i <= m; i++, j++) 191 f[i] = (j < 0)? zero : (double)ipio2[j]; 192 193 /* compute q[0],q[1],...q[jk] */ 194 for (i = 0; i <= jk; i++) { 195 for (j = 0, fw = zero; j <= jx; j++) 196 fw += x[j] * f[jx+i-j]; 197 q[i] = fw; 198 } 199 200 jz = jk; 201 recompute: 202 /* distill q[] into iq[] reversingly */ 203 for (i = 0, j = jz, z = q[jz]; j > 0; i++, j--) { 204 fw = (double)((int)(twon24 * z)); 205 iq[i] = (int)(z - two24 * fw); 206 z = q[j-1] + fw; 207 } 208 209 /* compute n */ 210 z = scalbn(z, q0); /* actual value of z */ 211 z -= eight * floor(z * eighth); /* trim off integer >= 8 */ 212 n = (int)z; 213 z -= (double)n; 214 ih = 0; 215 if (q0 > 0) { /* need iq[jz-1] to determine n */ 216 i = (iq[jz-1] >> (24 - q0)); 217 n += i; 218 iq[jz-1] -= i << (24 - q0); 219 ih = iq[jz-1] >> (23 - q0); 220 } else if (q0 == 0) { 221 ih = iq[jz-1] >> 23; 222 } else if (z >= half) { 223 ih = 2; 224 } 225 226 if (ih > 0) { /* q > 0.5 */ 227 n += 1; 228 carry = 0; 229 for (i = 0; i < jz; i++) { /* compute 1-q */ 230 j = iq[i]; 231 if (carry == 0) { 232 if (j != 0) { 233 carry = 1; 234 iq[i] = 0x1000000 - j; 235 } 236 } else { 237 iq[i] = 0xffffff - j; 238 } 239 } 240 if (q0 > 0) { /* rare case: chance is 1 in 12 */ 241 switch (q0) { 242 case 1: 243 iq[jz-1] &= 0x7fffff; 244 break; 245 case 2: 246 iq[jz-1] &= 0x3fffff; 247 break; 248 } 249 } 250 if (ih == 2) { 251 z = one - z; 252 if (carry != 0) 253 z -= scalbn(one, q0); 254 } 255 } 256 257 /* check if recomputation is needed */ 258 if (z == zero) { 259 j = 0; 260 for (i = jz - 1; i >= jk; i--) 261 j |= iq[i]; 262 if (j == 0) { /* need recomputation */ 263 /* set k to no. of terms needed */ 264 for (k = 1; iq[jk-k] == 0; k++) 265 ; 266 267 /* add q[jz+1] to q[jz+k] */ 268 for (i = jz + 1; i <= jz + k; i++) { 269 f[jx+i] = (double)ipio2[jv+i]; 270 for (j = 0, fw = zero; j <= jx; j++) 271 fw += x[j] * f[jx+i-j]; 272 q[i] = fw; 273 } 274 jz += k; 275 goto recompute; 276 } 277 } 278 279 /* cut out zero terms */ 280 if (z == zero) { 281 jz -= 1; 282 q0 -= 24; 283 while (iq[jz] == 0) { 284 jz--; 285 q0 -= 24; 286 } 287 } else { /* break z into 24-bit if neccessary */ 288 z = scalbn(z, -q0); 289 if (z >= two24) { 290 fw = (double)((int)(twon24 * z)); 291 iq[jz] = (int)(z - two24 * fw); 292 jz += 1; 293 q0 += 24; 294 iq[jz] = (int)fw; 295 } else { 296 iq[jz] = (int)z; 297 } 298 } 299 300 /* convert integer "bit" chunk to floating-point value */ 301 fw = scalbn(one, q0); 302 for (i = jz; i >= 0; i--) { 303 q[i] = fw * (double)iq[i]; 304 fw *= twon24; 305 } 306 307 /* compute pio2[0,...,jp]*q[jz,...,0] */ 308 for (i = jz; i >= 0; i--) { 309 for (fw = zero, k = 0; k <= jp && k <= jz - i; k++) 310 fw += pio2[k] * q[i+k]; 311 fq[jz-i] = fw; 312 } 313 314 /* compress fq[] into y[] */ 315 switch (prec) { 316 case 0: 317 fw = zero; 318 for (i = jz; i >= 0; i--) 319 fw += fq[i]; 320 y[0] = (ih == 0)? fw : -fw; 321 break; 322 323 case 1: 324 case 2: 325 fw = zero; 326 for (i = jz; i >= 0; i--) 327 fw += fq[i]; 328 y[0] = (ih == 0)? fw : -fw; 329 fw = fq[0] - fw; 330 for (i = 1; i <= jz; i++) 331 fw += fq[i]; 332 y[1] = (ih == 0)? fw : -fw; 333 break; 334 335 default: 336 for (i = jz; i > 0; i--) { 337 fw = fq[i-1] + fq[i]; 338 fq[i] += fq[i-1] - fw; 339 fq[i-1] = fw; 340 } 341 for (i = jz; i > 1; i--) { 342 fw = fq[i-1] + fq[i]; 343 fq[i] += fq[i-1] - fw; 344 fq[i-1] = fw; 345 } 346 for (fw = zero, i = jz; i >= 2; i--) 347 fw += fq[i]; 348 if (ih == 0) { 349 y[0] = fq[0]; 350 y[1] = fq[1]; 351 y[2] = fw; 352 } else { 353 y[0] = -fq[0]; 354 y[1] = -fq[1]; 355 y[2] = -fw; 356 } 357 } 358 359 #if defined(__i386) && !defined(__amd64) 360 (void) __swapRP(rp); 361 #endif 362 return (n & 7); 363 } 364