xref: /titanic_50/usr/src/lib/libmvec/common/__vrem_pio2m.c (revision 25c28e83beb90e7c80452a7c818c5e6f73a07dc8)
1*25c28e83SPiotr Jasiukajtis /*
2*25c28e83SPiotr Jasiukajtis  * CDDL HEADER START
3*25c28e83SPiotr Jasiukajtis  *
4*25c28e83SPiotr Jasiukajtis  * The contents of this file are subject to the terms of the
5*25c28e83SPiotr Jasiukajtis  * Common Development and Distribution License (the "License").
6*25c28e83SPiotr Jasiukajtis  * You may not use this file except in compliance with the License.
7*25c28e83SPiotr Jasiukajtis  *
8*25c28e83SPiotr Jasiukajtis  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*25c28e83SPiotr Jasiukajtis  * or http://www.opensolaris.org/os/licensing.
10*25c28e83SPiotr Jasiukajtis  * See the License for the specific language governing permissions
11*25c28e83SPiotr Jasiukajtis  * and limitations under the License.
12*25c28e83SPiotr Jasiukajtis  *
13*25c28e83SPiotr Jasiukajtis  * When distributing Covered Code, include this CDDL HEADER in each
14*25c28e83SPiotr Jasiukajtis  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*25c28e83SPiotr Jasiukajtis  * If applicable, add the following below this CDDL HEADER, with the
16*25c28e83SPiotr Jasiukajtis  * fields enclosed by brackets "[]" replaced with your own identifying
17*25c28e83SPiotr Jasiukajtis  * information: Portions Copyright [yyyy] [name of copyright owner]
18*25c28e83SPiotr Jasiukajtis  *
19*25c28e83SPiotr Jasiukajtis  * CDDL HEADER END
20*25c28e83SPiotr Jasiukajtis  */
21*25c28e83SPiotr Jasiukajtis 
22*25c28e83SPiotr Jasiukajtis /*
23*25c28e83SPiotr Jasiukajtis  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
24*25c28e83SPiotr Jasiukajtis  */
25*25c28e83SPiotr Jasiukajtis /*
26*25c28e83SPiotr Jasiukajtis  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
27*25c28e83SPiotr Jasiukajtis  * Use is subject to license terms.
28*25c28e83SPiotr Jasiukajtis  */
29*25c28e83SPiotr Jasiukajtis 
30*25c28e83SPiotr Jasiukajtis /*
31*25c28e83SPiotr Jasiukajtis  * Given X, __vlibm_rem_pio2m finds Y and an integer n such that
32*25c28e83SPiotr Jasiukajtis  * Y = X - n*pi/2 and |Y| < pi/2.
33*25c28e83SPiotr Jasiukajtis  *
34*25c28e83SPiotr Jasiukajtis  * On entry, X is represented by x, an array of nx 24-bit integers
35*25c28e83SPiotr Jasiukajtis  * stored in double precision format, and e:
36*25c28e83SPiotr Jasiukajtis  *
37*25c28e83SPiotr Jasiukajtis  *   X = sum (x[i] * 2^(e - 24*i))
38*25c28e83SPiotr Jasiukajtis  *
39*25c28e83SPiotr Jasiukajtis  * nx must be 1, 2, or 3, and e must be >= -24.  For example, a
40*25c28e83SPiotr Jasiukajtis  * suitable representation for the double precision number z can
41*25c28e83SPiotr Jasiukajtis  * be computed as follows:
42*25c28e83SPiotr Jasiukajtis  *
43*25c28e83SPiotr Jasiukajtis  *	e  = ilogb(z)-23
44*25c28e83SPiotr Jasiukajtis  *	z  = scalbn(z,-e)
45*25c28e83SPiotr Jasiukajtis  *	for i = 0,1,2
46*25c28e83SPiotr Jasiukajtis  *		x[i] = floor(z)
47*25c28e83SPiotr Jasiukajtis  *		z    = (z-x[i])*2**24
48*25c28e83SPiotr Jasiukajtis  *
49*25c28e83SPiotr Jasiukajtis  * On exit, Y is approximated by y[0] if prec is 0 and by the un-
50*25c28e83SPiotr Jasiukajtis  * evaluated sum y[0] + y[1] if prec != 0.  The approximation is
51*25c28e83SPiotr Jasiukajtis  * accurate to 53 bits in the former case and to at least 72 bits
52*25c28e83SPiotr Jasiukajtis  * in the latter.
53*25c28e83SPiotr Jasiukajtis  *
54*25c28e83SPiotr Jasiukajtis  * __vlibm_rem_pio2m returns n mod 8.
55*25c28e83SPiotr Jasiukajtis  *
56*25c28e83SPiotr Jasiukajtis  * Notes:
57*25c28e83SPiotr Jasiukajtis  *
58*25c28e83SPiotr Jasiukajtis  * As n is the integer nearest X * 2/pi, we approximate the latter
59*25c28e83SPiotr Jasiukajtis  * product to a precision that is determined dynamically so as to
60*25c28e83SPiotr Jasiukajtis  * ensure that the final value Y is approximated accurately enough.
61*25c28e83SPiotr Jasiukajtis  * We don't bother to compute terms in the product that are multiples
62*25c28e83SPiotr Jasiukajtis  * of 8, so the cost of this multiplication is independent of the
63*25c28e83SPiotr Jasiukajtis  * magnitude of X.  The variable ip determines the offset into the
64*25c28e83SPiotr Jasiukajtis  * array ipio2 of the first term we need to use.  The variable eq0
65*25c28e83SPiotr Jasiukajtis  * is the corresponding exponent of the first partial product.
66*25c28e83SPiotr Jasiukajtis  *
67*25c28e83SPiotr Jasiukajtis  * The partial products are scaled, summed, and split into an array
68*25c28e83SPiotr Jasiukajtis  * of non-overlapping 24-bit terms (not necessarily having the same
69*25c28e83SPiotr Jasiukajtis  * signs).  Each partial product overlaps three elements of the
70*25c28e83SPiotr Jasiukajtis  * resulting array:
71*25c28e83SPiotr Jasiukajtis  *
72*25c28e83SPiotr Jasiukajtis  *	q[i]   xxxxxxxxxxxxxx
73*25c28e83SPiotr Jasiukajtis  *	q[i+1]       xxxxxxxxxxxxxx
74*25c28e83SPiotr Jasiukajtis  *	q[i+2]             xxxxxxxxxxxxxx
75*25c28e83SPiotr Jasiukajtis  *	...                      ...
76*25c28e83SPiotr Jasiukajtis  *
77*25c28e83SPiotr Jasiukajtis  *
78*25c28e83SPiotr Jasiukajtis  *	r[i]     xxxxxx
79*25c28e83SPiotr Jasiukajtis  *      r[i+1]         xxxxxx
80*25c28e83SPiotr Jasiukajtis  *      r[i+2]               xxxxxx
81*25c28e83SPiotr Jasiukajtis  *	...                        ...
82*25c28e83SPiotr Jasiukajtis  *
83*25c28e83SPiotr Jasiukajtis  * In order that the last element of the r array have some correct
84*25c28e83SPiotr Jasiukajtis  * bits, we compute an extra term in the q array, but we don't bother
85*25c28e83SPiotr Jasiukajtis  * to split this last term into 24-bit chunks; thus, the final term
86*25c28e83SPiotr Jasiukajtis  * of the r array could have more than 24 bits, but this doesn't
87*25c28e83SPiotr Jasiukajtis  * matter.
88*25c28e83SPiotr Jasiukajtis  *
89*25c28e83SPiotr Jasiukajtis  * After we subtract the nearest integer to the product, we multiply
90*25c28e83SPiotr Jasiukajtis  * the remaining part of r by pi/2 to obtain Y.  Before we compute
91*25c28e83SPiotr Jasiukajtis  * this last product, however, we make sure that the remaining part
92*25c28e83SPiotr Jasiukajtis  * of r has at least five nonzero terms, computing more if need be.
93*25c28e83SPiotr Jasiukajtis  * This ensures that even if the first nonzero term is only a single
94*25c28e83SPiotr Jasiukajtis  * bit and the last term is wrong in several trailing bits, we still
95*25c28e83SPiotr Jasiukajtis  * have enough accuracy to obtain 72 bits of Y.
96*25c28e83SPiotr Jasiukajtis  *
97*25c28e83SPiotr Jasiukajtis  * IMPORTANT: This code assumes that the rounding mode is round-to-
98*25c28e83SPiotr Jasiukajtis  * nearest in several key places.  First, after we compute X * 2/pi,
99*25c28e83SPiotr Jasiukajtis  * we round to the nearest integer by adding and subtracting a power
100*25c28e83SPiotr Jasiukajtis  * of two.  This step must be done in round-to-nearest mode to ensure
101*25c28e83SPiotr Jasiukajtis  * that the remainder is less than 1/2 in absolute value.  (Because
102*25c28e83SPiotr Jasiukajtis  * we only take two adjacent terms of r into account when we perform
103*25c28e83SPiotr Jasiukajtis  * this rounding, in very rare cases the remainder could be just
104*25c28e83SPiotr Jasiukajtis  * barely greater than 1/2, but this shouldn't matter in practice.)
105*25c28e83SPiotr Jasiukajtis  *
106*25c28e83SPiotr Jasiukajtis  * Second, we also split the partial products of X * 2/pi into 24-bit
107*25c28e83SPiotr Jasiukajtis  * pieces by adding and subtracting a power of two.  In this step,
108*25c28e83SPiotr Jasiukajtis  * round-to-nearest mode is important in order to guarantee that
109*25c28e83SPiotr Jasiukajtis  * the index of the first nonzero term in the remainder gives an
110*25c28e83SPiotr Jasiukajtis  * accurate indication of the number of significant terms.  For
111*25c28e83SPiotr Jasiukajtis  * example, suppose eq0 = -1, so that r[1] is a multiple of 1/2 and
112*25c28e83SPiotr Jasiukajtis  * |r[2]| < 1/2.  After we subtract the nearest integer, r[1] could
113*25c28e83SPiotr Jasiukajtis  * be -1/2, and r[2] could be very nearly 1/2, so that r[1] != 0,
114*25c28e83SPiotr Jasiukajtis  * yet the remainder is much smaller than the least significant bit
115*25c28e83SPiotr Jasiukajtis  * corresponding to r[1].  As long as we use round-to-nearest mode,
116*25c28e83SPiotr Jasiukajtis  * this can't happen; instead, the absolute value of each r[j] will
117*25c28e83SPiotr Jasiukajtis  * be less than 1/2 the least significant bit corresponding to r[j-1],
118*25c28e83SPiotr Jasiukajtis  * so that the entire remainder must be at least half as large as
119*25c28e83SPiotr Jasiukajtis  * the first nonzero term (or perhaps just barely smaller than this).
120*25c28e83SPiotr Jasiukajtis  */
121*25c28e83SPiotr Jasiukajtis 
122*25c28e83SPiotr Jasiukajtis #include <sys/isa_defs.h>
123*25c28e83SPiotr Jasiukajtis 
124*25c28e83SPiotr Jasiukajtis #ifdef _LITTLE_ENDIAN
125*25c28e83SPiotr Jasiukajtis #define HIWORD	1
126*25c28e83SPiotr Jasiukajtis #define LOWORD	0
127*25c28e83SPiotr Jasiukajtis #else
128*25c28e83SPiotr Jasiukajtis #define HIWORD	0
129*25c28e83SPiotr Jasiukajtis #define LOWORD	1
130*25c28e83SPiotr Jasiukajtis #endif
131*25c28e83SPiotr Jasiukajtis 
132*25c28e83SPiotr Jasiukajtis /* 396 hex digits of 2/pi, with two leading zeroes to make life easier */
133*25c28e83SPiotr Jasiukajtis static const double ipio2[] = {
134*25c28e83SPiotr Jasiukajtis 	0, 0,
135*25c28e83SPiotr Jasiukajtis 	0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62,
136*25c28e83SPiotr Jasiukajtis 	0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A,
137*25c28e83SPiotr Jasiukajtis 	0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129,
138*25c28e83SPiotr Jasiukajtis 	0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41,
139*25c28e83SPiotr Jasiukajtis 	0x3991D6, 0x398353, 0x39F49C, 0x845F8B, 0xBDF928, 0x3B1FF8,
140*25c28e83SPiotr Jasiukajtis 	0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF,
141*25c28e83SPiotr Jasiukajtis 	0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5,
142*25c28e83SPiotr Jasiukajtis 	0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08,
143*25c28e83SPiotr Jasiukajtis 	0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3,
144*25c28e83SPiotr Jasiukajtis 	0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880,
145*25c28e83SPiotr Jasiukajtis 	0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B,
146*25c28e83SPiotr Jasiukajtis };
147*25c28e83SPiotr Jasiukajtis 
148*25c28e83SPiotr Jasiukajtis /* pi/2 in 24-bit pieces */
149*25c28e83SPiotr Jasiukajtis static const double pio2[] = {
150*25c28e83SPiotr Jasiukajtis 	1.57079625129699707031e+00,
151*25c28e83SPiotr Jasiukajtis 	7.54978941586159635335e-08,
152*25c28e83SPiotr Jasiukajtis 	5.39030252995776476554e-15,
153*25c28e83SPiotr Jasiukajtis 	3.28200341580791294123e-22,
154*25c28e83SPiotr Jasiukajtis 	1.27065575308067607349e-29,
155*25c28e83SPiotr Jasiukajtis };
156*25c28e83SPiotr Jasiukajtis 
157*25c28e83SPiotr Jasiukajtis /* miscellaneous constants */
158*25c28e83SPiotr Jasiukajtis static const double
159*25c28e83SPiotr Jasiukajtis 	zero	= 0.0,
160*25c28e83SPiotr Jasiukajtis 	two24	= 16777216.0,
161*25c28e83SPiotr Jasiukajtis 	round1	= 6755399441055744.0,	/* 3 * 2^51 */
162*25c28e83SPiotr Jasiukajtis 	round24	= 113336795588871485128704.0, /* 3 * 2^75 */
163*25c28e83SPiotr Jasiukajtis 	twon24	= 5.960464477539062500E-8;
164*25c28e83SPiotr Jasiukajtis 
165*25c28e83SPiotr Jasiukajtis int
__vlibm_rem_pio2m(double * x,double * y,int e,int nx,int prec)166*25c28e83SPiotr Jasiukajtis __vlibm_rem_pio2m(double *x, double *y, int e, int nx, int prec)
167*25c28e83SPiotr Jasiukajtis {
168*25c28e83SPiotr Jasiukajtis 	union {
169*25c28e83SPiotr Jasiukajtis 		double	d;
170*25c28e83SPiotr Jasiukajtis 		int	i[2];
171*25c28e83SPiotr Jasiukajtis 	} s;
172*25c28e83SPiotr Jasiukajtis 	double	z, t, p, q[20], r[21], *pr;
173*25c28e83SPiotr Jasiukajtis 	int	nq, ip, n, i, j, k, eq0, eqnqm1;
174*25c28e83SPiotr Jasiukajtis 
175*25c28e83SPiotr Jasiukajtis 	/* determine ip and eq0; note that -48 <= eq0 <= 2 */
176*25c28e83SPiotr Jasiukajtis 	ip = (e - 3) / 24;
177*25c28e83SPiotr Jasiukajtis 	if (ip < 0)
178*25c28e83SPiotr Jasiukajtis 		ip = 0;
179*25c28e83SPiotr Jasiukajtis 	eq0 = e - 24 * (ip + 1);
180*25c28e83SPiotr Jasiukajtis 
181*25c28e83SPiotr Jasiukajtis 	/* compute q[0,...,5] = x * ipio2 and initialize nq and eqnqm1 */
182*25c28e83SPiotr Jasiukajtis 	if (nx == 3) {
183*25c28e83SPiotr Jasiukajtis 		q[0] = x[0] * ipio2[ip+2] + x[1] * ipio2[ip+1] + x[2] * ipio2[ip];
184*25c28e83SPiotr Jasiukajtis 		q[1] = x[0] * ipio2[ip+3] + x[1] * ipio2[ip+2] + x[2] * ipio2[ip+1];
185*25c28e83SPiotr Jasiukajtis 		q[2] = x[0] * ipio2[ip+4] + x[1] * ipio2[ip+3] + x[2] * ipio2[ip+2];
186*25c28e83SPiotr Jasiukajtis 		q[3] = x[0] * ipio2[ip+5] + x[1] * ipio2[ip+4] + x[2] * ipio2[ip+3];
187*25c28e83SPiotr Jasiukajtis 		q[4] = x[0] * ipio2[ip+6] + x[1] * ipio2[ip+5] + x[2] * ipio2[ip+4];
188*25c28e83SPiotr Jasiukajtis 		q[5] = x[0] * ipio2[ip+7] + x[1] * ipio2[ip+6] + x[2] * ipio2[ip+5];
189*25c28e83SPiotr Jasiukajtis 	} else if (nx == 2) {
190*25c28e83SPiotr Jasiukajtis 		q[0] = x[0] * ipio2[ip+2] + x[1] * ipio2[ip+1];
191*25c28e83SPiotr Jasiukajtis 		q[1] = x[0] * ipio2[ip+3] + x[1] * ipio2[ip+2];
192*25c28e83SPiotr Jasiukajtis 		q[2] = x[0] * ipio2[ip+4] + x[1] * ipio2[ip+3];
193*25c28e83SPiotr Jasiukajtis 		q[3] = x[0] * ipio2[ip+5] + x[1] * ipio2[ip+4];
194*25c28e83SPiotr Jasiukajtis 		q[4] = x[0] * ipio2[ip+6] + x[1] * ipio2[ip+5];
195*25c28e83SPiotr Jasiukajtis 		q[5] = x[0] * ipio2[ip+7] + x[1] * ipio2[ip+6];
196*25c28e83SPiotr Jasiukajtis 	} else {
197*25c28e83SPiotr Jasiukajtis 		q[0] = x[0] * ipio2[ip+2];
198*25c28e83SPiotr Jasiukajtis 		q[1] = x[0] * ipio2[ip+3];
199*25c28e83SPiotr Jasiukajtis 		q[2] = x[0] * ipio2[ip+4];
200*25c28e83SPiotr Jasiukajtis 		q[3] = x[0] * ipio2[ip+5];
201*25c28e83SPiotr Jasiukajtis 		q[4] = x[0] * ipio2[ip+6];
202*25c28e83SPiotr Jasiukajtis 		q[5] = x[0] * ipio2[ip+7];
203*25c28e83SPiotr Jasiukajtis 	}
204*25c28e83SPiotr Jasiukajtis 	nq = 5;
205*25c28e83SPiotr Jasiukajtis 	eqnqm1 = eq0 - 96;
206*25c28e83SPiotr Jasiukajtis 
207*25c28e83SPiotr Jasiukajtis recompute:
208*25c28e83SPiotr Jasiukajtis 	/* propagate carries and incorporate powers of two */
209*25c28e83SPiotr Jasiukajtis 	s.i[HIWORD] = (0x3ff + eqnqm1) << 20;
210*25c28e83SPiotr Jasiukajtis 	s.i[LOWORD] = 0;
211*25c28e83SPiotr Jasiukajtis 	p = s.d;
212*25c28e83SPiotr Jasiukajtis 	z = q[nq] * twon24;
213*25c28e83SPiotr Jasiukajtis 	for (j = nq-1; j >= 1; j--) {
214*25c28e83SPiotr Jasiukajtis 		z += q[j];
215*25c28e83SPiotr Jasiukajtis 		t = (z + round24) - round24; /* must be rounded to nearest */
216*25c28e83SPiotr Jasiukajtis 		r[j+1] = (z - t) * p;
217*25c28e83SPiotr Jasiukajtis 		z = t * twon24;
218*25c28e83SPiotr Jasiukajtis 		p *= two24;
219*25c28e83SPiotr Jasiukajtis 	}
220*25c28e83SPiotr Jasiukajtis 	z += q[0];
221*25c28e83SPiotr Jasiukajtis 	t = (z + round24) - round24; /* must be rounded to nearest */
222*25c28e83SPiotr Jasiukajtis 	r[1] = (z - t) * p;
223*25c28e83SPiotr Jasiukajtis 	r[0] = t * p;
224*25c28e83SPiotr Jasiukajtis 
225*25c28e83SPiotr Jasiukajtis 	/* form n = [r] mod 8 and leave the fractional part of r */
226*25c28e83SPiotr Jasiukajtis 	if (eq0 > 0) {
227*25c28e83SPiotr Jasiukajtis 		/* binary point lies within r[2] */
228*25c28e83SPiotr Jasiukajtis 		z = r[2] + r[3];
229*25c28e83SPiotr Jasiukajtis 		t = (z + round1) - round1; /* must be rounded to nearest */
230*25c28e83SPiotr Jasiukajtis 		r[2] -= t;
231*25c28e83SPiotr Jasiukajtis 		n = (int)(r[1] + t);
232*25c28e83SPiotr Jasiukajtis 		r[0] = r[1] = zero;
233*25c28e83SPiotr Jasiukajtis 	} else if (eq0 > -24) {
234*25c28e83SPiotr Jasiukajtis 		/* binary point lies within or just to the right of r[1] */
235*25c28e83SPiotr Jasiukajtis 		z = r[1] + r[2];
236*25c28e83SPiotr Jasiukajtis 		t = (z + round1) - round1; /* must be rounded to nearest */
237*25c28e83SPiotr Jasiukajtis 		r[1] -= t;
238*25c28e83SPiotr Jasiukajtis 		z = r[0] + t;
239*25c28e83SPiotr Jasiukajtis 		/* cut off high part of z so conversion to int doesn't
240*25c28e83SPiotr Jasiukajtis 		   overflow */
241*25c28e83SPiotr Jasiukajtis 		t = (z + round24) - round24;
242*25c28e83SPiotr Jasiukajtis 		n = (int)(z - t);
243*25c28e83SPiotr Jasiukajtis 		r[0] = zero;
244*25c28e83SPiotr Jasiukajtis 	} else {
245*25c28e83SPiotr Jasiukajtis 		/* binary point lies within or just to the right of r[0] */
246*25c28e83SPiotr Jasiukajtis 		z = r[0] + r[1];
247*25c28e83SPiotr Jasiukajtis 		t = (z + round1) - round1; /* must be rounded to nearest */
248*25c28e83SPiotr Jasiukajtis 		r[0] -= t;
249*25c28e83SPiotr Jasiukajtis 		n = (int)t;
250*25c28e83SPiotr Jasiukajtis 	}
251*25c28e83SPiotr Jasiukajtis 
252*25c28e83SPiotr Jasiukajtis 	/* count the number of leading zeroes in r */
253*25c28e83SPiotr Jasiukajtis 	for (j = 0; j <= nq; j++) {
254*25c28e83SPiotr Jasiukajtis 		if (r[j] != zero)
255*25c28e83SPiotr Jasiukajtis 			break;
256*25c28e83SPiotr Jasiukajtis 	}
257*25c28e83SPiotr Jasiukajtis 
258*25c28e83SPiotr Jasiukajtis 	/* if fewer than 5 terms remain, add more */
259*25c28e83SPiotr Jasiukajtis 	if (nq - j < 4) {
260*25c28e83SPiotr Jasiukajtis 		k = 4 - (nq - j);
261*25c28e83SPiotr Jasiukajtis 		/*
262*25c28e83SPiotr Jasiukajtis 		 * compute q[nq+1] to q[nq+k]
263*25c28e83SPiotr Jasiukajtis 		 *
264*25c28e83SPiotr Jasiukajtis 		 * For some reason, writing out the nx loop explicitly
265*25c28e83SPiotr Jasiukajtis 		 * for each of the three possible values (as above) seems
266*25c28e83SPiotr Jasiukajtis 		 * to run a little slower, so we'll leave this code as is.
267*25c28e83SPiotr Jasiukajtis 		 */
268*25c28e83SPiotr Jasiukajtis 		for (i = nq + 1; i <= nq + k; i++) {
269*25c28e83SPiotr Jasiukajtis 			t = x[0] * ipio2[ip+2+i];
270*25c28e83SPiotr Jasiukajtis 			for (j = 1; j < nx; j++)
271*25c28e83SPiotr Jasiukajtis 				t += x[j] * ipio2[ip+2+i-j];
272*25c28e83SPiotr Jasiukajtis 			q[i] = t;
273*25c28e83SPiotr Jasiukajtis 			eqnqm1 -= 24;
274*25c28e83SPiotr Jasiukajtis 		}
275*25c28e83SPiotr Jasiukajtis 		nq += k;
276*25c28e83SPiotr Jasiukajtis 		goto recompute;
277*25c28e83SPiotr Jasiukajtis 	}
278*25c28e83SPiotr Jasiukajtis 
279*25c28e83SPiotr Jasiukajtis 	/* set pr and nq so that pr[0,...,nq] is the part of r remaining */
280*25c28e83SPiotr Jasiukajtis 	pr = &r[j];
281*25c28e83SPiotr Jasiukajtis 	nq = nq - j;
282*25c28e83SPiotr Jasiukajtis 
283*25c28e83SPiotr Jasiukajtis 	/* compute pio2 * pr[0,...,nq]; note that nq >= 4 here */
284*25c28e83SPiotr Jasiukajtis 	q[0] = pio2[0] * pr[0];
285*25c28e83SPiotr Jasiukajtis 	q[1] = pio2[0] * pr[1] + pio2[1] * pr[0];
286*25c28e83SPiotr Jasiukajtis 	q[2] = pio2[0] * pr[2] + pio2[1] * pr[1] + pio2[2] * pr[0];
287*25c28e83SPiotr Jasiukajtis 	q[3] = pio2[0] * pr[3] + pio2[1] * pr[2] + pio2[2] * pr[1]
288*25c28e83SPiotr Jasiukajtis 	    + pio2[3] * pr[0];
289*25c28e83SPiotr Jasiukajtis 	for (i = 4; i <= nq; i++) {
290*25c28e83SPiotr Jasiukajtis 		q[i] = pio2[0] * pr[i] + pio2[1] * pr[i-1] + pio2[2] * pr[i-2]
291*25c28e83SPiotr Jasiukajtis 		    + pio2[3] * pr[i-3] + pio2[4] * pr[i-4];
292*25c28e83SPiotr Jasiukajtis 	}
293*25c28e83SPiotr Jasiukajtis 
294*25c28e83SPiotr Jasiukajtis 	/* sum q in increasing order to obtain the first term of y */
295*25c28e83SPiotr Jasiukajtis 	t = q[nq];
296*25c28e83SPiotr Jasiukajtis 	for (i = nq - 1; i >= 0; i--)
297*25c28e83SPiotr Jasiukajtis 		t += q[i];
298*25c28e83SPiotr Jasiukajtis 	y[0] = t;
299*25c28e83SPiotr Jasiukajtis 	if (prec) {
300*25c28e83SPiotr Jasiukajtis 		/* subtract and sum again in decreasing order
301*25c28e83SPiotr Jasiukajtis 		   to obtain the second term */
302*25c28e83SPiotr Jasiukajtis 		t = q[0] - t;
303*25c28e83SPiotr Jasiukajtis 		for (i = 1; i <= nq; i++)
304*25c28e83SPiotr Jasiukajtis 			t += q[i];
305*25c28e83SPiotr Jasiukajtis 		y[1] = t;
306*25c28e83SPiotr Jasiukajtis 	}
307*25c28e83SPiotr Jasiukajtis 
308*25c28e83SPiotr Jasiukajtis 	return (n & 7);
309*25c28e83SPiotr Jasiukajtis }
310