xref: /linux/arch/x86/math-emu/poly_sin.c (revision ca55b2fef3a9373fcfc30f82fd26bc7fccbda732)
1 /*---------------------------------------------------------------------------+
2  |  poly_sin.c                                                               |
3  |                                                                           |
4  |  Computation of an approximation of the sin function and the cosine       |
5  |  function by a polynomial.                                                |
6  |                                                                           |
7  | Copyright (C) 1992,1993,1994,1997,1999                                    |
8  |                  W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
9  |                  E-mail   billm@melbpc.org.au                             |
10  |                                                                           |
11  |                                                                           |
12  +---------------------------------------------------------------------------*/
13 
14 #include "exception.h"
15 #include "reg_constant.h"
16 #include "fpu_emu.h"
17 #include "fpu_system.h"
18 #include "control_w.h"
19 #include "poly.h"
20 
21 #define	N_COEFF_P	4
22 #define	N_COEFF_N	4
23 
24 static const unsigned long long pos_terms_l[N_COEFF_P] = {
25 	0xaaaaaaaaaaaaaaabLL,
26 	0x00d00d00d00cf906LL,
27 	0x000006b99159a8bbLL,
28 	0x000000000d7392e6LL
29 };
30 
31 static const unsigned long long neg_terms_l[N_COEFF_N] = {
32 	0x2222222222222167LL,
33 	0x0002e3bc74aab624LL,
34 	0x0000000b09229062LL,
35 	0x00000000000c7973LL
36 };
37 
38 #define	N_COEFF_PH	4
39 #define	N_COEFF_NH	4
40 static const unsigned long long pos_terms_h[N_COEFF_PH] = {
41 	0x0000000000000000LL,
42 	0x05b05b05b05b0406LL,
43 	0x000049f93edd91a9LL,
44 	0x00000000c9c9ed62LL
45 };
46 
47 static const unsigned long long neg_terms_h[N_COEFF_NH] = {
48 	0xaaaaaaaaaaaaaa98LL,
49 	0x001a01a01a019064LL,
50 	0x0000008f76c68a77LL,
51 	0x0000000000d58f5eLL
52 };
53 
54 /*--- poly_sine() -----------------------------------------------------------+
55  |                                                                           |
56  +---------------------------------------------------------------------------*/
57 void poly_sine(FPU_REG *st0_ptr)
58 {
59 	int exponent, echange;
60 	Xsig accumulator, argSqrd, argTo4;
61 	unsigned long fix_up, adj;
62 	unsigned long long fixed_arg;
63 	FPU_REG result;
64 
65 	exponent = exponent(st0_ptr);
66 
67 	accumulator.lsw = accumulator.midw = accumulator.msw = 0;
68 
69 	/* Split into two ranges, for arguments below and above 1.0 */
70 	/* The boundary between upper and lower is approx 0.88309101259 */
71 	if ((exponent < -1)
72 	    || ((exponent == -1) && (st0_ptr->sigh <= 0xe21240aa))) {
73 		/* The argument is <= 0.88309101259 */
74 
75 		argSqrd.msw = st0_ptr->sigh;
76 		argSqrd.midw = st0_ptr->sigl;
77 		argSqrd.lsw = 0;
78 		mul64_Xsig(&argSqrd, &significand(st0_ptr));
79 		shr_Xsig(&argSqrd, 2 * (-1 - exponent));
80 		argTo4.msw = argSqrd.msw;
81 		argTo4.midw = argSqrd.midw;
82 		argTo4.lsw = argSqrd.lsw;
83 		mul_Xsig_Xsig(&argTo4, &argTo4);
84 
85 		polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l,
86 				N_COEFF_N - 1);
87 		mul_Xsig_Xsig(&accumulator, &argSqrd);
88 		negate_Xsig(&accumulator);
89 
90 		polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l,
91 				N_COEFF_P - 1);
92 
93 		shr_Xsig(&accumulator, 2);	/* Divide by four */
94 		accumulator.msw |= 0x80000000;	/* Add 1.0 */
95 
96 		mul64_Xsig(&accumulator, &significand(st0_ptr));
97 		mul64_Xsig(&accumulator, &significand(st0_ptr));
98 		mul64_Xsig(&accumulator, &significand(st0_ptr));
99 
100 		/* Divide by four, FPU_REG compatible, etc */
101 		exponent = 3 * exponent;
102 
103 		/* The minimum exponent difference is 3 */
104 		shr_Xsig(&accumulator, exponent(st0_ptr) - exponent);
105 
106 		negate_Xsig(&accumulator);
107 		XSIG_LL(accumulator) += significand(st0_ptr);
108 
109 		echange = round_Xsig(&accumulator);
110 
111 		setexponentpos(&result, exponent(st0_ptr) + echange);
112 	} else {
113 		/* The argument is > 0.88309101259 */
114 		/* We use sin(st(0)) = cos(pi/2-st(0)) */
115 
116 		fixed_arg = significand(st0_ptr);
117 
118 		if (exponent == 0) {
119 			/* The argument is >= 1.0 */
120 
121 			/* Put the binary point at the left. */
122 			fixed_arg <<= 1;
123 		}
124 		/* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
125 		fixed_arg = 0x921fb54442d18469LL - fixed_arg;
126 		/* There is a special case which arises due to rounding, to fix here. */
127 		if (fixed_arg == 0xffffffffffffffffLL)
128 			fixed_arg = 0;
129 
130 		XSIG_LL(argSqrd) = fixed_arg;
131 		argSqrd.lsw = 0;
132 		mul64_Xsig(&argSqrd, &fixed_arg);
133 
134 		XSIG_LL(argTo4) = XSIG_LL(argSqrd);
135 		argTo4.lsw = argSqrd.lsw;
136 		mul_Xsig_Xsig(&argTo4, &argTo4);
137 
138 		polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h,
139 				N_COEFF_NH - 1);
140 		mul_Xsig_Xsig(&accumulator, &argSqrd);
141 		negate_Xsig(&accumulator);
142 
143 		polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h,
144 				N_COEFF_PH - 1);
145 		negate_Xsig(&accumulator);
146 
147 		mul64_Xsig(&accumulator, &fixed_arg);
148 		mul64_Xsig(&accumulator, &fixed_arg);
149 
150 		shr_Xsig(&accumulator, 3);
151 		negate_Xsig(&accumulator);
152 
153 		add_Xsig_Xsig(&accumulator, &argSqrd);
154 
155 		shr_Xsig(&accumulator, 1);
156 
157 		accumulator.lsw |= 1;	/* A zero accumulator here would cause problems */
158 		negate_Xsig(&accumulator);
159 
160 		/* The basic computation is complete. Now fix the answer to
161 		   compensate for the error due to the approximation used for
162 		   pi/2
163 		 */
164 
165 		/* This has an exponent of -65 */
166 		fix_up = 0x898cc517;
167 		/* The fix-up needs to be improved for larger args */
168 		if (argSqrd.msw & 0xffc00000) {
169 			/* Get about 32 bit precision in these: */
170 			fix_up -= mul_32_32(0x898cc517, argSqrd.msw) / 6;
171 		}
172 		fix_up = mul_32_32(fix_up, LL_MSW(fixed_arg));
173 
174 		adj = accumulator.lsw;	/* temp save */
175 		accumulator.lsw -= fix_up;
176 		if (accumulator.lsw > adj)
177 			XSIG_LL(accumulator)--;
178 
179 		echange = round_Xsig(&accumulator);
180 
181 		setexponentpos(&result, echange - 1);
182 	}
183 
184 	significand(&result) = XSIG_LL(accumulator);
185 	setsign(&result, getsign(st0_ptr));
186 	FPU_copy_to_reg0(&result, TAG_Valid);
187 
188 #ifdef PARANOID
189 	if ((exponent(&result) >= 0)
190 	    && (significand(&result) > 0x8000000000000000LL)) {
191 		EXCEPTION(EX_INTERNAL | 0x150);
192 	}
193 #endif /* PARANOID */
194 
195 }
196 
197 /*--- poly_cos() ------------------------------------------------------------+
198  |                                                                           |
199  +---------------------------------------------------------------------------*/
200 void poly_cos(FPU_REG *st0_ptr)
201 {
202 	FPU_REG result;
203 	long int exponent, exp2, echange;
204 	Xsig accumulator, argSqrd, fix_up, argTo4;
205 	unsigned long long fixed_arg;
206 
207 #ifdef PARANOID
208 	if ((exponent(st0_ptr) > 0)
209 	    || ((exponent(st0_ptr) == 0)
210 		&& (significand(st0_ptr) > 0xc90fdaa22168c234LL))) {
211 		EXCEPTION(EX_Invalid);
212 		FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
213 		return;
214 	}
215 #endif /* PARANOID */
216 
217 	exponent = exponent(st0_ptr);
218 
219 	accumulator.lsw = accumulator.midw = accumulator.msw = 0;
220 
221 	if ((exponent < -1)
222 	    || ((exponent == -1) && (st0_ptr->sigh <= 0xb00d6f54))) {
223 		/* arg is < 0.687705 */
224 
225 		argSqrd.msw = st0_ptr->sigh;
226 		argSqrd.midw = st0_ptr->sigl;
227 		argSqrd.lsw = 0;
228 		mul64_Xsig(&argSqrd, &significand(st0_ptr));
229 
230 		if (exponent < -1) {
231 			/* shift the argument right by the required places */
232 			shr_Xsig(&argSqrd, 2 * (-1 - exponent));
233 		}
234 
235 		argTo4.msw = argSqrd.msw;
236 		argTo4.midw = argSqrd.midw;
237 		argTo4.lsw = argSqrd.lsw;
238 		mul_Xsig_Xsig(&argTo4, &argTo4);
239 
240 		polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h,
241 				N_COEFF_NH - 1);
242 		mul_Xsig_Xsig(&accumulator, &argSqrd);
243 		negate_Xsig(&accumulator);
244 
245 		polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h,
246 				N_COEFF_PH - 1);
247 		negate_Xsig(&accumulator);
248 
249 		mul64_Xsig(&accumulator, &significand(st0_ptr));
250 		mul64_Xsig(&accumulator, &significand(st0_ptr));
251 		shr_Xsig(&accumulator, -2 * (1 + exponent));
252 
253 		shr_Xsig(&accumulator, 3);
254 		negate_Xsig(&accumulator);
255 
256 		add_Xsig_Xsig(&accumulator, &argSqrd);
257 
258 		shr_Xsig(&accumulator, 1);
259 
260 		/* It doesn't matter if accumulator is all zero here, the
261 		   following code will work ok */
262 		negate_Xsig(&accumulator);
263 
264 		if (accumulator.lsw & 0x80000000)
265 			XSIG_LL(accumulator)++;
266 		if (accumulator.msw == 0) {
267 			/* The result is 1.0 */
268 			FPU_copy_to_reg0(&CONST_1, TAG_Valid);
269 			return;
270 		} else {
271 			significand(&result) = XSIG_LL(accumulator);
272 
273 			/* will be a valid positive nr with expon = -1 */
274 			setexponentpos(&result, -1);
275 		}
276 	} else {
277 		fixed_arg = significand(st0_ptr);
278 
279 		if (exponent == 0) {
280 			/* The argument is >= 1.0 */
281 
282 			/* Put the binary point at the left. */
283 			fixed_arg <<= 1;
284 		}
285 		/* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
286 		fixed_arg = 0x921fb54442d18469LL - fixed_arg;
287 		/* There is a special case which arises due to rounding, to fix here. */
288 		if (fixed_arg == 0xffffffffffffffffLL)
289 			fixed_arg = 0;
290 
291 		exponent = -1;
292 		exp2 = -1;
293 
294 		/* A shift is needed here only for a narrow range of arguments,
295 		   i.e. for fixed_arg approx 2^-32, but we pick up more... */
296 		if (!(LL_MSW(fixed_arg) & 0xffff0000)) {
297 			fixed_arg <<= 16;
298 			exponent -= 16;
299 			exp2 -= 16;
300 		}
301 
302 		XSIG_LL(argSqrd) = fixed_arg;
303 		argSqrd.lsw = 0;
304 		mul64_Xsig(&argSqrd, &fixed_arg);
305 
306 		if (exponent < -1) {
307 			/* shift the argument right by the required places */
308 			shr_Xsig(&argSqrd, 2 * (-1 - exponent));
309 		}
310 
311 		argTo4.msw = argSqrd.msw;
312 		argTo4.midw = argSqrd.midw;
313 		argTo4.lsw = argSqrd.lsw;
314 		mul_Xsig_Xsig(&argTo4, &argTo4);
315 
316 		polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l,
317 				N_COEFF_N - 1);
318 		mul_Xsig_Xsig(&accumulator, &argSqrd);
319 		negate_Xsig(&accumulator);
320 
321 		polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l,
322 				N_COEFF_P - 1);
323 
324 		shr_Xsig(&accumulator, 2);	/* Divide by four */
325 		accumulator.msw |= 0x80000000;	/* Add 1.0 */
326 
327 		mul64_Xsig(&accumulator, &fixed_arg);
328 		mul64_Xsig(&accumulator, &fixed_arg);
329 		mul64_Xsig(&accumulator, &fixed_arg);
330 
331 		/* Divide by four, FPU_REG compatible, etc */
332 		exponent = 3 * exponent;
333 
334 		/* The minimum exponent difference is 3 */
335 		shr_Xsig(&accumulator, exp2 - exponent);
336 
337 		negate_Xsig(&accumulator);
338 		XSIG_LL(accumulator) += fixed_arg;
339 
340 		/* The basic computation is complete. Now fix the answer to
341 		   compensate for the error due to the approximation used for
342 		   pi/2
343 		 */
344 
345 		/* This has an exponent of -65 */
346 		XSIG_LL(fix_up) = 0x898cc51701b839a2ll;
347 		fix_up.lsw = 0;
348 
349 		/* The fix-up needs to be improved for larger args */
350 		if (argSqrd.msw & 0xffc00000) {
351 			/* Get about 32 bit precision in these: */
352 			fix_up.msw -= mul_32_32(0x898cc517, argSqrd.msw) / 2;
353 			fix_up.msw += mul_32_32(0x898cc517, argTo4.msw) / 24;
354 		}
355 
356 		exp2 += norm_Xsig(&accumulator);
357 		shr_Xsig(&accumulator, 1);	/* Prevent overflow */
358 		exp2++;
359 		shr_Xsig(&fix_up, 65 + exp2);
360 
361 		add_Xsig_Xsig(&accumulator, &fix_up);
362 
363 		echange = round_Xsig(&accumulator);
364 
365 		setexponentpos(&result, exp2 + echange);
366 		significand(&result) = XSIG_LL(accumulator);
367 	}
368 
369 	FPU_copy_to_reg0(&result, TAG_Valid);
370 
371 #ifdef PARANOID
372 	if ((exponent(&result) >= 0)
373 	    && (significand(&result) > 0x8000000000000000LL)) {
374 		EXCEPTION(EX_INTERNAL | 0x151);
375 	}
376 #endif /* PARANOID */
377 
378 }
379