xref: /linux/arch/x86/math-emu/polynom_Xsig.S (revision 4413e16d9d21673bb5048a2e542f1aaa00015c2e)
1/*---------------------------------------------------------------------------+
2 |  polynomial_Xsig.S                                                        |
3 |                                                                           |
4 | Fixed point arithmetic polynomial evaluation.                             |
5 |                                                                           |
6 | Copyright (C) 1992,1993,1994,1995                                         |
7 |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
8 |                       Australia.  E-mail billm@jacobi.maths.monash.edu.au |
9 |                                                                           |
10 | Call from C as:                                                           |
11 |   void polynomial_Xsig(Xsig *accum, unsigned long long x,                 |
12 |                        unsigned long long terms[], int n)                 |
13 |                                                                           |
14 | Computes:                                                                 |
15 | terms[0] + (terms[1] + (terms[2] + ... + (terms[n-1]*x)*x)*x)*x) ... )*x  |
16 | and adds the result to the 12 byte Xsig.                                  |
17 | The terms[] are each 8 bytes, but all computation is performed to 12 byte |
18 | precision.                                                                |
19 |                                                                           |
20 | This function must be used carefully: most overflow of intermediate       |
21 | results is controlled, but overflow of the result is not.                 |
22 |                                                                           |
23 +---------------------------------------------------------------------------*/
24	.file	"polynomial_Xsig.S"
25
26#include "fpu_emu.h"
27
28
29#define	TERM_SIZE	$8
30#define	SUM_MS		-20(%ebp)	/* sum ms long */
31#define SUM_MIDDLE	-24(%ebp)	/* sum middle long */
32#define	SUM_LS		-28(%ebp)	/* sum ls long */
33#define	ACCUM_MS	-4(%ebp)	/* accum ms long */
34#define	ACCUM_MIDDLE	-8(%ebp)	/* accum middle long */
35#define	ACCUM_LS	-12(%ebp)	/* accum ls long */
36#define OVERFLOWED      -16(%ebp)	/* addition overflow flag */
37
38.text
39ENTRY(polynomial_Xsig)
40	pushl	%ebp
41	movl	%esp,%ebp
42	subl	$32,%esp
43	pushl	%esi
44	pushl	%edi
45	pushl	%ebx
46
47	movl	PARAM2,%esi		/* x */
48	movl	PARAM3,%edi		/* terms */
49
50	movl	TERM_SIZE,%eax
51	mull	PARAM4			/* n */
52	addl	%eax,%edi
53
54	movl	4(%edi),%edx		/* terms[n] */
55	movl	%edx,SUM_MS
56	movl	(%edi),%edx		/* terms[n] */
57	movl	%edx,SUM_MIDDLE
58	xor	%eax,%eax
59	movl	%eax,SUM_LS
60	movb	%al,OVERFLOWED
61
62	subl	TERM_SIZE,%edi
63	decl	PARAM4
64	js	L_accum_done
65
66L_accum_loop:
67	xor	%eax,%eax
68	movl	%eax,ACCUM_MS
69	movl	%eax,ACCUM_MIDDLE
70
71	movl	SUM_MIDDLE,%eax
72	mull	(%esi)			/* x ls long */
73	movl	%edx,ACCUM_LS
74
75	movl	SUM_MIDDLE,%eax
76	mull	4(%esi)			/* x ms long */
77	addl	%eax,ACCUM_LS
78	adcl	%edx,ACCUM_MIDDLE
79	adcl	$0,ACCUM_MS
80
81	movl	SUM_MS,%eax
82	mull	(%esi)			/* x ls long */
83	addl	%eax,ACCUM_LS
84	adcl	%edx,ACCUM_MIDDLE
85	adcl	$0,ACCUM_MS
86
87	movl	SUM_MS,%eax
88	mull	4(%esi)			/* x ms long */
89	addl	%eax,ACCUM_MIDDLE
90	adcl	%edx,ACCUM_MS
91
92	testb	$0xff,OVERFLOWED
93	jz	L_no_overflow
94
95	movl	(%esi),%eax
96	addl	%eax,ACCUM_MIDDLE
97	movl	4(%esi),%eax
98	adcl	%eax,ACCUM_MS		/* This could overflow too */
99
100L_no_overflow:
101
102/*
103 * Now put the sum of next term and the accumulator
104 * into the sum register
105 */
106	movl	ACCUM_LS,%eax
107	addl	(%edi),%eax		/* term ls long */
108	movl	%eax,SUM_LS
109	movl	ACCUM_MIDDLE,%eax
110	adcl	(%edi),%eax		/* term ls long */
111	movl	%eax,SUM_MIDDLE
112	movl	ACCUM_MS,%eax
113	adcl	4(%edi),%eax		/* term ms long */
114	movl	%eax,SUM_MS
115	sbbb	%al,%al
116	movb	%al,OVERFLOWED		/* Used in the next iteration */
117
118	subl	TERM_SIZE,%edi
119	decl	PARAM4
120	jns	L_accum_loop
121
122L_accum_done:
123	movl	PARAM1,%edi		/* accum */
124	movl	SUM_LS,%eax
125	addl	%eax,(%edi)
126	movl	SUM_MIDDLE,%eax
127	adcl	%eax,4(%edi)
128	movl	SUM_MS,%eax
129	adcl	%eax,8(%edi)
130
131	popl	%ebx
132	popl	%edi
133	popl	%esi
134	leave
135	ret
136