xref: /linux/arch/x86/math-emu/polynom_Xsig.S (revision 37744feebc086908fd89760650f458ab19071750)
1/* SPDX-License-Identifier: GPL-2.0 */
2/*---------------------------------------------------------------------------+
3 |  polynomial_Xsig.S                                                        |
4 |                                                                           |
5 | Fixed point arithmetic polynomial evaluation.                             |
6 |                                                                           |
7 | Copyright (C) 1992,1993,1994,1995                                         |
8 |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
9 |                       Australia.  E-mail billm@jacobi.maths.monash.edu.au |
10 |                                                                           |
11 | Call from C as:                                                           |
12 |   void polynomial_Xsig(Xsig *accum, unsigned long long x,                 |
13 |                        unsigned long long terms[], int n)                 |
14 |                                                                           |
15 | Computes:                                                                 |
16 | terms[0] + (terms[1] + (terms[2] + ... + (terms[n-1]*x)*x)*x)*x) ... )*x  |
17 | and adds the result to the 12 byte Xsig.                                  |
18 | The terms[] are each 8 bytes, but all computation is performed to 12 byte |
19 | precision.                                                                |
20 |                                                                           |
21 | This function must be used carefully: most overflow of intermediate       |
22 | results is controlled, but overflow of the result is not.                 |
23 |                                                                           |
24 +---------------------------------------------------------------------------*/
25	.file	"polynomial_Xsig.S"
26
27#include "fpu_emu.h"
28
29
30#define	TERM_SIZE	$8
31#define	SUM_MS		-20(%ebp)	/* sum ms long */
32#define SUM_MIDDLE	-24(%ebp)	/* sum middle long */
33#define	SUM_LS		-28(%ebp)	/* sum ls long */
34#define	ACCUM_MS	-4(%ebp)	/* accum ms long */
35#define	ACCUM_MIDDLE	-8(%ebp)	/* accum middle long */
36#define	ACCUM_LS	-12(%ebp)	/* accum ls long */
37#define OVERFLOWED      -16(%ebp)	/* addition overflow flag */
38
39.text
40SYM_FUNC_START(polynomial_Xsig)
41	pushl	%ebp
42	movl	%esp,%ebp
43	subl	$32,%esp
44	pushl	%esi
45	pushl	%edi
46	pushl	%ebx
47
48	movl	PARAM2,%esi		/* x */
49	movl	PARAM3,%edi		/* terms */
50
51	movl	TERM_SIZE,%eax
52	mull	PARAM4			/* n */
53	addl	%eax,%edi
54
55	movl	4(%edi),%edx		/* terms[n] */
56	movl	%edx,SUM_MS
57	movl	(%edi),%edx		/* terms[n] */
58	movl	%edx,SUM_MIDDLE
59	xor	%eax,%eax
60	movl	%eax,SUM_LS
61	movb	%al,OVERFLOWED
62
63	subl	TERM_SIZE,%edi
64	decl	PARAM4
65	js	L_accum_done
66
67L_accum_loop:
68	xor	%eax,%eax
69	movl	%eax,ACCUM_MS
70	movl	%eax,ACCUM_MIDDLE
71
72	movl	SUM_MIDDLE,%eax
73	mull	(%esi)			/* x ls long */
74	movl	%edx,ACCUM_LS
75
76	movl	SUM_MIDDLE,%eax
77	mull	4(%esi)			/* x ms long */
78	addl	%eax,ACCUM_LS
79	adcl	%edx,ACCUM_MIDDLE
80	adcl	$0,ACCUM_MS
81
82	movl	SUM_MS,%eax
83	mull	(%esi)			/* x ls long */
84	addl	%eax,ACCUM_LS
85	adcl	%edx,ACCUM_MIDDLE
86	adcl	$0,ACCUM_MS
87
88	movl	SUM_MS,%eax
89	mull	4(%esi)			/* x ms long */
90	addl	%eax,ACCUM_MIDDLE
91	adcl	%edx,ACCUM_MS
92
93	testb	$0xff,OVERFLOWED
94	jz	L_no_overflow
95
96	movl	(%esi),%eax
97	addl	%eax,ACCUM_MIDDLE
98	movl	4(%esi),%eax
99	adcl	%eax,ACCUM_MS		/* This could overflow too */
100
101L_no_overflow:
102
103/*
104 * Now put the sum of next term and the accumulator
105 * into the sum register
106 */
107	movl	ACCUM_LS,%eax
108	addl	(%edi),%eax		/* term ls long */
109	movl	%eax,SUM_LS
110	movl	ACCUM_MIDDLE,%eax
111	adcl	(%edi),%eax		/* term ls long */
112	movl	%eax,SUM_MIDDLE
113	movl	ACCUM_MS,%eax
114	adcl	4(%edi),%eax		/* term ms long */
115	movl	%eax,SUM_MS
116	sbbb	%al,%al
117	movb	%al,OVERFLOWED		/* Used in the next iteration */
118
119	subl	TERM_SIZE,%edi
120	decl	PARAM4
121	jns	L_accum_loop
122
123L_accum_done:
124	movl	PARAM1,%edi		/* accum */
125	movl	SUM_LS,%eax
126	addl	%eax,(%edi)
127	movl	SUM_MIDDLE,%eax
128	adcl	%eax,4(%edi)
129	movl	SUM_MS,%eax
130	adcl	%eax,8(%edi)
131
132	popl	%ebx
133	popl	%edi
134	popl	%esi
135	leave
136	ret
137SYM_FUNC_END(polynomial_Xsig)
138