xref: /linux/arch/x86/math-emu/div_Xsig.S (revision 2b64b2ed277ff23e785fbdb65098ee7e1252d64f)
1/* SPDX-License-Identifier: GPL-2.0 */
2	.file	"div_Xsig.S"
3/*---------------------------------------------------------------------------+
4 |  div_Xsig.S                                                               |
5 |                                                                           |
6 | Division subroutine for 96 bit quantities                                 |
7 |                                                                           |
8 | Copyright (C) 1994,1995                                                   |
9 |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
10 |                       Australia.  E-mail billm@jacobi.maths.monash.edu.au |
11 |                                                                           |
12 |                                                                           |
13 +---------------------------------------------------------------------------*/
14
15/*---------------------------------------------------------------------------+
16 | Divide the 96 bit quantity pointed to by a, by that pointed to by b, and  |
17 | put the 96 bit result at the location d.                                  |
18 |                                                                           |
19 | The result may not be accurate to 96 bits. It is intended for use where   |
20 | a result better than 64 bits is required. The result should usually be    |
21 | good to at least 94 bits.                                                 |
22 | The returned result is actually divided by one half. This is done to      |
23 | prevent overflow.                                                         |
24 |                                                                           |
25 |  .aaaaaaaaaaaaaa / .bbbbbbbbbbbbb  ->  .dddddddddddd                      |
26 |                                                                           |
27 |  void div_Xsig(Xsig *a, Xsig *b, Xsig *dest)                              |
28 |                                                                           |
29 +---------------------------------------------------------------------------*/
30
31#include "exception.h"
32#include "fpu_emu.h"
33
34
35#define	XsigLL(x)	(x)
36#define	XsigL(x)	4(x)
37#define	XsigH(x)	8(x)
38
39
40#ifndef NON_REENTRANT_FPU
41/*
42	Local storage on the stack:
43	Accumulator:	FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0
44 */
45#define FPU_accum_3	-4(%ebp)
46#define FPU_accum_2	-8(%ebp)
47#define FPU_accum_1	-12(%ebp)
48#define FPU_accum_0	-16(%ebp)
49#define FPU_result_3	-20(%ebp)
50#define FPU_result_2	-24(%ebp)
51#define FPU_result_1	-28(%ebp)
52
53#else
54.data
55/*
56	Local storage in a static area:
57	Accumulator:	FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0
58 */
59	.align 4,0
60FPU_accum_3:
61	.long	0
62FPU_accum_2:
63	.long	0
64FPU_accum_1:
65	.long	0
66FPU_accum_0:
67	.long	0
68FPU_result_3:
69	.long	0
70FPU_result_2:
71	.long	0
72FPU_result_1:
73	.long	0
74#endif /* NON_REENTRANT_FPU */
75
76
77.text
78ENTRY(div_Xsig)
79	pushl	%ebp
80	movl	%esp,%ebp
81#ifndef NON_REENTRANT_FPU
82	subl	$28,%esp
83#endif /* NON_REENTRANT_FPU */
84
85	pushl	%esi
86	pushl	%edi
87	pushl	%ebx
88
89	movl	PARAM1,%esi	/* pointer to num */
90	movl	PARAM2,%ebx	/* pointer to denom */
91
92#ifdef PARANOID
93	testl	$0x80000000, XsigH(%ebx)	/* Divisor */
94	je	L_bugged
95#endif /* PARANOID */
96
97
98/*---------------------------------------------------------------------------+
99 |  Divide:   Return  arg1/arg2 to arg3.                                     |
100 |                                                                           |
101 |  The maximum returned value is (ignoring exponents)                       |
102 |               .ffffffff ffffffff                                          |
103 |               ------------------  =  1.ffffffff fffffffe                  |
104 |               .80000000 00000000                                          |
105 | and the minimum is                                                        |
106 |               .80000000 00000000                                          |
107 |               ------------------  =  .80000000 00000001   (rounded)       |
108 |               .ffffffff ffffffff                                          |
109 |                                                                           |
110 +---------------------------------------------------------------------------*/
111
112	/* Save extended dividend in local register */
113
114	/* Divide by 2 to prevent overflow */
115	clc
116	movl	XsigH(%esi),%eax
117	rcrl	%eax
118	movl	%eax,FPU_accum_3
119	movl	XsigL(%esi),%eax
120	rcrl	%eax
121	movl	%eax,FPU_accum_2
122	movl	XsigLL(%esi),%eax
123	rcrl	%eax
124	movl	%eax,FPU_accum_1
125	movl	$0,%eax
126	rcrl	%eax
127	movl	%eax,FPU_accum_0
128
129	movl	FPU_accum_2,%eax	/* Get the current num */
130	movl	FPU_accum_3,%edx
131
132/*----------------------------------------------------------------------*/
133/* Initialization done.
134   Do the first 32 bits. */
135
136	/* We will divide by a number which is too large */
137	movl	XsigH(%ebx),%ecx
138	addl	$1,%ecx
139	jnc	LFirst_div_not_1
140
141	/* here we need to divide by 100000000h,
142	   i.e., no division at all.. */
143	mov	%edx,%eax
144	jmp	LFirst_div_done
145
146LFirst_div_not_1:
147	divl	%ecx		/* Divide the numerator by the augmented
148				   denom ms dw */
149
150LFirst_div_done:
151	movl	%eax,FPU_result_3	/* Put the result in the answer */
152
153	mull	XsigH(%ebx)	/* mul by the ms dw of the denom */
154
155	subl	%eax,FPU_accum_2	/* Subtract from the num local reg */
156	sbbl	%edx,FPU_accum_3
157
158	movl	FPU_result_3,%eax	/* Get the result back */
159	mull	XsigL(%ebx)	/* now mul the ls dw of the denom */
160
161	subl	%eax,FPU_accum_1	/* Subtract from the num local reg */
162	sbbl	%edx,FPU_accum_2
163	sbbl	$0,FPU_accum_3
164	je	LDo_2nd_32_bits		/* Must check for non-zero result here */
165
166#ifdef PARANOID
167	jb	L_bugged_1
168#endif /* PARANOID */
169
170	/* need to subtract another once of the denom */
171	incl	FPU_result_3	/* Correct the answer */
172
173	movl	XsigL(%ebx),%eax
174	movl	XsigH(%ebx),%edx
175	subl	%eax,FPU_accum_1	/* Subtract from the num local reg */
176	sbbl	%edx,FPU_accum_2
177
178#ifdef PARANOID
179	sbbl	$0,FPU_accum_3
180	jne	L_bugged_1	/* Must check for non-zero result here */
181#endif /* PARANOID */
182
183/*----------------------------------------------------------------------*/
184/* Half of the main problem is done, there is just a reduced numerator
185   to handle now.
186   Work with the second 32 bits, FPU_accum_0 not used from now on */
187LDo_2nd_32_bits:
188	movl	FPU_accum_2,%edx	/* get the reduced num */
189	movl	FPU_accum_1,%eax
190
191	/* need to check for possible subsequent overflow */
192	cmpl	XsigH(%ebx),%edx
193	jb	LDo_2nd_div
194	ja	LPrevent_2nd_overflow
195
196	cmpl	XsigL(%ebx),%eax
197	jb	LDo_2nd_div
198
199LPrevent_2nd_overflow:
200/* The numerator is greater or equal, would cause overflow */
201	/* prevent overflow */
202	subl	XsigL(%ebx),%eax
203	sbbl	XsigH(%ebx),%edx
204	movl	%edx,FPU_accum_2
205	movl	%eax,FPU_accum_1
206
207	incl	FPU_result_3	/* Reflect the subtraction in the answer */
208
209#ifdef PARANOID
210	je	L_bugged_2	/* Can't bump the result to 1.0 */
211#endif /* PARANOID */
212
213LDo_2nd_div:
214	cmpl	$0,%ecx		/* augmented denom msw */
215	jnz	LSecond_div_not_1
216
217	/* %ecx == 0, we are dividing by 1.0 */
218	mov	%edx,%eax
219	jmp	LSecond_div_done
220
221LSecond_div_not_1:
222	divl	%ecx		/* Divide the numerator by the denom ms dw */
223
224LSecond_div_done:
225	movl	%eax,FPU_result_2	/* Put the result in the answer */
226
227	mull	XsigH(%ebx)	/* mul by the ms dw of the denom */
228
229	subl	%eax,FPU_accum_1	/* Subtract from the num local reg */
230	sbbl	%edx,FPU_accum_2
231
232#ifdef PARANOID
233	jc	L_bugged_2
234#endif /* PARANOID */
235
236	movl	FPU_result_2,%eax	/* Get the result back */
237	mull	XsigL(%ebx)	/* now mul the ls dw of the denom */
238
239	subl	%eax,FPU_accum_0	/* Subtract from the num local reg */
240	sbbl	%edx,FPU_accum_1	/* Subtract from the num local reg */
241	sbbl	$0,FPU_accum_2
242
243#ifdef PARANOID
244	jc	L_bugged_2
245#endif /* PARANOID */
246
247	jz	LDo_3rd_32_bits
248
249#ifdef PARANOID
250	cmpl	$1,FPU_accum_2
251	jne	L_bugged_2
252#endif /* PARANOID */
253
254	/* need to subtract another once of the denom */
255	movl	XsigL(%ebx),%eax
256	movl	XsigH(%ebx),%edx
257	subl	%eax,FPU_accum_0	/* Subtract from the num local reg */
258	sbbl	%edx,FPU_accum_1
259	sbbl	$0,FPU_accum_2
260
261#ifdef PARANOID
262	jc	L_bugged_2
263	jne	L_bugged_2
264#endif /* PARANOID */
265
266	addl	$1,FPU_result_2	/* Correct the answer */
267	adcl	$0,FPU_result_3
268
269#ifdef PARANOID
270	jc	L_bugged_2	/* Must check for non-zero result here */
271#endif /* PARANOID */
272
273/*----------------------------------------------------------------------*/
274/* The division is essentially finished here, we just need to perform
275   tidying operations.
276   Deal with the 3rd 32 bits */
277LDo_3rd_32_bits:
278	/* We use an approximation for the third 32 bits.
279	To take account of the 3rd 32 bits of the divisor
280	(call them del), we subtract  del * (a/b) */
281
282	movl	FPU_result_3,%eax	/* a/b */
283	mull	XsigLL(%ebx)		/* del */
284
285	subl	%edx,FPU_accum_1
286
287	/* A borrow indicates that the result is negative */
288	jnb	LTest_over
289
290	movl	XsigH(%ebx),%edx
291	addl	%edx,FPU_accum_1
292
293	subl	$1,FPU_result_2		/* Adjust the answer */
294	sbbl	$0,FPU_result_3
295
296	/* The above addition might not have been enough, check again. */
297	movl	FPU_accum_1,%edx	/* get the reduced num */
298	cmpl	XsigH(%ebx),%edx	/* denom */
299	jb	LDo_3rd_div
300
301	movl	XsigH(%ebx),%edx
302	addl	%edx,FPU_accum_1
303
304	subl	$1,FPU_result_2		/* Adjust the answer */
305	sbbl	$0,FPU_result_3
306	jmp	LDo_3rd_div
307
308LTest_over:
309	movl	FPU_accum_1,%edx	/* get the reduced num */
310
311	/* need to check for possible subsequent overflow */
312	cmpl	XsigH(%ebx),%edx	/* denom */
313	jb	LDo_3rd_div
314
315	/* prevent overflow */
316	subl	XsigH(%ebx),%edx
317	movl	%edx,FPU_accum_1
318
319	addl	$1,FPU_result_2	/* Reflect the subtraction in the answer */
320	adcl	$0,FPU_result_3
321
322LDo_3rd_div:
323	movl	FPU_accum_0,%eax
324	movl	FPU_accum_1,%edx
325	divl	XsigH(%ebx)
326
327	movl    %eax,FPU_result_1       /* Rough estimate of third word */
328
329	movl	PARAM3,%esi		/* pointer to answer */
330
331	movl	FPU_result_1,%eax
332	movl	%eax,XsigLL(%esi)
333	movl	FPU_result_2,%eax
334	movl	%eax,XsigL(%esi)
335	movl	FPU_result_3,%eax
336	movl	%eax,XsigH(%esi)
337
338L_exit:
339	popl	%ebx
340	popl	%edi
341	popl	%esi
342
343	leave
344	ret
345
346
347#ifdef PARANOID
348/* The logic is wrong if we got here */
349L_bugged:
350	pushl	EX_INTERNAL|0x240
351	call	EXCEPTION
352	pop	%ebx
353	jmp	L_exit
354
355L_bugged_1:
356	pushl	EX_INTERNAL|0x241
357	call	EXCEPTION
358	pop	%ebx
359	jmp	L_exit
360
361L_bugged_2:
362	pushl	EX_INTERNAL|0x242
363	call	EXCEPTION
364	pop	%ebx
365	jmp	L_exit
366#endif /* PARANOID */
367ENDPROC(div_Xsig)
368