xref: /linux/arch/x86/math-emu/reg_u_div.S (revision ca55b2fef3a9373fcfc30f82fd26bc7fccbda732)
1	.file	"reg_u_div.S"
2/*---------------------------------------------------------------------------+
3 |  reg_u_div.S                                                              |
4 |                                                                           |
5 | Divide one FPU_REG by another and put the result in a destination FPU_REG.|
6 |                                                                           |
7 | Copyright (C) 1992,1993,1995,1997                                         |
8 |                  W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
9 |                  E-mail   billm@suburbia.net                              |
10 |                                                                           |
11 |                                                                           |
12 +---------------------------------------------------------------------------*/
13
14/*---------------------------------------------------------------------------+
15 | Call from C as:                                                           |
16 |    int FPU_u_div(FPU_REG *a, FPU_REG *b, FPU_REG *dest,                   |
17 |                unsigned int control_word, char *sign)                     |
18 |                                                                           |
19 |  Does not compute the destination exponent, but does adjust it.           |
20 |                                                                           |
21 |    Return value is the tag of the answer, or-ed with FPU_Exception if     |
22 |    one was raised, or -1 on internal error.                               |
23 +---------------------------------------------------------------------------*/
24
25#include "exception.h"
26#include "fpu_emu.h"
27#include "control_w.h"
28
29
30/* #define	dSIGL(x)	(x) */
31/* #define	dSIGH(x)	4(x) */
32
33
34#ifndef NON_REENTRANT_FPU
35/*
36	Local storage on the stack:
37	Result:		FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0
38	Overflow flag:	ovfl_flag
39 */
40#define FPU_accum_3	-4(%ebp)
41#define FPU_accum_2	-8(%ebp)
42#define FPU_accum_1	-12(%ebp)
43#define FPU_accum_0	-16(%ebp)
44#define FPU_result_1	-20(%ebp)
45#define FPU_result_2	-24(%ebp)
46#define FPU_ovfl_flag	-28(%ebp)
47
48#else
49.data
50/*
51	Local storage in a static area:
52	Result:		FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0
53	Overflow flag:	ovfl_flag
54 */
55	.align 4,0
56FPU_accum_3:
57	.long	0
58FPU_accum_2:
59	.long	0
60FPU_accum_1:
61	.long	0
62FPU_accum_0:
63	.long	0
64FPU_result_1:
65	.long	0
66FPU_result_2:
67	.long	0
68FPU_ovfl_flag:
69	.byte	0
70#endif /* NON_REENTRANT_FPU */
71
72#define REGA	PARAM1
73#define REGB	PARAM2
74#define DEST	PARAM3
75
76.text
77ENTRY(FPU_u_div)
78	pushl	%ebp
79	movl	%esp,%ebp
80#ifndef NON_REENTRANT_FPU
81	subl	$28,%esp
82#endif /* NON_REENTRANT_FPU */
83
84	pushl	%esi
85	pushl	%edi
86	pushl	%ebx
87
88	movl	REGA,%esi
89	movl	REGB,%ebx
90	movl	DEST,%edi
91
92	movswl	EXP(%esi),%edx
93	movswl	EXP(%ebx),%eax
94	subl	%eax,%edx
95	addl	EXP_BIAS,%edx
96
97	/* A denormal and a large number can cause an exponent underflow */
98	cmpl	EXP_WAY_UNDER,%edx
99	jg	xExp_not_underflow
100
101	/* Set to a really low value allow correct handling */
102	movl	EXP_WAY_UNDER,%edx
103
104xExp_not_underflow:
105
106	movw    %dx,EXP(%edi)
107
108#ifdef PARANOID
109/*	testl	$0x80000000, SIGH(%esi)	// Dividend */
110/*	je	L_bugged */
111	testl	$0x80000000, SIGH(%ebx)	/* Divisor */
112	je	L_bugged
113#endif /* PARANOID */
114
115/* Check if the divisor can be treated as having just 32 bits */
116	cmpl	$0,SIGL(%ebx)
117	jnz	L_Full_Division	/* Can't do a quick divide */
118
119/* We should be able to zip through the division here */
120	movl	SIGH(%ebx),%ecx	/* The divisor */
121	movl	SIGH(%esi),%edx	/* Dividend */
122	movl	SIGL(%esi),%eax	/* Dividend */
123
124	cmpl	%ecx,%edx
125	setaeb	FPU_ovfl_flag	/* Keep a record */
126	jb	L_no_adjust
127
128	subl	%ecx,%edx	/* Prevent the overflow */
129
130L_no_adjust:
131	/* Divide the 64 bit number by the 32 bit denominator */
132	divl	%ecx
133	movl	%eax,FPU_result_2
134
135	/* Work on the remainder of the first division */
136	xorl	%eax,%eax
137	divl	%ecx
138	movl	%eax,FPU_result_1
139
140	/* Work on the remainder of the 64 bit division */
141	xorl	%eax,%eax
142	divl	%ecx
143
144	testb	$255,FPU_ovfl_flag	/* was the num > denom ? */
145	je	L_no_overflow
146
147	/* Do the shifting here */
148	/* increase the exponent */
149	incw	EXP(%edi)
150
151	/* shift the mantissa right one bit */
152	stc			/* To set the ms bit */
153	rcrl	FPU_result_2
154	rcrl	FPU_result_1
155	rcrl	%eax
156
157L_no_overflow:
158	jmp	LRound_precision	/* Do the rounding as required */
159
160
161/*---------------------------------------------------------------------------+
162 |  Divide:   Return  arg1/arg2 to arg3.                                     |
163 |                                                                           |
164 |  This routine does not use the exponents of arg1 and arg2, but does       |
165 |  adjust the exponent of arg3.                                             |
166 |                                                                           |
167 |  The maximum returned value is (ignoring exponents)                       |
168 |               .ffffffff ffffffff                                          |
169 |               ------------------  =  1.ffffffff fffffffe                  |
170 |               .80000000 00000000                                          |
171 | and the minimum is                                                        |
172 |               .80000000 00000000                                          |
173 |               ------------------  =  .80000000 00000001   (rounded)       |
174 |               .ffffffff ffffffff                                          |
175 |                                                                           |
176 +---------------------------------------------------------------------------*/
177
178
179L_Full_Division:
180	/* Save extended dividend in local register */
181	movl	SIGL(%esi),%eax
182	movl	%eax,FPU_accum_2
183	movl	SIGH(%esi),%eax
184	movl	%eax,FPU_accum_3
185	xorl	%eax,%eax
186	movl	%eax,FPU_accum_1	/* zero the extension */
187	movl	%eax,FPU_accum_0	/* zero the extension */
188
189	movl	SIGL(%esi),%eax	/* Get the current num */
190	movl	SIGH(%esi),%edx
191
192/*----------------------------------------------------------------------*/
193/* Initialization done.
194   Do the first 32 bits. */
195
196	movb	$0,FPU_ovfl_flag
197	cmpl	SIGH(%ebx),%edx	/* Test for imminent overflow */
198	jb	LLess_than_1
199	ja	LGreater_than_1
200
201	cmpl	SIGL(%ebx),%eax
202	jb	LLess_than_1
203
204LGreater_than_1:
205/* The dividend is greater or equal, would cause overflow */
206	setaeb	FPU_ovfl_flag		/* Keep a record */
207
208	subl	SIGL(%ebx),%eax
209	sbbl	SIGH(%ebx),%edx	/* Prevent the overflow */
210	movl	%eax,FPU_accum_2
211	movl	%edx,FPU_accum_3
212
213LLess_than_1:
214/* At this point, we have a dividend < divisor, with a record of
215   adjustment in FPU_ovfl_flag */
216
217	/* We will divide by a number which is too large */
218	movl	SIGH(%ebx),%ecx
219	addl	$1,%ecx
220	jnc	LFirst_div_not_1
221
222	/* here we need to divide by 100000000h,
223	   i.e., no division at all.. */
224	mov	%edx,%eax
225	jmp	LFirst_div_done
226
227LFirst_div_not_1:
228	divl	%ecx		/* Divide the numerator by the augmented
229				   denom ms dw */
230
231LFirst_div_done:
232	movl	%eax,FPU_result_2	/* Put the result in the answer */
233
234	mull	SIGH(%ebx)	/* mul by the ms dw of the denom */
235
236	subl	%eax,FPU_accum_2	/* Subtract from the num local reg */
237	sbbl	%edx,FPU_accum_3
238
239	movl	FPU_result_2,%eax	/* Get the result back */
240	mull	SIGL(%ebx)	/* now mul the ls dw of the denom */
241
242	subl	%eax,FPU_accum_1	/* Subtract from the num local reg */
243	sbbl	%edx,FPU_accum_2
244	sbbl	$0,FPU_accum_3
245	je	LDo_2nd_32_bits		/* Must check for non-zero result here */
246
247#ifdef PARANOID
248	jb	L_bugged_1
249#endif /* PARANOID */
250
251	/* need to subtract another once of the denom */
252	incl	FPU_result_2	/* Correct the answer */
253
254	movl	SIGL(%ebx),%eax
255	movl	SIGH(%ebx),%edx
256	subl	%eax,FPU_accum_1	/* Subtract from the num local reg */
257	sbbl	%edx,FPU_accum_2
258
259#ifdef PARANOID
260	sbbl	$0,FPU_accum_3
261	jne	L_bugged_1	/* Must check for non-zero result here */
262#endif /* PARANOID */
263
264/*----------------------------------------------------------------------*/
265/* Half of the main problem is done, there is just a reduced numerator
266   to handle now.
267   Work with the second 32 bits, FPU_accum_0 not used from now on */
268LDo_2nd_32_bits:
269	movl	FPU_accum_2,%edx	/* get the reduced num */
270	movl	FPU_accum_1,%eax
271
272	/* need to check for possible subsequent overflow */
273	cmpl	SIGH(%ebx),%edx
274	jb	LDo_2nd_div
275	ja	LPrevent_2nd_overflow
276
277	cmpl	SIGL(%ebx),%eax
278	jb	LDo_2nd_div
279
280LPrevent_2nd_overflow:
281/* The numerator is greater or equal, would cause overflow */
282	/* prevent overflow */
283	subl	SIGL(%ebx),%eax
284	sbbl	SIGH(%ebx),%edx
285	movl	%edx,FPU_accum_2
286	movl	%eax,FPU_accum_1
287
288	incl	FPU_result_2	/* Reflect the subtraction in the answer */
289
290#ifdef PARANOID
291	je	L_bugged_2	/* Can't bump the result to 1.0 */
292#endif /* PARANOID */
293
294LDo_2nd_div:
295	cmpl	$0,%ecx		/* augmented denom msw */
296	jnz	LSecond_div_not_1
297
298	/* %ecx == 0, we are dividing by 1.0 */
299	mov	%edx,%eax
300	jmp	LSecond_div_done
301
302LSecond_div_not_1:
303	divl	%ecx		/* Divide the numerator by the denom ms dw */
304
305LSecond_div_done:
306	movl	%eax,FPU_result_1	/* Put the result in the answer */
307
308	mull	SIGH(%ebx)	/* mul by the ms dw of the denom */
309
310	subl	%eax,FPU_accum_1	/* Subtract from the num local reg */
311	sbbl	%edx,FPU_accum_2
312
313#ifdef PARANOID
314	jc	L_bugged_2
315#endif /* PARANOID */
316
317	movl	FPU_result_1,%eax	/* Get the result back */
318	mull	SIGL(%ebx)	/* now mul the ls dw of the denom */
319
320	subl	%eax,FPU_accum_0	/* Subtract from the num local reg */
321	sbbl	%edx,FPU_accum_1	/* Subtract from the num local reg */
322	sbbl	$0,FPU_accum_2
323
324#ifdef PARANOID
325	jc	L_bugged_2
326#endif /* PARANOID */
327
328	jz	LDo_3rd_32_bits
329
330#ifdef PARANOID
331	cmpl	$1,FPU_accum_2
332	jne	L_bugged_2
333#endif /* PARANOID */
334
335	/* need to subtract another once of the denom */
336	movl	SIGL(%ebx),%eax
337	movl	SIGH(%ebx),%edx
338	subl	%eax,FPU_accum_0	/* Subtract from the num local reg */
339	sbbl	%edx,FPU_accum_1
340	sbbl	$0,FPU_accum_2
341
342#ifdef PARANOID
343	jc	L_bugged_2
344	jne	L_bugged_2
345#endif /* PARANOID */
346
347	addl	$1,FPU_result_1	/* Correct the answer */
348	adcl	$0,FPU_result_2
349
350#ifdef PARANOID
351	jc	L_bugged_2	/* Must check for non-zero result here */
352#endif /* PARANOID */
353
354/*----------------------------------------------------------------------*/
355/* The division is essentially finished here, we just need to perform
356   tidying operations.
357   Deal with the 3rd 32 bits */
358LDo_3rd_32_bits:
359	movl	FPU_accum_1,%edx		/* get the reduced num */
360	movl	FPU_accum_0,%eax
361
362	/* need to check for possible subsequent overflow */
363	cmpl	SIGH(%ebx),%edx	/* denom */
364	jb	LRound_prep
365	ja	LPrevent_3rd_overflow
366
367	cmpl	SIGL(%ebx),%eax	/* denom */
368	jb	LRound_prep
369
370LPrevent_3rd_overflow:
371	/* prevent overflow */
372	subl	SIGL(%ebx),%eax
373	sbbl	SIGH(%ebx),%edx
374	movl	%edx,FPU_accum_1
375	movl	%eax,FPU_accum_0
376
377	addl	$1,FPU_result_1	/* Reflect the subtraction in the answer */
378	adcl	$0,FPU_result_2
379	jne	LRound_prep
380	jnc	LRound_prep
381
382	/* This is a tricky spot, there is an overflow of the answer */
383	movb	$255,FPU_ovfl_flag		/* Overflow -> 1.000 */
384
385LRound_prep:
386/*
387 * Prepare for rounding.
388 * To test for rounding, we just need to compare 2*accum with the
389 * denom.
390 */
391	movl	FPU_accum_0,%ecx
392	movl	FPU_accum_1,%edx
393	movl	%ecx,%eax
394	orl	%edx,%eax
395	jz	LRound_ovfl		/* The accumulator contains zero. */
396
397	/* Multiply by 2 */
398	clc
399	rcll	$1,%ecx
400	rcll	$1,%edx
401	jc	LRound_large		/* No need to compare, denom smaller */
402
403	subl	SIGL(%ebx),%ecx
404	sbbl	SIGH(%ebx),%edx
405	jnc	LRound_not_small
406
407	movl	$0x70000000,%eax	/* Denom was larger */
408	jmp	LRound_ovfl
409
410LRound_not_small:
411	jnz	LRound_large
412
413	movl	$0x80000000,%eax	/* Remainder was exactly 1/2 denom */
414	jmp	LRound_ovfl
415
416LRound_large:
417	movl	$0xff000000,%eax	/* Denom was smaller */
418
419LRound_ovfl:
420/* We are now ready to deal with rounding, but first we must get
421   the bits properly aligned */
422	testb	$255,FPU_ovfl_flag	/* was the num > denom ? */
423	je	LRound_precision
424
425	incw	EXP(%edi)
426
427	/* shift the mantissa right one bit */
428	stc			/* Will set the ms bit */
429	rcrl	FPU_result_2
430	rcrl	FPU_result_1
431	rcrl	%eax
432
433/* Round the result as required */
434LRound_precision:
435	decw	EXP(%edi)	/* binary point between 1st & 2nd bits */
436
437	movl	%eax,%edx
438	movl	FPU_result_1,%ebx
439	movl	FPU_result_2,%eax
440	jmp	fpu_reg_round
441
442
443#ifdef PARANOID
444/* The logic is wrong if we got here */
445L_bugged:
446	pushl	EX_INTERNAL|0x202
447	call	EXCEPTION
448	pop	%ebx
449	jmp	L_exit
450
451L_bugged_1:
452	pushl	EX_INTERNAL|0x203
453	call	EXCEPTION
454	pop	%ebx
455	jmp	L_exit
456
457L_bugged_2:
458	pushl	EX_INTERNAL|0x204
459	call	EXCEPTION
460	pop	%ebx
461	jmp	L_exit
462
463L_exit:
464	movl	$-1,%eax
465	popl	%ebx
466	popl	%edi
467	popl	%esi
468
469	leave
470	ret
471#endif /* PARANOID */
472