xref: /linux/arch/m68k/fpsp040/stwotox.S (revision ca55b2fef3a9373fcfc30f82fd26bc7fccbda732)
1|
2|	stwotox.sa 3.1 12/10/90
3|
4|	stwotox  --- 2**X
5|	stwotoxd --- 2**X for denormalized X
6|	stentox  --- 10**X
7|	stentoxd --- 10**X for denormalized X
8|
9|	Input: Double-extended number X in location pointed to
10|		by address register a0.
11|
12|	Output: The function values are returned in Fp0.
13|
14|	Accuracy and Monotonicity: The returned result is within 2 ulps in
15|		64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
16|		result is subsequently rounded to double precision. The
17|		result is provably monotonic in double precision.
18|
19|	Speed: The program stwotox takes approximately 190 cycles and the
20|		program stentox takes approximately 200 cycles.
21|
22|	Algorithm:
23|
24|	twotox
25|	1. If |X| > 16480, go to ExpBig.
26|
27|	2. If |X| < 2**(-70), go to ExpSm.
28|
29|	3. Decompose X as X = N/64 + r where |r| <= 1/128. Furthermore
30|		decompose N as
31|		 N = 64(M + M') + j,  j = 0,1,2,...,63.
32|
33|	4. Overwrite r := r * log2. Then
34|		2**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r).
35|		Go to expr to compute that expression.
36|
37|	tentox
38|	1. If |X| > 16480*log_10(2) (base 10 log of 2), go to ExpBig.
39|
40|	2. If |X| < 2**(-70), go to ExpSm.
41|
42|	3. Set y := X*log_2(10)*64 (base 2 log of 10). Set
43|		N := round-to-int(y). Decompose N as
44|		 N = 64(M + M') + j,  j = 0,1,2,...,63.
45|
46|	4. Define r as
47|		r := ((X - N*L1)-N*L2) * L10
48|		where L1, L2 are the leading and trailing parts of log_10(2)/64
49|		and L10 is the natural log of 10. Then
50|		10**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r).
51|		Go to expr to compute that expression.
52|
53|	expr
54|	1. Fetch 2**(j/64) from table as Fact1 and Fact2.
55|
56|	2. Overwrite Fact1 and Fact2 by
57|		Fact1 := 2**(M) * Fact1
58|		Fact2 := 2**(M) * Fact2
59|		Thus Fact1 + Fact2 = 2**(M) * 2**(j/64).
60|
61|	3. Calculate P where 1 + P approximates exp(r):
62|		P = r + r*r*(A1+r*(A2+...+r*A5)).
63|
64|	4. Let AdjFact := 2**(M'). Return
65|		AdjFact * ( Fact1 + ((Fact1*P) + Fact2) ).
66|		Exit.
67|
68|	ExpBig
69|	1. Generate overflow by Huge * Huge if X > 0; otherwise, generate
70|		underflow by Tiny * Tiny.
71|
72|	ExpSm
73|	1. Return 1 + X.
74|
75
76|		Copyright (C) Motorola, Inc. 1990
77|			All Rights Reserved
78|
79|       For details on the license for this file, please see the
80|       file, README, in this same directory.
81
82|STWOTOX	idnt	2,1 | Motorola 040 Floating Point Software Package
83
84	|section	8
85
86#include "fpsp.h"
87
88BOUNDS1:	.long 0x3FB98000,0x400D80C0 | ... 2^(-70),16480
89BOUNDS2:	.long 0x3FB98000,0x400B9B07 | ... 2^(-70),16480 LOG2/LOG10
90
91L2TEN64:	.long 0x406A934F,0x0979A371 | ... 64LOG10/LOG2
92L10TWO1:	.long 0x3F734413,0x509F8000 | ... LOG2/64LOG10
93
94L10TWO2:	.long 0xBFCD0000,0xC0219DC1,0xDA994FD2,0x00000000
95
96LOG10:	.long 0x40000000,0x935D8DDD,0xAAA8AC17,0x00000000
97
98LOG2:	.long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
99
100EXPA5:	.long 0x3F56C16D,0x6F7BD0B2
101EXPA4:	.long 0x3F811112,0x302C712C
102EXPA3:	.long 0x3FA55555,0x55554CC1
103EXPA2:	.long 0x3FC55555,0x55554A54
104EXPA1:	.long 0x3FE00000,0x00000000,0x00000000,0x00000000
105
106HUGE:	.long 0x7FFE0000,0xFFFFFFFF,0xFFFFFFFF,0x00000000
107TINY:	.long 0x00010000,0xFFFFFFFF,0xFFFFFFFF,0x00000000
108
109EXPTBL:
110	.long  0x3FFF0000,0x80000000,0x00000000,0x3F738000
111	.long  0x3FFF0000,0x8164D1F3,0xBC030773,0x3FBEF7CA
112	.long  0x3FFF0000,0x82CD8698,0xAC2BA1D7,0x3FBDF8A9
113	.long  0x3FFF0000,0x843A28C3,0xACDE4046,0x3FBCD7C9
114	.long  0x3FFF0000,0x85AAC367,0xCC487B15,0xBFBDE8DA
115	.long  0x3FFF0000,0x871F6196,0x9E8D1010,0x3FBDE85C
116	.long  0x3FFF0000,0x88980E80,0x92DA8527,0x3FBEBBF1
117	.long  0x3FFF0000,0x8A14D575,0x496EFD9A,0x3FBB80CA
118	.long  0x3FFF0000,0x8B95C1E3,0xEA8BD6E7,0xBFBA8373
119	.long  0x3FFF0000,0x8D1ADF5B,0x7E5BA9E6,0xBFBE9670
120	.long  0x3FFF0000,0x8EA4398B,0x45CD53C0,0x3FBDB700
121	.long  0x3FFF0000,0x9031DC43,0x1466B1DC,0x3FBEEEB0
122	.long  0x3FFF0000,0x91C3D373,0xAB11C336,0x3FBBFD6D
123	.long  0x3FFF0000,0x935A2B2F,0x13E6E92C,0xBFBDB319
124	.long  0x3FFF0000,0x94F4EFA8,0xFEF70961,0x3FBDBA2B
125	.long  0x3FFF0000,0x96942D37,0x20185A00,0x3FBE91D5
126	.long  0x3FFF0000,0x9837F051,0x8DB8A96F,0x3FBE8D5A
127	.long  0x3FFF0000,0x99E04593,0x20B7FA65,0xBFBCDE7B
128	.long  0x3FFF0000,0x9B8D39B9,0xD54E5539,0xBFBEBAAF
129	.long  0x3FFF0000,0x9D3ED9A7,0x2CFFB751,0xBFBD86DA
130	.long  0x3FFF0000,0x9EF53260,0x91A111AE,0xBFBEBEDD
131	.long  0x3FFF0000,0xA0B0510F,0xB9714FC2,0x3FBCC96E
132	.long  0x3FFF0000,0xA2704303,0x0C496819,0xBFBEC90B
133	.long  0x3FFF0000,0xA43515AE,0x09E6809E,0x3FBBD1DB
134	.long  0x3FFF0000,0xA5FED6A9,0xB15138EA,0x3FBCE5EB
135	.long  0x3FFF0000,0xA7CD93B4,0xE965356A,0xBFBEC274
136	.long  0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x3FBEA83C
137	.long  0x3FFF0000,0xAB7A39B5,0xA93ED337,0x3FBECB00
138	.long  0x3FFF0000,0xAD583EEA,0x42A14AC6,0x3FBE9301
139	.long  0x3FFF0000,0xAF3B78AD,0x690A4375,0xBFBD8367
140	.long  0x3FFF0000,0xB123F581,0xD2AC2590,0xBFBEF05F
141	.long  0x3FFF0000,0xB311C412,0xA9112489,0x3FBDFB3C
142	.long  0x3FFF0000,0xB504F333,0xF9DE6484,0x3FBEB2FB
143	.long  0x3FFF0000,0xB6FD91E3,0x28D17791,0x3FBAE2CB
144	.long  0x3FFF0000,0xB8FBAF47,0x62FB9EE9,0x3FBCDC3C
145	.long  0x3FFF0000,0xBAFF5AB2,0x133E45FB,0x3FBEE9AA
146	.long  0x3FFF0000,0xBD08A39F,0x580C36BF,0xBFBEAEFD
147	.long  0x3FFF0000,0xBF1799B6,0x7A731083,0xBFBCBF51
148	.long  0x3FFF0000,0xC12C4CCA,0x66709456,0x3FBEF88A
149	.long  0x3FFF0000,0xC346CCDA,0x24976407,0x3FBD83B2
150	.long  0x3FFF0000,0xC5672A11,0x5506DADD,0x3FBDF8AB
151	.long  0x3FFF0000,0xC78D74C8,0xABB9B15D,0xBFBDFB17
152	.long  0x3FFF0000,0xC9B9BD86,0x6E2F27A3,0xBFBEFE3C
153	.long  0x3FFF0000,0xCBEC14FE,0xF2727C5D,0xBFBBB6F8
154	.long  0x3FFF0000,0xCE248C15,0x1F8480E4,0xBFBCEE53
155	.long  0x3FFF0000,0xD06333DA,0xEF2B2595,0xBFBDA4AE
156	.long  0x3FFF0000,0xD2A81D91,0xF12AE45A,0x3FBC9124
157	.long  0x3FFF0000,0xD4F35AAB,0xCFEDFA1F,0x3FBEB243
158	.long  0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x3FBDE69A
159	.long  0x3FFF0000,0xD99D15C2,0x78AFD7B6,0xBFB8BC61
160	.long  0x3FFF0000,0xDBFBB797,0xDAF23755,0x3FBDF610
161	.long  0x3FFF0000,0xDE60F482,0x5E0E9124,0xBFBD8BE1
162	.long  0x3FFF0000,0xE0CCDEEC,0x2A94E111,0x3FBACB12
163	.long  0x3FFF0000,0xE33F8972,0xBE8A5A51,0x3FBB9BFE
164	.long  0x3FFF0000,0xE5B906E7,0x7C8348A8,0x3FBCF2F4
165	.long  0x3FFF0000,0xE8396A50,0x3C4BDC68,0x3FBEF22F
166	.long  0x3FFF0000,0xEAC0C6E7,0xDD24392F,0xBFBDBF4A
167	.long  0x3FFF0000,0xED4F301E,0xD9942B84,0x3FBEC01A
168	.long  0x3FFF0000,0xEFE4B99B,0xDCDAF5CB,0x3FBE8CAC
169	.long  0x3FFF0000,0xF281773C,0x59FFB13A,0xBFBCBB3F
170	.long  0x3FFF0000,0xF5257D15,0x2486CC2C,0x3FBEF73A
171	.long  0x3FFF0000,0xF7D0DF73,0x0AD13BB9,0xBFB8B795
172	.long  0x3FFF0000,0xFA83B2DB,0x722A033A,0x3FBEF84B
173	.long  0x3FFF0000,0xFD3E0C0C,0xF486C175,0xBFBEF581
174
175	.set	N,L_SCR1
176
177	.set	X,FP_SCR1
178	.set	XDCARE,X+2
179	.set	XFRAC,X+4
180
181	.set	ADJFACT,FP_SCR2
182
183	.set	FACT1,FP_SCR3
184	.set	FACT1HI,FACT1+4
185	.set	FACT1LOW,FACT1+8
186
187	.set	FACT2,FP_SCR4
188	.set	FACT2HI,FACT2+4
189	.set	FACT2LOW,FACT2+8
190
191	| xref	t_unfl
192	|xref	t_ovfl
193	|xref	t_frcinx
194
195	.global	stwotoxd
196stwotoxd:
197|--ENTRY POINT FOR 2**(X) FOR DENORMALIZED ARGUMENT
198
199	fmovel		%d1,%fpcr		| ...set user's rounding mode/precision
200	fmoves		#0x3F800000,%fp0  | ...RETURN 1 + X
201	movel		(%a0),%d0
202	orl		#0x00800001,%d0
203	fadds		%d0,%fp0
204	bra		t_frcinx
205
206	.global	stwotox
207stwotox:
208|--ENTRY POINT FOR 2**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
209	fmovemx	(%a0),%fp0-%fp0	| ...LOAD INPUT, do not set cc's
210
211	movel		(%a0),%d0
212	movew		4(%a0),%d0
213	fmovex		%fp0,X(%a6)
214	andil		#0x7FFFFFFF,%d0
215
216	cmpil		#0x3FB98000,%d0		| ...|X| >= 2**(-70)?
217	bges		TWOOK1
218	bra		EXPBORS
219
220TWOOK1:
221	cmpil		#0x400D80C0,%d0		| ...|X| > 16480?
222	bles		TWOMAIN
223	bra		EXPBORS
224
225
226TWOMAIN:
227|--USUAL CASE, 2^(-70) <= |X| <= 16480
228
229	fmovex		%fp0,%fp1
230	fmuls		#0x42800000,%fp1  | ...64 * X
231
232	fmovel		%fp1,N(%a6)		| ...N = ROUND-TO-INT(64 X)
233	movel		%d2,-(%sp)
234	lea		EXPTBL,%a1	| ...LOAD ADDRESS OF TABLE OF 2^(J/64)
235	fmovel		N(%a6),%fp1		| ...N --> FLOATING FMT
236	movel		N(%a6),%d0
237	movel		%d0,%d2
238	andil		#0x3F,%d0		| ...D0 IS J
239	asll		#4,%d0		| ...DISPLACEMENT FOR 2^(J/64)
240	addal		%d0,%a1		| ...ADDRESS FOR 2^(J/64)
241	asrl		#6,%d2		| ...d2 IS L, N = 64L + J
242	movel		%d2,%d0
243	asrl		#1,%d0		| ...D0 IS M
244	subl		%d0,%d2		| ...d2 IS M', N = 64(M+M') + J
245	addil		#0x3FFF,%d2
246	movew		%d2,ADJFACT(%a6)	| ...ADJFACT IS 2^(M')
247	movel		(%sp)+,%d2
248|--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
249|--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
250|--ADJFACT = 2^(M').
251|--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
252
253	fmuls		#0x3C800000,%fp1  | ...(1/64)*N
254	movel		(%a1)+,FACT1(%a6)
255	movel		(%a1)+,FACT1HI(%a6)
256	movel		(%a1)+,FACT1LOW(%a6)
257	movew		(%a1)+,FACT2(%a6)
258	clrw		FACT2+2(%a6)
259
260	fsubx		%fp1,%fp0		| ...X - (1/64)*INT(64 X)
261
262	movew		(%a1)+,FACT2HI(%a6)
263	clrw		FACT2HI+2(%a6)
264	clrl		FACT2LOW(%a6)
265	addw		%d0,FACT1(%a6)
266
267	fmulx		LOG2,%fp0	| ...FP0 IS R
268	addw		%d0,FACT2(%a6)
269
270	bra		expr
271
272EXPBORS:
273|--FPCR, D0 SAVED
274	cmpil		#0x3FFF8000,%d0
275	bgts		EXPBIG
276
277EXPSM:
278|--|X| IS SMALL, RETURN 1 + X
279
280	fmovel		%d1,%FPCR		|restore users exceptions
281	fadds		#0x3F800000,%fp0  | ...RETURN 1 + X
282
283	bra		t_frcinx
284
285EXPBIG:
286|--|X| IS LARGE, GENERATE OVERFLOW IF X > 0; ELSE GENERATE UNDERFLOW
287|--REGISTERS SAVE SO FAR ARE FPCR AND  D0
288	movel		X(%a6),%d0
289	cmpil		#0,%d0
290	blts		EXPNEG
291
292	bclrb		#7,(%a0)		|t_ovfl expects positive value
293	bra		t_ovfl
294
295EXPNEG:
296	bclrb		#7,(%a0)		|t_unfl expects positive value
297	bra		t_unfl
298
299	.global	stentoxd
300stentoxd:
301|--ENTRY POINT FOR 10**(X) FOR DENORMALIZED ARGUMENT
302
303	fmovel		%d1,%fpcr		| ...set user's rounding mode/precision
304	fmoves		#0x3F800000,%fp0  | ...RETURN 1 + X
305	movel		(%a0),%d0
306	orl		#0x00800001,%d0
307	fadds		%d0,%fp0
308	bra		t_frcinx
309
310	.global	stentox
311stentox:
312|--ENTRY POINT FOR 10**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
313	fmovemx	(%a0),%fp0-%fp0	| ...LOAD INPUT, do not set cc's
314
315	movel		(%a0),%d0
316	movew		4(%a0),%d0
317	fmovex		%fp0,X(%a6)
318	andil		#0x7FFFFFFF,%d0
319
320	cmpil		#0x3FB98000,%d0		| ...|X| >= 2**(-70)?
321	bges		TENOK1
322	bra		EXPBORS
323
324TENOK1:
325	cmpil		#0x400B9B07,%d0		| ...|X| <= 16480*log2/log10 ?
326	bles		TENMAIN
327	bra		EXPBORS
328
329TENMAIN:
330|--USUAL CASE, 2^(-70) <= |X| <= 16480 LOG 2 / LOG 10
331
332	fmovex		%fp0,%fp1
333	fmuld		L2TEN64,%fp1	| ...X*64*LOG10/LOG2
334
335	fmovel		%fp1,N(%a6)		| ...N=INT(X*64*LOG10/LOG2)
336	movel		%d2,-(%sp)
337	lea		EXPTBL,%a1	| ...LOAD ADDRESS OF TABLE OF 2^(J/64)
338	fmovel		N(%a6),%fp1		| ...N --> FLOATING FMT
339	movel		N(%a6),%d0
340	movel		%d0,%d2
341	andil		#0x3F,%d0		| ...D0 IS J
342	asll		#4,%d0		| ...DISPLACEMENT FOR 2^(J/64)
343	addal		%d0,%a1		| ...ADDRESS FOR 2^(J/64)
344	asrl		#6,%d2		| ...d2 IS L, N = 64L + J
345	movel		%d2,%d0
346	asrl		#1,%d0		| ...D0 IS M
347	subl		%d0,%d2		| ...d2 IS M', N = 64(M+M') + J
348	addil		#0x3FFF,%d2
349	movew		%d2,ADJFACT(%a6)	| ...ADJFACT IS 2^(M')
350	movel		(%sp)+,%d2
351
352|--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
353|--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
354|--ADJFACT = 2^(M').
355|--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
356
357	fmovex		%fp1,%fp2
358
359	fmuld		L10TWO1,%fp1	| ...N*(LOG2/64LOG10)_LEAD
360	movel		(%a1)+,FACT1(%a6)
361
362	fmulx		L10TWO2,%fp2	| ...N*(LOG2/64LOG10)_TRAIL
363
364	movel		(%a1)+,FACT1HI(%a6)
365	movel		(%a1)+,FACT1LOW(%a6)
366	fsubx		%fp1,%fp0		| ...X - N L_LEAD
367	movew		(%a1)+,FACT2(%a6)
368
369	fsubx		%fp2,%fp0		| ...X - N L_TRAIL
370
371	clrw		FACT2+2(%a6)
372	movew		(%a1)+,FACT2HI(%a6)
373	clrw		FACT2HI+2(%a6)
374	clrl		FACT2LOW(%a6)
375
376	fmulx		LOG10,%fp0	| ...FP0 IS R
377
378	addw		%d0,FACT1(%a6)
379	addw		%d0,FACT2(%a6)
380
381expr:
382|--FPCR, FP2, FP3 ARE SAVED IN ORDER AS SHOWN.
383|--ADJFACT CONTAINS 2**(M'), FACT1 + FACT2 = 2**(M) * 2**(J/64).
384|--FP0 IS R. THE FOLLOWING CODE COMPUTES
385|--	2**(M'+M) * 2**(J/64) * EXP(R)
386
387	fmovex		%fp0,%fp1
388	fmulx		%fp1,%fp1		| ...FP1 IS S = R*R
389
390	fmoved		EXPA5,%fp2	| ...FP2 IS A5
391	fmoved		EXPA4,%fp3	| ...FP3 IS A4
392
393	fmulx		%fp1,%fp2		| ...FP2 IS S*A5
394	fmulx		%fp1,%fp3		| ...FP3 IS S*A4
395
396	faddd		EXPA3,%fp2	| ...FP2 IS A3+S*A5
397	faddd		EXPA2,%fp3	| ...FP3 IS A2+S*A4
398
399	fmulx		%fp1,%fp2		| ...FP2 IS S*(A3+S*A5)
400	fmulx		%fp1,%fp3		| ...FP3 IS S*(A2+S*A4)
401
402	faddd		EXPA1,%fp2	| ...FP2 IS A1+S*(A3+S*A5)
403	fmulx		%fp0,%fp3		| ...FP3 IS R*S*(A2+S*A4)
404
405	fmulx		%fp1,%fp2		| ...FP2 IS S*(A1+S*(A3+S*A5))
406	faddx		%fp3,%fp0		| ...FP0 IS R+R*S*(A2+S*A4)
407
408	faddx		%fp2,%fp0		| ...FP0 IS EXP(R) - 1
409
410
411|--FINAL RECONSTRUCTION PROCESS
412|--EXP(X) = 2^M*2^(J/64) + 2^M*2^(J/64)*(EXP(R)-1)  -  (1 OR 0)
413
414	fmulx		FACT1(%a6),%fp0
415	faddx		FACT2(%a6),%fp0
416	faddx		FACT1(%a6),%fp0
417
418	fmovel		%d1,%FPCR		|restore users exceptions
419	clrw		ADJFACT+2(%a6)
420	movel		#0x80000000,ADJFACT+4(%a6)
421	clrl		ADJFACT+8(%a6)
422	fmulx		ADJFACT(%a6),%fp0	| ...FINAL ADJUSTMENT
423
424	bra		t_frcinx
425
426	|end
427