xref: /illumos-gate/usr/src/lib/libm/amd64/src/powl.S (revision 55fea89dcaa64928bed4327112404dcb3e07b79f)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
23 */
24/*
25 * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
26 * Use is subject to license terms.
27 */
28
29        .file "powl.s"
30
31/ Special cases:
32/
33/ x ** 0 is 1
34/ 1 ** y is 1				(C99)
35/ x ** NaN is NaN
36/ NaN ** y (except 0) is NaN
37/ x ** 1 is x
38/ +-(|x| > 1) **  +inf is +inf
39/ +-(|x| > 1) **  -inf is +0
40/ +-(|x| < 1) **  +inf is +0
41/ +-(|x| < 1) **  -inf is +inf
42/ (-1) ** +-inf is +1			(C99)
43/ +0 ** +y (except 0, NaN)              is +0
44/ -0 ** +y (except 0, NaN, odd int)     is +0
45/ +0 ** -y (except 0, NaN)              is +inf (z flag)
46/ -0 ** -y (except 0, NaN, odd int)     is +inf (z flag)
47/ -0 ** y (odd int)			is - (+0 ** x)
48/ +inf ** +y (except 0, NaN)    	is +inf
49/ +inf ** -y (except 0, NaN)    	is +0
50/ -inf ** +-y (except 0, NaN)   	is -0 ** -+y (NO z flag)
51/ x ** -1 is 1/x
52/ x ** 2 is x*x
53/ -x ** y (an integer) is (-1)**(y) * (+x)**(y)
54/ x ** y (x negative & y not integer) is NaN (i flag)
55
56#include "libm.h"
57LIBM_ANSI_PRAGMA_WEAK(powl,function)
58#include "xpg6.h"
59
60	.data
61	.align	16
62negzero:
63	.float	-0.0
64half:
65	.float	0.5
66one:
67	.float	1.0
68negone:
69	.float	-1.0
70two:
71	.float	2.0
72Snan:
73	.4byte	0x7f800001
74pinfinity:
75	.4byte	0x7f800000
76ninfinity:
77	.4byte	0xff800000
78
79
80	ENTRY(powl)
81	pushq	%rbp
82	movq	%rsp,%rbp
83	PIC_SETUP(1)
84
85	fldt	16(%rbp)		/ x
86	fxam				/ determine class of x
87	fnstsw	%ax			/ store status in %ax
88	movb	%ah,%dh			/ %dh <- condition code of x
89
90	fldt	32(%rbp)		/ y , x
91	fxam				/ determine class of y
92	fnstsw	%ax			/ store status in %ax
93	movb	%ah,%dl			/ %dl <- condition code of y
94
95	call	.pow_main		/// LOCAL
96	PIC_WRAPUP
97	leave
98	ret
99
100.pow_main:
101	/ x ** 0 is 1
102	movb	%dl,%cl
103	andb	$0x45,%cl
104	cmpb	$0x40,%cl		/ C3=1 C2=0 C1=? C0=0 when +-0
105	jne	1f
106	fstp	%st(0)			/ x
107	fstp	%st(0)			/ stack empty
108	fld1				/ 1
109	ret
110
1111:	/ y is not zero
112	PIC_G_LOAD(movzwq,__xpg6,rax)
113	andl	$_C99SUSv3_pow_treats_Inf_as_an_even_int,%eax
114	cmpl	$0,%eax
115	je	1f
116
117	/ C99: 1 ** anything is 1
118	fld1				/ 1, y, x
119	fucomip	%st(2),%st		/ y, x
120	jp	1f			/ so that pow(NaN1,NaN2) returns NaN2
121	jne	1f
122	fstp	%st(0)			/ x
123	ret
124
1251:
126	/ x ** NaN is NaN
127	movb	%dl,%cl
128	andb	$0x45,%cl
129	cmpb	$0x01,%cl		/ C3=0 C2=0 C1=? C0=1 when +-NaN
130	jne	1f
131	fstp	%st(1)			/ y
132	ret
133
1341:	/ y is not NaN
135	/ NaN ** y (except 0) is NaN
136	movb	%dh,%cl
137	andb	$0x45,%cl
138	cmpb	$0x01,%cl		/ C3=0 C2=0 C1=? C0=1 when +-NaN
139	jne	1f
140	fstp	%st(0)			/ x
141	ret
142
1431:	/ x is not NaN
144	/ x ** 1 is x
145	fld1				/ 1, y, x
146	fcomip	%st(1),%st		/ y, x
147	jne	1f
148	fstp	%st(0)			/ x
149	ret
150
1511:	/ y is not 1
152	/ +-(|x| > 1) **  +inf is +inf
153	/ +-(|x| > 1) **  -inf is +0
154	/ +-(|x| < 1) **  +inf is +0
155	/ +-(|x| < 1) **  -inf is +inf
156	/ +-(|x| = 1) ** +-inf is NaN
157	movb	%dl,%cl
158	andb	$0x47,%cl
159	cmpb	$0x05,%cl		/ C3=0 C2=1 C1=0 C0=1 when +inf
160	je	.yispinf
161	cmpb	$0x07,%cl		/ C3=0 C2=1 C1=1 C0=1 when -inf
162	je	.yisninf
163
164	/ +0 ** +y (except 0, NaN)		is +0
165	/ -0 ** +y (except 0, NaN, odd int)	is +0
166	/ +0 ** -y (except 0, NaN)		is +inf (z flag)
167	/ -0 ** -y (except 0, NaN, odd int)	is +inf (z flag)
168	/ -0 ** y (odd int)			is - (+0 ** x)
169	movb	%dh,%cl
170	andb	$0x47,%cl
171	cmpb	$0x40,%cl		/ C3=1 C2=0 C1=0 C0=0 when +0
172	je	.xispzero
173	cmpb	$0x42,%cl		/ C3=1 C2=0 C1=1 C0=0 when -0
174	je	.xisnzero
175
176	/ +inf ** +y (except 0, NaN)	is +inf
177	/ +inf ** -y (except 0, NaN)	is +0
178	/ -inf ** +-y (except 0, NaN)	is -0 ** -+y (NO z flag)
179	movb	%dh,%cl
180	andb	$0x47,%cl
181	cmpb	$0x05,%cl		/ C3=0 C2=1 C1=0 C0=1 when +inf
182	je	.xispinf
183	cmpb	$0x07,%cl		/ C3=0 C2=1 C1=1 C0=1 when -inf
184	je	.xisninf
185
186	/ x ** -1 is 1/x
187	flds	PIC_L(negone)		/ -1, y, x
188	fcomip	%st(1),%st		/ y, x
189	jne	1f
190	fld	%st(1)			/ x , y , x
191	fdivrs	PIC_L(one)		/ 1/x , y , x
192	jmp	.signok			/ check for over/underflow
193
1941:	/ y is not -1
195	/ x ** 2 is x*x
196	flds	PIC_L(two)		/ 2, y , x
197	fcomip	%st(1),%st		/ y, x
198	jne	1f
199	fld	%st(1)			/ x , y , x
200	fld	%st(0)			/ x , x , y , x
201	fmulp				/ x^2 , y , x
202	jmp	.signok			/ check for over/underflow
203
2041:	/ y is not 2
205	/ x ** 1/2 is sqrt(x)
206	flds	PIC_L(half)		/ 1/2, y , x
207	fcomip	%st(1),%st		/ y, x
208	jne	1f
209	fld	%st(1)			/ x , y , x
210	fsqrt				/ sqrt(x) , y , x
211	jmp	.signok			/ check for over/underflow
212
2131:	/ y is not 1/2
214	/ make copies of x & y
215	fld	%st(1)			/ x , y , x
216	fld	%st(1)			/ y , x , y , x
217
218	/ -x ** y (an integer) is (-1)**(y) * (+x)**(y)
219	/ x ** y (x negative & y not integer) is  NaN
220	movl	$0,%ecx			/ track whether to flip sign of result
221	fldz				/ 0 , y , x , y , x
222	fcomip	%st(2),%st		/ compare 0 with %st(2)
223	jb	.merge			/ 0 < x
224	/ x < 0
225	call	.y_is_int
226	cmpl	$0,%ecx
227	jne	1f
228	/ x < 0 & y != int so x**y = NaN (i flag)
229	fstp	%st(0)			/ x , y , x
230	fstp	%st(0)			/ y , x
231	fstp	%st(0)			/ x
232	fstp	%st(0)			/ stack empty
233	fldz
234	fdiv	%st,%st(0)		/ 0/0
235	ret
236
2371:	/ x < 0 & y = int
238	fxch				/ x , y , y , x
239	fchs				/ px = -x , y , y , x
240	fxch				/ y , px , y , x
241.merge:
242	/ px > 0
243	fxch				/ px , y , y , x
244
245	/ x**y   =   exp(y*ln(x))
246	fyl2x				/ t=y*log2(px) , y , x
247	fld	%st(0)			/ t , t , y , x
248	frndint				/ [t] , t , y , x
249	fxch				/ t , [t] , y , x
250	fucomi	%st(1),%st
251	je	1f			/ t is integral
252	fsub    %st(1),%st		/ t-[t] , [t] , y , x
253	f2xm1				/ 2**(t-[t])-1 , [t] , y , x
254	fadds	PIC_L(one)		/ 2**(t-[t]) , [t] , y , x
255	fscale				/ 2**t = px**y , [t] , y , x
256	jmp	2f
2571:
258	fstp    %st(0)                  / t=[t] , y , x
259	fld1                            / 1 , t , y , x
260	fscale                          / 1*2**t = x**y , t , y , x
2612:
262	fstp	%st(1)			/ x**y , y , x
263	cmpl	$1,%ecx
264	jne	.signok
265	fchs				/ change sign since x<0 & y=-int
266.signok:
267	fstp	%st(2)			/ y , x**y
268	fstp	%st(0)			/ x**y
269	ret
270
271/ ------------------------------------------------------------------------
272
273.xispinf:
274	fldz
275	fcomip	%st(1),%st		/ compare 0 with %st(1)
276	jb	.retpinf		/ 0 < y
277	jmp	.retpzero		/ y < 0
278
279.xisninf:
280	/ -inf ** +-y is -0 ** -+y
281	fchs				/ -y , x
282	flds	PIC_L(negzero)		/ -0 , -y , x
283	fstp	%st(2)			/ -y , -0
284	jmp	.xisnzero
285
286.yispinf:
287	fld	%st(1)			/ x , y , x
288	fabs				/ |x| , y , x
289	flds	PIC_L(one)		/ 1 , |x| , y , x
290	fcomip	%st(1),%st		/ |x| , y , x
291	fstp	%st(0)			/ y , x
292	je	.retponeorinvalid	/ x == -1	C99
293	jb	.retpinf		/ 1 < |x|
294	jmp	.retpzero		/ |x| < 1
295
296.yisninf:
297	fld	%st(1)			/ x , y , x
298	fabs				/ |x| , y , x
299	flds	PIC_L(one)		/ 1 , |x| , y , x
300	fcomip	%st(1),%st		/ |x| , y , x
301	fstp	%st(0)			/ y , x
302	je	.retponeorinvalid	/ x == -1	C99
303	jb	.retpzero		/ 1 < |x|
304	jmp	.retpinf		/ |x| < 1
305
306.xispzero:
307	/ y cannot be 0 or NaN ; stack has	y , x
308	fldz				/ 0 , y , x
309	fcomip	%st(1),%st		/ compare 0 with %st(1)
310	jb	.retpzero		/ 0 < y
311	/ x = +0 & y < 0 so x**y = +inf
312	jmp	.retpinfzflag		/ ret +inf & z flag
313
314.xisnzero:
315	/ y cannot be 0 or NaN ; stack has	y , x
316	call	.y_is_int
317	cmpl	$1,%ecx
318	jne	1f			/ y is not an odd integer
319	/ y is an odd integer
320	fldz
321	fcomip	%st(1),%st		/ compare 0 with %st(1)
322	jb	.retnzero		/ 0 < y
323	/ x = -0 & y < 0 (odd int)	return -inf (z flag)
324	/ x = -inf & y != 0 or NaN	return -inf (NO z flag)
325	movb	%dh,%cl
326	andb	$0x45,%cl
327	cmpb	$0x05,%cl		/ C3=0 C2=1 C1=? C0=1 when +-inf
328	je	2f
329	fdiv	%st,%st(1)		/ y / x, x (raise z flag)
3302:
331	fstp	%st(0)			/ x
332	fstp	%st(0)			/ stack empty
333	flds	PIC_L(ninfinity)	/ -inf
334	ret
335
3361:	/ y is not an odd integer
337	fldz
338	fcomip	%st(1),%st		/ compare 0 with %st(1)
339	jb	.retpzero		/ 0 < y
340	/ x = -0 & y < 0 (not odd int)	return +inf (z flag)
341	/ x = -inf & y not 0 or NaN 	return +inf (NO z flag)
342	movb	%dh,%cl
343	andb	$0x45,%cl
344	cmpb	$0x05,%cl		/ C3=0 C2=1 C1=? C0=1 when +-inf
345	jne	.retpinfzflag		/ ret +inf & divide-by-0 flag
346	jmp	.retpinf		/ return +inf (NO z flag)
347
348.retpzero:
349	fstp	%st(0)			/ x
350	fstp	%st(0)			/ stack empty
351	fldz				/ +0
352	ret
353
354.retnzero:
355	fstp	%st(0)			/ x
356	fstp	%st(0)			/ stack empty
357	flds	PIC_L(negzero)		/ -0
358	ret
359
360.retponeorinvalid:
361	PIC_G_LOAD(movzwq,__xpg6,rax)
362	andl	$_C99SUSv3_pow_treats_Inf_as_an_even_int,%eax
363	cmpl	$0,%eax
364	je	1f
365	fstp	%st(0)			/ x
366	fstp	%st(0)			/ stack empty
367	fld1				/ 1
368	ret
369
3701:
371	fstp	%st(0)			/ x
372	fstp	%st(0)			/ stack empty
373	flds	PIC_L(Snan)		/ Q NaN (i flag)
374	fwait
375	ret
376
377.retpinf:
378	fstp	%st(0)			/ x
379	fstp	%st(0)			/ stack empty
380	flds	PIC_L(pinfinity)	/ +inf
381	ret
382
383.retpinfzflag:
384	fstp	%st(0)			/ x
385	fstp	%st(0)			/ stack empty
386	fldz
387	fdivrs	PIC_L(one)		/ 1/0
388	ret
389
390/ Set %ecx to 2 if y is an even integer, 1 if y is an odd integer,
391/ 0 otherwise.  Assume y is not zero.  Do not raise inexact or modify
392/ %edx.
393.y_is_int:
394	movl	40(%rbp),%eax
395	andl	$0x7fff,%eax		/ exponent of y
396	cmpl	$0x403f,%eax
397	jae	1f			/ |y| >= 2^64, an even int
398	cmpl	$0x3fff,%eax
399	jb	2f			/ |y| < 1, can't be an int
400	movl	%eax,%ecx
401	subl	$0x403e,%ecx
402	negl	%ecx			/ 63 - unbiased exponent of y
403	movq	32(%rbp),%rax
404	bsfq	%rax,%rax		/ index of least sig. 1 bit
405	cmpl	%ecx,%eax
406	jb	2f
407	ja	1f
408	movl	$1,%ecx
409	ret
4101:
411	movl	$2,%ecx
412	ret
4132:
414	xorl	%ecx,%ecx
415	ret
416	.align	16
417	SET_SIZE(powl)
418