xref: /titanic_44/usr/src/lib/libm/common/m9x/fenv_inlines.h (revision 25c28e83beb90e7c80452a7c818c5e6f73a07dc8)
1*25c28e83SPiotr Jasiukajtis /*
2*25c28e83SPiotr Jasiukajtis  * This file and its contents are supplied under the terms of the
3*25c28e83SPiotr Jasiukajtis  * Common Development and Distribution License ("CDDL"), version 1.0.
4*25c28e83SPiotr Jasiukajtis  * You may only use this file in accordance with the terms of version
5*25c28e83SPiotr Jasiukajtis  * 1.0 of the CDDL.
6*25c28e83SPiotr Jasiukajtis  *
7*25c28e83SPiotr Jasiukajtis  * A full copy of the text of the CDDL should have accompanied this
8*25c28e83SPiotr Jasiukajtis  * source.  A copy of the CDDL is also available via the Internet at
9*25c28e83SPiotr Jasiukajtis  * http://www.illumos.org/license/CDDL.
10*25c28e83SPiotr Jasiukajtis  */
11*25c28e83SPiotr Jasiukajtis 
12*25c28e83SPiotr Jasiukajtis /*
13*25c28e83SPiotr Jasiukajtis  * Copyright 2011, Richard Lowe
14*25c28e83SPiotr Jasiukajtis  */
15*25c28e83SPiotr Jasiukajtis 
16*25c28e83SPiotr Jasiukajtis #ifndef _FENV_INLINES_H
17*25c28e83SPiotr Jasiukajtis #define	_FENV_INLINES_H
18*25c28e83SPiotr Jasiukajtis 
19*25c28e83SPiotr Jasiukajtis #ifdef __GNUC__
20*25c28e83SPiotr Jasiukajtis 
21*25c28e83SPiotr Jasiukajtis #ifdef __cplusplus
22*25c28e83SPiotr Jasiukajtis extern "C" {
23*25c28e83SPiotr Jasiukajtis #endif
24*25c28e83SPiotr Jasiukajtis 
25*25c28e83SPiotr Jasiukajtis #include <sys/types.h>
26*25c28e83SPiotr Jasiukajtis 
27*25c28e83SPiotr Jasiukajtis #if defined(__x86)
28*25c28e83SPiotr Jasiukajtis 
29*25c28e83SPiotr Jasiukajtis /*
30*25c28e83SPiotr Jasiukajtis  * Floating point Control Word and Status Word
31*25c28e83SPiotr Jasiukajtis  * Definition should actually be shared with x86
32*25c28e83SPiotr Jasiukajtis  * (much of this 'amd64' code can be, in fact.)
33*25c28e83SPiotr Jasiukajtis  */
34*25c28e83SPiotr Jasiukajtis union fp_cwsw {
35*25c28e83SPiotr Jasiukajtis 	uint32_t cwsw;
36*25c28e83SPiotr Jasiukajtis 	struct {
37*25c28e83SPiotr Jasiukajtis 		uint16_t cw;
38*25c28e83SPiotr Jasiukajtis 		uint16_t sw;
39*25c28e83SPiotr Jasiukajtis 	} words;
40*25c28e83SPiotr Jasiukajtis };
41*25c28e83SPiotr Jasiukajtis 
42*25c28e83SPiotr Jasiukajtis extern __inline__ void
__fenv_getcwsw(unsigned int * value)43*25c28e83SPiotr Jasiukajtis __fenv_getcwsw(unsigned int *value)
44*25c28e83SPiotr Jasiukajtis {
45*25c28e83SPiotr Jasiukajtis 	union fp_cwsw *u = (union fp_cwsw *)value;
46*25c28e83SPiotr Jasiukajtis 
47*25c28e83SPiotr Jasiukajtis 	__asm__ __volatile__(
48*25c28e83SPiotr Jasiukajtis 	    "fstsw %0\n\t"
49*25c28e83SPiotr Jasiukajtis 	    "fstcw %1\n\t"
50*25c28e83SPiotr Jasiukajtis 	    : "=m" (u->words.cw), "=m" (u->words.sw));
51*25c28e83SPiotr Jasiukajtis }
52*25c28e83SPiotr Jasiukajtis 
53*25c28e83SPiotr Jasiukajtis extern __inline__ void
__fenv_setcwsw(const unsigned int * value)54*25c28e83SPiotr Jasiukajtis __fenv_setcwsw(const unsigned int *value)
55*25c28e83SPiotr Jasiukajtis {
56*25c28e83SPiotr Jasiukajtis 	union fp_cwsw cwsw;
57*25c28e83SPiotr Jasiukajtis 	short fenv[16];
58*25c28e83SPiotr Jasiukajtis 
59*25c28e83SPiotr Jasiukajtis 	cwsw.cwsw = *value;
60*25c28e83SPiotr Jasiukajtis 
61*25c28e83SPiotr Jasiukajtis 	__asm__ __volatile__(
62*25c28e83SPiotr Jasiukajtis 	    "fstenv %0\n\t"
63*25c28e83SPiotr Jasiukajtis 	    "movw   %4,%1\n\t"
64*25c28e83SPiotr Jasiukajtis 	    "movw   %3,%2\n\t"
65*25c28e83SPiotr Jasiukajtis 	    "fldenv %0\n\t"
66*25c28e83SPiotr Jasiukajtis 	    "fwait\n\t"
67*25c28e83SPiotr Jasiukajtis 	    : "=m" (fenv), "=m" (fenv[0]), "=m" (fenv[2])
68*25c28e83SPiotr Jasiukajtis 	    : "r" (cwsw.words.cw), "r" (cwsw.words.sw)
69*25c28e83SPiotr Jasiukajtis 	    /* For practical purposes, we clobber the whole FPU */
70*25c28e83SPiotr Jasiukajtis 	    : "cc", "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)",
71*25c28e83SPiotr Jasiukajtis 	      "st(6)", "st(7)");
72*25c28e83SPiotr Jasiukajtis }
73*25c28e83SPiotr Jasiukajtis 
74*25c28e83SPiotr Jasiukajtis extern __inline__ void
__fenv_getmxcsr(unsigned int * value)75*25c28e83SPiotr Jasiukajtis __fenv_getmxcsr(unsigned int *value)
76*25c28e83SPiotr Jasiukajtis {
77*25c28e83SPiotr Jasiukajtis 	__asm__ __volatile__("stmxcsr %0" : "=m" (*value));
78*25c28e83SPiotr Jasiukajtis }
79*25c28e83SPiotr Jasiukajtis 
80*25c28e83SPiotr Jasiukajtis extern __inline__ void
__fenv_setmxcsr(const unsigned int * value)81*25c28e83SPiotr Jasiukajtis __fenv_setmxcsr(const unsigned int *value)
82*25c28e83SPiotr Jasiukajtis {
83*25c28e83SPiotr Jasiukajtis 	__asm__ __volatile__("ldmxcsr %0" : : "m" (*value));
84*25c28e83SPiotr Jasiukajtis }
85*25c28e83SPiotr Jasiukajtis 
86*25c28e83SPiotr Jasiukajtis extern __inline__ long double
f2xm1(long double x)87*25c28e83SPiotr Jasiukajtis f2xm1(long double x)
88*25c28e83SPiotr Jasiukajtis {
89*25c28e83SPiotr Jasiukajtis 	long double ret;
90*25c28e83SPiotr Jasiukajtis 
91*25c28e83SPiotr Jasiukajtis 	__asm__ __volatile__("f2xm1" : "=t" (ret) : "0" (x) : "cc");
92*25c28e83SPiotr Jasiukajtis 	return (ret);
93*25c28e83SPiotr Jasiukajtis }
94*25c28e83SPiotr Jasiukajtis 
95*25c28e83SPiotr Jasiukajtis extern __inline__ long double
fyl2x(long double y,long double x)96*25c28e83SPiotr Jasiukajtis fyl2x(long double y, long double x)
97*25c28e83SPiotr Jasiukajtis {
98*25c28e83SPiotr Jasiukajtis 	long double ret;
99*25c28e83SPiotr Jasiukajtis 
100*25c28e83SPiotr Jasiukajtis 	__asm__ __volatile__("fyl2x"
101*25c28e83SPiotr Jasiukajtis 	    : "=t" (ret)
102*25c28e83SPiotr Jasiukajtis 	    : "0" (x), "u" (y)
103*25c28e83SPiotr Jasiukajtis 	    : "st(1)", "cc");
104*25c28e83SPiotr Jasiukajtis 	return (ret);
105*25c28e83SPiotr Jasiukajtis }
106*25c28e83SPiotr Jasiukajtis 
107*25c28e83SPiotr Jasiukajtis extern __inline__ long double
fptan(long double x)108*25c28e83SPiotr Jasiukajtis fptan(long double x)
109*25c28e83SPiotr Jasiukajtis {
110*25c28e83SPiotr Jasiukajtis 	/*
111*25c28e83SPiotr Jasiukajtis 	 * fptan pushes 1.0 then the result on completion, so we want to pop
112*25c28e83SPiotr Jasiukajtis 	 * the FP stack twice, so we need a dummy value into which to pop it.
113*25c28e83SPiotr Jasiukajtis 	 */
114*25c28e83SPiotr Jasiukajtis 	long double ret;
115*25c28e83SPiotr Jasiukajtis 	long double dummy;
116*25c28e83SPiotr Jasiukajtis 
117*25c28e83SPiotr Jasiukajtis 	__asm__ __volatile__("fptan"
118*25c28e83SPiotr Jasiukajtis 	    : "=t" (dummy), "=u" (ret)
119*25c28e83SPiotr Jasiukajtis 	    : "0" (x)
120*25c28e83SPiotr Jasiukajtis 	    : "cc");
121*25c28e83SPiotr Jasiukajtis 	return (ret);
122*25c28e83SPiotr Jasiukajtis }
123*25c28e83SPiotr Jasiukajtis 
124*25c28e83SPiotr Jasiukajtis extern __inline__ long double
fpatan(long double x,long double y)125*25c28e83SPiotr Jasiukajtis fpatan(long double x, long double y)
126*25c28e83SPiotr Jasiukajtis {
127*25c28e83SPiotr Jasiukajtis 	long double ret;
128*25c28e83SPiotr Jasiukajtis 
129*25c28e83SPiotr Jasiukajtis 	__asm__ __volatile__("fpatan"
130*25c28e83SPiotr Jasiukajtis 	    : "=t" (ret)
131*25c28e83SPiotr Jasiukajtis 	    : "0" (y), "u" (x)
132*25c28e83SPiotr Jasiukajtis 	    : "st(1)", "cc");
133*25c28e83SPiotr Jasiukajtis 	return (ret);
134*25c28e83SPiotr Jasiukajtis }
135*25c28e83SPiotr Jasiukajtis 
136*25c28e83SPiotr Jasiukajtis extern __inline__ long double
fxtract(long double x)137*25c28e83SPiotr Jasiukajtis fxtract(long double x)
138*25c28e83SPiotr Jasiukajtis {
139*25c28e83SPiotr Jasiukajtis 	__asm__ __volatile__("fxtract" : "+t" (x) : : "cc");
140*25c28e83SPiotr Jasiukajtis 	return (x);
141*25c28e83SPiotr Jasiukajtis }
142*25c28e83SPiotr Jasiukajtis 
143*25c28e83SPiotr Jasiukajtis extern __inline__ long double
fprem1(long double idend,long double div)144*25c28e83SPiotr Jasiukajtis fprem1(long double idend, long double div)
145*25c28e83SPiotr Jasiukajtis {
146*25c28e83SPiotr Jasiukajtis 	__asm__ __volatile__("fprem1" : "+t" (div) : "u" (idend) : "cc");
147*25c28e83SPiotr Jasiukajtis 	return (div);
148*25c28e83SPiotr Jasiukajtis }
149*25c28e83SPiotr Jasiukajtis 
150*25c28e83SPiotr Jasiukajtis extern __inline__ long double
fprem(long double idend,long double div)151*25c28e83SPiotr Jasiukajtis fprem(long double idend, long double div)
152*25c28e83SPiotr Jasiukajtis {
153*25c28e83SPiotr Jasiukajtis 	__asm__ __volatile__("fprem" : "+t" (div) : "u" (idend) : "cc");
154*25c28e83SPiotr Jasiukajtis 	return (div);
155*25c28e83SPiotr Jasiukajtis }
156*25c28e83SPiotr Jasiukajtis 
157*25c28e83SPiotr Jasiukajtis extern __inline__ long double
fyl2xp1(long double y,long double x)158*25c28e83SPiotr Jasiukajtis fyl2xp1(long double y, long double x)
159*25c28e83SPiotr Jasiukajtis {
160*25c28e83SPiotr Jasiukajtis 	long double ret;
161*25c28e83SPiotr Jasiukajtis 
162*25c28e83SPiotr Jasiukajtis 	__asm__ __volatile__("fyl2xp1"
163*25c28e83SPiotr Jasiukajtis 	    : "=t" (ret)
164*25c28e83SPiotr Jasiukajtis 	    : "0" (x), "u" (y)
165*25c28e83SPiotr Jasiukajtis 	    : "st(1)", "cc");
166*25c28e83SPiotr Jasiukajtis 	return (ret);
167*25c28e83SPiotr Jasiukajtis }
168*25c28e83SPiotr Jasiukajtis 
169*25c28e83SPiotr Jasiukajtis extern __inline__ long double
fsqrt(long double x)170*25c28e83SPiotr Jasiukajtis fsqrt(long double x)
171*25c28e83SPiotr Jasiukajtis {
172*25c28e83SPiotr Jasiukajtis 	__asm__ __volatile__("fsqrt" : "+t" (x) : : "cc");
173*25c28e83SPiotr Jasiukajtis 	return (x);
174*25c28e83SPiotr Jasiukajtis }
175*25c28e83SPiotr Jasiukajtis 
176*25c28e83SPiotr Jasiukajtis extern __inline__ long double
fsincos(long double x)177*25c28e83SPiotr Jasiukajtis fsincos(long double x)
178*25c28e83SPiotr Jasiukajtis {
179*25c28e83SPiotr Jasiukajtis 	long double dummy;
180*25c28e83SPiotr Jasiukajtis 
181*25c28e83SPiotr Jasiukajtis 	__asm__ __volatile__("fsincos" : "+t" (x), "=u" (dummy) : : "cc");
182*25c28e83SPiotr Jasiukajtis 	return (x);
183*25c28e83SPiotr Jasiukajtis }
184*25c28e83SPiotr Jasiukajtis 
185*25c28e83SPiotr Jasiukajtis extern __inline__ long double
frndint(long double x)186*25c28e83SPiotr Jasiukajtis frndint(long double x)
187*25c28e83SPiotr Jasiukajtis {
188*25c28e83SPiotr Jasiukajtis 	__asm__ __volatile__("frndint" : "+t" (x) : : "cc");
189*25c28e83SPiotr Jasiukajtis 	return (x);
190*25c28e83SPiotr Jasiukajtis }
191*25c28e83SPiotr Jasiukajtis 
192*25c28e83SPiotr Jasiukajtis extern __inline__ long double
fscale(long double x,long double y)193*25c28e83SPiotr Jasiukajtis fscale(long double x, long double y)
194*25c28e83SPiotr Jasiukajtis {
195*25c28e83SPiotr Jasiukajtis 	long double ret;
196*25c28e83SPiotr Jasiukajtis 
197*25c28e83SPiotr Jasiukajtis 	__asm__ __volatile__("fscale" : "=t" (ret) : "0" (y), "u" (x) : "cc");
198*25c28e83SPiotr Jasiukajtis 	return (ret);
199*25c28e83SPiotr Jasiukajtis }
200*25c28e83SPiotr Jasiukajtis 
201*25c28e83SPiotr Jasiukajtis extern __inline__ long double
fsin(long double x)202*25c28e83SPiotr Jasiukajtis fsin(long double x)
203*25c28e83SPiotr Jasiukajtis {
204*25c28e83SPiotr Jasiukajtis 	__asm__ __volatile__("fsin" : "+t" (x) : : "cc");
205*25c28e83SPiotr Jasiukajtis 	return (x);
206*25c28e83SPiotr Jasiukajtis }
207*25c28e83SPiotr Jasiukajtis 
208*25c28e83SPiotr Jasiukajtis extern __inline__ long double
fcos(long double x)209*25c28e83SPiotr Jasiukajtis fcos(long double x)
210*25c28e83SPiotr Jasiukajtis {
211*25c28e83SPiotr Jasiukajtis 	__asm__ __volatile__("fcos" : "+t" (x) : : "cc");
212*25c28e83SPiotr Jasiukajtis 	return (x);
213*25c28e83SPiotr Jasiukajtis }
214*25c28e83SPiotr Jasiukajtis 
215*25c28e83SPiotr Jasiukajtis extern __inline__ void
sse_cmpeqss(float * f1,float * f2,int * i1)216*25c28e83SPiotr Jasiukajtis sse_cmpeqss(float *f1, float *f2, int *i1)
217*25c28e83SPiotr Jasiukajtis {
218*25c28e83SPiotr Jasiukajtis 	__asm__ __volatile__(
219*25c28e83SPiotr Jasiukajtis 	    "cmpeqss %2, %1\n\t"
220*25c28e83SPiotr Jasiukajtis 	    "movss   %1, %0"
221*25c28e83SPiotr Jasiukajtis 	    : "=m" (*i1), "+x" (*f1)
222*25c28e83SPiotr Jasiukajtis 	    : "x" (*f2)
223*25c28e83SPiotr Jasiukajtis 	    : "cc");
224*25c28e83SPiotr Jasiukajtis }
225*25c28e83SPiotr Jasiukajtis 
226*25c28e83SPiotr Jasiukajtis extern __inline__ void
sse_cmpltss(float * f1,float * f2,int * i1)227*25c28e83SPiotr Jasiukajtis sse_cmpltss(float *f1, float *f2, int *i1)
228*25c28e83SPiotr Jasiukajtis {
229*25c28e83SPiotr Jasiukajtis 	__asm__ __volatile__(
230*25c28e83SPiotr Jasiukajtis 	    "cmpltss %2, %1\n\t"
231*25c28e83SPiotr Jasiukajtis 	    "movss   %1, %0"
232*25c28e83SPiotr Jasiukajtis 	    : "=m" (*i1), "+x" (*f1)
233*25c28e83SPiotr Jasiukajtis 	    : "x" (*f2)
234*25c28e83SPiotr Jasiukajtis 	    : "cc");
235*25c28e83SPiotr Jasiukajtis }
236*25c28e83SPiotr Jasiukajtis 
237*25c28e83SPiotr Jasiukajtis extern __inline__ void
sse_cmpless(float * f1,float * f2,int * i1)238*25c28e83SPiotr Jasiukajtis sse_cmpless(float *f1, float *f2, int *i1)
239*25c28e83SPiotr Jasiukajtis {
240*25c28e83SPiotr Jasiukajtis 	__asm__ __volatile__(
241*25c28e83SPiotr Jasiukajtis 	    "cmpless %2, %1\n\t"
242*25c28e83SPiotr Jasiukajtis 	    "movss   %1, %0"
243*25c28e83SPiotr Jasiukajtis 	    : "=m" (*i1), "+x" (*f1)
244*25c28e83SPiotr Jasiukajtis 	    : "x" (*f2)
245*25c28e83SPiotr Jasiukajtis 	    : "cc");
246*25c28e83SPiotr Jasiukajtis }
247*25c28e83SPiotr Jasiukajtis 
248*25c28e83SPiotr Jasiukajtis extern __inline__ void
sse_cmpunordss(float * f1,float * f2,int * i1)249*25c28e83SPiotr Jasiukajtis sse_cmpunordss(float *f1, float *f2, int *i1)
250*25c28e83SPiotr Jasiukajtis {
251*25c28e83SPiotr Jasiukajtis 	__asm__ __volatile__(
252*25c28e83SPiotr Jasiukajtis 	    "cmpunordss %2, %1\n\t"
253*25c28e83SPiotr Jasiukajtis 	    "movss      %1, %0"
254*25c28e83SPiotr Jasiukajtis 	    : "=m" (*i1), "+x" (*f1)
255*25c28e83SPiotr Jasiukajtis 	    : "x" (*f2)
256*25c28e83SPiotr Jasiukajtis 	    : "cc");
257*25c28e83SPiotr Jasiukajtis }
258*25c28e83SPiotr Jasiukajtis 
259*25c28e83SPiotr Jasiukajtis extern __inline__ void
sse_minss(float * f1,float * f2,float * f3)260*25c28e83SPiotr Jasiukajtis sse_minss(float *f1, float *f2, float *f3)
261*25c28e83SPiotr Jasiukajtis {
262*25c28e83SPiotr Jasiukajtis 	__asm__ __volatile__(
263*25c28e83SPiotr Jasiukajtis 	    "minss %2, %1\n\t"
264*25c28e83SPiotr Jasiukajtis 	    "movss %1, %0"
265*25c28e83SPiotr Jasiukajtis 	    : "=m" (*f3), "+x" (*f1)
266*25c28e83SPiotr Jasiukajtis 	    : "x" (*f2));
267*25c28e83SPiotr Jasiukajtis }
268*25c28e83SPiotr Jasiukajtis 
269*25c28e83SPiotr Jasiukajtis extern __inline__ void
sse_maxss(float * f1,float * f2,float * f3)270*25c28e83SPiotr Jasiukajtis sse_maxss(float *f1, float *f2, float *f3)
271*25c28e83SPiotr Jasiukajtis {
272*25c28e83SPiotr Jasiukajtis 	__asm__ __volatile__(
273*25c28e83SPiotr Jasiukajtis 	    "maxss %2, %1\n\t"
274*25c28e83SPiotr Jasiukajtis 	    "movss %1, %0"
275*25c28e83SPiotr Jasiukajtis 	    : "=m" (*f3), "+x" (*f1)
276*25c28e83SPiotr Jasiukajtis 	    : "x" (*f2));
277*25c28e83SPiotr Jasiukajtis }
278*25c28e83SPiotr Jasiukajtis 
279*25c28e83SPiotr Jasiukajtis extern __inline__ void
sse_addss(float * f1,float * f2,float * f3)280*25c28e83SPiotr Jasiukajtis sse_addss(float *f1, float *f2, float *f3)
281*25c28e83SPiotr Jasiukajtis {
282*25c28e83SPiotr Jasiukajtis 	__asm__ __volatile__(
283*25c28e83SPiotr Jasiukajtis 	    "addss %2, %1\n\t"
284*25c28e83SPiotr Jasiukajtis 	    "movss %1, %0"
285*25c28e83SPiotr Jasiukajtis 	    : "=m" (*f3), "+x" (*f1)
286*25c28e83SPiotr Jasiukajtis 	    : "x" (*f2));
287*25c28e83SPiotr Jasiukajtis }
288*25c28e83SPiotr Jasiukajtis 
289*25c28e83SPiotr Jasiukajtis extern __inline__ void
sse_subss(float * f1,float * f2,float * f3)290*25c28e83SPiotr Jasiukajtis sse_subss(float *f1, float *f2, float *f3)
291*25c28e83SPiotr Jasiukajtis {
292*25c28e83SPiotr Jasiukajtis 	__asm__ __volatile__(
293*25c28e83SPiotr Jasiukajtis 	    "subss %2, %1\n\t"
294*25c28e83SPiotr Jasiukajtis 	    "movss %1, %0"
295*25c28e83SPiotr Jasiukajtis 	    : "=m" (*f3), "+x" (*f1)
296*25c28e83SPiotr Jasiukajtis 	    : "x" (*f2));
297*25c28e83SPiotr Jasiukajtis }
298*25c28e83SPiotr Jasiukajtis 
299*25c28e83SPiotr Jasiukajtis extern __inline__ void
sse_mulss(float * f1,float * f2,float * f3)300*25c28e83SPiotr Jasiukajtis sse_mulss(float *f1, float *f2, float *f3)
301*25c28e83SPiotr Jasiukajtis {
302*25c28e83SPiotr Jasiukajtis 	__asm__ __volatile__(
303*25c28e83SPiotr Jasiukajtis 	    "mulss %2, %1\n\t"
304*25c28e83SPiotr Jasiukajtis 	    "movss %1, %0"
305*25c28e83SPiotr Jasiukajtis 	    : "=m" (*f3), "+x" (*f1)
306*25c28e83SPiotr Jasiukajtis 	    : "x" (*f2));
307*25c28e83SPiotr Jasiukajtis }
308*25c28e83SPiotr Jasiukajtis 
309*25c28e83SPiotr Jasiukajtis extern __inline__ void
sse_divss(float * f1,float * f2,float * f3)310*25c28e83SPiotr Jasiukajtis sse_divss(float *f1, float *f2, float *f3)
311*25c28e83SPiotr Jasiukajtis {
312*25c28e83SPiotr Jasiukajtis 	__asm__ __volatile__(
313*25c28e83SPiotr Jasiukajtis 	    "divss %2, %1\n\t"
314*25c28e83SPiotr Jasiukajtis 	    "movss %1, %0"
315*25c28e83SPiotr Jasiukajtis 	    : "=m" (*f3), "+x" (*f1)
316*25c28e83SPiotr Jasiukajtis 	    : "x" (*f2));
317*25c28e83SPiotr Jasiukajtis }
318*25c28e83SPiotr Jasiukajtis 
319*25c28e83SPiotr Jasiukajtis extern __inline__ void
sse_sqrtss(float * f1,float * f2)320*25c28e83SPiotr Jasiukajtis sse_sqrtss(float *f1, float *f2)
321*25c28e83SPiotr Jasiukajtis {
322*25c28e83SPiotr Jasiukajtis 	double tmp;
323*25c28e83SPiotr Jasiukajtis 
324*25c28e83SPiotr Jasiukajtis 	__asm__ __volatile__(
325*25c28e83SPiotr Jasiukajtis 	    "sqrtss %2, %1\n\t"
326*25c28e83SPiotr Jasiukajtis 	    "movss  %1, %0"
327*25c28e83SPiotr Jasiukajtis 	    : "=m" (*f2), "=x" (tmp)
328*25c28e83SPiotr Jasiukajtis 	    : "m" (*f1));
329*25c28e83SPiotr Jasiukajtis }
330*25c28e83SPiotr Jasiukajtis 
331*25c28e83SPiotr Jasiukajtis extern __inline__ void
sse_ucomiss(float * f1,float * f2)332*25c28e83SPiotr Jasiukajtis sse_ucomiss(float *f1, float *f2)
333*25c28e83SPiotr Jasiukajtis {
334*25c28e83SPiotr Jasiukajtis 	__asm__ __volatile__("ucomiss %1, %0" : : "x" (*f1), "x" (*f2));
335*25c28e83SPiotr Jasiukajtis 
336*25c28e83SPiotr Jasiukajtis }
337*25c28e83SPiotr Jasiukajtis 
338*25c28e83SPiotr Jasiukajtis extern __inline__ void
sse_comiss(float * f1,float * f2)339*25c28e83SPiotr Jasiukajtis sse_comiss(float *f1, float *f2)
340*25c28e83SPiotr Jasiukajtis {
341*25c28e83SPiotr Jasiukajtis 	__asm__ __volatile__("comiss %1, %0" : : "x" (*f1), "x" (*f2));
342*25c28e83SPiotr Jasiukajtis }
343*25c28e83SPiotr Jasiukajtis 
344*25c28e83SPiotr Jasiukajtis extern __inline__ void
sse_cvtss2sd(float * f1,double * d1)345*25c28e83SPiotr Jasiukajtis sse_cvtss2sd(float *f1, double *d1)
346*25c28e83SPiotr Jasiukajtis {
347*25c28e83SPiotr Jasiukajtis 	double tmp;
348*25c28e83SPiotr Jasiukajtis 
349*25c28e83SPiotr Jasiukajtis 	__asm__ __volatile__(
350*25c28e83SPiotr Jasiukajtis 	    "cvtss2sd %2, %1\n\t"
351*25c28e83SPiotr Jasiukajtis 	    "movsd    %1, %0"
352*25c28e83SPiotr Jasiukajtis 	    : "=m" (*d1), "=x" (tmp)
353*25c28e83SPiotr Jasiukajtis 	    : "m" (*f1));
354*25c28e83SPiotr Jasiukajtis }
355*25c28e83SPiotr Jasiukajtis 
356*25c28e83SPiotr Jasiukajtis extern __inline__ void
sse_cvtsi2ss(int * i1,float * f1)357*25c28e83SPiotr Jasiukajtis sse_cvtsi2ss(int *i1, float *f1)
358*25c28e83SPiotr Jasiukajtis {
359*25c28e83SPiotr Jasiukajtis 	double tmp;
360*25c28e83SPiotr Jasiukajtis 
361*25c28e83SPiotr Jasiukajtis 	__asm__ __volatile__(
362*25c28e83SPiotr Jasiukajtis 	    "cvtsi2ss %2, %1\n\t"
363*25c28e83SPiotr Jasiukajtis 	    "movss    %1, %0"
364*25c28e83SPiotr Jasiukajtis 	    : "=m" (*f1), "=x" (tmp)
365*25c28e83SPiotr Jasiukajtis 	    : "m" (*i1));
366*25c28e83SPiotr Jasiukajtis }
367*25c28e83SPiotr Jasiukajtis 
368*25c28e83SPiotr Jasiukajtis extern __inline__ void
sse_cvttss2si(float * f1,int * i1)369*25c28e83SPiotr Jasiukajtis sse_cvttss2si(float *f1, int *i1)
370*25c28e83SPiotr Jasiukajtis {
371*25c28e83SPiotr Jasiukajtis 	int tmp;
372*25c28e83SPiotr Jasiukajtis 
373*25c28e83SPiotr Jasiukajtis 	__asm__ __volatile__(
374*25c28e83SPiotr Jasiukajtis 	    "cvttss2si %2, %1\n\t"
375*25c28e83SPiotr Jasiukajtis 	    "movl      %1, %0"
376*25c28e83SPiotr Jasiukajtis 	    : "=m" (*i1), "=r" (tmp)
377*25c28e83SPiotr Jasiukajtis 	    : "m" (*f1));
378*25c28e83SPiotr Jasiukajtis }
379*25c28e83SPiotr Jasiukajtis 
380*25c28e83SPiotr Jasiukajtis extern __inline__ void
sse_cvtss2si(float * f1,int * i1)381*25c28e83SPiotr Jasiukajtis sse_cvtss2si(float *f1, int *i1)
382*25c28e83SPiotr Jasiukajtis {
383*25c28e83SPiotr Jasiukajtis 	int tmp;
384*25c28e83SPiotr Jasiukajtis 
385*25c28e83SPiotr Jasiukajtis 	__asm__ __volatile__(
386*25c28e83SPiotr Jasiukajtis 	    "cvtss2si %2, %1\n\t"
387*25c28e83SPiotr Jasiukajtis 	    "movl     %1, %0"
388*25c28e83SPiotr Jasiukajtis 	    : "=m" (*i1), "=r" (tmp)
389*25c28e83SPiotr Jasiukajtis 	    : "m" (*f1));
390*25c28e83SPiotr Jasiukajtis }
391*25c28e83SPiotr Jasiukajtis 
392*25c28e83SPiotr Jasiukajtis #if defined(__amd64)
393*25c28e83SPiotr Jasiukajtis extern __inline__ void
sse_cvtsi2ssq(long long * ll1,float * f1)394*25c28e83SPiotr Jasiukajtis sse_cvtsi2ssq(long long *ll1, float *f1)
395*25c28e83SPiotr Jasiukajtis {
396*25c28e83SPiotr Jasiukajtis 	double tmp;
397*25c28e83SPiotr Jasiukajtis 
398*25c28e83SPiotr Jasiukajtis 	__asm__ __volatile__(
399*25c28e83SPiotr Jasiukajtis 	    "cvtsi2ssq %2, %1\n\t"
400*25c28e83SPiotr Jasiukajtis 	    "movss     %1, %0"
401*25c28e83SPiotr Jasiukajtis 	    : "=m" (*f1), "=x" (tmp)
402*25c28e83SPiotr Jasiukajtis 	    : "m" (*ll1));
403*25c28e83SPiotr Jasiukajtis }
404*25c28e83SPiotr Jasiukajtis 
405*25c28e83SPiotr Jasiukajtis extern __inline__ void
sse_cvttss2siq(float * f1,long long * ll1)406*25c28e83SPiotr Jasiukajtis sse_cvttss2siq(float *f1, long long *ll1)
407*25c28e83SPiotr Jasiukajtis {
408*25c28e83SPiotr Jasiukajtis 	uint64_t tmp;
409*25c28e83SPiotr Jasiukajtis 
410*25c28e83SPiotr Jasiukajtis 	__asm__ __volatile__(
411*25c28e83SPiotr Jasiukajtis 	    "cvttss2siq %2, %1\n\t"
412*25c28e83SPiotr Jasiukajtis 	    "movq       %1, %0"
413*25c28e83SPiotr Jasiukajtis 	    : "=m" (*ll1), "=r" (tmp)
414*25c28e83SPiotr Jasiukajtis 	    : "m" (*f1));
415*25c28e83SPiotr Jasiukajtis }
416*25c28e83SPiotr Jasiukajtis 
417*25c28e83SPiotr Jasiukajtis extern __inline__ void
sse_cvtss2siq(float * f1,long long * ll1)418*25c28e83SPiotr Jasiukajtis sse_cvtss2siq(float *f1, long long *ll1)
419*25c28e83SPiotr Jasiukajtis {
420*25c28e83SPiotr Jasiukajtis 	uint64_t tmp;
421*25c28e83SPiotr Jasiukajtis 
422*25c28e83SPiotr Jasiukajtis 	__asm__ __volatile__(
423*25c28e83SPiotr Jasiukajtis 	    "cvtss2siq %2, %1\n\t"
424*25c28e83SPiotr Jasiukajtis 	    "movq      %1, %0"
425*25c28e83SPiotr Jasiukajtis 	    : "=m" (*ll1), "=r" (tmp)
426*25c28e83SPiotr Jasiukajtis 	    : "m" (*f1));
427*25c28e83SPiotr Jasiukajtis }
428*25c28e83SPiotr Jasiukajtis 
429*25c28e83SPiotr Jasiukajtis #endif
430*25c28e83SPiotr Jasiukajtis 
431*25c28e83SPiotr Jasiukajtis extern __inline__ void
sse_cmpeqsd(double * d1,double * d2,long long * ll1)432*25c28e83SPiotr Jasiukajtis sse_cmpeqsd(double *d1, double *d2, long long *ll1)
433*25c28e83SPiotr Jasiukajtis {
434*25c28e83SPiotr Jasiukajtis 	__asm__ __volatile__(
435*25c28e83SPiotr Jasiukajtis 	    "cmpeqsd %2,%1\n\t"
436*25c28e83SPiotr Jasiukajtis 	    "movsd   %1,%0"
437*25c28e83SPiotr Jasiukajtis 	    : "=m" (*ll1), "+x" (*d1)
438*25c28e83SPiotr Jasiukajtis 	    : "x" (*d2));
439*25c28e83SPiotr Jasiukajtis }
440*25c28e83SPiotr Jasiukajtis 
441*25c28e83SPiotr Jasiukajtis extern __inline__ void
sse_cmpltsd(double * d1,double * d2,long long * ll1)442*25c28e83SPiotr Jasiukajtis sse_cmpltsd(double *d1, double *d2, long long *ll1)
443*25c28e83SPiotr Jasiukajtis {
444*25c28e83SPiotr Jasiukajtis 	__asm__ __volatile__(
445*25c28e83SPiotr Jasiukajtis 	    "cmpltsd %2,%1\n\t"
446*25c28e83SPiotr Jasiukajtis 	    "movsd   %1,%0"
447*25c28e83SPiotr Jasiukajtis 	    : "=m" (*ll1), "+x" (*d1)
448*25c28e83SPiotr Jasiukajtis 	    : "x" (*d2));
449*25c28e83SPiotr Jasiukajtis }
450*25c28e83SPiotr Jasiukajtis 
451*25c28e83SPiotr Jasiukajtis extern __inline__ void
sse_cmplesd(double * d1,double * d2,long long * ll1)452*25c28e83SPiotr Jasiukajtis sse_cmplesd(double *d1, double *d2, long long *ll1)
453*25c28e83SPiotr Jasiukajtis {
454*25c28e83SPiotr Jasiukajtis 	__asm__ __volatile__(
455*25c28e83SPiotr Jasiukajtis 	    "cmplesd %2,%1\n\t"
456*25c28e83SPiotr Jasiukajtis 	    "movsd   %1,%0"
457*25c28e83SPiotr Jasiukajtis 	    : "=m" (*ll1), "+x" (*d1)
458*25c28e83SPiotr Jasiukajtis 	    : "x" (*d2));
459*25c28e83SPiotr Jasiukajtis }
460*25c28e83SPiotr Jasiukajtis 
461*25c28e83SPiotr Jasiukajtis extern __inline__ void
sse_cmpunordsd(double * d1,double * d2,long long * ll1)462*25c28e83SPiotr Jasiukajtis sse_cmpunordsd(double *d1, double *d2, long long *ll1)
463*25c28e83SPiotr Jasiukajtis {
464*25c28e83SPiotr Jasiukajtis 	__asm__ __volatile__(
465*25c28e83SPiotr Jasiukajtis 	    "cmpunordsd %2,%1\n\t"
466*25c28e83SPiotr Jasiukajtis 	    "movsd      %1,%0"
467*25c28e83SPiotr Jasiukajtis 	    : "=m" (*ll1), "+x" (*d1)
468*25c28e83SPiotr Jasiukajtis 	    : "x" (*d2));
469*25c28e83SPiotr Jasiukajtis }
470*25c28e83SPiotr Jasiukajtis 
471*25c28e83SPiotr Jasiukajtis 
472*25c28e83SPiotr Jasiukajtis extern __inline__ void
sse_minsd(double * d1,double * d2,double * d3)473*25c28e83SPiotr Jasiukajtis sse_minsd(double *d1, double *d2, double *d3)
474*25c28e83SPiotr Jasiukajtis {
475*25c28e83SPiotr Jasiukajtis 	__asm__ __volatile__(
476*25c28e83SPiotr Jasiukajtis 	    "minsd %2,%1\n\t"
477*25c28e83SPiotr Jasiukajtis 	    "movsd %1,%0"
478*25c28e83SPiotr Jasiukajtis 	    : "=m" (*d3), "+x" (*d1)
479*25c28e83SPiotr Jasiukajtis 	    : "x" (*d2));
480*25c28e83SPiotr Jasiukajtis }
481*25c28e83SPiotr Jasiukajtis 
482*25c28e83SPiotr Jasiukajtis extern __inline__ void
sse_maxsd(double * d1,double * d2,double * d3)483*25c28e83SPiotr Jasiukajtis sse_maxsd(double *d1, double *d2, double *d3)
484*25c28e83SPiotr Jasiukajtis {
485*25c28e83SPiotr Jasiukajtis 	__asm__ __volatile__(
486*25c28e83SPiotr Jasiukajtis 	    "maxsd %2,%1\n\t"
487*25c28e83SPiotr Jasiukajtis 	    "movsd %1,%0"
488*25c28e83SPiotr Jasiukajtis 	    : "=m" (*d3), "+x" (*d1)
489*25c28e83SPiotr Jasiukajtis 	    : "x" (*d2));
490*25c28e83SPiotr Jasiukajtis }
491*25c28e83SPiotr Jasiukajtis 
492*25c28e83SPiotr Jasiukajtis extern __inline__ void
sse_addsd(double * d1,double * d2,double * d3)493*25c28e83SPiotr Jasiukajtis sse_addsd(double *d1, double *d2, double *d3)
494*25c28e83SPiotr Jasiukajtis {
495*25c28e83SPiotr Jasiukajtis 	__asm__ __volatile__(
496*25c28e83SPiotr Jasiukajtis 	    "addsd %2,%1\n\t"
497*25c28e83SPiotr Jasiukajtis 	    "movsd %1,%0"
498*25c28e83SPiotr Jasiukajtis 	    : "=m" (*d3), "+x" (*d1)
499*25c28e83SPiotr Jasiukajtis 	    : "x" (*d2));
500*25c28e83SPiotr Jasiukajtis }
501*25c28e83SPiotr Jasiukajtis 
502*25c28e83SPiotr Jasiukajtis extern __inline__ void
sse_subsd(double * d1,double * d2,double * d3)503*25c28e83SPiotr Jasiukajtis sse_subsd(double *d1, double *d2, double *d3)
504*25c28e83SPiotr Jasiukajtis {
505*25c28e83SPiotr Jasiukajtis 	__asm__ __volatile__(
506*25c28e83SPiotr Jasiukajtis 	    "subsd %2,%1\n\t"
507*25c28e83SPiotr Jasiukajtis 	    "movsd %1,%0"
508*25c28e83SPiotr Jasiukajtis 	    : "=m" (*d3), "+x" (*d1)
509*25c28e83SPiotr Jasiukajtis 	    : "x" (*d2));
510*25c28e83SPiotr Jasiukajtis }
511*25c28e83SPiotr Jasiukajtis 
512*25c28e83SPiotr Jasiukajtis extern __inline__ void
sse_mulsd(double * d1,double * d2,double * d3)513*25c28e83SPiotr Jasiukajtis sse_mulsd(double *d1, double *d2, double *d3)
514*25c28e83SPiotr Jasiukajtis {
515*25c28e83SPiotr Jasiukajtis 	__asm__ __volatile__(
516*25c28e83SPiotr Jasiukajtis 	    "mulsd %2,%1\n\t"
517*25c28e83SPiotr Jasiukajtis 	    "movsd %1,%0"
518*25c28e83SPiotr Jasiukajtis 	    : "=m" (*d3), "+x" (*d1)
519*25c28e83SPiotr Jasiukajtis 	    : "x" (*d2));
520*25c28e83SPiotr Jasiukajtis }
521*25c28e83SPiotr Jasiukajtis 
522*25c28e83SPiotr Jasiukajtis extern __inline__ void
sse_divsd(double * d1,double * d2,double * d3)523*25c28e83SPiotr Jasiukajtis sse_divsd(double *d1, double *d2, double *d3)
524*25c28e83SPiotr Jasiukajtis {
525*25c28e83SPiotr Jasiukajtis 	__asm__ __volatile__(
526*25c28e83SPiotr Jasiukajtis 	    "divsd %2,%1\n\t"
527*25c28e83SPiotr Jasiukajtis 	    "movsd %1,%0"
528*25c28e83SPiotr Jasiukajtis 	    : "=m" (*d3), "+x" (*d1)
529*25c28e83SPiotr Jasiukajtis 	    : "x" (*d2));
530*25c28e83SPiotr Jasiukajtis }
531*25c28e83SPiotr Jasiukajtis 
532*25c28e83SPiotr Jasiukajtis extern __inline__ void
sse_sqrtsd(double * d1,double * d2)533*25c28e83SPiotr Jasiukajtis sse_sqrtsd(double *d1, double *d2)
534*25c28e83SPiotr Jasiukajtis {
535*25c28e83SPiotr Jasiukajtis 	double tmp;
536*25c28e83SPiotr Jasiukajtis 
537*25c28e83SPiotr Jasiukajtis 	__asm__ __volatile__(
538*25c28e83SPiotr Jasiukajtis 	    "sqrtsd %2, %1\n\t"
539*25c28e83SPiotr Jasiukajtis 	    "movsd %1, %0"
540*25c28e83SPiotr Jasiukajtis 	    : "=m" (*d2), "=x" (tmp)
541*25c28e83SPiotr Jasiukajtis 	    : "m" (*d1));
542*25c28e83SPiotr Jasiukajtis }
543*25c28e83SPiotr Jasiukajtis 
544*25c28e83SPiotr Jasiukajtis extern __inline__ void
sse_ucomisd(double * d1,double * d2)545*25c28e83SPiotr Jasiukajtis sse_ucomisd(double *d1, double *d2)
546*25c28e83SPiotr Jasiukajtis {
547*25c28e83SPiotr Jasiukajtis 	__asm__ __volatile__("ucomisd %1, %0" : : "x" (*d1), "x" (*d2));
548*25c28e83SPiotr Jasiukajtis }
549*25c28e83SPiotr Jasiukajtis 
550*25c28e83SPiotr Jasiukajtis extern __inline__ void
sse_comisd(double * d1,double * d2)551*25c28e83SPiotr Jasiukajtis sse_comisd(double *d1, double *d2)
552*25c28e83SPiotr Jasiukajtis {
553*25c28e83SPiotr Jasiukajtis 	__asm__ __volatile__("comisd %1, %0" : : "x" (*d1), "x" (*d2));
554*25c28e83SPiotr Jasiukajtis }
555*25c28e83SPiotr Jasiukajtis 
556*25c28e83SPiotr Jasiukajtis extern __inline__ void
sse_cvtsd2ss(double * d1,float * f1)557*25c28e83SPiotr Jasiukajtis sse_cvtsd2ss(double *d1, float *f1)
558*25c28e83SPiotr Jasiukajtis {
559*25c28e83SPiotr Jasiukajtis 	double tmp;
560*25c28e83SPiotr Jasiukajtis 
561*25c28e83SPiotr Jasiukajtis 	__asm__ __volatile__(
562*25c28e83SPiotr Jasiukajtis 	    "cvtsd2ss %2,%1\n\t"
563*25c28e83SPiotr Jasiukajtis 	    "movss    %1,%0"
564*25c28e83SPiotr Jasiukajtis 	    : "=m" (*f1), "=x" (tmp)
565*25c28e83SPiotr Jasiukajtis 	    : "m" (*d1));
566*25c28e83SPiotr Jasiukajtis }
567*25c28e83SPiotr Jasiukajtis 
568*25c28e83SPiotr Jasiukajtis extern __inline__ void
sse_cvtsi2sd(int * i1,double * d1)569*25c28e83SPiotr Jasiukajtis sse_cvtsi2sd(int *i1, double *d1)
570*25c28e83SPiotr Jasiukajtis {
571*25c28e83SPiotr Jasiukajtis 	double tmp;
572*25c28e83SPiotr Jasiukajtis 	__asm__ __volatile__(
573*25c28e83SPiotr Jasiukajtis 	    "cvtsi2sd %2,%1\n\t"
574*25c28e83SPiotr Jasiukajtis 	    "movsd    %1,%0"
575*25c28e83SPiotr Jasiukajtis 	    : "=m" (*d1), "=x" (tmp)
576*25c28e83SPiotr Jasiukajtis 	    : "m" (*i1));
577*25c28e83SPiotr Jasiukajtis }
578*25c28e83SPiotr Jasiukajtis 
579*25c28e83SPiotr Jasiukajtis extern __inline__ void
sse_cvttsd2si(double * d1,int * i1)580*25c28e83SPiotr Jasiukajtis sse_cvttsd2si(double *d1, int *i1)
581*25c28e83SPiotr Jasiukajtis {
582*25c28e83SPiotr Jasiukajtis 	int tmp;
583*25c28e83SPiotr Jasiukajtis 
584*25c28e83SPiotr Jasiukajtis 	__asm__ __volatile__(
585*25c28e83SPiotr Jasiukajtis 	    "cvttsd2si %2,%1\n\t"
586*25c28e83SPiotr Jasiukajtis 	    "movl      %1,%0"
587*25c28e83SPiotr Jasiukajtis 	    : "=m" (*i1), "=r" (tmp)
588*25c28e83SPiotr Jasiukajtis 	    : "m" (*d1));
589*25c28e83SPiotr Jasiukajtis }
590*25c28e83SPiotr Jasiukajtis 
591*25c28e83SPiotr Jasiukajtis extern __inline__ void
sse_cvtsd2si(double * d1,int * i1)592*25c28e83SPiotr Jasiukajtis sse_cvtsd2si(double *d1, int *i1)
593*25c28e83SPiotr Jasiukajtis {
594*25c28e83SPiotr Jasiukajtis 	int tmp;
595*25c28e83SPiotr Jasiukajtis 
596*25c28e83SPiotr Jasiukajtis 	__asm__ __volatile__(
597*25c28e83SPiotr Jasiukajtis 	    "cvtsd2si %2,%1\n\t"
598*25c28e83SPiotr Jasiukajtis 	    "movl     %1,%0"
599*25c28e83SPiotr Jasiukajtis 	    : "=m" (*i1), "=r" (tmp)
600*25c28e83SPiotr Jasiukajtis 	    : "m" (*d1));
601*25c28e83SPiotr Jasiukajtis }
602*25c28e83SPiotr Jasiukajtis 
603*25c28e83SPiotr Jasiukajtis #if defined(__amd64)
604*25c28e83SPiotr Jasiukajtis extern __inline__ void
sse_cvtsi2sdq(long long * ll1,double * d1)605*25c28e83SPiotr Jasiukajtis sse_cvtsi2sdq(long long *ll1, double *d1)
606*25c28e83SPiotr Jasiukajtis {
607*25c28e83SPiotr Jasiukajtis 	double tmp;
608*25c28e83SPiotr Jasiukajtis 
609*25c28e83SPiotr Jasiukajtis 	__asm__ __volatile__(
610*25c28e83SPiotr Jasiukajtis 	    "cvtsi2sdq %2,%1\n\t"
611*25c28e83SPiotr Jasiukajtis 	    "movsd     %1,%0"
612*25c28e83SPiotr Jasiukajtis 	    : "=m" (*d1), "=x" (tmp)
613*25c28e83SPiotr Jasiukajtis 	    : "m" (*ll1));
614*25c28e83SPiotr Jasiukajtis }
615*25c28e83SPiotr Jasiukajtis 
616*25c28e83SPiotr Jasiukajtis extern __inline__ void
sse_cvttsd2siq(double * d1,long long * ll1)617*25c28e83SPiotr Jasiukajtis sse_cvttsd2siq(double *d1, long long *ll1)
618*25c28e83SPiotr Jasiukajtis {
619*25c28e83SPiotr Jasiukajtis 	uint64_t tmp;
620*25c28e83SPiotr Jasiukajtis 
621*25c28e83SPiotr Jasiukajtis 	__asm__ __volatile__(
622*25c28e83SPiotr Jasiukajtis 	    "cvttsd2siq %2,%1\n\t"
623*25c28e83SPiotr Jasiukajtis 	    "movq       %1,%0"
624*25c28e83SPiotr Jasiukajtis 	    : "=m" (*ll1), "=r" (tmp)
625*25c28e83SPiotr Jasiukajtis 	    : "m" (*d1));
626*25c28e83SPiotr Jasiukajtis }
627*25c28e83SPiotr Jasiukajtis 
628*25c28e83SPiotr Jasiukajtis extern __inline__ void
sse_cvtsd2siq(double * d1,long long * ll1)629*25c28e83SPiotr Jasiukajtis sse_cvtsd2siq(double *d1, long long *ll1)
630*25c28e83SPiotr Jasiukajtis {
631*25c28e83SPiotr Jasiukajtis 	uint64_t tmp;
632*25c28e83SPiotr Jasiukajtis 
633*25c28e83SPiotr Jasiukajtis 	__asm__ __volatile__(
634*25c28e83SPiotr Jasiukajtis 	    "cvtsd2siq %2,%1\n\t"
635*25c28e83SPiotr Jasiukajtis 	    "movq      %1,%0"
636*25c28e83SPiotr Jasiukajtis 	    : "=m" (*ll1), "=r" (tmp)
637*25c28e83SPiotr Jasiukajtis 	    : "m" (*d1));
638*25c28e83SPiotr Jasiukajtis }
639*25c28e83SPiotr Jasiukajtis #endif
640*25c28e83SPiotr Jasiukajtis 
641*25c28e83SPiotr Jasiukajtis #elif defined(__sparc)
642*25c28e83SPiotr Jasiukajtis extern __inline__ void
643*25c28e83SPiotr Jasiukajtis __fenv_getfsr(unsigned long *l)
644*25c28e83SPiotr Jasiukajtis {
645*25c28e83SPiotr Jasiukajtis 	__asm__ __volatile__(
646*25c28e83SPiotr Jasiukajtis #if defined(__sparcv9)
647*25c28e83SPiotr Jasiukajtis 		"stx %%fsr,%0\n\t"
648*25c28e83SPiotr Jasiukajtis #else
649*25c28e83SPiotr Jasiukajtis 		"st  %%fsr,%0\n\t"
650*25c28e83SPiotr Jasiukajtis #endif
651*25c28e83SPiotr Jasiukajtis 		: "=m" (*l));
652*25c28e83SPiotr Jasiukajtis }
653*25c28e83SPiotr Jasiukajtis 
654*25c28e83SPiotr Jasiukajtis extern __inline__ void
655*25c28e83SPiotr Jasiukajtis __fenv_setfsr(const unsigned long *l)
656*25c28e83SPiotr Jasiukajtis {
657*25c28e83SPiotr Jasiukajtis 	__asm__ __volatile__(
658*25c28e83SPiotr Jasiukajtis #if defined(__sparcv9)
659*25c28e83SPiotr Jasiukajtis 		"ldx %0,%%fsr\n\t"
660*25c28e83SPiotr Jasiukajtis #else
661*25c28e83SPiotr Jasiukajtis 		"ld %0,%%fsr\n\t"
662*25c28e83SPiotr Jasiukajtis #endif
663*25c28e83SPiotr Jasiukajtis 		: : "m" (*l) : "cc");
664*25c28e83SPiotr Jasiukajtis }
665*25c28e83SPiotr Jasiukajtis 
666*25c28e83SPiotr Jasiukajtis extern __inline__ void
667*25c28e83SPiotr Jasiukajtis __fenv_getfsr32(unsigned int *l)
668*25c28e83SPiotr Jasiukajtis {
669*25c28e83SPiotr Jasiukajtis 	__asm__ __volatile__("st %%fsr,%0\n\t" : "=m" (*l));
670*25c28e83SPiotr Jasiukajtis }
671*25c28e83SPiotr Jasiukajtis 
672*25c28e83SPiotr Jasiukajtis extern __inline__ void
673*25c28e83SPiotr Jasiukajtis __fenv_setfsr32(const unsigned int *l)
674*25c28e83SPiotr Jasiukajtis {
675*25c28e83SPiotr Jasiukajtis 	__asm__ __volatile__("ld %0,%%fsr\n\t" : : "m" (*l));
676*25c28e83SPiotr Jasiukajtis }
677*25c28e83SPiotr Jasiukajtis #else
678*25c28e83SPiotr Jasiukajtis #error "GCC FENV inlines not implemented for this platform"
679*25c28e83SPiotr Jasiukajtis #endif
680*25c28e83SPiotr Jasiukajtis 
681*25c28e83SPiotr Jasiukajtis #ifdef __cplusplus
682*25c28e83SPiotr Jasiukajtis }
683*25c28e83SPiotr Jasiukajtis #endif
684*25c28e83SPiotr Jasiukajtis 
685*25c28e83SPiotr Jasiukajtis #endif  /* __GNUC__ */
686*25c28e83SPiotr Jasiukajtis 
687*25c28e83SPiotr Jasiukajtis #endif /* _FENV_INLINES_H */
688