1*5b2ba9d3SPiotr Jasiukajtis /*
2*5b2ba9d3SPiotr Jasiukajtis * This file and its contents are supplied under the terms of the
3*5b2ba9d3SPiotr Jasiukajtis * Common Development and Distribution License ("CDDL"), version 1.0.
4*5b2ba9d3SPiotr Jasiukajtis * You may only use this file in accordance with the terms of version
5*5b2ba9d3SPiotr Jasiukajtis * 1.0 of the CDDL.
6*5b2ba9d3SPiotr Jasiukajtis *
7*5b2ba9d3SPiotr Jasiukajtis * A full copy of the text of the CDDL should have accompanied this
8*5b2ba9d3SPiotr Jasiukajtis * source. A copy of the CDDL is also available via the Internet at
9*5b2ba9d3SPiotr Jasiukajtis * http://www.illumos.org/license/CDDL.
10*5b2ba9d3SPiotr Jasiukajtis */
11*5b2ba9d3SPiotr Jasiukajtis
12*5b2ba9d3SPiotr Jasiukajtis /*
13*5b2ba9d3SPiotr Jasiukajtis * Copyright 2011, Richard Lowe
14*5b2ba9d3SPiotr Jasiukajtis */
15*5b2ba9d3SPiotr Jasiukajtis
16*5b2ba9d3SPiotr Jasiukajtis #ifndef _FENV_INLINES_H
17*5b2ba9d3SPiotr Jasiukajtis #define _FENV_INLINES_H
18*5b2ba9d3SPiotr Jasiukajtis
19*5b2ba9d3SPiotr Jasiukajtis #ifdef __GNUC__
20*5b2ba9d3SPiotr Jasiukajtis
21*5b2ba9d3SPiotr Jasiukajtis #ifdef __cplusplus
22*5b2ba9d3SPiotr Jasiukajtis extern "C" {
23*5b2ba9d3SPiotr Jasiukajtis #endif
24*5b2ba9d3SPiotr Jasiukajtis
25*5b2ba9d3SPiotr Jasiukajtis #include <sys/types.h>
26*5b2ba9d3SPiotr Jasiukajtis
27*5b2ba9d3SPiotr Jasiukajtis #if defined(__x86)
28*5b2ba9d3SPiotr Jasiukajtis
29*5b2ba9d3SPiotr Jasiukajtis /*
30*5b2ba9d3SPiotr Jasiukajtis * Floating point Control Word and Status Word
31*5b2ba9d3SPiotr Jasiukajtis * Definition should actually be shared with x86
32*5b2ba9d3SPiotr Jasiukajtis * (much of this 'amd64' code can be, in fact.)
33*5b2ba9d3SPiotr Jasiukajtis */
34*5b2ba9d3SPiotr Jasiukajtis union fp_cwsw {
35*5b2ba9d3SPiotr Jasiukajtis uint32_t cwsw;
36*5b2ba9d3SPiotr Jasiukajtis struct {
37*5b2ba9d3SPiotr Jasiukajtis uint16_t cw;
38*5b2ba9d3SPiotr Jasiukajtis uint16_t sw;
39*5b2ba9d3SPiotr Jasiukajtis } words;
40*5b2ba9d3SPiotr Jasiukajtis };
41*5b2ba9d3SPiotr Jasiukajtis
42*5b2ba9d3SPiotr Jasiukajtis extern __inline__ void
__fenv_getcwsw(unsigned int * value)43*5b2ba9d3SPiotr Jasiukajtis __fenv_getcwsw(unsigned int *value)
44*5b2ba9d3SPiotr Jasiukajtis {
45*5b2ba9d3SPiotr Jasiukajtis union fp_cwsw *u = (union fp_cwsw *)value;
46*5b2ba9d3SPiotr Jasiukajtis
47*5b2ba9d3SPiotr Jasiukajtis __asm__ __volatile__(
48*5b2ba9d3SPiotr Jasiukajtis "fstsw %0\n\t"
49*5b2ba9d3SPiotr Jasiukajtis "fstcw %1\n\t"
50*5b2ba9d3SPiotr Jasiukajtis : "=m" (u->words.cw), "=m" (u->words.sw));
51*5b2ba9d3SPiotr Jasiukajtis }
52*5b2ba9d3SPiotr Jasiukajtis
53*5b2ba9d3SPiotr Jasiukajtis extern __inline__ void
__fenv_setcwsw(const unsigned int * value)54*5b2ba9d3SPiotr Jasiukajtis __fenv_setcwsw(const unsigned int *value)
55*5b2ba9d3SPiotr Jasiukajtis {
56*5b2ba9d3SPiotr Jasiukajtis union fp_cwsw cwsw;
57*5b2ba9d3SPiotr Jasiukajtis short fenv[16];
58*5b2ba9d3SPiotr Jasiukajtis
59*5b2ba9d3SPiotr Jasiukajtis cwsw.cwsw = *value;
60*5b2ba9d3SPiotr Jasiukajtis
61*5b2ba9d3SPiotr Jasiukajtis __asm__ __volatile__(
62*5b2ba9d3SPiotr Jasiukajtis "fstenv %0\n\t"
63*5b2ba9d3SPiotr Jasiukajtis "movw %4,%1\n\t"
64*5b2ba9d3SPiotr Jasiukajtis "movw %3,%2\n\t"
65*5b2ba9d3SPiotr Jasiukajtis "fldenv %0\n\t"
66*5b2ba9d3SPiotr Jasiukajtis "fwait\n\t"
67*5b2ba9d3SPiotr Jasiukajtis : "=m" (fenv), "=m" (fenv[0]), "=m" (fenv[2])
68*5b2ba9d3SPiotr Jasiukajtis : "r" (cwsw.words.cw), "r" (cwsw.words.sw)
69*5b2ba9d3SPiotr Jasiukajtis /* For practical purposes, we clobber the whole FPU */
70*5b2ba9d3SPiotr Jasiukajtis : "cc", "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)",
71*5b2ba9d3SPiotr Jasiukajtis "st(6)", "st(7)");
72*5b2ba9d3SPiotr Jasiukajtis }
73*5b2ba9d3SPiotr Jasiukajtis
74*5b2ba9d3SPiotr Jasiukajtis extern __inline__ void
__fenv_getmxcsr(unsigned int * value)75*5b2ba9d3SPiotr Jasiukajtis __fenv_getmxcsr(unsigned int *value)
76*5b2ba9d3SPiotr Jasiukajtis {
77*5b2ba9d3SPiotr Jasiukajtis __asm__ __volatile__("stmxcsr %0" : "=m" (*value));
78*5b2ba9d3SPiotr Jasiukajtis }
79*5b2ba9d3SPiotr Jasiukajtis
80*5b2ba9d3SPiotr Jasiukajtis extern __inline__ void
__fenv_setmxcsr(const unsigned int * value)81*5b2ba9d3SPiotr Jasiukajtis __fenv_setmxcsr(const unsigned int *value)
82*5b2ba9d3SPiotr Jasiukajtis {
83*5b2ba9d3SPiotr Jasiukajtis __asm__ __volatile__("ldmxcsr %0" : : "m" (*value));
84*5b2ba9d3SPiotr Jasiukajtis }
85*5b2ba9d3SPiotr Jasiukajtis
86*5b2ba9d3SPiotr Jasiukajtis extern __inline__ long double
f2xm1(long double x)87*5b2ba9d3SPiotr Jasiukajtis f2xm1(long double x)
88*5b2ba9d3SPiotr Jasiukajtis {
89*5b2ba9d3SPiotr Jasiukajtis long double ret;
90*5b2ba9d3SPiotr Jasiukajtis
91*5b2ba9d3SPiotr Jasiukajtis __asm__ __volatile__("f2xm1" : "=t" (ret) : "0" (x) : "cc");
92*5b2ba9d3SPiotr Jasiukajtis return (ret);
93*5b2ba9d3SPiotr Jasiukajtis }
94*5b2ba9d3SPiotr Jasiukajtis
95*5b2ba9d3SPiotr Jasiukajtis extern __inline__ long double
fyl2x(long double y,long double x)96*5b2ba9d3SPiotr Jasiukajtis fyl2x(long double y, long double x)
97*5b2ba9d3SPiotr Jasiukajtis {
98*5b2ba9d3SPiotr Jasiukajtis long double ret;
99*5b2ba9d3SPiotr Jasiukajtis
100*5b2ba9d3SPiotr Jasiukajtis __asm__ __volatile__("fyl2x"
101*5b2ba9d3SPiotr Jasiukajtis : "=t" (ret)
102*5b2ba9d3SPiotr Jasiukajtis : "0" (x), "u" (y)
103*5b2ba9d3SPiotr Jasiukajtis : "st(1)", "cc");
104*5b2ba9d3SPiotr Jasiukajtis return (ret);
105*5b2ba9d3SPiotr Jasiukajtis }
106*5b2ba9d3SPiotr Jasiukajtis
107*5b2ba9d3SPiotr Jasiukajtis extern __inline__ long double
fptan(long double x)108*5b2ba9d3SPiotr Jasiukajtis fptan(long double x)
109*5b2ba9d3SPiotr Jasiukajtis {
110*5b2ba9d3SPiotr Jasiukajtis /*
111*5b2ba9d3SPiotr Jasiukajtis * fptan pushes 1.0 then the result on completion, so we want to pop
112*5b2ba9d3SPiotr Jasiukajtis * the FP stack twice, so we need a dummy value into which to pop it.
113*5b2ba9d3SPiotr Jasiukajtis */
114*5b2ba9d3SPiotr Jasiukajtis long double ret;
115*5b2ba9d3SPiotr Jasiukajtis long double dummy;
116*5b2ba9d3SPiotr Jasiukajtis
117*5b2ba9d3SPiotr Jasiukajtis __asm__ __volatile__("fptan"
118*5b2ba9d3SPiotr Jasiukajtis : "=t" (dummy), "=u" (ret)
119*5b2ba9d3SPiotr Jasiukajtis : "0" (x)
120*5b2ba9d3SPiotr Jasiukajtis : "cc");
121*5b2ba9d3SPiotr Jasiukajtis return (ret);
122*5b2ba9d3SPiotr Jasiukajtis }
123*5b2ba9d3SPiotr Jasiukajtis
124*5b2ba9d3SPiotr Jasiukajtis extern __inline__ long double
fpatan(long double x,long double y)125*5b2ba9d3SPiotr Jasiukajtis fpatan(long double x, long double y)
126*5b2ba9d3SPiotr Jasiukajtis {
127*5b2ba9d3SPiotr Jasiukajtis long double ret;
128*5b2ba9d3SPiotr Jasiukajtis
129*5b2ba9d3SPiotr Jasiukajtis __asm__ __volatile__("fpatan"
130*5b2ba9d3SPiotr Jasiukajtis : "=t" (ret)
131*5b2ba9d3SPiotr Jasiukajtis : "0" (y), "u" (x)
132*5b2ba9d3SPiotr Jasiukajtis : "st(1)", "cc");
133*5b2ba9d3SPiotr Jasiukajtis return (ret);
134*5b2ba9d3SPiotr Jasiukajtis }
135*5b2ba9d3SPiotr Jasiukajtis
136*5b2ba9d3SPiotr Jasiukajtis extern __inline__ long double
fxtract(long double x)137*5b2ba9d3SPiotr Jasiukajtis fxtract(long double x)
138*5b2ba9d3SPiotr Jasiukajtis {
139*5b2ba9d3SPiotr Jasiukajtis __asm__ __volatile__("fxtract" : "+t" (x) : : "cc");
140*5b2ba9d3SPiotr Jasiukajtis return (x);
141*5b2ba9d3SPiotr Jasiukajtis }
142*5b2ba9d3SPiotr Jasiukajtis
143*5b2ba9d3SPiotr Jasiukajtis extern __inline__ long double
fprem1(long double idend,long double div)144*5b2ba9d3SPiotr Jasiukajtis fprem1(long double idend, long double div)
145*5b2ba9d3SPiotr Jasiukajtis {
146*5b2ba9d3SPiotr Jasiukajtis __asm__ __volatile__("fprem1" : "+t" (div) : "u" (idend) : "cc");
147*5b2ba9d3SPiotr Jasiukajtis return (div);
148*5b2ba9d3SPiotr Jasiukajtis }
149*5b2ba9d3SPiotr Jasiukajtis
150*5b2ba9d3SPiotr Jasiukajtis extern __inline__ long double
fprem(long double idend,long double div)151*5b2ba9d3SPiotr Jasiukajtis fprem(long double idend, long double div)
152*5b2ba9d3SPiotr Jasiukajtis {
153*5b2ba9d3SPiotr Jasiukajtis __asm__ __volatile__("fprem" : "+t" (div) : "u" (idend) : "cc");
154*5b2ba9d3SPiotr Jasiukajtis return (div);
155*5b2ba9d3SPiotr Jasiukajtis }
156*5b2ba9d3SPiotr Jasiukajtis
157*5b2ba9d3SPiotr Jasiukajtis extern __inline__ long double
fyl2xp1(long double y,long double x)158*5b2ba9d3SPiotr Jasiukajtis fyl2xp1(long double y, long double x)
159*5b2ba9d3SPiotr Jasiukajtis {
160*5b2ba9d3SPiotr Jasiukajtis long double ret;
161*5b2ba9d3SPiotr Jasiukajtis
162*5b2ba9d3SPiotr Jasiukajtis __asm__ __volatile__("fyl2xp1"
163*5b2ba9d3SPiotr Jasiukajtis : "=t" (ret)
164*5b2ba9d3SPiotr Jasiukajtis : "0" (x), "u" (y)
165*5b2ba9d3SPiotr Jasiukajtis : "st(1)", "cc");
166*5b2ba9d3SPiotr Jasiukajtis return (ret);
167*5b2ba9d3SPiotr Jasiukajtis }
168*5b2ba9d3SPiotr Jasiukajtis
169*5b2ba9d3SPiotr Jasiukajtis extern __inline__ long double
fsqrt(long double x)170*5b2ba9d3SPiotr Jasiukajtis fsqrt(long double x)
171*5b2ba9d3SPiotr Jasiukajtis {
172*5b2ba9d3SPiotr Jasiukajtis __asm__ __volatile__("fsqrt" : "+t" (x) : : "cc");
173*5b2ba9d3SPiotr Jasiukajtis return (x);
174*5b2ba9d3SPiotr Jasiukajtis }
175*5b2ba9d3SPiotr Jasiukajtis
176*5b2ba9d3SPiotr Jasiukajtis extern __inline__ long double
fsincos(long double x)177*5b2ba9d3SPiotr Jasiukajtis fsincos(long double x)
178*5b2ba9d3SPiotr Jasiukajtis {
179*5b2ba9d3SPiotr Jasiukajtis long double dummy;
180*5b2ba9d3SPiotr Jasiukajtis
181*5b2ba9d3SPiotr Jasiukajtis __asm__ __volatile__("fsincos" : "+t" (x), "=u" (dummy) : : "cc");
182*5b2ba9d3SPiotr Jasiukajtis return (x);
183*5b2ba9d3SPiotr Jasiukajtis }
184*5b2ba9d3SPiotr Jasiukajtis
185*5b2ba9d3SPiotr Jasiukajtis extern __inline__ long double
frndint(long double x)186*5b2ba9d3SPiotr Jasiukajtis frndint(long double x)
187*5b2ba9d3SPiotr Jasiukajtis {
188*5b2ba9d3SPiotr Jasiukajtis __asm__ __volatile__("frndint" : "+t" (x) : : "cc");
189*5b2ba9d3SPiotr Jasiukajtis return (x);
190*5b2ba9d3SPiotr Jasiukajtis }
191*5b2ba9d3SPiotr Jasiukajtis
192*5b2ba9d3SPiotr Jasiukajtis extern __inline__ long double
fscale(long double x,long double y)193*5b2ba9d3SPiotr Jasiukajtis fscale(long double x, long double y)
194*5b2ba9d3SPiotr Jasiukajtis {
195*5b2ba9d3SPiotr Jasiukajtis long double ret;
196*5b2ba9d3SPiotr Jasiukajtis
197*5b2ba9d3SPiotr Jasiukajtis __asm__ __volatile__("fscale" : "=t" (ret) : "0" (y), "u" (x) : "cc");
198*5b2ba9d3SPiotr Jasiukajtis return (ret);
199*5b2ba9d3SPiotr Jasiukajtis }
200*5b2ba9d3SPiotr Jasiukajtis
201*5b2ba9d3SPiotr Jasiukajtis extern __inline__ long double
fsin(long double x)202*5b2ba9d3SPiotr Jasiukajtis fsin(long double x)
203*5b2ba9d3SPiotr Jasiukajtis {
204*5b2ba9d3SPiotr Jasiukajtis __asm__ __volatile__("fsin" : "+t" (x) : : "cc");
205*5b2ba9d3SPiotr Jasiukajtis return (x);
206*5b2ba9d3SPiotr Jasiukajtis }
207*5b2ba9d3SPiotr Jasiukajtis
208*5b2ba9d3SPiotr Jasiukajtis extern __inline__ long double
fcos(long double x)209*5b2ba9d3SPiotr Jasiukajtis fcos(long double x)
210*5b2ba9d3SPiotr Jasiukajtis {
211*5b2ba9d3SPiotr Jasiukajtis __asm__ __volatile__("fcos" : "+t" (x) : : "cc");
212*5b2ba9d3SPiotr Jasiukajtis return (x);
213*5b2ba9d3SPiotr Jasiukajtis }
214*5b2ba9d3SPiotr Jasiukajtis
215*5b2ba9d3SPiotr Jasiukajtis extern __inline__ void
sse_cmpeqss(float * f1,float * f2,int * i1)216*5b2ba9d3SPiotr Jasiukajtis sse_cmpeqss(float *f1, float *f2, int *i1)
217*5b2ba9d3SPiotr Jasiukajtis {
218*5b2ba9d3SPiotr Jasiukajtis __asm__ __volatile__(
219*5b2ba9d3SPiotr Jasiukajtis "cmpeqss %2, %1\n\t"
220*5b2ba9d3SPiotr Jasiukajtis "movss %1, %0"
221*5b2ba9d3SPiotr Jasiukajtis : "=m" (*i1), "+x" (*f1)
222*5b2ba9d3SPiotr Jasiukajtis : "x" (*f2)
223*5b2ba9d3SPiotr Jasiukajtis : "cc");
224*5b2ba9d3SPiotr Jasiukajtis }
225*5b2ba9d3SPiotr Jasiukajtis
226*5b2ba9d3SPiotr Jasiukajtis extern __inline__ void
sse_cmpltss(float * f1,float * f2,int * i1)227*5b2ba9d3SPiotr Jasiukajtis sse_cmpltss(float *f1, float *f2, int *i1)
228*5b2ba9d3SPiotr Jasiukajtis {
229*5b2ba9d3SPiotr Jasiukajtis __asm__ __volatile__(
230*5b2ba9d3SPiotr Jasiukajtis "cmpltss %2, %1\n\t"
231*5b2ba9d3SPiotr Jasiukajtis "movss %1, %0"
232*5b2ba9d3SPiotr Jasiukajtis : "=m" (*i1), "+x" (*f1)
233*5b2ba9d3SPiotr Jasiukajtis : "x" (*f2)
234*5b2ba9d3SPiotr Jasiukajtis : "cc");
235*5b2ba9d3SPiotr Jasiukajtis }
236*5b2ba9d3SPiotr Jasiukajtis
237*5b2ba9d3SPiotr Jasiukajtis extern __inline__ void
sse_cmpless(float * f1,float * f2,int * i1)238*5b2ba9d3SPiotr Jasiukajtis sse_cmpless(float *f1, float *f2, int *i1)
239*5b2ba9d3SPiotr Jasiukajtis {
240*5b2ba9d3SPiotr Jasiukajtis __asm__ __volatile__(
241*5b2ba9d3SPiotr Jasiukajtis "cmpless %2, %1\n\t"
242*5b2ba9d3SPiotr Jasiukajtis "movss %1, %0"
243*5b2ba9d3SPiotr Jasiukajtis : "=m" (*i1), "+x" (*f1)
244*5b2ba9d3SPiotr Jasiukajtis : "x" (*f2)
245*5b2ba9d3SPiotr Jasiukajtis : "cc");
246*5b2ba9d3SPiotr Jasiukajtis }
247*5b2ba9d3SPiotr Jasiukajtis
248*5b2ba9d3SPiotr Jasiukajtis extern __inline__ void
sse_cmpunordss(float * f1,float * f2,int * i1)249*5b2ba9d3SPiotr Jasiukajtis sse_cmpunordss(float *f1, float *f2, int *i1)
250*5b2ba9d3SPiotr Jasiukajtis {
251*5b2ba9d3SPiotr Jasiukajtis __asm__ __volatile__(
252*5b2ba9d3SPiotr Jasiukajtis "cmpunordss %2, %1\n\t"
253*5b2ba9d3SPiotr Jasiukajtis "movss %1, %0"
254*5b2ba9d3SPiotr Jasiukajtis : "=m" (*i1), "+x" (*f1)
255*5b2ba9d3SPiotr Jasiukajtis : "x" (*f2)
256*5b2ba9d3SPiotr Jasiukajtis : "cc");
257*5b2ba9d3SPiotr Jasiukajtis }
258*5b2ba9d3SPiotr Jasiukajtis
259*5b2ba9d3SPiotr Jasiukajtis extern __inline__ void
sse_minss(float * f1,float * f2,float * f3)260*5b2ba9d3SPiotr Jasiukajtis sse_minss(float *f1, float *f2, float *f3)
261*5b2ba9d3SPiotr Jasiukajtis {
262*5b2ba9d3SPiotr Jasiukajtis __asm__ __volatile__(
263*5b2ba9d3SPiotr Jasiukajtis "minss %2, %1\n\t"
264*5b2ba9d3SPiotr Jasiukajtis "movss %1, %0"
265*5b2ba9d3SPiotr Jasiukajtis : "=m" (*f3), "+x" (*f1)
266*5b2ba9d3SPiotr Jasiukajtis : "x" (*f2));
267*5b2ba9d3SPiotr Jasiukajtis }
268*5b2ba9d3SPiotr Jasiukajtis
269*5b2ba9d3SPiotr Jasiukajtis extern __inline__ void
sse_maxss(float * f1,float * f2,float * f3)270*5b2ba9d3SPiotr Jasiukajtis sse_maxss(float *f1, float *f2, float *f3)
271*5b2ba9d3SPiotr Jasiukajtis {
272*5b2ba9d3SPiotr Jasiukajtis __asm__ __volatile__(
273*5b2ba9d3SPiotr Jasiukajtis "maxss %2, %1\n\t"
274*5b2ba9d3SPiotr Jasiukajtis "movss %1, %0"
275*5b2ba9d3SPiotr Jasiukajtis : "=m" (*f3), "+x" (*f1)
276*5b2ba9d3SPiotr Jasiukajtis : "x" (*f2));
277*5b2ba9d3SPiotr Jasiukajtis }
278*5b2ba9d3SPiotr Jasiukajtis
279*5b2ba9d3SPiotr Jasiukajtis extern __inline__ void
sse_addss(float * f1,float * f2,float * f3)280*5b2ba9d3SPiotr Jasiukajtis sse_addss(float *f1, float *f2, float *f3)
281*5b2ba9d3SPiotr Jasiukajtis {
282*5b2ba9d3SPiotr Jasiukajtis __asm__ __volatile__(
283*5b2ba9d3SPiotr Jasiukajtis "addss %2, %1\n\t"
284*5b2ba9d3SPiotr Jasiukajtis "movss %1, %0"
285*5b2ba9d3SPiotr Jasiukajtis : "=m" (*f3), "+x" (*f1)
286*5b2ba9d3SPiotr Jasiukajtis : "x" (*f2));
287*5b2ba9d3SPiotr Jasiukajtis }
288*5b2ba9d3SPiotr Jasiukajtis
289*5b2ba9d3SPiotr Jasiukajtis extern __inline__ void
sse_subss(float * f1,float * f2,float * f3)290*5b2ba9d3SPiotr Jasiukajtis sse_subss(float *f1, float *f2, float *f3)
291*5b2ba9d3SPiotr Jasiukajtis {
292*5b2ba9d3SPiotr Jasiukajtis __asm__ __volatile__(
293*5b2ba9d3SPiotr Jasiukajtis "subss %2, %1\n\t"
294*5b2ba9d3SPiotr Jasiukajtis "movss %1, %0"
295*5b2ba9d3SPiotr Jasiukajtis : "=m" (*f3), "+x" (*f1)
296*5b2ba9d3SPiotr Jasiukajtis : "x" (*f2));
297*5b2ba9d3SPiotr Jasiukajtis }
298*5b2ba9d3SPiotr Jasiukajtis
299*5b2ba9d3SPiotr Jasiukajtis extern __inline__ void
sse_mulss(float * f1,float * f2,float * f3)300*5b2ba9d3SPiotr Jasiukajtis sse_mulss(float *f1, float *f2, float *f3)
301*5b2ba9d3SPiotr Jasiukajtis {
302*5b2ba9d3SPiotr Jasiukajtis __asm__ __volatile__(
303*5b2ba9d3SPiotr Jasiukajtis "mulss %2, %1\n\t"
304*5b2ba9d3SPiotr Jasiukajtis "movss %1, %0"
305*5b2ba9d3SPiotr Jasiukajtis : "=m" (*f3), "+x" (*f1)
306*5b2ba9d3SPiotr Jasiukajtis : "x" (*f2));
307*5b2ba9d3SPiotr Jasiukajtis }
308*5b2ba9d3SPiotr Jasiukajtis
309*5b2ba9d3SPiotr Jasiukajtis extern __inline__ void
sse_divss(float * f1,float * f2,float * f3)310*5b2ba9d3SPiotr Jasiukajtis sse_divss(float *f1, float *f2, float *f3)
311*5b2ba9d3SPiotr Jasiukajtis {
312*5b2ba9d3SPiotr Jasiukajtis __asm__ __volatile__(
313*5b2ba9d3SPiotr Jasiukajtis "divss %2, %1\n\t"
314*5b2ba9d3SPiotr Jasiukajtis "movss %1, %0"
315*5b2ba9d3SPiotr Jasiukajtis : "=m" (*f3), "+x" (*f1)
316*5b2ba9d3SPiotr Jasiukajtis : "x" (*f2));
317*5b2ba9d3SPiotr Jasiukajtis }
318*5b2ba9d3SPiotr Jasiukajtis
319*5b2ba9d3SPiotr Jasiukajtis extern __inline__ void
sse_sqrtss(float * f1,float * f2)320*5b2ba9d3SPiotr Jasiukajtis sse_sqrtss(float *f1, float *f2)
321*5b2ba9d3SPiotr Jasiukajtis {
322*5b2ba9d3SPiotr Jasiukajtis double tmp;
323*5b2ba9d3SPiotr Jasiukajtis
324*5b2ba9d3SPiotr Jasiukajtis __asm__ __volatile__(
325*5b2ba9d3SPiotr Jasiukajtis "sqrtss %2, %1\n\t"
326*5b2ba9d3SPiotr Jasiukajtis "movss %1, %0"
327*5b2ba9d3SPiotr Jasiukajtis : "=m" (*f2), "=x" (tmp)
328*5b2ba9d3SPiotr Jasiukajtis : "m" (*f1));
329*5b2ba9d3SPiotr Jasiukajtis }
330*5b2ba9d3SPiotr Jasiukajtis
331*5b2ba9d3SPiotr Jasiukajtis extern __inline__ void
sse_ucomiss(float * f1,float * f2)332*5b2ba9d3SPiotr Jasiukajtis sse_ucomiss(float *f1, float *f2)
333*5b2ba9d3SPiotr Jasiukajtis {
334*5b2ba9d3SPiotr Jasiukajtis __asm__ __volatile__("ucomiss %1, %0" : : "x" (*f1), "x" (*f2));
335*5b2ba9d3SPiotr Jasiukajtis
336*5b2ba9d3SPiotr Jasiukajtis }
337*5b2ba9d3SPiotr Jasiukajtis
338*5b2ba9d3SPiotr Jasiukajtis extern __inline__ void
sse_comiss(float * f1,float * f2)339*5b2ba9d3SPiotr Jasiukajtis sse_comiss(float *f1, float *f2)
340*5b2ba9d3SPiotr Jasiukajtis {
341*5b2ba9d3SPiotr Jasiukajtis __asm__ __volatile__("comiss %1, %0" : : "x" (*f1), "x" (*f2));
342*5b2ba9d3SPiotr Jasiukajtis }
343*5b2ba9d3SPiotr Jasiukajtis
344*5b2ba9d3SPiotr Jasiukajtis extern __inline__ void
sse_cvtss2sd(float * f1,double * d1)345*5b2ba9d3SPiotr Jasiukajtis sse_cvtss2sd(float *f1, double *d1)
346*5b2ba9d3SPiotr Jasiukajtis {
347*5b2ba9d3SPiotr Jasiukajtis double tmp;
348*5b2ba9d3SPiotr Jasiukajtis
349*5b2ba9d3SPiotr Jasiukajtis __asm__ __volatile__(
350*5b2ba9d3SPiotr Jasiukajtis "cvtss2sd %2, %1\n\t"
351*5b2ba9d3SPiotr Jasiukajtis "movsd %1, %0"
352*5b2ba9d3SPiotr Jasiukajtis : "=m" (*d1), "=x" (tmp)
353*5b2ba9d3SPiotr Jasiukajtis : "m" (*f1));
354*5b2ba9d3SPiotr Jasiukajtis }
355*5b2ba9d3SPiotr Jasiukajtis
356*5b2ba9d3SPiotr Jasiukajtis extern __inline__ void
sse_cvtsi2ss(int * i1,float * f1)357*5b2ba9d3SPiotr Jasiukajtis sse_cvtsi2ss(int *i1, float *f1)
358*5b2ba9d3SPiotr Jasiukajtis {
359*5b2ba9d3SPiotr Jasiukajtis double tmp;
360*5b2ba9d3SPiotr Jasiukajtis
361*5b2ba9d3SPiotr Jasiukajtis __asm__ __volatile__(
362*5b2ba9d3SPiotr Jasiukajtis "cvtsi2ss %2, %1\n\t"
363*5b2ba9d3SPiotr Jasiukajtis "movss %1, %0"
364*5b2ba9d3SPiotr Jasiukajtis : "=m" (*f1), "=x" (tmp)
365*5b2ba9d3SPiotr Jasiukajtis : "m" (*i1));
366*5b2ba9d3SPiotr Jasiukajtis }
367*5b2ba9d3SPiotr Jasiukajtis
368*5b2ba9d3SPiotr Jasiukajtis extern __inline__ void
sse_cvttss2si(float * f1,int * i1)369*5b2ba9d3SPiotr Jasiukajtis sse_cvttss2si(float *f1, int *i1)
370*5b2ba9d3SPiotr Jasiukajtis {
371*5b2ba9d3SPiotr Jasiukajtis int tmp;
372*5b2ba9d3SPiotr Jasiukajtis
373*5b2ba9d3SPiotr Jasiukajtis __asm__ __volatile__(
374*5b2ba9d3SPiotr Jasiukajtis "cvttss2si %2, %1\n\t"
375*5b2ba9d3SPiotr Jasiukajtis "movl %1, %0"
376*5b2ba9d3SPiotr Jasiukajtis : "=m" (*i1), "=r" (tmp)
377*5b2ba9d3SPiotr Jasiukajtis : "m" (*f1));
378*5b2ba9d3SPiotr Jasiukajtis }
379*5b2ba9d3SPiotr Jasiukajtis
380*5b2ba9d3SPiotr Jasiukajtis extern __inline__ void
sse_cvtss2si(float * f1,int * i1)381*5b2ba9d3SPiotr Jasiukajtis sse_cvtss2si(float *f1, int *i1)
382*5b2ba9d3SPiotr Jasiukajtis {
383*5b2ba9d3SPiotr Jasiukajtis int tmp;
384*5b2ba9d3SPiotr Jasiukajtis
385*5b2ba9d3SPiotr Jasiukajtis __asm__ __volatile__(
386*5b2ba9d3SPiotr Jasiukajtis "cvtss2si %2, %1\n\t"
387*5b2ba9d3SPiotr Jasiukajtis "movl %1, %0"
388*5b2ba9d3SPiotr Jasiukajtis : "=m" (*i1), "=r" (tmp)
389*5b2ba9d3SPiotr Jasiukajtis : "m" (*f1));
390*5b2ba9d3SPiotr Jasiukajtis }
391*5b2ba9d3SPiotr Jasiukajtis
392*5b2ba9d3SPiotr Jasiukajtis #if defined(__amd64)
393*5b2ba9d3SPiotr Jasiukajtis extern __inline__ void
sse_cvtsi2ssq(long long * ll1,float * f1)394*5b2ba9d3SPiotr Jasiukajtis sse_cvtsi2ssq(long long *ll1, float *f1)
395*5b2ba9d3SPiotr Jasiukajtis {
396*5b2ba9d3SPiotr Jasiukajtis double tmp;
397*5b2ba9d3SPiotr Jasiukajtis
398*5b2ba9d3SPiotr Jasiukajtis __asm__ __volatile__(
399*5b2ba9d3SPiotr Jasiukajtis "cvtsi2ssq %2, %1\n\t"
400*5b2ba9d3SPiotr Jasiukajtis "movss %1, %0"
401*5b2ba9d3SPiotr Jasiukajtis : "=m" (*f1), "=x" (tmp)
402*5b2ba9d3SPiotr Jasiukajtis : "m" (*ll1));
403*5b2ba9d3SPiotr Jasiukajtis }
404*5b2ba9d3SPiotr Jasiukajtis
405*5b2ba9d3SPiotr Jasiukajtis extern __inline__ void
sse_cvttss2siq(float * f1,long long * ll1)406*5b2ba9d3SPiotr Jasiukajtis sse_cvttss2siq(float *f1, long long *ll1)
407*5b2ba9d3SPiotr Jasiukajtis {
408*5b2ba9d3SPiotr Jasiukajtis uint64_t tmp;
409*5b2ba9d3SPiotr Jasiukajtis
410*5b2ba9d3SPiotr Jasiukajtis __asm__ __volatile__(
411*5b2ba9d3SPiotr Jasiukajtis "cvttss2siq %2, %1\n\t"
412*5b2ba9d3SPiotr Jasiukajtis "movq %1, %0"
413*5b2ba9d3SPiotr Jasiukajtis : "=m" (*ll1), "=r" (tmp)
414*5b2ba9d3SPiotr Jasiukajtis : "m" (*f1));
415*5b2ba9d3SPiotr Jasiukajtis }
416*5b2ba9d3SPiotr Jasiukajtis
417*5b2ba9d3SPiotr Jasiukajtis extern __inline__ void
sse_cvtss2siq(float * f1,long long * ll1)418*5b2ba9d3SPiotr Jasiukajtis sse_cvtss2siq(float *f1, long long *ll1)
419*5b2ba9d3SPiotr Jasiukajtis {
420*5b2ba9d3SPiotr Jasiukajtis uint64_t tmp;
421*5b2ba9d3SPiotr Jasiukajtis
422*5b2ba9d3SPiotr Jasiukajtis __asm__ __volatile__(
423*5b2ba9d3SPiotr Jasiukajtis "cvtss2siq %2, %1\n\t"
424*5b2ba9d3SPiotr Jasiukajtis "movq %1, %0"
425*5b2ba9d3SPiotr Jasiukajtis : "=m" (*ll1), "=r" (tmp)
426*5b2ba9d3SPiotr Jasiukajtis : "m" (*f1));
427*5b2ba9d3SPiotr Jasiukajtis }
428*5b2ba9d3SPiotr Jasiukajtis
429*5b2ba9d3SPiotr Jasiukajtis #endif
430*5b2ba9d3SPiotr Jasiukajtis
431*5b2ba9d3SPiotr Jasiukajtis extern __inline__ void
sse_cmpeqsd(double * d1,double * d2,long long * ll1)432*5b2ba9d3SPiotr Jasiukajtis sse_cmpeqsd(double *d1, double *d2, long long *ll1)
433*5b2ba9d3SPiotr Jasiukajtis {
434*5b2ba9d3SPiotr Jasiukajtis __asm__ __volatile__(
435*5b2ba9d3SPiotr Jasiukajtis "cmpeqsd %2,%1\n\t"
436*5b2ba9d3SPiotr Jasiukajtis "movsd %1,%0"
437*5b2ba9d3SPiotr Jasiukajtis : "=m" (*ll1), "+x" (*d1)
438*5b2ba9d3SPiotr Jasiukajtis : "x" (*d2));
439*5b2ba9d3SPiotr Jasiukajtis }
440*5b2ba9d3SPiotr Jasiukajtis
441*5b2ba9d3SPiotr Jasiukajtis extern __inline__ void
sse_cmpltsd(double * d1,double * d2,long long * ll1)442*5b2ba9d3SPiotr Jasiukajtis sse_cmpltsd(double *d1, double *d2, long long *ll1)
443*5b2ba9d3SPiotr Jasiukajtis {
444*5b2ba9d3SPiotr Jasiukajtis __asm__ __volatile__(
445*5b2ba9d3SPiotr Jasiukajtis "cmpltsd %2,%1\n\t"
446*5b2ba9d3SPiotr Jasiukajtis "movsd %1,%0"
447*5b2ba9d3SPiotr Jasiukajtis : "=m" (*ll1), "+x" (*d1)
448*5b2ba9d3SPiotr Jasiukajtis : "x" (*d2));
449*5b2ba9d3SPiotr Jasiukajtis }
450*5b2ba9d3SPiotr Jasiukajtis
451*5b2ba9d3SPiotr Jasiukajtis extern __inline__ void
sse_cmplesd(double * d1,double * d2,long long * ll1)452*5b2ba9d3SPiotr Jasiukajtis sse_cmplesd(double *d1, double *d2, long long *ll1)
453*5b2ba9d3SPiotr Jasiukajtis {
454*5b2ba9d3SPiotr Jasiukajtis __asm__ __volatile__(
455*5b2ba9d3SPiotr Jasiukajtis "cmplesd %2,%1\n\t"
456*5b2ba9d3SPiotr Jasiukajtis "movsd %1,%0"
457*5b2ba9d3SPiotr Jasiukajtis : "=m" (*ll1), "+x" (*d1)
458*5b2ba9d3SPiotr Jasiukajtis : "x" (*d2));
459*5b2ba9d3SPiotr Jasiukajtis }
460*5b2ba9d3SPiotr Jasiukajtis
461*5b2ba9d3SPiotr Jasiukajtis extern __inline__ void
sse_cmpunordsd(double * d1,double * d2,long long * ll1)462*5b2ba9d3SPiotr Jasiukajtis sse_cmpunordsd(double *d1, double *d2, long long *ll1)
463*5b2ba9d3SPiotr Jasiukajtis {
464*5b2ba9d3SPiotr Jasiukajtis __asm__ __volatile__(
465*5b2ba9d3SPiotr Jasiukajtis "cmpunordsd %2,%1\n\t"
466*5b2ba9d3SPiotr Jasiukajtis "movsd %1,%0"
467*5b2ba9d3SPiotr Jasiukajtis : "=m" (*ll1), "+x" (*d1)
468*5b2ba9d3SPiotr Jasiukajtis : "x" (*d2));
469*5b2ba9d3SPiotr Jasiukajtis }
470*5b2ba9d3SPiotr Jasiukajtis
471*5b2ba9d3SPiotr Jasiukajtis
472*5b2ba9d3SPiotr Jasiukajtis extern __inline__ void
sse_minsd(double * d1,double * d2,double * d3)473*5b2ba9d3SPiotr Jasiukajtis sse_minsd(double *d1, double *d2, double *d3)
474*5b2ba9d3SPiotr Jasiukajtis {
475*5b2ba9d3SPiotr Jasiukajtis __asm__ __volatile__(
476*5b2ba9d3SPiotr Jasiukajtis "minsd %2,%1\n\t"
477*5b2ba9d3SPiotr Jasiukajtis "movsd %1,%0"
478*5b2ba9d3SPiotr Jasiukajtis : "=m" (*d3), "+x" (*d1)
479*5b2ba9d3SPiotr Jasiukajtis : "x" (*d2));
480*5b2ba9d3SPiotr Jasiukajtis }
481*5b2ba9d3SPiotr Jasiukajtis
482*5b2ba9d3SPiotr Jasiukajtis extern __inline__ void
sse_maxsd(double * d1,double * d2,double * d3)483*5b2ba9d3SPiotr Jasiukajtis sse_maxsd(double *d1, double *d2, double *d3)
484*5b2ba9d3SPiotr Jasiukajtis {
485*5b2ba9d3SPiotr Jasiukajtis __asm__ __volatile__(
486*5b2ba9d3SPiotr Jasiukajtis "maxsd %2,%1\n\t"
487*5b2ba9d3SPiotr Jasiukajtis "movsd %1,%0"
488*5b2ba9d3SPiotr Jasiukajtis : "=m" (*d3), "+x" (*d1)
489*5b2ba9d3SPiotr Jasiukajtis : "x" (*d2));
490*5b2ba9d3SPiotr Jasiukajtis }
491*5b2ba9d3SPiotr Jasiukajtis
492*5b2ba9d3SPiotr Jasiukajtis extern __inline__ void
sse_addsd(double * d1,double * d2,double * d3)493*5b2ba9d3SPiotr Jasiukajtis sse_addsd(double *d1, double *d2, double *d3)
494*5b2ba9d3SPiotr Jasiukajtis {
495*5b2ba9d3SPiotr Jasiukajtis __asm__ __volatile__(
496*5b2ba9d3SPiotr Jasiukajtis "addsd %2,%1\n\t"
497*5b2ba9d3SPiotr Jasiukajtis "movsd %1,%0"
498*5b2ba9d3SPiotr Jasiukajtis : "=m" (*d3), "+x" (*d1)
499*5b2ba9d3SPiotr Jasiukajtis : "x" (*d2));
500*5b2ba9d3SPiotr Jasiukajtis }
501*5b2ba9d3SPiotr Jasiukajtis
502*5b2ba9d3SPiotr Jasiukajtis extern __inline__ void
sse_subsd(double * d1,double * d2,double * d3)503*5b2ba9d3SPiotr Jasiukajtis sse_subsd(double *d1, double *d2, double *d3)
504*5b2ba9d3SPiotr Jasiukajtis {
505*5b2ba9d3SPiotr Jasiukajtis __asm__ __volatile__(
506*5b2ba9d3SPiotr Jasiukajtis "subsd %2,%1\n\t"
507*5b2ba9d3SPiotr Jasiukajtis "movsd %1,%0"
508*5b2ba9d3SPiotr Jasiukajtis : "=m" (*d3), "+x" (*d1)
509*5b2ba9d3SPiotr Jasiukajtis : "x" (*d2));
510*5b2ba9d3SPiotr Jasiukajtis }
511*5b2ba9d3SPiotr Jasiukajtis
512*5b2ba9d3SPiotr Jasiukajtis extern __inline__ void
sse_mulsd(double * d1,double * d2,double * d3)513*5b2ba9d3SPiotr Jasiukajtis sse_mulsd(double *d1, double *d2, double *d3)
514*5b2ba9d3SPiotr Jasiukajtis {
515*5b2ba9d3SPiotr Jasiukajtis __asm__ __volatile__(
516*5b2ba9d3SPiotr Jasiukajtis "mulsd %2,%1\n\t"
517*5b2ba9d3SPiotr Jasiukajtis "movsd %1,%0"
518*5b2ba9d3SPiotr Jasiukajtis : "=m" (*d3), "+x" (*d1)
519*5b2ba9d3SPiotr Jasiukajtis : "x" (*d2));
520*5b2ba9d3SPiotr Jasiukajtis }
521*5b2ba9d3SPiotr Jasiukajtis
522*5b2ba9d3SPiotr Jasiukajtis extern __inline__ void
sse_divsd(double * d1,double * d2,double * d3)523*5b2ba9d3SPiotr Jasiukajtis sse_divsd(double *d1, double *d2, double *d3)
524*5b2ba9d3SPiotr Jasiukajtis {
525*5b2ba9d3SPiotr Jasiukajtis __asm__ __volatile__(
526*5b2ba9d3SPiotr Jasiukajtis "divsd %2,%1\n\t"
527*5b2ba9d3SPiotr Jasiukajtis "movsd %1,%0"
528*5b2ba9d3SPiotr Jasiukajtis : "=m" (*d3), "+x" (*d1)
529*5b2ba9d3SPiotr Jasiukajtis : "x" (*d2));
530*5b2ba9d3SPiotr Jasiukajtis }
531*5b2ba9d3SPiotr Jasiukajtis
532*5b2ba9d3SPiotr Jasiukajtis extern __inline__ void
sse_sqrtsd(double * d1,double * d2)533*5b2ba9d3SPiotr Jasiukajtis sse_sqrtsd(double *d1, double *d2)
534*5b2ba9d3SPiotr Jasiukajtis {
535*5b2ba9d3SPiotr Jasiukajtis double tmp;
536*5b2ba9d3SPiotr Jasiukajtis
537*5b2ba9d3SPiotr Jasiukajtis __asm__ __volatile__(
538*5b2ba9d3SPiotr Jasiukajtis "sqrtsd %2, %1\n\t"
539*5b2ba9d3SPiotr Jasiukajtis "movsd %1, %0"
540*5b2ba9d3SPiotr Jasiukajtis : "=m" (*d2), "=x" (tmp)
541*5b2ba9d3SPiotr Jasiukajtis : "m" (*d1));
542*5b2ba9d3SPiotr Jasiukajtis }
543*5b2ba9d3SPiotr Jasiukajtis
544*5b2ba9d3SPiotr Jasiukajtis extern __inline__ void
sse_ucomisd(double * d1,double * d2)545*5b2ba9d3SPiotr Jasiukajtis sse_ucomisd(double *d1, double *d2)
546*5b2ba9d3SPiotr Jasiukajtis {
547*5b2ba9d3SPiotr Jasiukajtis __asm__ __volatile__("ucomisd %1, %0" : : "x" (*d1), "x" (*d2));
548*5b2ba9d3SPiotr Jasiukajtis }
549*5b2ba9d3SPiotr Jasiukajtis
550*5b2ba9d3SPiotr Jasiukajtis extern __inline__ void
sse_comisd(double * d1,double * d2)551*5b2ba9d3SPiotr Jasiukajtis sse_comisd(double *d1, double *d2)
552*5b2ba9d3SPiotr Jasiukajtis {
553*5b2ba9d3SPiotr Jasiukajtis __asm__ __volatile__("comisd %1, %0" : : "x" (*d1), "x" (*d2));
554*5b2ba9d3SPiotr Jasiukajtis }
555*5b2ba9d3SPiotr Jasiukajtis
556*5b2ba9d3SPiotr Jasiukajtis extern __inline__ void
sse_cvtsd2ss(double * d1,float * f1)557*5b2ba9d3SPiotr Jasiukajtis sse_cvtsd2ss(double *d1, float *f1)
558*5b2ba9d3SPiotr Jasiukajtis {
559*5b2ba9d3SPiotr Jasiukajtis double tmp;
560*5b2ba9d3SPiotr Jasiukajtis
561*5b2ba9d3SPiotr Jasiukajtis __asm__ __volatile__(
562*5b2ba9d3SPiotr Jasiukajtis "cvtsd2ss %2,%1\n\t"
563*5b2ba9d3SPiotr Jasiukajtis "movss %1,%0"
564*5b2ba9d3SPiotr Jasiukajtis : "=m" (*f1), "=x" (tmp)
565*5b2ba9d3SPiotr Jasiukajtis : "m" (*d1));
566*5b2ba9d3SPiotr Jasiukajtis }
567*5b2ba9d3SPiotr Jasiukajtis
568*5b2ba9d3SPiotr Jasiukajtis extern __inline__ void
sse_cvtsi2sd(int * i1,double * d1)569*5b2ba9d3SPiotr Jasiukajtis sse_cvtsi2sd(int *i1, double *d1)
570*5b2ba9d3SPiotr Jasiukajtis {
571*5b2ba9d3SPiotr Jasiukajtis double tmp;
572*5b2ba9d3SPiotr Jasiukajtis __asm__ __volatile__(
573*5b2ba9d3SPiotr Jasiukajtis "cvtsi2sd %2,%1\n\t"
574*5b2ba9d3SPiotr Jasiukajtis "movsd %1,%0"
575*5b2ba9d3SPiotr Jasiukajtis : "=m" (*d1), "=x" (tmp)
576*5b2ba9d3SPiotr Jasiukajtis : "m" (*i1));
577*5b2ba9d3SPiotr Jasiukajtis }
578*5b2ba9d3SPiotr Jasiukajtis
579*5b2ba9d3SPiotr Jasiukajtis extern __inline__ void
sse_cvttsd2si(double * d1,int * i1)580*5b2ba9d3SPiotr Jasiukajtis sse_cvttsd2si(double *d1, int *i1)
581*5b2ba9d3SPiotr Jasiukajtis {
582*5b2ba9d3SPiotr Jasiukajtis int tmp;
583*5b2ba9d3SPiotr Jasiukajtis
584*5b2ba9d3SPiotr Jasiukajtis __asm__ __volatile__(
585*5b2ba9d3SPiotr Jasiukajtis "cvttsd2si %2,%1\n\t"
586*5b2ba9d3SPiotr Jasiukajtis "movl %1,%0"
587*5b2ba9d3SPiotr Jasiukajtis : "=m" (*i1), "=r" (tmp)
588*5b2ba9d3SPiotr Jasiukajtis : "m" (*d1));
589*5b2ba9d3SPiotr Jasiukajtis }
590*5b2ba9d3SPiotr Jasiukajtis
591*5b2ba9d3SPiotr Jasiukajtis extern __inline__ void
sse_cvtsd2si(double * d1,int * i1)592*5b2ba9d3SPiotr Jasiukajtis sse_cvtsd2si(double *d1, int *i1)
593*5b2ba9d3SPiotr Jasiukajtis {
594*5b2ba9d3SPiotr Jasiukajtis int tmp;
595*5b2ba9d3SPiotr Jasiukajtis
596*5b2ba9d3SPiotr Jasiukajtis __asm__ __volatile__(
597*5b2ba9d3SPiotr Jasiukajtis "cvtsd2si %2,%1\n\t"
598*5b2ba9d3SPiotr Jasiukajtis "movl %1,%0"
599*5b2ba9d3SPiotr Jasiukajtis : "=m" (*i1), "=r" (tmp)
600*5b2ba9d3SPiotr Jasiukajtis : "m" (*d1));
601*5b2ba9d3SPiotr Jasiukajtis }
602*5b2ba9d3SPiotr Jasiukajtis
603*5b2ba9d3SPiotr Jasiukajtis #if defined(__amd64)
604*5b2ba9d3SPiotr Jasiukajtis extern __inline__ void
sse_cvtsi2sdq(long long * ll1,double * d1)605*5b2ba9d3SPiotr Jasiukajtis sse_cvtsi2sdq(long long *ll1, double *d1)
606*5b2ba9d3SPiotr Jasiukajtis {
607*5b2ba9d3SPiotr Jasiukajtis double tmp;
608*5b2ba9d3SPiotr Jasiukajtis
609*5b2ba9d3SPiotr Jasiukajtis __asm__ __volatile__(
610*5b2ba9d3SPiotr Jasiukajtis "cvtsi2sdq %2,%1\n\t"
611*5b2ba9d3SPiotr Jasiukajtis "movsd %1,%0"
612*5b2ba9d3SPiotr Jasiukajtis : "=m" (*d1), "=x" (tmp)
613*5b2ba9d3SPiotr Jasiukajtis : "m" (*ll1));
614*5b2ba9d3SPiotr Jasiukajtis }
615*5b2ba9d3SPiotr Jasiukajtis
616*5b2ba9d3SPiotr Jasiukajtis extern __inline__ void
sse_cvttsd2siq(double * d1,long long * ll1)617*5b2ba9d3SPiotr Jasiukajtis sse_cvttsd2siq(double *d1, long long *ll1)
618*5b2ba9d3SPiotr Jasiukajtis {
619*5b2ba9d3SPiotr Jasiukajtis uint64_t tmp;
620*5b2ba9d3SPiotr Jasiukajtis
621*5b2ba9d3SPiotr Jasiukajtis __asm__ __volatile__(
622*5b2ba9d3SPiotr Jasiukajtis "cvttsd2siq %2,%1\n\t"
623*5b2ba9d3SPiotr Jasiukajtis "movq %1,%0"
624*5b2ba9d3SPiotr Jasiukajtis : "=m" (*ll1), "=r" (tmp)
625*5b2ba9d3SPiotr Jasiukajtis : "m" (*d1));
626*5b2ba9d3SPiotr Jasiukajtis }
627*5b2ba9d3SPiotr Jasiukajtis
628*5b2ba9d3SPiotr Jasiukajtis extern __inline__ void
sse_cvtsd2siq(double * d1,long long * ll1)629*5b2ba9d3SPiotr Jasiukajtis sse_cvtsd2siq(double *d1, long long *ll1)
630*5b2ba9d3SPiotr Jasiukajtis {
631*5b2ba9d3SPiotr Jasiukajtis uint64_t tmp;
632*5b2ba9d3SPiotr Jasiukajtis
633*5b2ba9d3SPiotr Jasiukajtis __asm__ __volatile__(
634*5b2ba9d3SPiotr Jasiukajtis "cvtsd2siq %2,%1\n\t"
635*5b2ba9d3SPiotr Jasiukajtis "movq %1,%0"
636*5b2ba9d3SPiotr Jasiukajtis : "=m" (*ll1), "=r" (tmp)
637*5b2ba9d3SPiotr Jasiukajtis : "m" (*d1));
638*5b2ba9d3SPiotr Jasiukajtis }
639*5b2ba9d3SPiotr Jasiukajtis #endif
640*5b2ba9d3SPiotr Jasiukajtis
641*5b2ba9d3SPiotr Jasiukajtis #elif defined(__sparc)
642*5b2ba9d3SPiotr Jasiukajtis extern __inline__ void
643*5b2ba9d3SPiotr Jasiukajtis __fenv_getfsr(unsigned long *l)
644*5b2ba9d3SPiotr Jasiukajtis {
645*5b2ba9d3SPiotr Jasiukajtis __asm__ __volatile__(
646*5b2ba9d3SPiotr Jasiukajtis #if defined(__sparcv9)
647*5b2ba9d3SPiotr Jasiukajtis "stx %%fsr,%0\n\t"
648*5b2ba9d3SPiotr Jasiukajtis #else
649*5b2ba9d3SPiotr Jasiukajtis "st %%fsr,%0\n\t"
650*5b2ba9d3SPiotr Jasiukajtis #endif
651*5b2ba9d3SPiotr Jasiukajtis : "=m" (*l));
652*5b2ba9d3SPiotr Jasiukajtis }
653*5b2ba9d3SPiotr Jasiukajtis
654*5b2ba9d3SPiotr Jasiukajtis extern __inline__ void
655*5b2ba9d3SPiotr Jasiukajtis __fenv_setfsr(const unsigned long *l)
656*5b2ba9d3SPiotr Jasiukajtis {
657*5b2ba9d3SPiotr Jasiukajtis __asm__ __volatile__(
658*5b2ba9d3SPiotr Jasiukajtis #if defined(__sparcv9)
659*5b2ba9d3SPiotr Jasiukajtis "ldx %0,%%fsr\n\t"
660*5b2ba9d3SPiotr Jasiukajtis #else
661*5b2ba9d3SPiotr Jasiukajtis "ld %0,%%fsr\n\t"
662*5b2ba9d3SPiotr Jasiukajtis #endif
663*5b2ba9d3SPiotr Jasiukajtis : : "m" (*l) : "cc");
664*5b2ba9d3SPiotr Jasiukajtis }
665*5b2ba9d3SPiotr Jasiukajtis
666*5b2ba9d3SPiotr Jasiukajtis extern __inline__ void
667*5b2ba9d3SPiotr Jasiukajtis __fenv_getfsr32(unsigned int *l)
668*5b2ba9d3SPiotr Jasiukajtis {
669*5b2ba9d3SPiotr Jasiukajtis __asm__ __volatile__("st %%fsr,%0\n\t" : "=m" (*l));
670*5b2ba9d3SPiotr Jasiukajtis }
671*5b2ba9d3SPiotr Jasiukajtis
672*5b2ba9d3SPiotr Jasiukajtis extern __inline__ void
673*5b2ba9d3SPiotr Jasiukajtis __fenv_setfsr32(const unsigned int *l)
674*5b2ba9d3SPiotr Jasiukajtis {
675*5b2ba9d3SPiotr Jasiukajtis __asm__ __volatile__("ld %0,%%fsr\n\t" : : "m" (*l));
676*5b2ba9d3SPiotr Jasiukajtis }
677*5b2ba9d3SPiotr Jasiukajtis #else
678*5b2ba9d3SPiotr Jasiukajtis #error "GCC FENV inlines not implemented for this platform"
679*5b2ba9d3SPiotr Jasiukajtis #endif
680*5b2ba9d3SPiotr Jasiukajtis
681*5b2ba9d3SPiotr Jasiukajtis #ifdef __cplusplus
682*5b2ba9d3SPiotr Jasiukajtis }
683*5b2ba9d3SPiotr Jasiukajtis #endif
684*5b2ba9d3SPiotr Jasiukajtis
685*5b2ba9d3SPiotr Jasiukajtis #endif /* __GNUC__ */
686*5b2ba9d3SPiotr Jasiukajtis
687*5b2ba9d3SPiotr Jasiukajtis #endif /* _FENV_INLINES_H */
688