/* * This file and its contents are supplied under the terms of the * Common Development and Distribution License ("CDDL"), version 1.0. * You may only use this file in accordance with the terms of version * 1.0 of the CDDL. * * A full copy of the text of the CDDL should have accompanied this * source. A copy of the CDDL is also available via the Internet at * http://www.illumos.org/license/CDDL. */ /* * Copyright 2011, Richard Lowe */ #ifndef _FENV_INLINES_H #define _FENV_INLINES_H #ifdef __GNUC__ #ifdef __cplusplus extern "C" { #endif #include #if defined(__x86) /* * Floating point Control Word and Status Word * Definition should actually be shared with x86 * (much of this 'amd64' code can be, in fact.) */ union fp_cwsw { uint32_t cwsw; struct { uint16_t cw; uint16_t sw; } words; }; extern __inline__ void __fenv_getcwsw(unsigned int *value) { union fp_cwsw *u = (union fp_cwsw *)value; __asm__ __volatile__( "fstsw %0\n\t" "fstcw %1\n\t" : "=m" (u->words.cw), "=m" (u->words.sw)); } extern __inline__ void __fenv_setcwsw(const unsigned int *value) { union fp_cwsw cwsw; short fenv[16]; cwsw.cwsw = *value; __asm__ __volatile__( "fstenv %0\n\t" "movw %4,%1\n\t" "movw %3,%2\n\t" "fldenv %0\n\t" "fwait\n\t" : "=m" (fenv), "=m" (fenv[0]), "=m" (fenv[2]) : "r" (cwsw.words.cw), "r" (cwsw.words.sw) /* For practical purposes, we clobber the whole FPU */ : "cc", "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)"); } extern __inline__ void __fenv_getmxcsr(unsigned int *value) { __asm__ __volatile__("stmxcsr %0" : "=m" (*value)); } extern __inline__ void __fenv_setmxcsr(const unsigned int *value) { __asm__ __volatile__("ldmxcsr %0" : : "m" (*value)); } extern __inline__ long double f2xm1(long double x) { long double ret; __asm__ __volatile__("f2xm1" : "=t" (ret) : "0" (x) : "cc"); return (ret); } extern __inline__ long double fyl2x(long double y, long double x) { long double ret; __asm__ __volatile__("fyl2x" : "=t" (ret) : "0" (x), "u" (y) : "st(1)", "cc"); return (ret); } extern __inline__ long double fptan(long double x) { /* * fptan pushes 1.0 then the result on completion, so we want to pop * the FP stack twice, so we need a dummy value into which to pop it. */ long double ret; long double dummy; __asm__ __volatile__("fptan" : "=t" (dummy), "=u" (ret) : "0" (x) : "cc"); return (ret); } extern __inline__ long double fpatan(long double x, long double y) { long double ret; __asm__ __volatile__("fpatan" : "=t" (ret) : "0" (y), "u" (x) : "st(1)", "cc"); return (ret); } extern __inline__ long double fxtract(long double x) { __asm__ __volatile__("fxtract" : "+t" (x) : : "cc"); return (x); } extern __inline__ long double fprem1(long double idend, long double div) { __asm__ __volatile__("fprem1" : "+t" (div) : "u" (idend) : "cc"); return (div); } extern __inline__ long double fprem(long double idend, long double div) { __asm__ __volatile__("fprem" : "+t" (div) : "u" (idend) : "cc"); return (div); } extern __inline__ long double fyl2xp1(long double y, long double x) { long double ret; __asm__ __volatile__("fyl2xp1" : "=t" (ret) : "0" (x), "u" (y) : "st(1)", "cc"); return (ret); } extern __inline__ long double fsqrt(long double x) { __asm__ __volatile__("fsqrt" : "+t" (x) : : "cc"); return (x); } extern __inline__ long double fsincos(long double x) { long double dummy; __asm__ __volatile__("fsincos" : "+t" (x), "=u" (dummy) : : "cc"); return (x); } extern __inline__ long double frndint(long double x) { __asm__ __volatile__("frndint" : "+t" (x) : : "cc"); return (x); } extern __inline__ long double fscale(long double x, long double y) { long double ret; __asm__ __volatile__("fscale" : "=t" (ret) : "0" (y), "u" (x) : "cc"); return (ret); } extern __inline__ long double fsin(long double x) { __asm__ __volatile__("fsin" : "+t" (x) : : "cc"); return (x); } extern __inline__ long double fcos(long double x) { __asm__ __volatile__("fcos" : "+t" (x) : : "cc"); return (x); } extern __inline__ void sse_cmpeqss(float *f1, float *f2, int *i1) { __asm__ __volatile__( "cmpeqss %2, %1\n\t" "movss %1, %0" : "=m" (*i1), "+x" (*f1) : "x" (*f2) : "cc"); } extern __inline__ void sse_cmpltss(float *f1, float *f2, int *i1) { __asm__ __volatile__( "cmpltss %2, %1\n\t" "movss %1, %0" : "=m" (*i1), "+x" (*f1) : "x" (*f2) : "cc"); } extern __inline__ void sse_cmpless(float *f1, float *f2, int *i1) { __asm__ __volatile__( "cmpless %2, %1\n\t" "movss %1, %0" : "=m" (*i1), "+x" (*f1) : "x" (*f2) : "cc"); } extern __inline__ void sse_cmpunordss(float *f1, float *f2, int *i1) { __asm__ __volatile__( "cmpunordss %2, %1\n\t" "movss %1, %0" : "=m" (*i1), "+x" (*f1) : "x" (*f2) : "cc"); } extern __inline__ void sse_minss(float *f1, float *f2, float *f3) { __asm__ __volatile__( "minss %2, %1\n\t" "movss %1, %0" : "=m" (*f3), "+x" (*f1) : "x" (*f2)); } extern __inline__ void sse_maxss(float *f1, float *f2, float *f3) { __asm__ __volatile__( "maxss %2, %1\n\t" "movss %1, %0" : "=m" (*f3), "+x" (*f1) : "x" (*f2)); } extern __inline__ void sse_addss(float *f1, float *f2, float *f3) { __asm__ __volatile__( "addss %2, %1\n\t" "movss %1, %0" : "=m" (*f3), "+x" (*f1) : "x" (*f2)); } extern __inline__ void sse_subss(float *f1, float *f2, float *f3) { __asm__ __volatile__( "subss %2, %1\n\t" "movss %1, %0" : "=m" (*f3), "+x" (*f1) : "x" (*f2)); } extern __inline__ void sse_mulss(float *f1, float *f2, float *f3) { __asm__ __volatile__( "mulss %2, %1\n\t" "movss %1, %0" : "=m" (*f3), "+x" (*f1) : "x" (*f2)); } extern __inline__ void sse_divss(float *f1, float *f2, float *f3) { __asm__ __volatile__( "divss %2, %1\n\t" "movss %1, %0" : "=m" (*f3), "+x" (*f1) : "x" (*f2)); } extern __inline__ void sse_sqrtss(float *f1, float *f2) { double tmp; __asm__ __volatile__( "sqrtss %2, %1\n\t" "movss %1, %0" : "=m" (*f2), "=x" (tmp) : "m" (*f1)); } extern __inline__ void sse_ucomiss(float *f1, float *f2) { __asm__ __volatile__("ucomiss %1, %0" : : "x" (*f1), "x" (*f2)); } extern __inline__ void sse_comiss(float *f1, float *f2) { __asm__ __volatile__("comiss %1, %0" : : "x" (*f1), "x" (*f2)); } extern __inline__ void sse_cvtss2sd(float *f1, double *d1) { double tmp; __asm__ __volatile__( "cvtss2sd %2, %1\n\t" "movsd %1, %0" : "=m" (*d1), "=x" (tmp) : "m" (*f1)); } extern __inline__ void sse_cvtsi2ss(int *i1, float *f1) { double tmp; __asm__ __volatile__( "cvtsi2ss %2, %1\n\t" "movss %1, %0" : "=m" (*f1), "=x" (tmp) : "m" (*i1)); } extern __inline__ void sse_cvttss2si(float *f1, int *i1) { int tmp; __asm__ __volatile__( "cvttss2si %2, %1\n\t" "movl %1, %0" : "=m" (*i1), "=r" (tmp) : "m" (*f1)); } extern __inline__ void sse_cvtss2si(float *f1, int *i1) { int tmp; __asm__ __volatile__( "cvtss2si %2, %1\n\t" "movl %1, %0" : "=m" (*i1), "=r" (tmp) : "m" (*f1)); } #if defined(__amd64) extern __inline__ void sse_cvtsi2ssq(long long *ll1, float *f1) { double tmp; __asm__ __volatile__( "cvtsi2ssq %2, %1\n\t" "movss %1, %0" : "=m" (*f1), "=x" (tmp) : "m" (*ll1)); } extern __inline__ void sse_cvttss2siq(float *f1, long long *ll1) { uint64_t tmp; __asm__ __volatile__( "cvttss2siq %2, %1\n\t" "movq %1, %0" : "=m" (*ll1), "=r" (tmp) : "m" (*f1)); } extern __inline__ void sse_cvtss2siq(float *f1, long long *ll1) { uint64_t tmp; __asm__ __volatile__( "cvtss2siq %2, %1\n\t" "movq %1, %0" : "=m" (*ll1), "=r" (tmp) : "m" (*f1)); } #endif extern __inline__ void sse_cmpeqsd(double *d1, double *d2, long long *ll1) { __asm__ __volatile__( "cmpeqsd %2,%1\n\t" "movsd %1,%0" : "=m" (*ll1), "+x" (*d1) : "x" (*d2)); } extern __inline__ void sse_cmpltsd(double *d1, double *d2, long long *ll1) { __asm__ __volatile__( "cmpltsd %2,%1\n\t" "movsd %1,%0" : "=m" (*ll1), "+x" (*d1) : "x" (*d2)); } extern __inline__ void sse_cmplesd(double *d1, double *d2, long long *ll1) { __asm__ __volatile__( "cmplesd %2,%1\n\t" "movsd %1,%0" : "=m" (*ll1), "+x" (*d1) : "x" (*d2)); } extern __inline__ void sse_cmpunordsd(double *d1, double *d2, long long *ll1) { __asm__ __volatile__( "cmpunordsd %2,%1\n\t" "movsd %1,%0" : "=m" (*ll1), "+x" (*d1) : "x" (*d2)); } extern __inline__ void sse_minsd(double *d1, double *d2, double *d3) { __asm__ __volatile__( "minsd %2,%1\n\t" "movsd %1,%0" : "=m" (*d3), "+x" (*d1) : "x" (*d2)); } extern __inline__ void sse_maxsd(double *d1, double *d2, double *d3) { __asm__ __volatile__( "maxsd %2,%1\n\t" "movsd %1,%0" : "=m" (*d3), "+x" (*d1) : "x" (*d2)); } extern __inline__ void sse_addsd(double *d1, double *d2, double *d3) { __asm__ __volatile__( "addsd %2,%1\n\t" "movsd %1,%0" : "=m" (*d3), "+x" (*d1) : "x" (*d2)); } extern __inline__ void sse_subsd(double *d1, double *d2, double *d3) { __asm__ __volatile__( "subsd %2,%1\n\t" "movsd %1,%0" : "=m" (*d3), "+x" (*d1) : "x" (*d2)); } extern __inline__ void sse_mulsd(double *d1, double *d2, double *d3) { __asm__ __volatile__( "mulsd %2,%1\n\t" "movsd %1,%0" : "=m" (*d3), "+x" (*d1) : "x" (*d2)); } extern __inline__ void sse_divsd(double *d1, double *d2, double *d3) { __asm__ __volatile__( "divsd %2,%1\n\t" "movsd %1,%0" : "=m" (*d3), "+x" (*d1) : "x" (*d2)); } extern __inline__ void sse_sqrtsd(double *d1, double *d2) { double tmp; __asm__ __volatile__( "sqrtsd %2, %1\n\t" "movsd %1, %0" : "=m" (*d2), "=x" (tmp) : "m" (*d1)); } extern __inline__ void sse_ucomisd(double *d1, double *d2) { __asm__ __volatile__("ucomisd %1, %0" : : "x" (*d1), "x" (*d2)); } extern __inline__ void sse_comisd(double *d1, double *d2) { __asm__ __volatile__("comisd %1, %0" : : "x" (*d1), "x" (*d2)); } extern __inline__ void sse_cvtsd2ss(double *d1, float *f1) { double tmp; __asm__ __volatile__( "cvtsd2ss %2,%1\n\t" "movss %1,%0" : "=m" (*f1), "=x" (tmp) : "m" (*d1)); } extern __inline__ void sse_cvtsi2sd(int *i1, double *d1) { double tmp; __asm__ __volatile__( "cvtsi2sd %2,%1\n\t" "movsd %1,%0" : "=m" (*d1), "=x" (tmp) : "m" (*i1)); } extern __inline__ void sse_cvttsd2si(double *d1, int *i1) { int tmp; __asm__ __volatile__( "cvttsd2si %2,%1\n\t" "movl %1,%0" : "=m" (*i1), "=r" (tmp) : "m" (*d1)); } extern __inline__ void sse_cvtsd2si(double *d1, int *i1) { int tmp; __asm__ __volatile__( "cvtsd2si %2,%1\n\t" "movl %1,%0" : "=m" (*i1), "=r" (tmp) : "m" (*d1)); } #if defined(__amd64) extern __inline__ void sse_cvtsi2sdq(long long *ll1, double *d1) { double tmp; __asm__ __volatile__( "cvtsi2sdq %2,%1\n\t" "movsd %1,%0" : "=m" (*d1), "=x" (tmp) : "m" (*ll1)); } extern __inline__ void sse_cvttsd2siq(double *d1, long long *ll1) { uint64_t tmp; __asm__ __volatile__( "cvttsd2siq %2,%1\n\t" "movq %1,%0" : "=m" (*ll1), "=r" (tmp) : "m" (*d1)); } extern __inline__ void sse_cvtsd2siq(double *d1, long long *ll1) { uint64_t tmp; __asm__ __volatile__( "cvtsd2siq %2,%1\n\t" "movq %1,%0" : "=m" (*ll1), "=r" (tmp) : "m" (*d1)); } #endif #elif defined(__sparc) extern __inline__ void __fenv_getfsr(unsigned long *l) { __asm__ __volatile__( #if defined(__sparcv9) "stx %%fsr,%0\n\t" #else "st %%fsr,%0\n\t" #endif : "=m" (*l)); } extern __inline__ void __fenv_setfsr(const unsigned long *l) { __asm__ __volatile__( #if defined(__sparcv9) "ldx %0,%%fsr\n\t" #else "ld %0,%%fsr\n\t" #endif : : "m" (*l) : "cc"); } extern __inline__ void __fenv_getfsr32(unsigned int *l) { __asm__ __volatile__("st %%fsr,%0\n\t" : "=m" (*l)); } extern __inline__ void __fenv_setfsr32(const unsigned int *l) { __asm__ __volatile__("ld %0,%%fsr\n\t" : : "m" (*l)); } #else #error "GCC FENV inlines not implemented for this platform" #endif #ifdef __cplusplus } #endif #endif /* __GNUC__ */ #endif /* _FENV_INLINES_H */