1*0b57cec5SDimitry Andric /*===---- fmaintrin.h - FMA intrinsics -------------------------------------=== 2*0b57cec5SDimitry Andric * 3*0b57cec5SDimitry Andric * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*0b57cec5SDimitry Andric * See https://llvm.org/LICENSE.txt for license information. 5*0b57cec5SDimitry Andric * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*0b57cec5SDimitry Andric * 7*0b57cec5SDimitry Andric *===-----------------------------------------------------------------------=== 8*0b57cec5SDimitry Andric */ 9*0b57cec5SDimitry Andric 10*0b57cec5SDimitry Andric #ifndef __IMMINTRIN_H 11*0b57cec5SDimitry Andric #error "Never use <fmaintrin.h> directly; include <immintrin.h> instead." 12*0b57cec5SDimitry Andric #endif 13*0b57cec5SDimitry Andric 14*0b57cec5SDimitry Andric #ifndef __FMAINTRIN_H 15*0b57cec5SDimitry Andric #define __FMAINTRIN_H 16*0b57cec5SDimitry Andric 17*0b57cec5SDimitry Andric /* Define the default attributes for the functions in this file. */ 18*0b57cec5SDimitry Andric #define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("fma"), __min_vector_width__(128))) 19*0b57cec5SDimitry Andric #define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("fma"), __min_vector_width__(256))) 20*0b57cec5SDimitry Andric 21*0b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 22*0b57cec5SDimitry Andric _mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C) 23*0b57cec5SDimitry Andric { 24*0b57cec5SDimitry Andric return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); 25*0b57cec5SDimitry Andric } 26*0b57cec5SDimitry Andric 27*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 28*0b57cec5SDimitry Andric _mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C) 29*0b57cec5SDimitry Andric { 30*0b57cec5SDimitry Andric return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C); 31*0b57cec5SDimitry Andric } 32*0b57cec5SDimitry Andric 33*0b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 34*0b57cec5SDimitry Andric _mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C) 35*0b57cec5SDimitry Andric { 36*0b57cec5SDimitry Andric return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); 37*0b57cec5SDimitry Andric } 38*0b57cec5SDimitry Andric 39*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 40*0b57cec5SDimitry Andric _mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C) 41*0b57cec5SDimitry Andric { 42*0b57cec5SDimitry Andric return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, (__v2df)__C); 43*0b57cec5SDimitry Andric } 44*0b57cec5SDimitry Andric 45*0b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 46*0b57cec5SDimitry Andric _mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C) 47*0b57cec5SDimitry Andric { 48*0b57cec5SDimitry Andric return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); 49*0b57cec5SDimitry Andric } 50*0b57cec5SDimitry Andric 51*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 52*0b57cec5SDimitry Andric _mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C) 53*0b57cec5SDimitry Andric { 54*0b57cec5SDimitry Andric return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, -(__v2df)__C); 55*0b57cec5SDimitry Andric } 56*0b57cec5SDimitry Andric 57*0b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 58*0b57cec5SDimitry Andric _mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C) 59*0b57cec5SDimitry Andric { 60*0b57cec5SDimitry Andric return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); 61*0b57cec5SDimitry Andric } 62*0b57cec5SDimitry Andric 63*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 64*0b57cec5SDimitry Andric _mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C) 65*0b57cec5SDimitry Andric { 66*0b57cec5SDimitry Andric return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, -(__v2df)__C); 67*0b57cec5SDimitry Andric } 68*0b57cec5SDimitry Andric 69*0b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 70*0b57cec5SDimitry Andric _mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C) 71*0b57cec5SDimitry Andric { 72*0b57cec5SDimitry Andric return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C); 73*0b57cec5SDimitry Andric } 74*0b57cec5SDimitry Andric 75*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 76*0b57cec5SDimitry Andric _mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C) 77*0b57cec5SDimitry Andric { 78*0b57cec5SDimitry Andric return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, (__v2df)__C); 79*0b57cec5SDimitry Andric } 80*0b57cec5SDimitry Andric 81*0b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 82*0b57cec5SDimitry Andric _mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C) 83*0b57cec5SDimitry Andric { 84*0b57cec5SDimitry Andric return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, (__v4sf)__C); 85*0b57cec5SDimitry Andric } 86*0b57cec5SDimitry Andric 87*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 88*0b57cec5SDimitry Andric _mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C) 89*0b57cec5SDimitry Andric { 90*0b57cec5SDimitry Andric return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, (__v2df)__C); 91*0b57cec5SDimitry Andric } 92*0b57cec5SDimitry Andric 93*0b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 94*0b57cec5SDimitry Andric _mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C) 95*0b57cec5SDimitry Andric { 96*0b57cec5SDimitry Andric return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); 97*0b57cec5SDimitry Andric } 98*0b57cec5SDimitry Andric 99*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 100*0b57cec5SDimitry Andric _mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C) 101*0b57cec5SDimitry Andric { 102*0b57cec5SDimitry Andric return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C); 103*0b57cec5SDimitry Andric } 104*0b57cec5SDimitry Andric 105*0b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 106*0b57cec5SDimitry Andric _mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C) 107*0b57cec5SDimitry Andric { 108*0b57cec5SDimitry Andric return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, -(__v4sf)__C); 109*0b57cec5SDimitry Andric } 110*0b57cec5SDimitry Andric 111*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 112*0b57cec5SDimitry Andric _mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C) 113*0b57cec5SDimitry Andric { 114*0b57cec5SDimitry Andric return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, -(__v2df)__C); 115*0b57cec5SDimitry Andric } 116*0b57cec5SDimitry Andric 117*0b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 118*0b57cec5SDimitry Andric _mm_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C) 119*0b57cec5SDimitry Andric { 120*0b57cec5SDimitry Andric return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); 121*0b57cec5SDimitry Andric } 122*0b57cec5SDimitry Andric 123*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 124*0b57cec5SDimitry Andric _mm_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C) 125*0b57cec5SDimitry Andric { 126*0b57cec5SDimitry Andric return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C); 127*0b57cec5SDimitry Andric } 128*0b57cec5SDimitry Andric 129*0b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 130*0b57cec5SDimitry Andric _mm_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C) 131*0b57cec5SDimitry Andric { 132*0b57cec5SDimitry Andric return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); 133*0b57cec5SDimitry Andric } 134*0b57cec5SDimitry Andric 135*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 136*0b57cec5SDimitry Andric _mm_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C) 137*0b57cec5SDimitry Andric { 138*0b57cec5SDimitry Andric return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, -(__v2df)__C); 139*0b57cec5SDimitry Andric } 140*0b57cec5SDimitry Andric 141*0b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 142*0b57cec5SDimitry Andric _mm256_fmadd_ps(__m256 __A, __m256 __B, __m256 __C) 143*0b57cec5SDimitry Andric { 144*0b57cec5SDimitry Andric return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); 145*0b57cec5SDimitry Andric } 146*0b57cec5SDimitry Andric 147*0b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 148*0b57cec5SDimitry Andric _mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C) 149*0b57cec5SDimitry Andric { 150*0b57cec5SDimitry Andric return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C); 151*0b57cec5SDimitry Andric } 152*0b57cec5SDimitry Andric 153*0b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 154*0b57cec5SDimitry Andric _mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C) 155*0b57cec5SDimitry Andric { 156*0b57cec5SDimitry Andric return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); 157*0b57cec5SDimitry Andric } 158*0b57cec5SDimitry Andric 159*0b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 160*0b57cec5SDimitry Andric _mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C) 161*0b57cec5SDimitry Andric { 162*0b57cec5SDimitry Andric return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C); 163*0b57cec5SDimitry Andric } 164*0b57cec5SDimitry Andric 165*0b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 166*0b57cec5SDimitry Andric _mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C) 167*0b57cec5SDimitry Andric { 168*0b57cec5SDimitry Andric return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C); 169*0b57cec5SDimitry Andric } 170*0b57cec5SDimitry Andric 171*0b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 172*0b57cec5SDimitry Andric _mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C) 173*0b57cec5SDimitry Andric { 174*0b57cec5SDimitry Andric return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, (__v4df)__C); 175*0b57cec5SDimitry Andric } 176*0b57cec5SDimitry Andric 177*0b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 178*0b57cec5SDimitry Andric _mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C) 179*0b57cec5SDimitry Andric { 180*0b57cec5SDimitry Andric return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); 181*0b57cec5SDimitry Andric } 182*0b57cec5SDimitry Andric 183*0b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 184*0b57cec5SDimitry Andric _mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C) 185*0b57cec5SDimitry Andric { 186*0b57cec5SDimitry Andric return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, -(__v4df)__C); 187*0b57cec5SDimitry Andric } 188*0b57cec5SDimitry Andric 189*0b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 190*0b57cec5SDimitry Andric _mm256_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C) 191*0b57cec5SDimitry Andric { 192*0b57cec5SDimitry Andric return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); 193*0b57cec5SDimitry Andric } 194*0b57cec5SDimitry Andric 195*0b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 196*0b57cec5SDimitry Andric _mm256_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C) 197*0b57cec5SDimitry Andric { 198*0b57cec5SDimitry Andric return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C); 199*0b57cec5SDimitry Andric } 200*0b57cec5SDimitry Andric 201*0b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 202*0b57cec5SDimitry Andric _mm256_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C) 203*0b57cec5SDimitry Andric { 204*0b57cec5SDimitry Andric return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); 205*0b57cec5SDimitry Andric } 206*0b57cec5SDimitry Andric 207*0b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 208*0b57cec5SDimitry Andric _mm256_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C) 209*0b57cec5SDimitry Andric { 210*0b57cec5SDimitry Andric return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C); 211*0b57cec5SDimitry Andric } 212*0b57cec5SDimitry Andric 213*0b57cec5SDimitry Andric #undef __DEFAULT_FN_ATTRS128 214*0b57cec5SDimitry Andric #undef __DEFAULT_FN_ATTRS256 215*0b57cec5SDimitry Andric 216*0b57cec5SDimitry Andric #endif /* __FMAINTRIN_H */ 217