xref: /freebsd/contrib/llvm-project/clang/lib/Headers/fmaintrin.h (revision 0b57cec536236d46e3dba9bd041533462f33dbb7)
1*0b57cec5SDimitry Andric /*===---- fmaintrin.h - FMA intrinsics -------------------------------------===
2*0b57cec5SDimitry Andric  *
3*0b57cec5SDimitry Andric  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*0b57cec5SDimitry Andric  * See https://llvm.org/LICENSE.txt for license information.
5*0b57cec5SDimitry Andric  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*0b57cec5SDimitry Andric  *
7*0b57cec5SDimitry Andric  *===-----------------------------------------------------------------------===
8*0b57cec5SDimitry Andric  */
9*0b57cec5SDimitry Andric 
10*0b57cec5SDimitry Andric #ifndef __IMMINTRIN_H
11*0b57cec5SDimitry Andric #error "Never use <fmaintrin.h> directly; include <immintrin.h> instead."
12*0b57cec5SDimitry Andric #endif
13*0b57cec5SDimitry Andric 
14*0b57cec5SDimitry Andric #ifndef __FMAINTRIN_H
15*0b57cec5SDimitry Andric #define __FMAINTRIN_H
16*0b57cec5SDimitry Andric 
17*0b57cec5SDimitry Andric /* Define the default attributes for the functions in this file. */
18*0b57cec5SDimitry Andric #define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("fma"), __min_vector_width__(128)))
19*0b57cec5SDimitry Andric #define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("fma"), __min_vector_width__(256)))
20*0b57cec5SDimitry Andric 
21*0b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128
22*0b57cec5SDimitry Andric _mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C)
23*0b57cec5SDimitry Andric {
24*0b57cec5SDimitry Andric   return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
25*0b57cec5SDimitry Andric }
26*0b57cec5SDimitry Andric 
27*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128
28*0b57cec5SDimitry Andric _mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C)
29*0b57cec5SDimitry Andric {
30*0b57cec5SDimitry Andric   return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
31*0b57cec5SDimitry Andric }
32*0b57cec5SDimitry Andric 
33*0b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128
34*0b57cec5SDimitry Andric _mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C)
35*0b57cec5SDimitry Andric {
36*0b57cec5SDimitry Andric   return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
37*0b57cec5SDimitry Andric }
38*0b57cec5SDimitry Andric 
39*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128
40*0b57cec5SDimitry Andric _mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C)
41*0b57cec5SDimitry Andric {
42*0b57cec5SDimitry Andric   return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, (__v2df)__C);
43*0b57cec5SDimitry Andric }
44*0b57cec5SDimitry Andric 
45*0b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128
46*0b57cec5SDimitry Andric _mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C)
47*0b57cec5SDimitry Andric {
48*0b57cec5SDimitry Andric   return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
49*0b57cec5SDimitry Andric }
50*0b57cec5SDimitry Andric 
51*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128
52*0b57cec5SDimitry Andric _mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C)
53*0b57cec5SDimitry Andric {
54*0b57cec5SDimitry Andric   return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
55*0b57cec5SDimitry Andric }
56*0b57cec5SDimitry Andric 
57*0b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128
58*0b57cec5SDimitry Andric _mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C)
59*0b57cec5SDimitry Andric {
60*0b57cec5SDimitry Andric   return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
61*0b57cec5SDimitry Andric }
62*0b57cec5SDimitry Andric 
63*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128
64*0b57cec5SDimitry Andric _mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C)
65*0b57cec5SDimitry Andric {
66*0b57cec5SDimitry Andric   return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, -(__v2df)__C);
67*0b57cec5SDimitry Andric }
68*0b57cec5SDimitry Andric 
69*0b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128
70*0b57cec5SDimitry Andric _mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C)
71*0b57cec5SDimitry Andric {
72*0b57cec5SDimitry Andric   return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
73*0b57cec5SDimitry Andric }
74*0b57cec5SDimitry Andric 
75*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128
76*0b57cec5SDimitry Andric _mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C)
77*0b57cec5SDimitry Andric {
78*0b57cec5SDimitry Andric   return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, (__v2df)__C);
79*0b57cec5SDimitry Andric }
80*0b57cec5SDimitry Andric 
81*0b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128
82*0b57cec5SDimitry Andric _mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C)
83*0b57cec5SDimitry Andric {
84*0b57cec5SDimitry Andric   return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, (__v4sf)__C);
85*0b57cec5SDimitry Andric }
86*0b57cec5SDimitry Andric 
87*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128
88*0b57cec5SDimitry Andric _mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C)
89*0b57cec5SDimitry Andric {
90*0b57cec5SDimitry Andric   return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, (__v2df)__C);
91*0b57cec5SDimitry Andric }
92*0b57cec5SDimitry Andric 
93*0b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128
94*0b57cec5SDimitry Andric _mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C)
95*0b57cec5SDimitry Andric {
96*0b57cec5SDimitry Andric   return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
97*0b57cec5SDimitry Andric }
98*0b57cec5SDimitry Andric 
99*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128
100*0b57cec5SDimitry Andric _mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C)
101*0b57cec5SDimitry Andric {
102*0b57cec5SDimitry Andric   return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
103*0b57cec5SDimitry Andric }
104*0b57cec5SDimitry Andric 
105*0b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128
106*0b57cec5SDimitry Andric _mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C)
107*0b57cec5SDimitry Andric {
108*0b57cec5SDimitry Andric   return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, -(__v4sf)__C);
109*0b57cec5SDimitry Andric }
110*0b57cec5SDimitry Andric 
111*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128
112*0b57cec5SDimitry Andric _mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C)
113*0b57cec5SDimitry Andric {
114*0b57cec5SDimitry Andric   return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, -(__v2df)__C);
115*0b57cec5SDimitry Andric }
116*0b57cec5SDimitry Andric 
117*0b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128
118*0b57cec5SDimitry Andric _mm_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C)
119*0b57cec5SDimitry Andric {
120*0b57cec5SDimitry Andric   return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
121*0b57cec5SDimitry Andric }
122*0b57cec5SDimitry Andric 
123*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128
124*0b57cec5SDimitry Andric _mm_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C)
125*0b57cec5SDimitry Andric {
126*0b57cec5SDimitry Andric   return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
127*0b57cec5SDimitry Andric }
128*0b57cec5SDimitry Andric 
129*0b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128
130*0b57cec5SDimitry Andric _mm_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C)
131*0b57cec5SDimitry Andric {
132*0b57cec5SDimitry Andric   return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
133*0b57cec5SDimitry Andric }
134*0b57cec5SDimitry Andric 
135*0b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128
136*0b57cec5SDimitry Andric _mm_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C)
137*0b57cec5SDimitry Andric {
138*0b57cec5SDimitry Andric   return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
139*0b57cec5SDimitry Andric }
140*0b57cec5SDimitry Andric 
141*0b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256
142*0b57cec5SDimitry Andric _mm256_fmadd_ps(__m256 __A, __m256 __B, __m256 __C)
143*0b57cec5SDimitry Andric {
144*0b57cec5SDimitry Andric   return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
145*0b57cec5SDimitry Andric }
146*0b57cec5SDimitry Andric 
147*0b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256
148*0b57cec5SDimitry Andric _mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C)
149*0b57cec5SDimitry Andric {
150*0b57cec5SDimitry Andric   return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
151*0b57cec5SDimitry Andric }
152*0b57cec5SDimitry Andric 
153*0b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256
154*0b57cec5SDimitry Andric _mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C)
155*0b57cec5SDimitry Andric {
156*0b57cec5SDimitry Andric   return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
157*0b57cec5SDimitry Andric }
158*0b57cec5SDimitry Andric 
159*0b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256
160*0b57cec5SDimitry Andric _mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C)
161*0b57cec5SDimitry Andric {
162*0b57cec5SDimitry Andric   return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);
163*0b57cec5SDimitry Andric }
164*0b57cec5SDimitry Andric 
165*0b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256
166*0b57cec5SDimitry Andric _mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C)
167*0b57cec5SDimitry Andric {
168*0b57cec5SDimitry Andric   return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
169*0b57cec5SDimitry Andric }
170*0b57cec5SDimitry Andric 
171*0b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256
172*0b57cec5SDimitry Andric _mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C)
173*0b57cec5SDimitry Andric {
174*0b57cec5SDimitry Andric   return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, (__v4df)__C);
175*0b57cec5SDimitry Andric }
176*0b57cec5SDimitry Andric 
177*0b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256
178*0b57cec5SDimitry Andric _mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C)
179*0b57cec5SDimitry Andric {
180*0b57cec5SDimitry Andric   return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
181*0b57cec5SDimitry Andric }
182*0b57cec5SDimitry Andric 
183*0b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256
184*0b57cec5SDimitry Andric _mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C)
185*0b57cec5SDimitry Andric {
186*0b57cec5SDimitry Andric   return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, -(__v4df)__C);
187*0b57cec5SDimitry Andric }
188*0b57cec5SDimitry Andric 
189*0b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256
190*0b57cec5SDimitry Andric _mm256_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C)
191*0b57cec5SDimitry Andric {
192*0b57cec5SDimitry Andric   return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
193*0b57cec5SDimitry Andric }
194*0b57cec5SDimitry Andric 
195*0b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256
196*0b57cec5SDimitry Andric _mm256_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C)
197*0b57cec5SDimitry Andric {
198*0b57cec5SDimitry Andric   return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
199*0b57cec5SDimitry Andric }
200*0b57cec5SDimitry Andric 
201*0b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256
202*0b57cec5SDimitry Andric _mm256_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C)
203*0b57cec5SDimitry Andric {
204*0b57cec5SDimitry Andric   return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
205*0b57cec5SDimitry Andric }
206*0b57cec5SDimitry Andric 
207*0b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256
208*0b57cec5SDimitry Andric _mm256_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C)
209*0b57cec5SDimitry Andric {
210*0b57cec5SDimitry Andric   return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);
211*0b57cec5SDimitry Andric }
212*0b57cec5SDimitry Andric 
213*0b57cec5SDimitry Andric #undef __DEFAULT_FN_ATTRS128
214*0b57cec5SDimitry Andric #undef __DEFAULT_FN_ATTRS256
215*0b57cec5SDimitry Andric 
216*0b57cec5SDimitry Andric #endif /* __FMAINTRIN_H */
217