xref: /freebsd/contrib/llvm-project/clang/lib/Headers/avx512vlfp16intrin.h (revision 5f757f3ff9144b609b3c433dfd370cc6bdc191ad)
1349cc55cSDimitry Andric /*===---------- avx512vlfp16intrin.h - AVX512-FP16 intrinsics --------------===
2349cc55cSDimitry Andric  *
3349cc55cSDimitry Andric  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4349cc55cSDimitry Andric  * See https://llvm.org/LICENSE.txt for license information.
5349cc55cSDimitry Andric  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6349cc55cSDimitry Andric  *
7349cc55cSDimitry Andric  *===-----------------------------------------------------------------------===
8349cc55cSDimitry Andric  */
9349cc55cSDimitry Andric #ifndef __IMMINTRIN_H
10349cc55cSDimitry Andric #error                                                                         \
11349cc55cSDimitry Andric     "Never use <avx512vlfp16intrin.h> directly; include <immintrin.h> instead."
12349cc55cSDimitry Andric #endif
13349cc55cSDimitry Andric 
14bdd1243dSDimitry Andric #ifdef __SSE2__
15bdd1243dSDimitry Andric 
16349cc55cSDimitry Andric #ifndef __AVX512VLFP16INTRIN_H
17349cc55cSDimitry Andric #define __AVX512VLFP16INTRIN_H
18349cc55cSDimitry Andric 
19349cc55cSDimitry Andric /* Define the default attributes for the functions in this file. */
20349cc55cSDimitry Andric #define __DEFAULT_FN_ATTRS256                                                  \
21349cc55cSDimitry Andric   __attribute__((__always_inline__, __nodebug__,                               \
22*5f757f3fSDimitry Andric                  __target__("avx512fp16,avx512vl,no-evex512"),                 \
23349cc55cSDimitry Andric                  __min_vector_width__(256)))
24349cc55cSDimitry Andric #define __DEFAULT_FN_ATTRS128                                                  \
25349cc55cSDimitry Andric   __attribute__((__always_inline__, __nodebug__,                               \
26*5f757f3fSDimitry Andric                  __target__("avx512fp16,avx512vl,no-evex512"),                 \
27349cc55cSDimitry Andric                  __min_vector_width__(128)))
28349cc55cSDimitry Andric 
29349cc55cSDimitry Andric static __inline__ _Float16 __DEFAULT_FN_ATTRS128 _mm_cvtsh_h(__m128h __a) {
30349cc55cSDimitry Andric   return __a[0];
31349cc55cSDimitry Andric }
32349cc55cSDimitry Andric 
33349cc55cSDimitry Andric static __inline__ _Float16 __DEFAULT_FN_ATTRS256 _mm256_cvtsh_h(__m256h __a) {
34349cc55cSDimitry Andric   return __a[0];
35349cc55cSDimitry Andric }
36349cc55cSDimitry Andric 
37349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_set_sh(_Float16 __h) {
38349cc55cSDimitry Andric   return __extension__(__m128h){__h, 0, 0, 0, 0, 0, 0, 0};
39349cc55cSDimitry Andric }
40349cc55cSDimitry Andric 
41349cc55cSDimitry Andric static __inline __m128h __DEFAULT_FN_ATTRS128 _mm_set1_ph(_Float16 __h) {
42349cc55cSDimitry Andric   return (__m128h)(__v8hf){__h, __h, __h, __h, __h, __h, __h, __h};
43349cc55cSDimitry Andric }
44349cc55cSDimitry Andric 
45349cc55cSDimitry Andric static __inline __m256h __DEFAULT_FN_ATTRS256 _mm256_set1_ph(_Float16 __h) {
46349cc55cSDimitry Andric   return (__m256h)(__v16hf){__h, __h, __h, __h, __h, __h, __h, __h,
47349cc55cSDimitry Andric                             __h, __h, __h, __h, __h, __h, __h, __h};
48349cc55cSDimitry Andric }
49349cc55cSDimitry Andric 
50349cc55cSDimitry Andric static __inline __m128h __DEFAULT_FN_ATTRS128
51349cc55cSDimitry Andric _mm_set_ph(_Float16 __h1, _Float16 __h2, _Float16 __h3, _Float16 __h4,
52349cc55cSDimitry Andric            _Float16 __h5, _Float16 __h6, _Float16 __h7, _Float16 __h8) {
53349cc55cSDimitry Andric   return (__m128h)(__v8hf){__h8, __h7, __h6, __h5, __h4, __h3, __h2, __h1};
54349cc55cSDimitry Andric }
55349cc55cSDimitry Andric 
56349cc55cSDimitry Andric static __inline __m256h __DEFAULT_FN_ATTRS256
57349cc55cSDimitry Andric _mm256_set1_pch(_Float16 _Complex h) {
58349cc55cSDimitry Andric   return (__m256h)_mm256_set1_ps(__builtin_bit_cast(float, h));
59349cc55cSDimitry Andric }
60349cc55cSDimitry Andric 
61349cc55cSDimitry Andric static __inline __m128h __DEFAULT_FN_ATTRS128
62349cc55cSDimitry Andric _mm_set1_pch(_Float16 _Complex h) {
63349cc55cSDimitry Andric   return (__m128h)_mm_set1_ps(__builtin_bit_cast(float, h));
64349cc55cSDimitry Andric }
65349cc55cSDimitry Andric 
66349cc55cSDimitry Andric static __inline __m256h __DEFAULT_FN_ATTRS256
67349cc55cSDimitry Andric _mm256_set_ph(_Float16 __h1, _Float16 __h2, _Float16 __h3, _Float16 __h4,
68349cc55cSDimitry Andric               _Float16 __h5, _Float16 __h6, _Float16 __h7, _Float16 __h8,
69349cc55cSDimitry Andric               _Float16 __h9, _Float16 __h10, _Float16 __h11, _Float16 __h12,
70349cc55cSDimitry Andric               _Float16 __h13, _Float16 __h14, _Float16 __h15, _Float16 __h16) {
71349cc55cSDimitry Andric   return (__m256h)(__v16hf){__h16, __h15, __h14, __h13, __h12, __h11,
72349cc55cSDimitry Andric                             __h10, __h9,  __h8,  __h7,  __h6,  __h5,
73349cc55cSDimitry Andric                             __h4,  __h3,  __h2,  __h1};
74349cc55cSDimitry Andric }
75349cc55cSDimitry Andric 
76349cc55cSDimitry Andric #define _mm_setr_ph(h1, h2, h3, h4, h5, h6, h7, h8)                            \
77349cc55cSDimitry Andric   _mm_set_ph((h8), (h7), (h6), (h5), (h4), (h3), (h2), (h1))
78349cc55cSDimitry Andric 
79349cc55cSDimitry Andric #define _mm256_setr_ph(h1, h2, h3, h4, h5, h6, h7, h8, h9, h10, h11, h12, h13, \
80349cc55cSDimitry Andric                        h14, h15, h16)                                          \
81349cc55cSDimitry Andric   _mm256_set_ph((h16), (h15), (h14), (h13), (h12), (h11), (h10), (h9), (h8),   \
82349cc55cSDimitry Andric                 (h7), (h6), (h5), (h4), (h3), (h2), (h1))
83349cc55cSDimitry Andric 
84349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_add_ph(__m256h __A,
85349cc55cSDimitry Andric                                                               __m256h __B) {
86349cc55cSDimitry Andric   return (__m256h)((__v16hf)__A + (__v16hf)__B);
87349cc55cSDimitry Andric }
88349cc55cSDimitry Andric 
89349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256
90349cc55cSDimitry Andric _mm256_mask_add_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) {
91349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_selectph_256(
92349cc55cSDimitry Andric       __U, (__v16hf)_mm256_add_ph(__A, __B), (__v16hf)__W);
93349cc55cSDimitry Andric }
94349cc55cSDimitry Andric 
95349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256
96349cc55cSDimitry Andric _mm256_maskz_add_ph(__mmask16 __U, __m256h __A, __m256h __B) {
97349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_selectph_256(
98349cc55cSDimitry Andric       __U, (__v16hf)_mm256_add_ph(__A, __B), (__v16hf)_mm256_setzero_ph());
99349cc55cSDimitry Andric }
100349cc55cSDimitry Andric 
101349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_add_ph(__m128h __A,
102349cc55cSDimitry Andric                                                            __m128h __B) {
103349cc55cSDimitry Andric   return (__m128h)((__v8hf)__A + (__v8hf)__B);
104349cc55cSDimitry Andric }
105349cc55cSDimitry Andric 
106349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_add_ph(__m128h __W,
107349cc55cSDimitry Andric                                                                 __mmask8 __U,
108349cc55cSDimitry Andric                                                                 __m128h __A,
109349cc55cSDimitry Andric                                                                 __m128h __B) {
110349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_add_ph(__A, __B),
111349cc55cSDimitry Andric                                               (__v8hf)__W);
112349cc55cSDimitry Andric }
113349cc55cSDimitry Andric 
114349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_add_ph(__mmask8 __U,
115349cc55cSDimitry Andric                                                                  __m128h __A,
116349cc55cSDimitry Andric                                                                  __m128h __B) {
117349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_add_ph(__A, __B),
118349cc55cSDimitry Andric                                               (__v8hf)_mm_setzero_ph());
119349cc55cSDimitry Andric }
120349cc55cSDimitry Andric 
121349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_sub_ph(__m256h __A,
122349cc55cSDimitry Andric                                                               __m256h __B) {
123349cc55cSDimitry Andric   return (__m256h)((__v16hf)__A - (__v16hf)__B);
124349cc55cSDimitry Andric }
125349cc55cSDimitry Andric 
126349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256
127349cc55cSDimitry Andric _mm256_mask_sub_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) {
128349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_selectph_256(
129349cc55cSDimitry Andric       __U, (__v16hf)_mm256_sub_ph(__A, __B), (__v16hf)__W);
130349cc55cSDimitry Andric }
131349cc55cSDimitry Andric 
132349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256
133349cc55cSDimitry Andric _mm256_maskz_sub_ph(__mmask16 __U, __m256h __A, __m256h __B) {
134349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_selectph_256(
135349cc55cSDimitry Andric       __U, (__v16hf)_mm256_sub_ph(__A, __B), (__v16hf)_mm256_setzero_ph());
136349cc55cSDimitry Andric }
137349cc55cSDimitry Andric 
138349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_sub_ph(__m128h __A,
139349cc55cSDimitry Andric                                                            __m128h __B) {
140349cc55cSDimitry Andric   return (__m128h)((__v8hf)__A - (__v8hf)__B);
141349cc55cSDimitry Andric }
142349cc55cSDimitry Andric 
143349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_sub_ph(__m128h __W,
144349cc55cSDimitry Andric                                                                 __mmask8 __U,
145349cc55cSDimitry Andric                                                                 __m128h __A,
146349cc55cSDimitry Andric                                                                 __m128h __B) {
147349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_sub_ph(__A, __B),
148349cc55cSDimitry Andric                                               (__v8hf)__W);
149349cc55cSDimitry Andric }
150349cc55cSDimitry Andric 
151349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_sub_ph(__mmask8 __U,
152349cc55cSDimitry Andric                                                                  __m128h __A,
153349cc55cSDimitry Andric                                                                  __m128h __B) {
154349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_sub_ph(__A, __B),
155349cc55cSDimitry Andric                                               (__v8hf)_mm_setzero_ph());
156349cc55cSDimitry Andric }
157349cc55cSDimitry Andric 
158349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mul_ph(__m256h __A,
159349cc55cSDimitry Andric                                                               __m256h __B) {
160349cc55cSDimitry Andric   return (__m256h)((__v16hf)__A * (__v16hf)__B);
161349cc55cSDimitry Andric }
162349cc55cSDimitry Andric 
163349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256
164349cc55cSDimitry Andric _mm256_mask_mul_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) {
165349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_selectph_256(
166349cc55cSDimitry Andric       __U, (__v16hf)_mm256_mul_ph(__A, __B), (__v16hf)__W);
167349cc55cSDimitry Andric }
168349cc55cSDimitry Andric 
169349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256
170349cc55cSDimitry Andric _mm256_maskz_mul_ph(__mmask16 __U, __m256h __A, __m256h __B) {
171349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_selectph_256(
172349cc55cSDimitry Andric       __U, (__v16hf)_mm256_mul_ph(__A, __B), (__v16hf)_mm256_setzero_ph());
173349cc55cSDimitry Andric }
174349cc55cSDimitry Andric 
175349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mul_ph(__m128h __A,
176349cc55cSDimitry Andric                                                            __m128h __B) {
177349cc55cSDimitry Andric   return (__m128h)((__v8hf)__A * (__v8hf)__B);
178349cc55cSDimitry Andric }
179349cc55cSDimitry Andric 
180349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_mul_ph(__m128h __W,
181349cc55cSDimitry Andric                                                                 __mmask8 __U,
182349cc55cSDimitry Andric                                                                 __m128h __A,
183349cc55cSDimitry Andric                                                                 __m128h __B) {
184349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_mul_ph(__A, __B),
185349cc55cSDimitry Andric                                               (__v8hf)__W);
186349cc55cSDimitry Andric }
187349cc55cSDimitry Andric 
188349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_mul_ph(__mmask8 __U,
189349cc55cSDimitry Andric                                                                  __m128h __A,
190349cc55cSDimitry Andric                                                                  __m128h __B) {
191349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_mul_ph(__A, __B),
192349cc55cSDimitry Andric                                               (__v8hf)_mm_setzero_ph());
193349cc55cSDimitry Andric }
194349cc55cSDimitry Andric 
195349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_div_ph(__m256h __A,
196349cc55cSDimitry Andric                                                               __m256h __B) {
197349cc55cSDimitry Andric   return (__m256h)((__v16hf)__A / (__v16hf)__B);
198349cc55cSDimitry Andric }
199349cc55cSDimitry Andric 
200349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256
201349cc55cSDimitry Andric _mm256_mask_div_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) {
202349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_selectph_256(
203349cc55cSDimitry Andric       __U, (__v16hf)_mm256_div_ph(__A, __B), (__v16hf)__W);
204349cc55cSDimitry Andric }
205349cc55cSDimitry Andric 
206349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256
207349cc55cSDimitry Andric _mm256_maskz_div_ph(__mmask16 __U, __m256h __A, __m256h __B) {
208349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_selectph_256(
209349cc55cSDimitry Andric       __U, (__v16hf)_mm256_div_ph(__A, __B), (__v16hf)_mm256_setzero_ph());
210349cc55cSDimitry Andric }
211349cc55cSDimitry Andric 
212349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_div_ph(__m128h __A,
213349cc55cSDimitry Andric                                                            __m128h __B) {
214349cc55cSDimitry Andric   return (__m128h)((__v8hf)__A / (__v8hf)__B);
215349cc55cSDimitry Andric }
216349cc55cSDimitry Andric 
217349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_div_ph(__m128h __W,
218349cc55cSDimitry Andric                                                                 __mmask8 __U,
219349cc55cSDimitry Andric                                                                 __m128h __A,
220349cc55cSDimitry Andric                                                                 __m128h __B) {
221349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_div_ph(__A, __B),
222349cc55cSDimitry Andric                                               (__v8hf)__W);
223349cc55cSDimitry Andric }
224349cc55cSDimitry Andric 
225349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_div_ph(__mmask8 __U,
226349cc55cSDimitry Andric                                                                  __m128h __A,
227349cc55cSDimitry Andric                                                                  __m128h __B) {
228349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_div_ph(__A, __B),
229349cc55cSDimitry Andric                                               (__v8hf)_mm_setzero_ph());
230349cc55cSDimitry Andric }
231349cc55cSDimitry Andric 
232349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_min_ph(__m256h __A,
233349cc55cSDimitry Andric                                                               __m256h __B) {
234349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_minph256((__v16hf)__A, (__v16hf)__B);
235349cc55cSDimitry Andric }
236349cc55cSDimitry Andric 
237349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256
238349cc55cSDimitry Andric _mm256_mask_min_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) {
239349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_selectph_256(
240349cc55cSDimitry Andric       (__mmask16)__U,
241349cc55cSDimitry Andric       (__v16hf)__builtin_ia32_minph256((__v16hf)__A, (__v16hf)__B),
242349cc55cSDimitry Andric       (__v16hf)__W);
243349cc55cSDimitry Andric }
244349cc55cSDimitry Andric 
245349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256
246349cc55cSDimitry Andric _mm256_maskz_min_ph(__mmask16 __U, __m256h __A, __m256h __B) {
247349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_selectph_256(
248349cc55cSDimitry Andric       (__mmask16)__U,
249349cc55cSDimitry Andric       (__v16hf)__builtin_ia32_minph256((__v16hf)__A, (__v16hf)__B),
250349cc55cSDimitry Andric       (__v16hf)_mm256_setzero_ph());
251349cc55cSDimitry Andric }
252349cc55cSDimitry Andric 
253349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_min_ph(__m128h __A,
254349cc55cSDimitry Andric                                                            __m128h __B) {
255349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_minph128((__v8hf)__A, (__v8hf)__B);
256349cc55cSDimitry Andric }
257349cc55cSDimitry Andric 
258349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_min_ph(__m128h __W,
259349cc55cSDimitry Andric                                                                 __mmask8 __U,
260349cc55cSDimitry Andric                                                                 __m128h __A,
261349cc55cSDimitry Andric                                                                 __m128h __B) {
262349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_selectph_128(
263349cc55cSDimitry Andric       (__mmask8)__U, (__v8hf)__builtin_ia32_minph128((__v8hf)__A, (__v8hf)__B),
264349cc55cSDimitry Andric       (__v8hf)__W);
265349cc55cSDimitry Andric }
266349cc55cSDimitry Andric 
267349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_min_ph(__mmask8 __U,
268349cc55cSDimitry Andric                                                                  __m128h __A,
269349cc55cSDimitry Andric                                                                  __m128h __B) {
270349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_selectph_128(
271349cc55cSDimitry Andric       (__mmask8)__U, (__v8hf)__builtin_ia32_minph128((__v8hf)__A, (__v8hf)__B),
272349cc55cSDimitry Andric       (__v8hf)_mm_setzero_ph());
273349cc55cSDimitry Andric }
274349cc55cSDimitry Andric 
275349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_max_ph(__m256h __A,
276349cc55cSDimitry Andric                                                               __m256h __B) {
277349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_maxph256((__v16hf)__A, (__v16hf)__B);
278349cc55cSDimitry Andric }
279349cc55cSDimitry Andric 
280349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256
281349cc55cSDimitry Andric _mm256_mask_max_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) {
282349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_selectph_256(
283349cc55cSDimitry Andric       (__mmask16)__U,
284349cc55cSDimitry Andric       (__v16hf)__builtin_ia32_maxph256((__v16hf)__A, (__v16hf)__B),
285349cc55cSDimitry Andric       (__v16hf)__W);
286349cc55cSDimitry Andric }
287349cc55cSDimitry Andric 
288349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256
289349cc55cSDimitry Andric _mm256_maskz_max_ph(__mmask16 __U, __m256h __A, __m256h __B) {
290349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_selectph_256(
291349cc55cSDimitry Andric       (__mmask16)__U,
292349cc55cSDimitry Andric       (__v16hf)__builtin_ia32_maxph256((__v16hf)__A, (__v16hf)__B),
293349cc55cSDimitry Andric       (__v16hf)_mm256_setzero_ph());
294349cc55cSDimitry Andric }
295349cc55cSDimitry Andric 
296349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_max_ph(__m128h __A,
297349cc55cSDimitry Andric                                                            __m128h __B) {
298349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_maxph128((__v8hf)__A, (__v8hf)__B);
299349cc55cSDimitry Andric }
300349cc55cSDimitry Andric 
301349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_max_ph(__m128h __W,
302349cc55cSDimitry Andric                                                                 __mmask8 __U,
303349cc55cSDimitry Andric                                                                 __m128h __A,
304349cc55cSDimitry Andric                                                                 __m128h __B) {
305349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_selectph_128(
306349cc55cSDimitry Andric       (__mmask8)__U, (__v8hf)__builtin_ia32_maxph128((__v8hf)__A, (__v8hf)__B),
307349cc55cSDimitry Andric       (__v8hf)__W);
308349cc55cSDimitry Andric }
309349cc55cSDimitry Andric 
310349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_max_ph(__mmask8 __U,
311349cc55cSDimitry Andric                                                                  __m128h __A,
312349cc55cSDimitry Andric                                                                  __m128h __B) {
313349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_selectph_128(
314349cc55cSDimitry Andric       (__mmask8)__U, (__v8hf)__builtin_ia32_maxph128((__v8hf)__A, (__v8hf)__B),
315349cc55cSDimitry Andric       (__v8hf)_mm_setzero_ph());
316349cc55cSDimitry Andric }
317349cc55cSDimitry Andric 
318349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_abs_ph(__m256h __A) {
319349cc55cSDimitry Andric   return (__m256h)_mm256_and_epi32(_mm256_set1_epi32(0x7FFF7FFF), (__m256i)__A);
320349cc55cSDimitry Andric }
321349cc55cSDimitry Andric 
322349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_abs_ph(__m128h __A) {
323349cc55cSDimitry Andric   return (__m128h)_mm_and_epi32(_mm_set1_epi32(0x7FFF7FFF), (__m128i)__A);
324349cc55cSDimitry Andric }
325349cc55cSDimitry Andric 
326349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_conj_pch(__m256h __A) {
327349cc55cSDimitry Andric   return (__m256h)_mm256_xor_ps((__m256)__A, _mm256_set1_ps(-0.0f));
328349cc55cSDimitry Andric }
329349cc55cSDimitry Andric 
330349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256
331349cc55cSDimitry Andric _mm256_mask_conj_pch(__m256h __W, __mmask8 __U, __m256h __A) {
332349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_selectps_256(
333349cc55cSDimitry Andric       (__mmask8)__U, (__v8sf)_mm256_conj_pch(__A), (__v8sf)__W);
334349cc55cSDimitry Andric }
335349cc55cSDimitry Andric 
336349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256
337349cc55cSDimitry Andric _mm256_maskz_conj_pch(__mmask8 __U, __m256h __A) {
338349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_selectps_256(
339349cc55cSDimitry Andric       (__mmask8)__U, (__v8sf)_mm256_conj_pch(__A), (__v8sf)_mm256_setzero_ps());
340349cc55cSDimitry Andric }
341349cc55cSDimitry Andric 
342349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_conj_pch(__m128h __A) {
343349cc55cSDimitry Andric   return (__m128h)_mm_xor_ps((__m128)__A, _mm_set1_ps(-0.0f));
344349cc55cSDimitry Andric }
345349cc55cSDimitry Andric 
346349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_conj_pch(__m128h __W,
347349cc55cSDimitry Andric                                                                   __mmask8 __U,
348349cc55cSDimitry Andric                                                                   __m128h __A) {
349349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_selectps_128(
350349cc55cSDimitry Andric       (__mmask8)__U, (__v4sf)_mm_conj_pch(__A), (__v4sf)__W);
351349cc55cSDimitry Andric }
352349cc55cSDimitry Andric 
353349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128
354349cc55cSDimitry Andric _mm_maskz_conj_pch(__mmask8 __U, __m128h __A) {
355349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_selectps_128(
356349cc55cSDimitry Andric       (__mmask8)__U, (__v4sf)_mm_conj_pch(__A), (__v4sf)_mm_setzero_ps());
357349cc55cSDimitry Andric }
358349cc55cSDimitry Andric 
359349cc55cSDimitry Andric #define _mm256_cmp_ph_mask(a, b, p)                                            \
360349cc55cSDimitry Andric   ((__mmask16)__builtin_ia32_cmpph256_mask(                                    \
361349cc55cSDimitry Andric       (__v16hf)(__m256h)(a), (__v16hf)(__m256h)(b), (int)(p), (__mmask16)-1))
362349cc55cSDimitry Andric 
363349cc55cSDimitry Andric #define _mm256_mask_cmp_ph_mask(m, a, b, p)                                    \
364349cc55cSDimitry Andric   ((__mmask16)__builtin_ia32_cmpph256_mask(                                    \
365349cc55cSDimitry Andric       (__v16hf)(__m256h)(a), (__v16hf)(__m256h)(b), (int)(p), (__mmask16)(m)))
366349cc55cSDimitry Andric 
367349cc55cSDimitry Andric #define _mm_cmp_ph_mask(a, b, p)                                               \
368349cc55cSDimitry Andric   ((__mmask8)__builtin_ia32_cmpph128_mask(                                     \
369349cc55cSDimitry Andric       (__v8hf)(__m128h)(a), (__v8hf)(__m128h)(b), (int)(p), (__mmask8)-1))
370349cc55cSDimitry Andric 
371349cc55cSDimitry Andric #define _mm_mask_cmp_ph_mask(m, a, b, p)                                       \
372349cc55cSDimitry Andric   ((__mmask8)__builtin_ia32_cmpph128_mask(                                     \
373349cc55cSDimitry Andric       (__v8hf)(__m128h)(a), (__v8hf)(__m128h)(b), (int)(p), (__mmask8)(m)))
374349cc55cSDimitry Andric 
375349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_rcp_ph(__m256h __A) {
376349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_rcpph256_mask(
377349cc55cSDimitry Andric       (__v16hf)__A, (__v16hf)_mm256_undefined_ph(), (__mmask16)-1);
378349cc55cSDimitry Andric }
379349cc55cSDimitry Andric 
380349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256
381349cc55cSDimitry Andric _mm256_mask_rcp_ph(__m256h __W, __mmask16 __U, __m256h __A) {
382349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_rcpph256_mask((__v16hf)__A, (__v16hf)__W,
383349cc55cSDimitry Andric                                                (__mmask16)__U);
384349cc55cSDimitry Andric }
385349cc55cSDimitry Andric 
386349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256
387349cc55cSDimitry Andric _mm256_maskz_rcp_ph(__mmask16 __U, __m256h __A) {
388349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_rcpph256_mask(
389349cc55cSDimitry Andric       (__v16hf)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U);
390349cc55cSDimitry Andric }
391349cc55cSDimitry Andric 
392349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_rcp_ph(__m128h __A) {
393349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_rcpph128_mask(
394349cc55cSDimitry Andric       (__v8hf)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
395349cc55cSDimitry Andric }
396349cc55cSDimitry Andric 
397349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_rcp_ph(__m128h __W,
398349cc55cSDimitry Andric                                                                 __mmask8 __U,
399349cc55cSDimitry Andric                                                                 __m128h __A) {
400349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_rcpph128_mask((__v8hf)__A, (__v8hf)__W,
401349cc55cSDimitry Andric                                                (__mmask8)__U);
402349cc55cSDimitry Andric }
403349cc55cSDimitry Andric 
404349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_rcp_ph(__mmask8 __U,
405349cc55cSDimitry Andric                                                                  __m128h __A) {
406349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_rcpph128_mask(
407349cc55cSDimitry Andric       (__v8hf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
408349cc55cSDimitry Andric }
409349cc55cSDimitry Andric 
410349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_rsqrt_ph(__m256h __A) {
411349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_rsqrtph256_mask(
412349cc55cSDimitry Andric       (__v16hf)__A, (__v16hf)_mm256_undefined_ph(), (__mmask16)-1);
413349cc55cSDimitry Andric }
414349cc55cSDimitry Andric 
415349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256
416349cc55cSDimitry Andric _mm256_mask_rsqrt_ph(__m256h __W, __mmask16 __U, __m256h __A) {
417349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_rsqrtph256_mask((__v16hf)__A, (__v16hf)__W,
418349cc55cSDimitry Andric                                                  (__mmask16)__U);
419349cc55cSDimitry Andric }
420349cc55cSDimitry Andric 
421349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256
422349cc55cSDimitry Andric _mm256_maskz_rsqrt_ph(__mmask16 __U, __m256h __A) {
423349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_rsqrtph256_mask(
424349cc55cSDimitry Andric       (__v16hf)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U);
425349cc55cSDimitry Andric }
426349cc55cSDimitry Andric 
427349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_rsqrt_ph(__m128h __A) {
428349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_rsqrtph128_mask(
429349cc55cSDimitry Andric       (__v8hf)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
430349cc55cSDimitry Andric }
431349cc55cSDimitry Andric 
432349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt_ph(__m128h __W,
433349cc55cSDimitry Andric                                                                   __mmask8 __U,
434349cc55cSDimitry Andric                                                                   __m128h __A) {
435349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_rsqrtph128_mask((__v8hf)__A, (__v8hf)__W,
436349cc55cSDimitry Andric                                                  (__mmask8)__U);
437349cc55cSDimitry Andric }
438349cc55cSDimitry Andric 
439349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128
440349cc55cSDimitry Andric _mm_maskz_rsqrt_ph(__mmask8 __U, __m128h __A) {
441349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_rsqrtph128_mask(
442349cc55cSDimitry Andric       (__v8hf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
443349cc55cSDimitry Andric }
444349cc55cSDimitry Andric 
445349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_getexp_ph(__m128h __A) {
446349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_getexpph128_mask(
447349cc55cSDimitry Andric       (__v8hf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)-1);
448349cc55cSDimitry Andric }
449349cc55cSDimitry Andric 
450349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128
451349cc55cSDimitry Andric _mm_mask_getexp_ph(__m128h __W, __mmask8 __U, __m128h __A) {
452349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_getexpph128_mask((__v8hf)__A, (__v8hf)__W,
453349cc55cSDimitry Andric                                                   (__mmask8)__U);
454349cc55cSDimitry Andric }
455349cc55cSDimitry Andric 
456349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128
457349cc55cSDimitry Andric _mm_maskz_getexp_ph(__mmask8 __U, __m128h __A) {
458349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_getexpph128_mask(
459349cc55cSDimitry Andric       (__v8hf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
460349cc55cSDimitry Andric }
461349cc55cSDimitry Andric 
462349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_getexp_ph(__m256h __A) {
463349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_getexpph256_mask(
464349cc55cSDimitry Andric       (__v16hf)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)-1);
465349cc55cSDimitry Andric }
466349cc55cSDimitry Andric 
467349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256
468349cc55cSDimitry Andric _mm256_mask_getexp_ph(__m256h __W, __mmask16 __U, __m256h __A) {
469349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_getexpph256_mask((__v16hf)__A, (__v16hf)__W,
470349cc55cSDimitry Andric                                                   (__mmask16)__U);
471349cc55cSDimitry Andric }
472349cc55cSDimitry Andric 
473349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256
474349cc55cSDimitry Andric _mm256_maskz_getexp_ph(__mmask16 __U, __m256h __A) {
475349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_getexpph256_mask(
476349cc55cSDimitry Andric       (__v16hf)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U);
477349cc55cSDimitry Andric }
478349cc55cSDimitry Andric 
479349cc55cSDimitry Andric #define _mm_getmant_ph(A, B, C)                                                \
480349cc55cSDimitry Andric   ((__m128h)__builtin_ia32_getmantph128_mask(                                  \
481349cc55cSDimitry Andric       (__v8hf)(__m128h)(A), (int)(((C) << 2) | (B)), (__v8hf)_mm_setzero_ph(), \
482349cc55cSDimitry Andric       (__mmask8)-1))
483349cc55cSDimitry Andric 
484349cc55cSDimitry Andric #define _mm_mask_getmant_ph(W, U, A, B, C)                                     \
485349cc55cSDimitry Andric   ((__m128h)__builtin_ia32_getmantph128_mask(                                  \
486349cc55cSDimitry Andric       (__v8hf)(__m128h)(A), (int)(((C) << 2) | (B)), (__v8hf)(__m128h)(W),     \
487349cc55cSDimitry Andric       (__mmask8)(U)))
488349cc55cSDimitry Andric 
489349cc55cSDimitry Andric #define _mm_maskz_getmant_ph(U, A, B, C)                                       \
490349cc55cSDimitry Andric   ((__m128h)__builtin_ia32_getmantph128_mask(                                  \
491349cc55cSDimitry Andric       (__v8hf)(__m128h)(A), (int)(((C) << 2) | (B)), (__v8hf)_mm_setzero_ph(), \
492349cc55cSDimitry Andric       (__mmask8)(U)))
493349cc55cSDimitry Andric 
494349cc55cSDimitry Andric #define _mm256_getmant_ph(A, B, C)                                             \
495349cc55cSDimitry Andric   ((__m256h)__builtin_ia32_getmantph256_mask(                                  \
496349cc55cSDimitry Andric       (__v16hf)(__m256h)(A), (int)(((C) << 2) | (B)),                          \
497349cc55cSDimitry Andric       (__v16hf)_mm256_setzero_ph(), (__mmask16)-1))
498349cc55cSDimitry Andric 
499349cc55cSDimitry Andric #define _mm256_mask_getmant_ph(W, U, A, B, C)                                  \
500349cc55cSDimitry Andric   ((__m256h)__builtin_ia32_getmantph256_mask(                                  \
501349cc55cSDimitry Andric       (__v16hf)(__m256h)(A), (int)(((C) << 2) | (B)), (__v16hf)(__m256h)(W),   \
502349cc55cSDimitry Andric       (__mmask16)(U)))
503349cc55cSDimitry Andric 
504349cc55cSDimitry Andric #define _mm256_maskz_getmant_ph(U, A, B, C)                                    \
505349cc55cSDimitry Andric   ((__m256h)__builtin_ia32_getmantph256_mask(                                  \
506349cc55cSDimitry Andric       (__v16hf)(__m256h)(A), (int)(((C) << 2) | (B)),                          \
507349cc55cSDimitry Andric       (__v16hf)_mm256_setzero_ph(), (__mmask16)(U)))
508349cc55cSDimitry Andric 
509349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_scalef_ph(__m128h __A,
510349cc55cSDimitry Andric                                                               __m128h __B) {
511349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_scalefph128_mask(
512349cc55cSDimitry Andric       (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)-1);
513349cc55cSDimitry Andric }
514349cc55cSDimitry Andric 
515349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128
516349cc55cSDimitry Andric _mm_mask_scalef_ph(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
517349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_scalefph128_mask((__v8hf)__A, (__v8hf)__B,
518349cc55cSDimitry Andric                                                   (__v8hf)__W, (__mmask8)__U);
519349cc55cSDimitry Andric }
520349cc55cSDimitry Andric 
521349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128
522349cc55cSDimitry Andric _mm_maskz_scalef_ph(__mmask8 __U, __m128h __A, __m128h __B) {
523349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_scalefph128_mask(
524349cc55cSDimitry Andric       (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
525349cc55cSDimitry Andric }
526349cc55cSDimitry Andric 
527349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_scalef_ph(__m256h __A,
528349cc55cSDimitry Andric                                                                  __m256h __B) {
529349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_scalefph256_mask(
530349cc55cSDimitry Andric       (__v16hf)__A, (__v16hf)__B, (__v16hf)_mm256_setzero_ph(), (__mmask16)-1);
531349cc55cSDimitry Andric }
532349cc55cSDimitry Andric 
533349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256
534349cc55cSDimitry Andric _mm256_mask_scalef_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) {
535349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_scalefph256_mask((__v16hf)__A, (__v16hf)__B,
536349cc55cSDimitry Andric                                                   (__v16hf)__W, (__mmask16)__U);
537349cc55cSDimitry Andric }
538349cc55cSDimitry Andric 
539349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256
540349cc55cSDimitry Andric _mm256_maskz_scalef_ph(__mmask16 __U, __m256h __A, __m256h __B) {
541349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_scalefph256_mask(
542349cc55cSDimitry Andric       (__v16hf)__A, (__v16hf)__B, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U);
543349cc55cSDimitry Andric }
544349cc55cSDimitry Andric 
545349cc55cSDimitry Andric #define _mm_roundscale_ph(A, imm)                                              \
546349cc55cSDimitry Andric   ((__m128h)__builtin_ia32_rndscaleph_128_mask(                                \
547349cc55cSDimitry Andric       (__v8hf)(__m128h)(A), (int)(imm), (__v8hf)_mm_setzero_ph(),              \
548349cc55cSDimitry Andric       (__mmask8)-1))
549349cc55cSDimitry Andric 
550349cc55cSDimitry Andric #define _mm_mask_roundscale_ph(W, U, A, imm)                                   \
551349cc55cSDimitry Andric   ((__m128h)__builtin_ia32_rndscaleph_128_mask(                                \
552349cc55cSDimitry Andric       (__v8hf)(__m128h)(A), (int)(imm), (__v8hf)(__m128h)(W), (__mmask8)(U)))
553349cc55cSDimitry Andric 
554349cc55cSDimitry Andric #define _mm_maskz_roundscale_ph(U, A, imm)                                     \
555349cc55cSDimitry Andric   ((__m128h)__builtin_ia32_rndscaleph_128_mask(                                \
556349cc55cSDimitry Andric       (__v8hf)(__m128h)(A), (int)(imm), (__v8hf)_mm_setzero_ph(),              \
557349cc55cSDimitry Andric       (__mmask8)(U)))
558349cc55cSDimitry Andric 
559349cc55cSDimitry Andric #define _mm256_roundscale_ph(A, imm)                                           \
560349cc55cSDimitry Andric   ((__m256h)__builtin_ia32_rndscaleph_256_mask(                                \
561349cc55cSDimitry Andric       (__v16hf)(__m256h)(A), (int)(imm), (__v16hf)_mm256_setzero_ph(),         \
562349cc55cSDimitry Andric       (__mmask16)-1))
563349cc55cSDimitry Andric 
564349cc55cSDimitry Andric #define _mm256_mask_roundscale_ph(W, U, A, imm)                                \
565349cc55cSDimitry Andric   ((__m256h)__builtin_ia32_rndscaleph_256_mask(                                \
566349cc55cSDimitry Andric       (__v16hf)(__m256h)(A), (int)(imm), (__v16hf)(__m256h)(W),                \
567349cc55cSDimitry Andric       (__mmask16)(U)))
568349cc55cSDimitry Andric 
569349cc55cSDimitry Andric #define _mm256_maskz_roundscale_ph(U, A, imm)                                  \
570349cc55cSDimitry Andric   ((__m256h)__builtin_ia32_rndscaleph_256_mask(                                \
571349cc55cSDimitry Andric       (__v16hf)(__m256h)(A), (int)(imm), (__v16hf)_mm256_setzero_ph(),         \
572349cc55cSDimitry Andric       (__mmask16)(U)))
573349cc55cSDimitry Andric 
574349cc55cSDimitry Andric #define _mm_reduce_ph(A, imm)                                                  \
575349cc55cSDimitry Andric   ((__m128h)__builtin_ia32_reduceph128_mask((__v8hf)(__m128h)(A), (int)(imm),  \
576349cc55cSDimitry Andric                                             (__v8hf)_mm_setzero_ph(),          \
577349cc55cSDimitry Andric                                             (__mmask8)-1))
578349cc55cSDimitry Andric 
579349cc55cSDimitry Andric #define _mm_mask_reduce_ph(W, U, A, imm)                                       \
580349cc55cSDimitry Andric   ((__m128h)__builtin_ia32_reduceph128_mask(                                   \
581349cc55cSDimitry Andric       (__v8hf)(__m128h)(A), (int)(imm), (__v8hf)(__m128h)(W), (__mmask8)(U)))
582349cc55cSDimitry Andric 
583349cc55cSDimitry Andric #define _mm_maskz_reduce_ph(U, A, imm)                                         \
584349cc55cSDimitry Andric   ((__m128h)__builtin_ia32_reduceph128_mask((__v8hf)(__m128h)(A), (int)(imm),  \
585349cc55cSDimitry Andric                                             (__v8hf)_mm_setzero_ph(),          \
586349cc55cSDimitry Andric                                             (__mmask8)(U)))
587349cc55cSDimitry Andric 
588349cc55cSDimitry Andric #define _mm256_reduce_ph(A, imm)                                               \
589349cc55cSDimitry Andric   ((__m256h)__builtin_ia32_reduceph256_mask((__v16hf)(__m256h)(A), (int)(imm), \
590349cc55cSDimitry Andric                                             (__v16hf)_mm256_setzero_ph(),      \
591349cc55cSDimitry Andric                                             (__mmask16)-1))
592349cc55cSDimitry Andric 
593349cc55cSDimitry Andric #define _mm256_mask_reduce_ph(W, U, A, imm)                                    \
594349cc55cSDimitry Andric   ((__m256h)__builtin_ia32_reduceph256_mask((__v16hf)(__m256h)(A), (int)(imm), \
595349cc55cSDimitry Andric                                             (__v16hf)(__m256h)(W),             \
596349cc55cSDimitry Andric                                             (__mmask16)(U)))
597349cc55cSDimitry Andric 
598349cc55cSDimitry Andric #define _mm256_maskz_reduce_ph(U, A, imm)                                      \
599349cc55cSDimitry Andric   ((__m256h)__builtin_ia32_reduceph256_mask((__v16hf)(__m256h)(A), (int)(imm), \
600349cc55cSDimitry Andric                                             (__v16hf)_mm256_setzero_ph(),      \
601349cc55cSDimitry Andric                                             (__mmask16)(U)))
602349cc55cSDimitry Andric 
603349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_sqrt_ph(__m128h __a) {
604349cc55cSDimitry Andric   return __builtin_ia32_sqrtph((__v8hf)__a);
605349cc55cSDimitry Andric }
606349cc55cSDimitry Andric 
607349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_ph(__m128h __W,
608349cc55cSDimitry Andric                                                                  __mmask8 __U,
609349cc55cSDimitry Andric                                                                  __m128h __A) {
610349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_selectph_128(
611349cc55cSDimitry Andric       (__mmask8)__U, (__v8hf)_mm_sqrt_ph(__A), (__v8hf)__W);
612349cc55cSDimitry Andric }
613349cc55cSDimitry Andric 
614349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_ph(__mmask8 __U,
615349cc55cSDimitry Andric                                                                   __m128h __A) {
616349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_selectph_128(
617349cc55cSDimitry Andric       (__mmask8)__U, (__v8hf)_mm_sqrt_ph(__A), (__v8hf)_mm_setzero_ph());
618349cc55cSDimitry Andric }
619349cc55cSDimitry Andric 
620349cc55cSDimitry Andric static __inline __m256h __DEFAULT_FN_ATTRS256 _mm256_sqrt_ph(__m256h __a) {
621349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_sqrtph256((__v16hf)__a);
622349cc55cSDimitry Andric }
623349cc55cSDimitry Andric 
624349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256
625349cc55cSDimitry Andric _mm256_mask_sqrt_ph(__m256h __W, __mmask16 __U, __m256h __A) {
626349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_selectph_256(
627349cc55cSDimitry Andric       (__mmask16)__U, (__v16hf)_mm256_sqrt_ph(__A), (__v16hf)__W);
628349cc55cSDimitry Andric }
629349cc55cSDimitry Andric 
630349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256
631349cc55cSDimitry Andric _mm256_maskz_sqrt_ph(__mmask16 __U, __m256h __A) {
632349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_selectph_256((__mmask16)__U,
633349cc55cSDimitry Andric                                               (__v16hf)_mm256_sqrt_ph(__A),
634349cc55cSDimitry Andric                                               (__v16hf)_mm256_setzero_ph());
635349cc55cSDimitry Andric }
636349cc55cSDimitry Andric 
637349cc55cSDimitry Andric #define _mm_mask_fpclass_ph_mask(U, A, imm)                                    \
638349cc55cSDimitry Andric   ((__mmask8)__builtin_ia32_fpclassph128_mask((__v8hf)(__m128h)(A),            \
639349cc55cSDimitry Andric                                               (int)(imm), (__mmask8)(U)))
640349cc55cSDimitry Andric 
641349cc55cSDimitry Andric #define _mm_fpclass_ph_mask(A, imm)                                            \
642349cc55cSDimitry Andric   ((__mmask8)__builtin_ia32_fpclassph128_mask((__v8hf)(__m128h)(A),            \
643349cc55cSDimitry Andric                                               (int)(imm), (__mmask8)-1))
644349cc55cSDimitry Andric 
645349cc55cSDimitry Andric #define _mm256_mask_fpclass_ph_mask(U, A, imm)                                 \
646349cc55cSDimitry Andric   ((__mmask16)__builtin_ia32_fpclassph256_mask((__v16hf)(__m256h)(A),          \
647349cc55cSDimitry Andric                                                (int)(imm), (__mmask16)(U)))
648349cc55cSDimitry Andric 
649349cc55cSDimitry Andric #define _mm256_fpclass_ph_mask(A, imm)                                         \
650349cc55cSDimitry Andric   ((__mmask16)__builtin_ia32_fpclassph256_mask((__v16hf)(__m256h)(A),          \
651349cc55cSDimitry Andric                                                (int)(imm), (__mmask16)-1))
652349cc55cSDimitry Andric 
653349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtpd_ph(__m128d __A) {
654349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_vcvtpd2ph128_mask(
655349cc55cSDimitry Andric       (__v2df)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
656349cc55cSDimitry Andric }
657349cc55cSDimitry Andric 
658349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvtpd_ph(__m128h __W,
659349cc55cSDimitry Andric                                                                   __mmask8 __U,
660349cc55cSDimitry Andric                                                                   __m128d __A) {
661349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_vcvtpd2ph128_mask((__v2df)__A, (__v8hf)__W,
662349cc55cSDimitry Andric                                                    (__mmask8)__U);
663349cc55cSDimitry Andric }
664349cc55cSDimitry Andric 
665349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128
666349cc55cSDimitry Andric _mm_maskz_cvtpd_ph(__mmask8 __U, __m128d __A) {
667349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_vcvtpd2ph128_mask(
668349cc55cSDimitry Andric       (__v2df)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
669349cc55cSDimitry Andric }
670349cc55cSDimitry Andric 
671349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_cvtpd_ph(__m256d __A) {
672349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_vcvtpd2ph256_mask(
673349cc55cSDimitry Andric       (__v4df)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
674349cc55cSDimitry Andric }
675349cc55cSDimitry Andric 
676349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS256
677349cc55cSDimitry Andric _mm256_mask_cvtpd_ph(__m128h __W, __mmask8 __U, __m256d __A) {
678349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_vcvtpd2ph256_mask((__v4df)__A, (__v8hf)__W,
679349cc55cSDimitry Andric                                                    (__mmask8)__U);
680349cc55cSDimitry Andric }
681349cc55cSDimitry Andric 
682349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS256
683349cc55cSDimitry Andric _mm256_maskz_cvtpd_ph(__mmask8 __U, __m256d __A) {
684349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_vcvtpd2ph256_mask(
685349cc55cSDimitry Andric       (__v4df)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
686349cc55cSDimitry Andric }
687349cc55cSDimitry Andric 
688349cc55cSDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtph_pd(__m128h __A) {
689349cc55cSDimitry Andric   return (__m128d)__builtin_ia32_vcvtph2pd128_mask(
690349cc55cSDimitry Andric       (__v8hf)__A, (__v2df)_mm_undefined_pd(), (__mmask8)-1);
691349cc55cSDimitry Andric }
692349cc55cSDimitry Andric 
693349cc55cSDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtph_pd(__m128d __W,
694349cc55cSDimitry Andric                                                                   __mmask8 __U,
695349cc55cSDimitry Andric                                                                   __m128h __A) {
696349cc55cSDimitry Andric   return (__m128d)__builtin_ia32_vcvtph2pd128_mask((__v8hf)__A, (__v2df)__W,
697349cc55cSDimitry Andric                                                    (__mmask8)__U);
698349cc55cSDimitry Andric }
699349cc55cSDimitry Andric 
700349cc55cSDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128
701349cc55cSDimitry Andric _mm_maskz_cvtph_pd(__mmask8 __U, __m128h __A) {
702349cc55cSDimitry Andric   return (__m128d)__builtin_ia32_vcvtph2pd128_mask(
703349cc55cSDimitry Andric       (__v8hf)__A, (__v2df)_mm_setzero_pd(), (__mmask8)__U);
704349cc55cSDimitry Andric }
705349cc55cSDimitry Andric 
706349cc55cSDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_cvtph_pd(__m128h __A) {
707349cc55cSDimitry Andric   return (__m256d)__builtin_ia32_vcvtph2pd256_mask(
708349cc55cSDimitry Andric       (__v8hf)__A, (__v4df)_mm256_undefined_pd(), (__mmask8)-1);
709349cc55cSDimitry Andric }
710349cc55cSDimitry Andric 
711349cc55cSDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256
712349cc55cSDimitry Andric _mm256_mask_cvtph_pd(__m256d __W, __mmask8 __U, __m128h __A) {
713349cc55cSDimitry Andric   return (__m256d)__builtin_ia32_vcvtph2pd256_mask((__v8hf)__A, (__v4df)__W,
714349cc55cSDimitry Andric                                                    (__mmask8)__U);
715349cc55cSDimitry Andric }
716349cc55cSDimitry Andric 
717349cc55cSDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256
718349cc55cSDimitry Andric _mm256_maskz_cvtph_pd(__mmask8 __U, __m128h __A) {
719349cc55cSDimitry Andric   return (__m256d)__builtin_ia32_vcvtph2pd256_mask(
720349cc55cSDimitry Andric       (__v8hf)__A, (__v4df)_mm256_setzero_pd(), (__mmask8)__U);
721349cc55cSDimitry Andric }
722349cc55cSDimitry Andric 
723349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epi16(__m128h __A) {
724349cc55cSDimitry Andric   return (__m128i)__builtin_ia32_vcvtph2w128_mask(
725349cc55cSDimitry Andric       (__v8hf)__A, (__v8hi)_mm_undefined_si128(), (__mmask8)-1);
726349cc55cSDimitry Andric }
727349cc55cSDimitry Andric 
728349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
729349cc55cSDimitry Andric _mm_mask_cvtph_epi16(__m128i __W, __mmask8 __U, __m128h __A) {
730349cc55cSDimitry Andric   return (__m128i)__builtin_ia32_vcvtph2w128_mask((__v8hf)__A, (__v8hi)__W,
731349cc55cSDimitry Andric                                                   (__mmask8)__U);
732349cc55cSDimitry Andric }
733349cc55cSDimitry Andric 
734349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
735349cc55cSDimitry Andric _mm_maskz_cvtph_epi16(__mmask8 __U, __m128h __A) {
736349cc55cSDimitry Andric   return (__m128i)__builtin_ia32_vcvtph2w128_mask(
737349cc55cSDimitry Andric       (__v8hf)__A, (__v8hi)_mm_setzero_si128(), (__mmask8)__U);
738349cc55cSDimitry Andric }
739349cc55cSDimitry Andric 
740349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
741349cc55cSDimitry Andric _mm256_cvtph_epi16(__m256h __A) {
742349cc55cSDimitry Andric   return (__m256i)__builtin_ia32_vcvtph2w256_mask(
743349cc55cSDimitry Andric       (__v16hf)__A, (__v16hi)_mm256_undefined_si256(), (__mmask16)-1);
744349cc55cSDimitry Andric }
745349cc55cSDimitry Andric 
746349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
747349cc55cSDimitry Andric _mm256_mask_cvtph_epi16(__m256i __W, __mmask16 __U, __m256h __A) {
748349cc55cSDimitry Andric   return (__m256i)__builtin_ia32_vcvtph2w256_mask((__v16hf)__A, (__v16hi)__W,
749349cc55cSDimitry Andric                                                   (__mmask16)__U);
750349cc55cSDimitry Andric }
751349cc55cSDimitry Andric 
752349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
753349cc55cSDimitry Andric _mm256_maskz_cvtph_epi16(__mmask16 __U, __m256h __A) {
754349cc55cSDimitry Andric   return (__m256i)__builtin_ia32_vcvtph2w256_mask(
755349cc55cSDimitry Andric       (__v16hf)__A, (__v16hi)_mm256_setzero_si256(), (__mmask16)__U);
756349cc55cSDimitry Andric }
757349cc55cSDimitry Andric 
758349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epi16(__m128h __A) {
759349cc55cSDimitry Andric   return (__m128i)__builtin_ia32_vcvttph2w128_mask(
760349cc55cSDimitry Andric       (__v8hf)__A, (__v8hi)_mm_undefined_si128(), (__mmask8)-1);
761349cc55cSDimitry Andric }
762349cc55cSDimitry Andric 
763349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
764349cc55cSDimitry Andric _mm_mask_cvttph_epi16(__m128i __W, __mmask8 __U, __m128h __A) {
765349cc55cSDimitry Andric   return (__m128i)__builtin_ia32_vcvttph2w128_mask((__v8hf)__A, (__v8hi)__W,
766349cc55cSDimitry Andric                                                    (__mmask8)__U);
767349cc55cSDimitry Andric }
768349cc55cSDimitry Andric 
769349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
770349cc55cSDimitry Andric _mm_maskz_cvttph_epi16(__mmask8 __U, __m128h __A) {
771349cc55cSDimitry Andric   return (__m128i)__builtin_ia32_vcvttph2w128_mask(
772349cc55cSDimitry Andric       (__v8hf)__A, (__v8hi)_mm_setzero_si128(), (__mmask8)__U);
773349cc55cSDimitry Andric }
774349cc55cSDimitry Andric 
775349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
776349cc55cSDimitry Andric _mm256_cvttph_epi16(__m256h __A) {
777349cc55cSDimitry Andric   return (__m256i)__builtin_ia32_vcvttph2w256_mask(
778349cc55cSDimitry Andric       (__v16hf)__A, (__v16hi)_mm256_undefined_si256(), (__mmask16)-1);
779349cc55cSDimitry Andric }
780349cc55cSDimitry Andric 
781349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
782349cc55cSDimitry Andric _mm256_mask_cvttph_epi16(__m256i __W, __mmask16 __U, __m256h __A) {
783349cc55cSDimitry Andric   return (__m256i)__builtin_ia32_vcvttph2w256_mask((__v16hf)__A, (__v16hi)__W,
784349cc55cSDimitry Andric                                                    (__mmask16)__U);
785349cc55cSDimitry Andric }
786349cc55cSDimitry Andric 
787349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
788349cc55cSDimitry Andric _mm256_maskz_cvttph_epi16(__mmask16 __U, __m256h __A) {
789349cc55cSDimitry Andric   return (__m256i)__builtin_ia32_vcvttph2w256_mask(
790349cc55cSDimitry Andric       (__v16hf)__A, (__v16hi)_mm256_setzero_si256(), (__mmask16)__U);
791349cc55cSDimitry Andric }
792349cc55cSDimitry Andric 
793349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepi16_ph(__m128i __A) {
794349cc55cSDimitry Andric   return (__m128h) __builtin_convertvector((__v8hi)__A, __v8hf);
795349cc55cSDimitry Andric }
796349cc55cSDimitry Andric 
797349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128
798349cc55cSDimitry Andric _mm_mask_cvtepi16_ph(__m128h __W, __mmask8 __U, __m128i __A) {
799349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_selectph_128(
800349cc55cSDimitry Andric       (__mmask8)__U, (__v8hf)_mm_cvtepi16_ph(__A), (__v8hf)__W);
801349cc55cSDimitry Andric }
802349cc55cSDimitry Andric 
803349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128
804349cc55cSDimitry Andric _mm_maskz_cvtepi16_ph(__mmask8 __U, __m128i __A) {
805349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_selectph_128(
806349cc55cSDimitry Andric       (__mmask8)__U, (__v8hf)_mm_cvtepi16_ph(__A), (__v8hf)_mm_setzero_ph());
807349cc55cSDimitry Andric }
808349cc55cSDimitry Andric 
809349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256
810349cc55cSDimitry Andric _mm256_cvtepi16_ph(__m256i __A) {
811349cc55cSDimitry Andric   return (__m256h) __builtin_convertvector((__v16hi)__A, __v16hf);
812349cc55cSDimitry Andric }
813349cc55cSDimitry Andric 
814349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256
815349cc55cSDimitry Andric _mm256_mask_cvtepi16_ph(__m256h __W, __mmask16 __U, __m256i __A) {
816349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_selectph_256(
817349cc55cSDimitry Andric       (__mmask16)__U, (__v16hf)_mm256_cvtepi16_ph(__A), (__v16hf)__W);
818349cc55cSDimitry Andric }
819349cc55cSDimitry Andric 
820349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256
821349cc55cSDimitry Andric _mm256_maskz_cvtepi16_ph(__mmask16 __U, __m256i __A) {
822349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_selectph_256((__mmask16)__U,
823349cc55cSDimitry Andric                                               (__v16hf)_mm256_cvtepi16_ph(__A),
824349cc55cSDimitry Andric                                               (__v16hf)_mm256_setzero_ph());
825349cc55cSDimitry Andric }
826349cc55cSDimitry Andric 
827349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epu16(__m128h __A) {
828349cc55cSDimitry Andric   return (__m128i)__builtin_ia32_vcvtph2uw128_mask(
829349cc55cSDimitry Andric       (__v8hf)__A, (__v8hu)_mm_undefined_si128(), (__mmask8)-1);
830349cc55cSDimitry Andric }
831349cc55cSDimitry Andric 
832349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
833349cc55cSDimitry Andric _mm_mask_cvtph_epu16(__m128i __W, __mmask8 __U, __m128h __A) {
834349cc55cSDimitry Andric   return (__m128i)__builtin_ia32_vcvtph2uw128_mask((__v8hf)__A, (__v8hu)__W,
835349cc55cSDimitry Andric                                                    (__mmask8)__U);
836349cc55cSDimitry Andric }
837349cc55cSDimitry Andric 
838349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
839349cc55cSDimitry Andric _mm_maskz_cvtph_epu16(__mmask8 __U, __m128h __A) {
840349cc55cSDimitry Andric   return (__m128i)__builtin_ia32_vcvtph2uw128_mask(
841349cc55cSDimitry Andric       (__v8hf)__A, (__v8hu)_mm_setzero_si128(), (__mmask8)__U);
842349cc55cSDimitry Andric }
843349cc55cSDimitry Andric 
844349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
845349cc55cSDimitry Andric _mm256_cvtph_epu16(__m256h __A) {
846349cc55cSDimitry Andric   return (__m256i)__builtin_ia32_vcvtph2uw256_mask(
847349cc55cSDimitry Andric       (__v16hf)__A, (__v16hu)_mm256_undefined_si256(), (__mmask16)-1);
848349cc55cSDimitry Andric }
849349cc55cSDimitry Andric 
850349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
851349cc55cSDimitry Andric _mm256_mask_cvtph_epu16(__m256i __W, __mmask16 __U, __m256h __A) {
852349cc55cSDimitry Andric   return (__m256i)__builtin_ia32_vcvtph2uw256_mask((__v16hf)__A, (__v16hu)__W,
853349cc55cSDimitry Andric                                                    (__mmask16)__U);
854349cc55cSDimitry Andric }
855349cc55cSDimitry Andric 
856349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
857349cc55cSDimitry Andric _mm256_maskz_cvtph_epu16(__mmask16 __U, __m256h __A) {
858349cc55cSDimitry Andric   return (__m256i)__builtin_ia32_vcvtph2uw256_mask(
859349cc55cSDimitry Andric       (__v16hf)__A, (__v16hu)_mm256_setzero_si256(), (__mmask16)__U);
860349cc55cSDimitry Andric }
861349cc55cSDimitry Andric 
862349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epu16(__m128h __A) {
863349cc55cSDimitry Andric   return (__m128i)__builtin_ia32_vcvttph2uw128_mask(
864349cc55cSDimitry Andric       (__v8hf)__A, (__v8hu)_mm_undefined_si128(), (__mmask8)-1);
865349cc55cSDimitry Andric }
866349cc55cSDimitry Andric 
867349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
868349cc55cSDimitry Andric _mm_mask_cvttph_epu16(__m128i __W, __mmask8 __U, __m128h __A) {
869349cc55cSDimitry Andric   return (__m128i)__builtin_ia32_vcvttph2uw128_mask((__v8hf)__A, (__v8hu)__W,
870349cc55cSDimitry Andric                                                     (__mmask8)__U);
871349cc55cSDimitry Andric }
872349cc55cSDimitry Andric 
873349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
874349cc55cSDimitry Andric _mm_maskz_cvttph_epu16(__mmask8 __U, __m128h __A) {
875349cc55cSDimitry Andric   return (__m128i)__builtin_ia32_vcvttph2uw128_mask(
876349cc55cSDimitry Andric       (__v8hf)__A, (__v8hu)_mm_setzero_si128(), (__mmask8)__U);
877349cc55cSDimitry Andric }
878349cc55cSDimitry Andric 
879349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
880349cc55cSDimitry Andric _mm256_cvttph_epu16(__m256h __A) {
881349cc55cSDimitry Andric   return (__m256i)__builtin_ia32_vcvttph2uw256_mask(
882349cc55cSDimitry Andric       (__v16hf)__A, (__v16hu)_mm256_undefined_si256(), (__mmask16)-1);
883349cc55cSDimitry Andric }
884349cc55cSDimitry Andric 
885349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
886349cc55cSDimitry Andric _mm256_mask_cvttph_epu16(__m256i __W, __mmask16 __U, __m256h __A) {
887349cc55cSDimitry Andric   return (__m256i)__builtin_ia32_vcvttph2uw256_mask((__v16hf)__A, (__v16hu)__W,
888349cc55cSDimitry Andric                                                     (__mmask16)__U);
889349cc55cSDimitry Andric }
890349cc55cSDimitry Andric 
891349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
892349cc55cSDimitry Andric _mm256_maskz_cvttph_epu16(__mmask16 __U, __m256h __A) {
893349cc55cSDimitry Andric   return (__m256i)__builtin_ia32_vcvttph2uw256_mask(
894349cc55cSDimitry Andric       (__v16hf)__A, (__v16hu)_mm256_setzero_si256(), (__mmask16)__U);
895349cc55cSDimitry Andric }
896349cc55cSDimitry Andric 
897349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepu16_ph(__m128i __A) {
898349cc55cSDimitry Andric   return (__m128h) __builtin_convertvector((__v8hu)__A, __v8hf);
899349cc55cSDimitry Andric }
900349cc55cSDimitry Andric 
901349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128
902349cc55cSDimitry Andric _mm_mask_cvtepu16_ph(__m128h __W, __mmask8 __U, __m128i __A) {
903349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_selectph_128(
904349cc55cSDimitry Andric       (__mmask8)__U, (__v8hf)_mm_cvtepu16_ph(__A), (__v8hf)__W);
905349cc55cSDimitry Andric }
906349cc55cSDimitry Andric 
907349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128
908349cc55cSDimitry Andric _mm_maskz_cvtepu16_ph(__mmask8 __U, __m128i __A) {
909349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_selectph_128(
910349cc55cSDimitry Andric       (__mmask8)__U, (__v8hf)_mm_cvtepu16_ph(__A), (__v8hf)_mm_setzero_ph());
911349cc55cSDimitry Andric }
912349cc55cSDimitry Andric 
913349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256
914349cc55cSDimitry Andric _mm256_cvtepu16_ph(__m256i __A) {
915349cc55cSDimitry Andric   return (__m256h) __builtin_convertvector((__v16hu)__A, __v16hf);
916349cc55cSDimitry Andric }
917349cc55cSDimitry Andric 
918349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256
919349cc55cSDimitry Andric _mm256_mask_cvtepu16_ph(__m256h __W, __mmask16 __U, __m256i __A) {
920349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_selectph_256(
921349cc55cSDimitry Andric       (__mmask16)__U, (__v16hf)_mm256_cvtepu16_ph(__A), (__v16hf)__W);
922349cc55cSDimitry Andric }
923349cc55cSDimitry Andric 
924349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256
925349cc55cSDimitry Andric _mm256_maskz_cvtepu16_ph(__mmask16 __U, __m256i __A) {
926349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_selectph_256((__mmask16)__U,
927349cc55cSDimitry Andric                                               (__v16hf)_mm256_cvtepu16_ph(__A),
928349cc55cSDimitry Andric                                               (__v16hf)_mm256_setzero_ph());
929349cc55cSDimitry Andric }
930349cc55cSDimitry Andric 
931349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epi32(__m128h __A) {
932349cc55cSDimitry Andric   return (__m128i)__builtin_ia32_vcvtph2dq128_mask(
933349cc55cSDimitry Andric       (__v8hf)__A, (__v4si)_mm_undefined_si128(), (__mmask8)-1);
934349cc55cSDimitry Andric }
935349cc55cSDimitry Andric 
936349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
937349cc55cSDimitry Andric _mm_mask_cvtph_epi32(__m128i __W, __mmask8 __U, __m128h __A) {
938349cc55cSDimitry Andric   return (__m128i)__builtin_ia32_vcvtph2dq128_mask((__v8hf)__A, (__v4si)__W,
939349cc55cSDimitry Andric                                                    (__mmask8)__U);
940349cc55cSDimitry Andric }
941349cc55cSDimitry Andric 
942349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
943349cc55cSDimitry Andric _mm_maskz_cvtph_epi32(__mmask8 __U, __m128h __A) {
944349cc55cSDimitry Andric   return (__m128i)__builtin_ia32_vcvtph2dq128_mask(
945349cc55cSDimitry Andric       (__v8hf)__A, (__v4si)_mm_setzero_si128(), (__mmask8)__U);
946349cc55cSDimitry Andric }
947349cc55cSDimitry Andric 
948349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
949349cc55cSDimitry Andric _mm256_cvtph_epi32(__m128h __A) {
950349cc55cSDimitry Andric   return (__m256i)__builtin_ia32_vcvtph2dq256_mask(
951349cc55cSDimitry Andric       (__v8hf)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1);
952349cc55cSDimitry Andric }
953349cc55cSDimitry Andric 
954349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
955349cc55cSDimitry Andric _mm256_mask_cvtph_epi32(__m256i __W, __mmask8 __U, __m128h __A) {
956349cc55cSDimitry Andric   return (__m256i)__builtin_ia32_vcvtph2dq256_mask((__v8hf)__A, (__v8si)__W,
957349cc55cSDimitry Andric                                                    (__mmask8)__U);
958349cc55cSDimitry Andric }
959349cc55cSDimitry Andric 
960349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
961349cc55cSDimitry Andric _mm256_maskz_cvtph_epi32(__mmask8 __U, __m128h __A) {
962349cc55cSDimitry Andric   return (__m256i)__builtin_ia32_vcvtph2dq256_mask(
963349cc55cSDimitry Andric       (__v8hf)__A, (__v8si)_mm256_setzero_si256(), (__mmask8)__U);
964349cc55cSDimitry Andric }
965349cc55cSDimitry Andric 
966349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epu32(__m128h __A) {
967349cc55cSDimitry Andric   return (__m128i)__builtin_ia32_vcvtph2udq128_mask(
968349cc55cSDimitry Andric       (__v8hf)__A, (__v4su)_mm_undefined_si128(), (__mmask8)-1);
969349cc55cSDimitry Andric }
970349cc55cSDimitry Andric 
971349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
972349cc55cSDimitry Andric _mm_mask_cvtph_epu32(__m128i __W, __mmask8 __U, __m128h __A) {
973349cc55cSDimitry Andric   return (__m128i)__builtin_ia32_vcvtph2udq128_mask((__v8hf)__A, (__v4su)__W,
974349cc55cSDimitry Andric                                                     (__mmask8)__U);
975349cc55cSDimitry Andric }
976349cc55cSDimitry Andric 
977349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
978349cc55cSDimitry Andric _mm_maskz_cvtph_epu32(__mmask8 __U, __m128h __A) {
979349cc55cSDimitry Andric   return (__m128i)__builtin_ia32_vcvtph2udq128_mask(
980349cc55cSDimitry Andric       (__v8hf)__A, (__v4su)_mm_setzero_si128(), (__mmask8)__U);
981349cc55cSDimitry Andric }
982349cc55cSDimitry Andric 
983349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
984349cc55cSDimitry Andric _mm256_cvtph_epu32(__m128h __A) {
985349cc55cSDimitry Andric   return (__m256i)__builtin_ia32_vcvtph2udq256_mask(
986349cc55cSDimitry Andric       (__v8hf)__A, (__v8su)_mm256_undefined_si256(), (__mmask8)-1);
987349cc55cSDimitry Andric }
988349cc55cSDimitry Andric 
989349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
990349cc55cSDimitry Andric _mm256_mask_cvtph_epu32(__m256i __W, __mmask8 __U, __m128h __A) {
991349cc55cSDimitry Andric   return (__m256i)__builtin_ia32_vcvtph2udq256_mask((__v8hf)__A, (__v8su)__W,
992349cc55cSDimitry Andric                                                     (__mmask8)__U);
993349cc55cSDimitry Andric }
994349cc55cSDimitry Andric 
995349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
996349cc55cSDimitry Andric _mm256_maskz_cvtph_epu32(__mmask8 __U, __m128h __A) {
997349cc55cSDimitry Andric   return (__m256i)__builtin_ia32_vcvtph2udq256_mask(
998349cc55cSDimitry Andric       (__v8hf)__A, (__v8su)_mm256_setzero_si256(), (__mmask8)__U);
999349cc55cSDimitry Andric }
1000349cc55cSDimitry Andric 
1001349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepi32_ph(__m128i __A) {
1002349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_vcvtdq2ph128_mask(
1003349cc55cSDimitry Andric       (__v4si)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
1004349cc55cSDimitry Andric }
1005349cc55cSDimitry Andric 
1006349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128
1007349cc55cSDimitry Andric _mm_mask_cvtepi32_ph(__m128h __W, __mmask8 __U, __m128i __A) {
1008349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_vcvtdq2ph128_mask((__v4si)__A, (__v8hf)__W,
1009349cc55cSDimitry Andric                                                    (__mmask8)__U);
1010349cc55cSDimitry Andric }
1011349cc55cSDimitry Andric 
1012349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128
1013349cc55cSDimitry Andric _mm_maskz_cvtepi32_ph(__mmask8 __U, __m128i __A) {
1014349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_vcvtdq2ph128_mask(
1015349cc55cSDimitry Andric       (__v4si)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
1016349cc55cSDimitry Andric }
1017349cc55cSDimitry Andric 
1018349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS256
1019349cc55cSDimitry Andric _mm256_cvtepi32_ph(__m256i __A) {
1020349cc55cSDimitry Andric   return (__m128h) __builtin_convertvector((__v8si)__A, __v8hf);
1021349cc55cSDimitry Andric }
1022349cc55cSDimitry Andric 
1023349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS256
1024349cc55cSDimitry Andric _mm256_mask_cvtepi32_ph(__m128h __W, __mmask8 __U, __m256i __A) {
1025349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_selectph_128(
1026349cc55cSDimitry Andric       (__mmask8)__U, (__v8hf)_mm256_cvtepi32_ph(__A), (__v8hf)__W);
1027349cc55cSDimitry Andric }
1028349cc55cSDimitry Andric 
1029349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS256
1030349cc55cSDimitry Andric _mm256_maskz_cvtepi32_ph(__mmask8 __U, __m256i __A) {
1031349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_selectph_128(
1032349cc55cSDimitry Andric       (__mmask8)__U, (__v8hf)_mm256_cvtepi32_ph(__A), (__v8hf)_mm_setzero_ph());
1033349cc55cSDimitry Andric }
1034349cc55cSDimitry Andric 
1035349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepu32_ph(__m128i __A) {
1036349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_vcvtudq2ph128_mask(
1037349cc55cSDimitry Andric       (__v4su)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
1038349cc55cSDimitry Andric }
1039349cc55cSDimitry Andric 
1040349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128
1041349cc55cSDimitry Andric _mm_mask_cvtepu32_ph(__m128h __W, __mmask8 __U, __m128i __A) {
1042349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_vcvtudq2ph128_mask((__v4su)__A, (__v8hf)__W,
1043349cc55cSDimitry Andric                                                     (__mmask8)__U);
1044349cc55cSDimitry Andric }
1045349cc55cSDimitry Andric 
1046349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128
1047349cc55cSDimitry Andric _mm_maskz_cvtepu32_ph(__mmask8 __U, __m128i __A) {
1048349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_vcvtudq2ph128_mask(
1049349cc55cSDimitry Andric       (__v4su)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
1050349cc55cSDimitry Andric }
1051349cc55cSDimitry Andric 
1052349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS256
1053349cc55cSDimitry Andric _mm256_cvtepu32_ph(__m256i __A) {
1054349cc55cSDimitry Andric   return (__m128h) __builtin_convertvector((__v8su)__A, __v8hf);
1055349cc55cSDimitry Andric }
1056349cc55cSDimitry Andric 
1057349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS256
1058349cc55cSDimitry Andric _mm256_mask_cvtepu32_ph(__m128h __W, __mmask8 __U, __m256i __A) {
1059349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_selectph_128(
1060349cc55cSDimitry Andric       (__mmask8)__U, (__v8hf)_mm256_cvtepu32_ph(__A), (__v8hf)__W);
1061349cc55cSDimitry Andric }
1062349cc55cSDimitry Andric 
1063349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS256
1064349cc55cSDimitry Andric _mm256_maskz_cvtepu32_ph(__mmask8 __U, __m256i __A) {
1065349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_selectph_128(
1066349cc55cSDimitry Andric       (__mmask8)__U, (__v8hf)_mm256_cvtepu32_ph(__A), (__v8hf)_mm_setzero_ph());
1067349cc55cSDimitry Andric }
1068349cc55cSDimitry Andric 
1069349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epi32(__m128h __A) {
1070349cc55cSDimitry Andric   return (__m128i)__builtin_ia32_vcvttph2dq128_mask(
1071349cc55cSDimitry Andric       (__v8hf)__A, (__v4si)_mm_undefined_si128(), (__mmask8)-1);
1072349cc55cSDimitry Andric }
1073349cc55cSDimitry Andric 
1074349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
1075349cc55cSDimitry Andric _mm_mask_cvttph_epi32(__m128i __W, __mmask8 __U, __m128h __A) {
1076349cc55cSDimitry Andric   return (__m128i)__builtin_ia32_vcvttph2dq128_mask((__v8hf)__A, (__v4si)__W,
1077349cc55cSDimitry Andric                                                     (__mmask8)__U);
1078349cc55cSDimitry Andric }
1079349cc55cSDimitry Andric 
1080349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
1081349cc55cSDimitry Andric _mm_maskz_cvttph_epi32(__mmask8 __U, __m128h __A) {
1082349cc55cSDimitry Andric   return (__m128i)__builtin_ia32_vcvttph2dq128_mask(
1083349cc55cSDimitry Andric       (__v8hf)__A, (__v4si)_mm_setzero_si128(), (__mmask8)__U);
1084349cc55cSDimitry Andric }
1085349cc55cSDimitry Andric 
1086349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
1087349cc55cSDimitry Andric _mm256_cvttph_epi32(__m128h __A) {
1088349cc55cSDimitry Andric   return (__m256i)__builtin_ia32_vcvttph2dq256_mask(
1089349cc55cSDimitry Andric       (__v8hf)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1);
1090349cc55cSDimitry Andric }
1091349cc55cSDimitry Andric 
1092349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
1093349cc55cSDimitry Andric _mm256_mask_cvttph_epi32(__m256i __W, __mmask8 __U, __m128h __A) {
1094349cc55cSDimitry Andric   return (__m256i)__builtin_ia32_vcvttph2dq256_mask((__v8hf)__A, (__v8si)__W,
1095349cc55cSDimitry Andric                                                     (__mmask8)__U);
1096349cc55cSDimitry Andric }
1097349cc55cSDimitry Andric 
1098349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
1099349cc55cSDimitry Andric _mm256_maskz_cvttph_epi32(__mmask8 __U, __m128h __A) {
1100349cc55cSDimitry Andric   return (__m256i)__builtin_ia32_vcvttph2dq256_mask(
1101349cc55cSDimitry Andric       (__v8hf)__A, (__v8si)_mm256_setzero_si256(), (__mmask8)__U);
1102349cc55cSDimitry Andric }
1103349cc55cSDimitry Andric 
1104349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epu32(__m128h __A) {
1105349cc55cSDimitry Andric   return (__m128i)__builtin_ia32_vcvttph2udq128_mask(
1106349cc55cSDimitry Andric       (__v8hf)__A, (__v4su)_mm_undefined_si128(), (__mmask8)-1);
1107349cc55cSDimitry Andric }
1108349cc55cSDimitry Andric 
1109349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
1110349cc55cSDimitry Andric _mm_mask_cvttph_epu32(__m128i __W, __mmask8 __U, __m128h __A) {
1111349cc55cSDimitry Andric   return (__m128i)__builtin_ia32_vcvttph2udq128_mask((__v8hf)__A, (__v4su)__W,
1112349cc55cSDimitry Andric                                                      (__mmask8)__U);
1113349cc55cSDimitry Andric }
1114349cc55cSDimitry Andric 
1115349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
1116349cc55cSDimitry Andric _mm_maskz_cvttph_epu32(__mmask8 __U, __m128h __A) {
1117349cc55cSDimitry Andric   return (__m128i)__builtin_ia32_vcvttph2udq128_mask(
1118349cc55cSDimitry Andric       (__v8hf)__A, (__v4su)_mm_setzero_si128(), (__mmask8)__U);
1119349cc55cSDimitry Andric }
1120349cc55cSDimitry Andric 
1121349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
1122349cc55cSDimitry Andric _mm256_cvttph_epu32(__m128h __A) {
1123349cc55cSDimitry Andric   return (__m256i)__builtin_ia32_vcvttph2udq256_mask(
1124349cc55cSDimitry Andric       (__v8hf)__A, (__v8su)_mm256_undefined_si256(), (__mmask8)-1);
1125349cc55cSDimitry Andric }
1126349cc55cSDimitry Andric 
1127349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
1128349cc55cSDimitry Andric _mm256_mask_cvttph_epu32(__m256i __W, __mmask8 __U, __m128h __A) {
1129349cc55cSDimitry Andric   return (__m256i)__builtin_ia32_vcvttph2udq256_mask((__v8hf)__A, (__v8su)__W,
1130349cc55cSDimitry Andric                                                      (__mmask8)__U);
1131349cc55cSDimitry Andric }
1132349cc55cSDimitry Andric 
1133349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
1134349cc55cSDimitry Andric _mm256_maskz_cvttph_epu32(__mmask8 __U, __m128h __A) {
1135349cc55cSDimitry Andric   return (__m256i)__builtin_ia32_vcvttph2udq256_mask(
1136349cc55cSDimitry Andric       (__v8hf)__A, (__v8su)_mm256_setzero_si256(), (__mmask8)__U);
1137349cc55cSDimitry Andric }
1138349cc55cSDimitry Andric 
1139349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepi64_ph(__m128i __A) {
1140349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_vcvtqq2ph128_mask(
1141349cc55cSDimitry Andric       (__v2di)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
1142349cc55cSDimitry Andric }
1143349cc55cSDimitry Andric 
1144349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128
1145349cc55cSDimitry Andric _mm_mask_cvtepi64_ph(__m128h __W, __mmask8 __U, __m128i __A) {
1146349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_vcvtqq2ph128_mask((__v2di)__A, (__v8hf)__W,
1147349cc55cSDimitry Andric                                                    (__mmask8)__U);
1148349cc55cSDimitry Andric }
1149349cc55cSDimitry Andric 
1150349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128
1151349cc55cSDimitry Andric _mm_maskz_cvtepi64_ph(__mmask8 __U, __m128i __A) {
1152349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_vcvtqq2ph128_mask(
1153349cc55cSDimitry Andric       (__v2di)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
1154349cc55cSDimitry Andric }
1155349cc55cSDimitry Andric 
1156349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS256
1157349cc55cSDimitry Andric _mm256_cvtepi64_ph(__m256i __A) {
1158349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_vcvtqq2ph256_mask(
1159349cc55cSDimitry Andric       (__v4di)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
1160349cc55cSDimitry Andric }
1161349cc55cSDimitry Andric 
1162349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS256
1163349cc55cSDimitry Andric _mm256_mask_cvtepi64_ph(__m128h __W, __mmask8 __U, __m256i __A) {
1164349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_vcvtqq2ph256_mask((__v4di)__A, (__v8hf)__W,
1165349cc55cSDimitry Andric                                                    (__mmask8)__U);
1166349cc55cSDimitry Andric }
1167349cc55cSDimitry Andric 
1168349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS256
1169349cc55cSDimitry Andric _mm256_maskz_cvtepi64_ph(__mmask8 __U, __m256i __A) {
1170349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_vcvtqq2ph256_mask(
1171349cc55cSDimitry Andric       (__v4di)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
1172349cc55cSDimitry Andric }
1173349cc55cSDimitry Andric 
1174349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epi64(__m128h __A) {
1175349cc55cSDimitry Andric   return (__m128i)__builtin_ia32_vcvtph2qq128_mask(
1176349cc55cSDimitry Andric       (__v8hf)__A, (__v2di)_mm_undefined_si128(), (__mmask8)-1);
1177349cc55cSDimitry Andric }
1178349cc55cSDimitry Andric 
1179349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
1180349cc55cSDimitry Andric _mm_mask_cvtph_epi64(__m128i __W, __mmask8 __U, __m128h __A) {
1181349cc55cSDimitry Andric   return (__m128i)__builtin_ia32_vcvtph2qq128_mask((__v8hf)__A, (__v2di)__W,
1182349cc55cSDimitry Andric                                                    (__mmask8)__U);
1183349cc55cSDimitry Andric }
1184349cc55cSDimitry Andric 
1185349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
1186349cc55cSDimitry Andric _mm_maskz_cvtph_epi64(__mmask8 __U, __m128h __A) {
1187349cc55cSDimitry Andric   return (__m128i)__builtin_ia32_vcvtph2qq128_mask(
1188349cc55cSDimitry Andric       (__v8hf)__A, (__v2di)_mm_setzero_si128(), (__mmask8)__U);
1189349cc55cSDimitry Andric }
1190349cc55cSDimitry Andric 
1191349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
1192349cc55cSDimitry Andric _mm256_cvtph_epi64(__m128h __A) {
1193349cc55cSDimitry Andric   return (__m256i)__builtin_ia32_vcvtph2qq256_mask(
1194349cc55cSDimitry Andric       (__v8hf)__A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1);
1195349cc55cSDimitry Andric }
1196349cc55cSDimitry Andric 
1197349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
1198349cc55cSDimitry Andric _mm256_mask_cvtph_epi64(__m256i __W, __mmask8 __U, __m128h __A) {
1199349cc55cSDimitry Andric   return (__m256i)__builtin_ia32_vcvtph2qq256_mask((__v8hf)__A, (__v4di)__W,
1200349cc55cSDimitry Andric                                                    (__mmask8)__U);
1201349cc55cSDimitry Andric }
1202349cc55cSDimitry Andric 
1203349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
1204349cc55cSDimitry Andric _mm256_maskz_cvtph_epi64(__mmask8 __U, __m128h __A) {
1205349cc55cSDimitry Andric   return (__m256i)__builtin_ia32_vcvtph2qq256_mask(
1206349cc55cSDimitry Andric       (__v8hf)__A, (__v4di)_mm256_setzero_si256(), (__mmask8)__U);
1207349cc55cSDimitry Andric }
1208349cc55cSDimitry Andric 
1209349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepu64_ph(__m128i __A) {
1210349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_vcvtuqq2ph128_mask(
1211349cc55cSDimitry Andric       (__v2du)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
1212349cc55cSDimitry Andric }
1213349cc55cSDimitry Andric 
1214349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128
1215349cc55cSDimitry Andric _mm_mask_cvtepu64_ph(__m128h __W, __mmask8 __U, __m128i __A) {
1216349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_vcvtuqq2ph128_mask((__v2du)__A, (__v8hf)__W,
1217349cc55cSDimitry Andric                                                     (__mmask8)__U);
1218349cc55cSDimitry Andric }
1219349cc55cSDimitry Andric 
1220349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128
1221349cc55cSDimitry Andric _mm_maskz_cvtepu64_ph(__mmask8 __U, __m128i __A) {
1222349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_vcvtuqq2ph128_mask(
1223349cc55cSDimitry Andric       (__v2du)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
1224349cc55cSDimitry Andric }
1225349cc55cSDimitry Andric 
1226349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS256
1227349cc55cSDimitry Andric _mm256_cvtepu64_ph(__m256i __A) {
1228349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_vcvtuqq2ph256_mask(
1229349cc55cSDimitry Andric       (__v4du)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
1230349cc55cSDimitry Andric }
1231349cc55cSDimitry Andric 
1232349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS256
1233349cc55cSDimitry Andric _mm256_mask_cvtepu64_ph(__m128h __W, __mmask8 __U, __m256i __A) {
1234349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_vcvtuqq2ph256_mask((__v4du)__A, (__v8hf)__W,
1235349cc55cSDimitry Andric                                                     (__mmask8)__U);
1236349cc55cSDimitry Andric }
1237349cc55cSDimitry Andric 
1238349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS256
1239349cc55cSDimitry Andric _mm256_maskz_cvtepu64_ph(__mmask8 __U, __m256i __A) {
1240349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_vcvtuqq2ph256_mask(
1241349cc55cSDimitry Andric       (__v4du)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
1242349cc55cSDimitry Andric }
1243349cc55cSDimitry Andric 
1244349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epu64(__m128h __A) {
1245349cc55cSDimitry Andric   return (__m128i)__builtin_ia32_vcvtph2uqq128_mask(
1246349cc55cSDimitry Andric       (__v8hf)__A, (__v2du)_mm_undefined_si128(), (__mmask8)-1);
1247349cc55cSDimitry Andric }
1248349cc55cSDimitry Andric 
1249349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
1250349cc55cSDimitry Andric _mm_mask_cvtph_epu64(__m128i __W, __mmask8 __U, __m128h __A) {
1251349cc55cSDimitry Andric   return (__m128i)__builtin_ia32_vcvtph2uqq128_mask((__v8hf)__A, (__v2du)__W,
1252349cc55cSDimitry Andric                                                     (__mmask8)__U);
1253349cc55cSDimitry Andric }
1254349cc55cSDimitry Andric 
1255349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
1256349cc55cSDimitry Andric _mm_maskz_cvtph_epu64(__mmask8 __U, __m128h __A) {
1257349cc55cSDimitry Andric   return (__m128i)__builtin_ia32_vcvtph2uqq128_mask(
1258349cc55cSDimitry Andric       (__v8hf)__A, (__v2du)_mm_setzero_si128(), (__mmask8)__U);
1259349cc55cSDimitry Andric }
1260349cc55cSDimitry Andric 
1261349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
1262349cc55cSDimitry Andric _mm256_cvtph_epu64(__m128h __A) {
1263349cc55cSDimitry Andric   return (__m256i)__builtin_ia32_vcvtph2uqq256_mask(
1264349cc55cSDimitry Andric       (__v8hf)__A, (__v4du)_mm256_undefined_si256(), (__mmask8)-1);
1265349cc55cSDimitry Andric }
1266349cc55cSDimitry Andric 
1267349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
1268349cc55cSDimitry Andric _mm256_mask_cvtph_epu64(__m256i __W, __mmask8 __U, __m128h __A) {
1269349cc55cSDimitry Andric   return (__m256i)__builtin_ia32_vcvtph2uqq256_mask((__v8hf)__A, (__v4du)__W,
1270349cc55cSDimitry Andric                                                     (__mmask8)__U);
1271349cc55cSDimitry Andric }
1272349cc55cSDimitry Andric 
1273349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
1274349cc55cSDimitry Andric _mm256_maskz_cvtph_epu64(__mmask8 __U, __m128h __A) {
1275349cc55cSDimitry Andric   return (__m256i)__builtin_ia32_vcvtph2uqq256_mask(
1276349cc55cSDimitry Andric       (__v8hf)__A, (__v4du)_mm256_setzero_si256(), (__mmask8)__U);
1277349cc55cSDimitry Andric }
1278349cc55cSDimitry Andric 
1279349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epi64(__m128h __A) {
1280349cc55cSDimitry Andric   return (__m128i)__builtin_ia32_vcvttph2qq128_mask(
1281349cc55cSDimitry Andric       (__v8hf)__A, (__v2di)_mm_undefined_si128(), (__mmask8)-1);
1282349cc55cSDimitry Andric }
1283349cc55cSDimitry Andric 
1284349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
1285349cc55cSDimitry Andric _mm_mask_cvttph_epi64(__m128i __W, __mmask8 __U, __m128h __A) {
1286349cc55cSDimitry Andric   return (__m128i)__builtin_ia32_vcvttph2qq128_mask((__v8hf)__A, (__v2di)__W,
1287349cc55cSDimitry Andric                                                     (__mmask8)__U);
1288349cc55cSDimitry Andric }
1289349cc55cSDimitry Andric 
1290349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
1291349cc55cSDimitry Andric _mm_maskz_cvttph_epi64(__mmask8 __U, __m128h __A) {
1292349cc55cSDimitry Andric   return (__m128i)__builtin_ia32_vcvttph2qq128_mask(
1293349cc55cSDimitry Andric       (__v8hf)__A, (__v2di)_mm_setzero_si128(), (__mmask8)__U);
1294349cc55cSDimitry Andric }
1295349cc55cSDimitry Andric 
1296349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
1297349cc55cSDimitry Andric _mm256_cvttph_epi64(__m128h __A) {
1298349cc55cSDimitry Andric   return (__m256i)__builtin_ia32_vcvttph2qq256_mask(
1299349cc55cSDimitry Andric       (__v8hf)__A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1);
1300349cc55cSDimitry Andric }
1301349cc55cSDimitry Andric 
1302349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
1303349cc55cSDimitry Andric _mm256_mask_cvttph_epi64(__m256i __W, __mmask8 __U, __m128h __A) {
1304349cc55cSDimitry Andric   return (__m256i)__builtin_ia32_vcvttph2qq256_mask((__v8hf)__A, (__v4di)__W,
1305349cc55cSDimitry Andric                                                     (__mmask8)__U);
1306349cc55cSDimitry Andric }
1307349cc55cSDimitry Andric 
1308349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
1309349cc55cSDimitry Andric _mm256_maskz_cvttph_epi64(__mmask8 __U, __m128h __A) {
1310349cc55cSDimitry Andric   return (__m256i)__builtin_ia32_vcvttph2qq256_mask(
1311349cc55cSDimitry Andric       (__v8hf)__A, (__v4di)_mm256_setzero_si256(), (__mmask8)__U);
1312349cc55cSDimitry Andric }
1313349cc55cSDimitry Andric 
1314349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epu64(__m128h __A) {
1315349cc55cSDimitry Andric   return (__m128i)__builtin_ia32_vcvttph2uqq128_mask(
1316349cc55cSDimitry Andric       (__v8hf)__A, (__v2du)_mm_undefined_si128(), (__mmask8)-1);
1317349cc55cSDimitry Andric }
1318349cc55cSDimitry Andric 
1319349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
1320349cc55cSDimitry Andric _mm_mask_cvttph_epu64(__m128i __W, __mmask8 __U, __m128h __A) {
1321349cc55cSDimitry Andric   return (__m128i)__builtin_ia32_vcvttph2uqq128_mask((__v8hf)__A, (__v2du)__W,
1322349cc55cSDimitry Andric                                                      (__mmask8)__U);
1323349cc55cSDimitry Andric }
1324349cc55cSDimitry Andric 
1325349cc55cSDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
1326349cc55cSDimitry Andric _mm_maskz_cvttph_epu64(__mmask8 __U, __m128h __A) {
1327349cc55cSDimitry Andric   return (__m128i)__builtin_ia32_vcvttph2uqq128_mask(
1328349cc55cSDimitry Andric       (__v8hf)__A, (__v2du)_mm_setzero_si128(), (__mmask8)__U);
1329349cc55cSDimitry Andric }
1330349cc55cSDimitry Andric 
1331349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
1332349cc55cSDimitry Andric _mm256_cvttph_epu64(__m128h __A) {
1333349cc55cSDimitry Andric   return (__m256i)__builtin_ia32_vcvttph2uqq256_mask(
1334349cc55cSDimitry Andric       (__v8hf)__A, (__v4du)_mm256_undefined_si256(), (__mmask8)-1);
1335349cc55cSDimitry Andric }
1336349cc55cSDimitry Andric 
1337349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
1338349cc55cSDimitry Andric _mm256_mask_cvttph_epu64(__m256i __W, __mmask8 __U, __m128h __A) {
1339349cc55cSDimitry Andric   return (__m256i)__builtin_ia32_vcvttph2uqq256_mask((__v8hf)__A, (__v4du)__W,
1340349cc55cSDimitry Andric                                                      (__mmask8)__U);
1341349cc55cSDimitry Andric }
1342349cc55cSDimitry Andric 
1343349cc55cSDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
1344349cc55cSDimitry Andric _mm256_maskz_cvttph_epu64(__mmask8 __U, __m128h __A) {
1345349cc55cSDimitry Andric   return (__m256i)__builtin_ia32_vcvttph2uqq256_mask(
1346349cc55cSDimitry Andric       (__v8hf)__A, (__v4du)_mm256_setzero_si256(), (__mmask8)__U);
1347349cc55cSDimitry Andric }
1348349cc55cSDimitry Andric 
1349349cc55cSDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtxph_ps(__m128h __A) {
1350349cc55cSDimitry Andric   return (__m128)__builtin_ia32_vcvtph2psx128_mask(
1351349cc55cSDimitry Andric       (__v8hf)__A, (__v4sf)_mm_undefined_ps(), (__mmask8)-1);
1352349cc55cSDimitry Andric }
1353349cc55cSDimitry Andric 
1354349cc55cSDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtxph_ps(__m128 __W,
1355349cc55cSDimitry Andric                                                                   __mmask8 __U,
1356349cc55cSDimitry Andric                                                                   __m128h __A) {
1357349cc55cSDimitry Andric   return (__m128)__builtin_ia32_vcvtph2psx128_mask((__v8hf)__A, (__v4sf)__W,
1358349cc55cSDimitry Andric                                                    (__mmask8)__U);
1359349cc55cSDimitry Andric }
1360349cc55cSDimitry Andric 
1361349cc55cSDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128
1362349cc55cSDimitry Andric _mm_maskz_cvtxph_ps(__mmask8 __U, __m128h __A) {
1363349cc55cSDimitry Andric   return (__m128)__builtin_ia32_vcvtph2psx128_mask(
1364349cc55cSDimitry Andric       (__v8hf)__A, (__v4sf)_mm_setzero_ps(), (__mmask8)__U);
1365349cc55cSDimitry Andric }
1366349cc55cSDimitry Andric 
1367349cc55cSDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_cvtxph_ps(__m128h __A) {
1368349cc55cSDimitry Andric   return (__m256)__builtin_ia32_vcvtph2psx256_mask(
1369349cc55cSDimitry Andric       (__v8hf)__A, (__v8sf)_mm256_undefined_ps(), (__mmask8)-1);
1370349cc55cSDimitry Andric }
1371349cc55cSDimitry Andric 
1372349cc55cSDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256
1373349cc55cSDimitry Andric _mm256_mask_cvtxph_ps(__m256 __W, __mmask8 __U, __m128h __A) {
1374349cc55cSDimitry Andric   return (__m256)__builtin_ia32_vcvtph2psx256_mask((__v8hf)__A, (__v8sf)__W,
1375349cc55cSDimitry Andric                                                    (__mmask8)__U);
1376349cc55cSDimitry Andric }
1377349cc55cSDimitry Andric 
1378349cc55cSDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256
1379349cc55cSDimitry Andric _mm256_maskz_cvtxph_ps(__mmask8 __U, __m128h __A) {
1380349cc55cSDimitry Andric   return (__m256)__builtin_ia32_vcvtph2psx256_mask(
1381349cc55cSDimitry Andric       (__v8hf)__A, (__v8sf)_mm256_setzero_ps(), (__mmask8)__U);
1382349cc55cSDimitry Andric }
1383349cc55cSDimitry Andric 
1384349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtxps_ph(__m128 __A) {
1385349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_vcvtps2phx128_mask(
1386349cc55cSDimitry Andric       (__v4sf)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
1387349cc55cSDimitry Andric }
1388349cc55cSDimitry Andric 
1389349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvtxps_ph(__m128h __W,
1390349cc55cSDimitry Andric                                                                    __mmask8 __U,
1391349cc55cSDimitry Andric                                                                    __m128 __A) {
1392349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_vcvtps2phx128_mask((__v4sf)__A, (__v8hf)__W,
1393349cc55cSDimitry Andric                                                     (__mmask8)__U);
1394349cc55cSDimitry Andric }
1395349cc55cSDimitry Andric 
1396349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128
1397349cc55cSDimitry Andric _mm_maskz_cvtxps_ph(__mmask8 __U, __m128 __A) {
1398349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_vcvtps2phx128_mask(
1399349cc55cSDimitry Andric       (__v4sf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
1400349cc55cSDimitry Andric }
1401349cc55cSDimitry Andric 
1402349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_cvtxps_ph(__m256 __A) {
1403349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_vcvtps2phx256_mask(
1404349cc55cSDimitry Andric       (__v8sf)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
1405349cc55cSDimitry Andric }
1406349cc55cSDimitry Andric 
1407349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS256
1408349cc55cSDimitry Andric _mm256_mask_cvtxps_ph(__m128h __W, __mmask8 __U, __m256 __A) {
1409349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_vcvtps2phx256_mask((__v8sf)__A, (__v8hf)__W,
1410349cc55cSDimitry Andric                                                     (__mmask8)__U);
1411349cc55cSDimitry Andric }
1412349cc55cSDimitry Andric 
1413349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS256
1414349cc55cSDimitry Andric _mm256_maskz_cvtxps_ph(__mmask8 __U, __m256 __A) {
1415349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_vcvtps2phx256_mask(
1416349cc55cSDimitry Andric       (__v8sf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
1417349cc55cSDimitry Andric }
1418349cc55cSDimitry Andric 
1419349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmadd_ph(__m128h __A,
1420349cc55cSDimitry Andric                                                              __m128h __B,
1421349cc55cSDimitry Andric                                                              __m128h __C) {
1422349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B,
1423349cc55cSDimitry Andric                                           (__v8hf)__C);
1424349cc55cSDimitry Andric }
1425349cc55cSDimitry Andric 
1426349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_ph(__m128h __A,
1427349cc55cSDimitry Andric                                                                   __mmask8 __U,
1428349cc55cSDimitry Andric                                                                   __m128h __B,
1429349cc55cSDimitry Andric                                                                   __m128h __C) {
1430349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_selectph_128(
1431349cc55cSDimitry Andric       (__mmask8)__U,
1432349cc55cSDimitry Andric       __builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1433349cc55cSDimitry Andric       (__v8hf)__A);
1434349cc55cSDimitry Andric }
1435349cc55cSDimitry Andric 
1436349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128
1437349cc55cSDimitry Andric _mm_mask3_fmadd_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
1438349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_selectph_128(
1439349cc55cSDimitry Andric       (__mmask8)__U,
1440349cc55cSDimitry Andric       __builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1441349cc55cSDimitry Andric       (__v8hf)__C);
1442349cc55cSDimitry Andric }
1443349cc55cSDimitry Andric 
1444349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128
1445349cc55cSDimitry Andric _mm_maskz_fmadd_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
1446349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_selectph_128(
1447349cc55cSDimitry Andric       (__mmask8)__U,
1448349cc55cSDimitry Andric       __builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1449349cc55cSDimitry Andric       (__v8hf)_mm_setzero_ph());
1450349cc55cSDimitry Andric }
1451349cc55cSDimitry Andric 
1452349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmsub_ph(__m128h __A,
1453349cc55cSDimitry Andric                                                              __m128h __B,
1454349cc55cSDimitry Andric                                                              __m128h __C) {
1455349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B,
1456349cc55cSDimitry Andric                                           -(__v8hf)__C);
1457349cc55cSDimitry Andric }
1458349cc55cSDimitry Andric 
1459349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_ph(__m128h __A,
1460349cc55cSDimitry Andric                                                                   __mmask8 __U,
1461349cc55cSDimitry Andric                                                                   __m128h __B,
1462349cc55cSDimitry Andric                                                                   __m128h __C) {
1463349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_selectph_128(
1464349cc55cSDimitry Andric       (__mmask8)__U, _mm_fmsub_ph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1465349cc55cSDimitry Andric       (__v8hf)__A);
1466349cc55cSDimitry Andric }
1467349cc55cSDimitry Andric 
1468349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128
1469349cc55cSDimitry Andric _mm_maskz_fmsub_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
1470349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_selectph_128(
1471349cc55cSDimitry Andric       (__mmask8)__U, _mm_fmsub_ph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1472349cc55cSDimitry Andric       (__v8hf)_mm_setzero_ph());
1473349cc55cSDimitry Andric }
1474349cc55cSDimitry Andric 
1475349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128
1476349cc55cSDimitry Andric _mm_mask3_fnmadd_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
1477349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_selectph_128(
1478349cc55cSDimitry Andric       (__mmask8)__U,
1479349cc55cSDimitry Andric       __builtin_ia32_vfmaddph(-(__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1480349cc55cSDimitry Andric       (__v8hf)__C);
1481349cc55cSDimitry Andric }
1482349cc55cSDimitry Andric 
1483349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128
1484349cc55cSDimitry Andric _mm_maskz_fnmadd_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
1485349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_selectph_128(
1486349cc55cSDimitry Andric       (__mmask8)__U,
1487349cc55cSDimitry Andric       __builtin_ia32_vfmaddph(-(__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1488349cc55cSDimitry Andric       (__v8hf)_mm_setzero_ph());
1489349cc55cSDimitry Andric }
1490349cc55cSDimitry Andric 
1491349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128
1492349cc55cSDimitry Andric _mm_maskz_fnmsub_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
1493349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_selectph_128(
1494349cc55cSDimitry Andric       (__mmask8)__U,
1495349cc55cSDimitry Andric       __builtin_ia32_vfmaddph(-(__v8hf)__A, (__v8hf)__B, -(__v8hf)__C),
1496349cc55cSDimitry Andric       (__v8hf)_mm_setzero_ph());
1497349cc55cSDimitry Andric }
1498349cc55cSDimitry Andric 
1499349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fmadd_ph(__m256h __A,
1500349cc55cSDimitry Andric                                                                 __m256h __B,
1501349cc55cSDimitry Andric                                                                 __m256h __C) {
1502349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B,
1503349cc55cSDimitry Andric                                              (__v16hf)__C);
1504349cc55cSDimitry Andric }
1505349cc55cSDimitry Andric 
1506349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256
1507349cc55cSDimitry Andric _mm256_mask_fmadd_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) {
1508349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_selectph_256(
1509349cc55cSDimitry Andric       (__mmask16)__U,
1510349cc55cSDimitry Andric       __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
1511349cc55cSDimitry Andric       (__v16hf)__A);
1512349cc55cSDimitry Andric }
1513349cc55cSDimitry Andric 
1514349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256
1515349cc55cSDimitry Andric _mm256_mask3_fmadd_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) {
1516349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_selectph_256(
1517349cc55cSDimitry Andric       (__mmask16)__U,
1518349cc55cSDimitry Andric       __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
1519349cc55cSDimitry Andric       (__v16hf)__C);
1520349cc55cSDimitry Andric }
1521349cc55cSDimitry Andric 
1522349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256
1523349cc55cSDimitry Andric _mm256_maskz_fmadd_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
1524349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_selectph_256(
1525349cc55cSDimitry Andric       (__mmask16)__U,
1526349cc55cSDimitry Andric       __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
1527349cc55cSDimitry Andric       (__v16hf)_mm256_setzero_ph());
1528349cc55cSDimitry Andric }
1529349cc55cSDimitry Andric 
1530349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fmsub_ph(__m256h __A,
1531349cc55cSDimitry Andric                                                                 __m256h __B,
1532349cc55cSDimitry Andric                                                                 __m256h __C) {
1533349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B,
1534349cc55cSDimitry Andric                                              -(__v16hf)__C);
1535349cc55cSDimitry Andric }
1536349cc55cSDimitry Andric 
1537349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256
1538349cc55cSDimitry Andric _mm256_mask_fmsub_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) {
1539349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_selectph_256(
1540349cc55cSDimitry Andric       (__mmask16)__U,
1541349cc55cSDimitry Andric       __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
1542349cc55cSDimitry Andric       (__v16hf)__A);
1543349cc55cSDimitry Andric }
1544349cc55cSDimitry Andric 
1545349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256
1546349cc55cSDimitry Andric _mm256_maskz_fmsub_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
1547349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_selectph_256(
1548349cc55cSDimitry Andric       (__mmask16)__U,
1549349cc55cSDimitry Andric       __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
1550349cc55cSDimitry Andric       (__v16hf)_mm256_setzero_ph());
1551349cc55cSDimitry Andric }
1552349cc55cSDimitry Andric 
1553349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256
1554349cc55cSDimitry Andric _mm256_mask3_fnmadd_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) {
1555349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_selectph_256(
1556349cc55cSDimitry Andric       (__mmask16)__U,
1557349cc55cSDimitry Andric       __builtin_ia32_vfmaddph256(-(__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
1558349cc55cSDimitry Andric       (__v16hf)__C);
1559349cc55cSDimitry Andric }
1560349cc55cSDimitry Andric 
1561349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256
1562349cc55cSDimitry Andric _mm256_maskz_fnmadd_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
1563349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_selectph_256(
1564349cc55cSDimitry Andric       (__mmask16)__U,
1565349cc55cSDimitry Andric       __builtin_ia32_vfmaddph256(-(__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
1566349cc55cSDimitry Andric       (__v16hf)_mm256_setzero_ph());
1567349cc55cSDimitry Andric }
1568349cc55cSDimitry Andric 
1569349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256
1570349cc55cSDimitry Andric _mm256_maskz_fnmsub_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
1571349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_selectph_256(
1572349cc55cSDimitry Andric       (__mmask16)__U,
1573349cc55cSDimitry Andric       __builtin_ia32_vfmaddph256(-(__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
1574349cc55cSDimitry Andric       (__v16hf)_mm256_setzero_ph());
1575349cc55cSDimitry Andric }
1576349cc55cSDimitry Andric 
1577349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmaddsub_ph(__m128h __A,
1578349cc55cSDimitry Andric                                                                 __m128h __B,
1579349cc55cSDimitry Andric                                                                 __m128h __C) {
1580349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B,
1581349cc55cSDimitry Andric                                              (__v8hf)__C);
1582349cc55cSDimitry Andric }
1583349cc55cSDimitry Andric 
1584349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128
1585349cc55cSDimitry Andric _mm_mask_fmaddsub_ph(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
1586349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_selectph_128(
1587349cc55cSDimitry Andric       (__mmask8)__U,
1588349cc55cSDimitry Andric       __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1589349cc55cSDimitry Andric       (__v8hf)__A);
1590349cc55cSDimitry Andric }
1591349cc55cSDimitry Andric 
1592349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128
1593349cc55cSDimitry Andric _mm_mask3_fmaddsub_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
1594349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_selectph_128(
1595349cc55cSDimitry Andric       (__mmask8)__U,
1596349cc55cSDimitry Andric       __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1597349cc55cSDimitry Andric       (__v8hf)__C);
1598349cc55cSDimitry Andric }
1599349cc55cSDimitry Andric 
1600349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128
1601349cc55cSDimitry Andric _mm_maskz_fmaddsub_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
1602349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_selectph_128(
1603349cc55cSDimitry Andric       (__mmask8)__U,
1604349cc55cSDimitry Andric       __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
1605349cc55cSDimitry Andric       (__v8hf)_mm_setzero_ph());
1606349cc55cSDimitry Andric }
1607349cc55cSDimitry Andric 
1608349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmsubadd_ph(__m128h __A,
1609349cc55cSDimitry Andric                                                                 __m128h __B,
1610349cc55cSDimitry Andric                                                                 __m128h __C) {
1611349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B,
1612349cc55cSDimitry Andric                                              -(__v8hf)__C);
1613349cc55cSDimitry Andric }
1614349cc55cSDimitry Andric 
1615349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128
1616349cc55cSDimitry Andric _mm_mask_fmsubadd_ph(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
1617349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_selectph_128(
1618349cc55cSDimitry Andric       (__mmask8)__U,
1619349cc55cSDimitry Andric       __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, -(__v8hf)__C),
1620349cc55cSDimitry Andric       (__v8hf)__A);
1621349cc55cSDimitry Andric }
1622349cc55cSDimitry Andric 
1623349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128
1624349cc55cSDimitry Andric _mm_maskz_fmsubadd_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
1625349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_selectph_128(
1626349cc55cSDimitry Andric       (__mmask8)__U,
1627349cc55cSDimitry Andric       __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, -(__v8hf)__C),
1628349cc55cSDimitry Andric       (__v8hf)_mm_setzero_ph());
1629349cc55cSDimitry Andric }
1630349cc55cSDimitry Andric 
1631349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256
1632349cc55cSDimitry Andric _mm256_fmaddsub_ph(__m256h __A, __m256h __B, __m256h __C) {
1633349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B,
1634349cc55cSDimitry Andric                                                 (__v16hf)__C);
1635349cc55cSDimitry Andric }
1636349cc55cSDimitry Andric 
1637349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256
1638349cc55cSDimitry Andric _mm256_mask_fmaddsub_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) {
1639349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_selectph_256(
1640349cc55cSDimitry Andric       (__mmask16)__U,
1641349cc55cSDimitry Andric       __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
1642349cc55cSDimitry Andric       (__v16hf)__A);
1643349cc55cSDimitry Andric }
1644349cc55cSDimitry Andric 
1645349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256
1646349cc55cSDimitry Andric _mm256_mask3_fmaddsub_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) {
1647349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_selectph_256(
1648349cc55cSDimitry Andric       (__mmask16)__U,
1649349cc55cSDimitry Andric       __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
1650349cc55cSDimitry Andric       (__v16hf)__C);
1651349cc55cSDimitry Andric }
1652349cc55cSDimitry Andric 
1653349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256
1654349cc55cSDimitry Andric _mm256_maskz_fmaddsub_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
1655349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_selectph_256(
1656349cc55cSDimitry Andric       (__mmask16)__U,
1657349cc55cSDimitry Andric       __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
1658349cc55cSDimitry Andric       (__v16hf)_mm256_setzero_ph());
1659349cc55cSDimitry Andric }
1660349cc55cSDimitry Andric 
1661349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256
1662349cc55cSDimitry Andric _mm256_fmsubadd_ph(__m256h __A, __m256h __B, __m256h __C) {
1663349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B,
1664349cc55cSDimitry Andric                                                 -(__v16hf)__C);
1665349cc55cSDimitry Andric }
1666349cc55cSDimitry Andric 
1667349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256
1668349cc55cSDimitry Andric _mm256_mask_fmsubadd_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) {
1669349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_selectph_256(
1670349cc55cSDimitry Andric       (__mmask16)__U,
1671349cc55cSDimitry Andric       __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
1672349cc55cSDimitry Andric       (__v16hf)__A);
1673349cc55cSDimitry Andric }
1674349cc55cSDimitry Andric 
1675349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256
1676349cc55cSDimitry Andric _mm256_maskz_fmsubadd_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
1677349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_selectph_256(
1678349cc55cSDimitry Andric       (__mmask16)__U,
1679349cc55cSDimitry Andric       __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
1680349cc55cSDimitry Andric       (__v16hf)_mm256_setzero_ph());
1681349cc55cSDimitry Andric }
1682349cc55cSDimitry Andric 
1683349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128
1684349cc55cSDimitry Andric _mm_mask3_fmsub_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
1685349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_selectph_128(
1686349cc55cSDimitry Andric       (__mmask8)__U,
1687349cc55cSDimitry Andric       __builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B, -(__v8hf)__C),
1688349cc55cSDimitry Andric       (__v8hf)__C);
1689349cc55cSDimitry Andric }
1690349cc55cSDimitry Andric 
1691349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256
1692349cc55cSDimitry Andric _mm256_mask3_fmsub_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) {
1693349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_selectph_256(
1694349cc55cSDimitry Andric       (__mmask16)__U,
1695349cc55cSDimitry Andric       __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
1696349cc55cSDimitry Andric       (__v16hf)__C);
1697349cc55cSDimitry Andric }
1698349cc55cSDimitry Andric 
1699349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128
1700349cc55cSDimitry Andric _mm_mask3_fmsubadd_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
1701349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_selectph_128(
1702349cc55cSDimitry Andric       (__mmask8)__U,
1703349cc55cSDimitry Andric       __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, -(__v8hf)__C),
1704349cc55cSDimitry Andric       (__v8hf)__C);
1705349cc55cSDimitry Andric }
1706349cc55cSDimitry Andric 
1707349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256
1708349cc55cSDimitry Andric _mm256_mask3_fmsubadd_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) {
1709349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_selectph_256(
1710349cc55cSDimitry Andric       (__mmask16)__U,
1711349cc55cSDimitry Andric       __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
1712349cc55cSDimitry Andric       (__v16hf)__C);
1713349cc55cSDimitry Andric }
1714349cc55cSDimitry Andric 
1715349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fnmadd_ph(__m128h __A,
1716349cc55cSDimitry Andric                                                               __m128h __B,
1717349cc55cSDimitry Andric                                                               __m128h __C) {
1718349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_vfmaddph((__v8hf)__A, -(__v8hf)__B,
1719349cc55cSDimitry Andric                                           (__v8hf)__C);
1720349cc55cSDimitry Andric }
1721349cc55cSDimitry Andric 
1722349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128
1723349cc55cSDimitry Andric _mm_mask_fnmadd_ph(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
1724349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_selectph_128(
1725349cc55cSDimitry Andric       (__mmask8)__U,
1726349cc55cSDimitry Andric       __builtin_ia32_vfmaddph((__v8hf)__A, -(__v8hf)__B, (__v8hf)__C),
1727349cc55cSDimitry Andric       (__v8hf)__A);
1728349cc55cSDimitry Andric }
1729349cc55cSDimitry Andric 
1730349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fnmadd_ph(__m256h __A,
1731349cc55cSDimitry Andric                                                                  __m256h __B,
1732349cc55cSDimitry Andric                                                                  __m256h __C) {
1733349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_vfmaddph256((__v16hf)__A, -(__v16hf)__B,
1734349cc55cSDimitry Andric                                              (__v16hf)__C);
1735349cc55cSDimitry Andric }
1736349cc55cSDimitry Andric 
1737349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256
1738349cc55cSDimitry Andric _mm256_mask_fnmadd_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) {
1739349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_selectph_256(
1740349cc55cSDimitry Andric       (__mmask16)__U,
1741349cc55cSDimitry Andric       __builtin_ia32_vfmaddph256((__v16hf)__A, -(__v16hf)__B, (__v16hf)__C),
1742349cc55cSDimitry Andric       (__v16hf)__A);
1743349cc55cSDimitry Andric }
1744349cc55cSDimitry Andric 
1745349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fnmsub_ph(__m128h __A,
1746349cc55cSDimitry Andric                                                               __m128h __B,
1747349cc55cSDimitry Andric                                                               __m128h __C) {
1748349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_vfmaddph((__v8hf)__A, -(__v8hf)__B,
1749349cc55cSDimitry Andric                                           -(__v8hf)__C);
1750349cc55cSDimitry Andric }
1751349cc55cSDimitry Andric 
1752349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128
1753349cc55cSDimitry Andric _mm_mask_fnmsub_ph(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
1754349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_selectph_128(
1755349cc55cSDimitry Andric       (__mmask8)__U,
1756349cc55cSDimitry Andric       __builtin_ia32_vfmaddph((__v8hf)__A, -(__v8hf)__B, -(__v8hf)__C),
1757349cc55cSDimitry Andric       (__v8hf)__A);
1758349cc55cSDimitry Andric }
1759349cc55cSDimitry Andric 
1760349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128
1761349cc55cSDimitry Andric _mm_mask3_fnmsub_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
1762349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_selectph_128(
1763349cc55cSDimitry Andric       (__mmask8)__U,
1764349cc55cSDimitry Andric       __builtin_ia32_vfmaddph((__v8hf)__A, -(__v8hf)__B, -(__v8hf)__C),
1765349cc55cSDimitry Andric       (__v8hf)__C);
1766349cc55cSDimitry Andric }
1767349cc55cSDimitry Andric 
1768349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fnmsub_ph(__m256h __A,
1769349cc55cSDimitry Andric                                                                  __m256h __B,
1770349cc55cSDimitry Andric                                                                  __m256h __C) {
1771349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_vfmaddph256((__v16hf)__A, -(__v16hf)__B,
1772349cc55cSDimitry Andric                                              -(__v16hf)__C);
1773349cc55cSDimitry Andric }
1774349cc55cSDimitry Andric 
1775349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256
1776349cc55cSDimitry Andric _mm256_mask_fnmsub_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) {
1777349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_selectph_256(
1778349cc55cSDimitry Andric       (__mmask16)__U,
1779349cc55cSDimitry Andric       __builtin_ia32_vfmaddph256((__v16hf)__A, -(__v16hf)__B, -(__v16hf)__C),
1780349cc55cSDimitry Andric       (__v16hf)__A);
1781349cc55cSDimitry Andric }
1782349cc55cSDimitry Andric 
1783349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256
1784349cc55cSDimitry Andric _mm256_mask3_fnmsub_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) {
1785349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_selectph_256(
1786349cc55cSDimitry Andric       (__mmask16)__U,
1787349cc55cSDimitry Andric       __builtin_ia32_vfmaddph256((__v16hf)__A, -(__v16hf)__B, -(__v16hf)__C),
1788349cc55cSDimitry Andric       (__v16hf)__C);
1789349cc55cSDimitry Andric }
1790349cc55cSDimitry Andric 
1791349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fcmul_pch(__m128h __A,
1792349cc55cSDimitry Andric                                                               __m128h __B) {
1793349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_vfcmulcph128_mask(
1794349cc55cSDimitry Andric       (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_undefined_ph(), (__mmask8)-1);
1795349cc55cSDimitry Andric }
1796349cc55cSDimitry Andric 
1797349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128
1798349cc55cSDimitry Andric _mm_mask_fcmul_pch(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
1799349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_vfcmulcph128_mask((__v4sf)__A, (__v4sf)__B,
1800349cc55cSDimitry Andric                                                    (__v4sf)__W, (__mmask8)__U);
1801349cc55cSDimitry Andric }
1802349cc55cSDimitry Andric 
1803349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128
1804349cc55cSDimitry Andric _mm_maskz_fcmul_pch(__mmask8 __U, __m128h __A, __m128h __B) {
1805349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_vfcmulcph128_mask(
1806349cc55cSDimitry Andric       (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_setzero_ph(), (__mmask8)__U);
1807349cc55cSDimitry Andric }
1808349cc55cSDimitry Andric 
1809349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS128 _mm256_fcmul_pch(__m256h __A,
1810349cc55cSDimitry Andric                                                                  __m256h __B) {
1811349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_vfcmulcph256_mask(
1812349cc55cSDimitry Andric       (__v8sf)__A, (__v8sf)__B, (__v8sf)_mm256_undefined_ph(), (__mmask8)-1);
1813349cc55cSDimitry Andric }
1814349cc55cSDimitry Andric 
1815349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256
1816349cc55cSDimitry Andric _mm256_mask_fcmul_pch(__m256h __W, __mmask8 __U, __m256h __A, __m256h __B) {
1817349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_vfcmulcph256_mask((__v8sf)__A, (__v8sf)__B,
1818349cc55cSDimitry Andric                                                    (__v8sf)__W, (__mmask8)__U);
1819349cc55cSDimitry Andric }
1820349cc55cSDimitry Andric 
1821349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256
1822349cc55cSDimitry Andric _mm256_maskz_fcmul_pch(__mmask8 __U, __m256h __A, __m256h __B) {
1823349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_vfcmulcph256_mask(
1824349cc55cSDimitry Andric       (__v8sf)__A, (__v8sf)__B, (__v8sf)_mm256_setzero_ph(), (__mmask8)__U);
1825349cc55cSDimitry Andric }
1826349cc55cSDimitry Andric 
1827349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fcmadd_pch(__m128h __A,
1828349cc55cSDimitry Andric                                                                __m128h __B,
1829349cc55cSDimitry Andric                                                                __m128h __C) {
1830349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_vfcmaddcph128_mask((__v4sf)__A, (__v4sf)__B,
1831349cc55cSDimitry Andric                                                     (__v4sf)__C, (__mmask8)-1);
1832349cc55cSDimitry Andric }
1833349cc55cSDimitry Andric 
1834349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128
1835349cc55cSDimitry Andric _mm_mask_fcmadd_pch(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
1836349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_selectps_128(
1837349cc55cSDimitry Andric       __U,
1838349cc55cSDimitry Andric       __builtin_ia32_vfcmaddcph128_mask((__v4sf)__A, (__v4sf)(__m128h)__B,
1839349cc55cSDimitry Andric                                         (__v4sf)__C, (__mmask8)__U),
1840349cc55cSDimitry Andric       (__v4sf)__A);
1841349cc55cSDimitry Andric }
1842349cc55cSDimitry Andric 
1843349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128
1844349cc55cSDimitry Andric _mm_mask3_fcmadd_pch(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
1845349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_vfcmaddcph128_mask((__v4sf)__A, (__v4sf)__B,
1846349cc55cSDimitry Andric                                                     (__v4sf)__C, (__mmask8)__U);
1847349cc55cSDimitry Andric }
1848349cc55cSDimitry Andric 
1849349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128
1850349cc55cSDimitry Andric _mm_maskz_fcmadd_pch(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
1851349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_vfcmaddcph128_maskz(
1852349cc55cSDimitry Andric       (__v4sf)__A, (__v4sf)__B, (__v4sf)__C, (__mmask8)__U);
1853349cc55cSDimitry Andric }
1854349cc55cSDimitry Andric 
1855349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fcmadd_pch(__m256h __A,
1856349cc55cSDimitry Andric                                                                   __m256h __B,
1857349cc55cSDimitry Andric                                                                   __m256h __C) {
1858349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_vfcmaddcph256_mask((__v8sf)__A, (__v8sf)__B,
1859349cc55cSDimitry Andric                                                     (__v8sf)__C, (__mmask8)-1);
1860349cc55cSDimitry Andric }
1861349cc55cSDimitry Andric 
1862349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256
1863349cc55cSDimitry Andric _mm256_mask_fcmadd_pch(__m256h __A, __mmask8 __U, __m256h __B, __m256h __C) {
1864349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_selectps_256(
1865349cc55cSDimitry Andric       __U,
1866349cc55cSDimitry Andric       __builtin_ia32_vfcmaddcph256_mask((__v8sf)__A, (__v8sf)__B, (__v8sf)__C,
1867349cc55cSDimitry Andric                                         (__mmask8)__U),
1868349cc55cSDimitry Andric       (__v8sf)__A);
1869349cc55cSDimitry Andric }
1870349cc55cSDimitry Andric 
1871349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256
1872349cc55cSDimitry Andric _mm256_mask3_fcmadd_pch(__m256h __A, __m256h __B, __m256h __C, __mmask8 __U) {
1873349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_vfcmaddcph256_mask((__v8sf)__A, (__v8sf)__B,
1874349cc55cSDimitry Andric                                                     (__v8sf)__C, (__mmask8)__U);
1875349cc55cSDimitry Andric }
1876349cc55cSDimitry Andric 
1877349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256
1878349cc55cSDimitry Andric _mm256_maskz_fcmadd_pch(__mmask8 __U, __m256h __A, __m256h __B, __m256h __C) {
1879349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_vfcmaddcph256_maskz(
1880349cc55cSDimitry Andric       (__v8sf)__A, (__v8sf)__B, (__v8sf)__C, (__mmask8)__U);
1881349cc55cSDimitry Andric }
1882349cc55cSDimitry Andric 
1883349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmul_pch(__m128h __A,
1884349cc55cSDimitry Andric                                                              __m128h __B) {
1885349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_vfmulcph128_mask(
1886349cc55cSDimitry Andric       (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_undefined_ph(), (__mmask8)-1);
1887349cc55cSDimitry Andric }
1888349cc55cSDimitry Andric 
1889349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmul_pch(__m128h __W,
1890349cc55cSDimitry Andric                                                                   __mmask8 __U,
1891349cc55cSDimitry Andric                                                                   __m128h __A,
1892349cc55cSDimitry Andric                                                                   __m128h __B) {
1893349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_vfmulcph128_mask((__v4sf)__A, (__v4sf)__B,
1894349cc55cSDimitry Andric                                                   (__v4sf)__W, (__mmask8)__U);
1895349cc55cSDimitry Andric }
1896349cc55cSDimitry Andric 
1897349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128
1898349cc55cSDimitry Andric _mm_maskz_fmul_pch(__mmask8 __U, __m128h __A, __m128h __B) {
1899349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_vfmulcph128_mask(
1900349cc55cSDimitry Andric       (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_setzero_ph(), (__mmask8)__U);
1901349cc55cSDimitry Andric }
1902349cc55cSDimitry Andric 
1903349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fmul_pch(__m256h __A,
1904349cc55cSDimitry Andric                                                                 __m256h __B) {
1905349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_vfmulcph256_mask(
1906349cc55cSDimitry Andric       (__v8sf)__A, (__v8sf)__B, (__v8sf)_mm256_undefined_ph(), (__mmask8)-1);
1907349cc55cSDimitry Andric }
1908349cc55cSDimitry Andric 
1909349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256
1910349cc55cSDimitry Andric _mm256_mask_fmul_pch(__m256h __W, __mmask8 __U, __m256h __A, __m256h __B) {
1911349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_vfmulcph256_mask((__v8sf)__A, (__v8sf)__B,
1912349cc55cSDimitry Andric                                                   (__v8sf)__W, (__mmask8)__U);
1913349cc55cSDimitry Andric }
1914349cc55cSDimitry Andric 
1915349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256
1916349cc55cSDimitry Andric _mm256_maskz_fmul_pch(__mmask8 __U, __m256h __A, __m256h __B) {
1917349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_vfmulcph256_mask(
1918349cc55cSDimitry Andric       (__v8sf)__A, (__v8sf)__B, (__v8sf)_mm256_setzero_ph(), (__mmask8)__U);
1919349cc55cSDimitry Andric }
1920349cc55cSDimitry Andric 
1921349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmadd_pch(__m128h __A,
1922349cc55cSDimitry Andric                                                               __m128h __B,
1923349cc55cSDimitry Andric                                                               __m128h __C) {
1924349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_vfmaddcph128_mask((__v4sf)__A, (__v4sf)__B,
1925349cc55cSDimitry Andric                                                    (__v4sf)__C, (__mmask8)-1);
1926349cc55cSDimitry Andric }
1927349cc55cSDimitry Andric 
1928349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128
1929349cc55cSDimitry Andric _mm_mask_fmadd_pch(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
1930349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_selectps_128(
1931349cc55cSDimitry Andric       __U,
1932349cc55cSDimitry Andric       __builtin_ia32_vfmaddcph128_mask((__v4sf)__A, (__v4sf)__B, (__v4sf)__C,
1933349cc55cSDimitry Andric                                        (__mmask8)__U),
1934349cc55cSDimitry Andric       (__v4sf)__A);
1935349cc55cSDimitry Andric }
1936349cc55cSDimitry Andric 
1937349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128
1938349cc55cSDimitry Andric _mm_mask3_fmadd_pch(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
1939349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_vfmaddcph128_mask((__v4sf)__A, (__v4sf)__B,
1940349cc55cSDimitry Andric                                                    (__v4sf)__C, (__mmask8)__U);
1941349cc55cSDimitry Andric }
1942349cc55cSDimitry Andric 
1943349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128
1944349cc55cSDimitry Andric _mm_maskz_fmadd_pch(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
1945349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_vfmaddcph128_maskz((__v4sf)__A, (__v4sf)__B,
1946349cc55cSDimitry Andric                                                     (__v4sf)__C, (__mmask8)__U);
1947349cc55cSDimitry Andric }
1948349cc55cSDimitry Andric 
1949349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fmadd_pch(__m256h __A,
1950349cc55cSDimitry Andric                                                                  __m256h __B,
1951349cc55cSDimitry Andric                                                                  __m256h __C) {
1952349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_vfmaddcph256_mask((__v8sf)__A, (__v8sf)__B,
1953349cc55cSDimitry Andric                                                    (__v8sf)__C, (__mmask8)-1);
1954349cc55cSDimitry Andric }
1955349cc55cSDimitry Andric 
1956349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256
1957349cc55cSDimitry Andric _mm256_mask_fmadd_pch(__m256h __A, __mmask8 __U, __m256h __B, __m256h __C) {
1958349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_selectps_256(
1959349cc55cSDimitry Andric       __U,
1960349cc55cSDimitry Andric       __builtin_ia32_vfmaddcph256_mask((__v8sf)__A, (__v8sf)__B, (__v8sf)__C,
1961349cc55cSDimitry Andric                                        (__mmask8)__U),
1962349cc55cSDimitry Andric       (__v8sf)__A);
1963349cc55cSDimitry Andric }
1964349cc55cSDimitry Andric 
1965349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256
1966349cc55cSDimitry Andric _mm256_mask3_fmadd_pch(__m256h __A, __m256h __B, __m256h __C, __mmask8 __U) {
1967349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_vfmaddcph256_mask((__v8sf)__A, (__v8sf)__B,
1968349cc55cSDimitry Andric                                                    (__v8sf)__C, (__mmask8)__U);
1969349cc55cSDimitry Andric }
1970349cc55cSDimitry Andric 
1971349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256
1972349cc55cSDimitry Andric _mm256_maskz_fmadd_pch(__mmask8 __U, __m256h __A, __m256h __B, __m256h __C) {
1973349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_vfmaddcph256_maskz((__v8sf)__A, (__v8sf)__B,
1974349cc55cSDimitry Andric                                                     (__v8sf)__C, (__mmask8)__U);
1975349cc55cSDimitry Andric }
1976349cc55cSDimitry Andric 
1977349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_blend_ph(__mmask8 __U,
1978349cc55cSDimitry Andric                                                                   __m128h __A,
1979349cc55cSDimitry Andric                                                                   __m128h __W) {
1980349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_selectph_128((__mmask8)__U, (__v8hf)__W,
1981349cc55cSDimitry Andric                                               (__v8hf)__A);
1982349cc55cSDimitry Andric }
1983349cc55cSDimitry Andric 
1984349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256
1985349cc55cSDimitry Andric _mm256_mask_blend_ph(__mmask16 __U, __m256h __A, __m256h __W) {
1986349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_selectph_256((__mmask16)__U, (__v16hf)__W,
1987349cc55cSDimitry Andric                                               (__v16hf)__A);
1988349cc55cSDimitry Andric }
1989349cc55cSDimitry Andric 
1990349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128
1991349cc55cSDimitry Andric _mm_permutex2var_ph(__m128h __A, __m128i __I, __m128h __B) {
1992349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_vpermi2varhi128((__v8hi)__A, (__v8hi)__I,
1993349cc55cSDimitry Andric                                                  (__v8hi)__B);
1994349cc55cSDimitry Andric }
1995349cc55cSDimitry Andric 
1996349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256
1997349cc55cSDimitry Andric _mm256_permutex2var_ph(__m256h __A, __m256i __I, __m256h __B) {
1998349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_vpermi2varhi256((__v16hi)__A, (__v16hi)__I,
1999349cc55cSDimitry Andric                                                  (__v16hi)__B);
2000349cc55cSDimitry Andric }
2001349cc55cSDimitry Andric 
2002349cc55cSDimitry Andric static __inline__ __m128h __DEFAULT_FN_ATTRS128
2003349cc55cSDimitry Andric _mm_permutexvar_ph(__m128i __A, __m128h __B) {
2004349cc55cSDimitry Andric   return (__m128h)__builtin_ia32_permvarhi128((__v8hi)__B, (__v8hi)__A);
2005349cc55cSDimitry Andric }
2006349cc55cSDimitry Andric 
2007349cc55cSDimitry Andric static __inline__ __m256h __DEFAULT_FN_ATTRS256
2008349cc55cSDimitry Andric _mm256_permutexvar_ph(__m256i __A, __m256h __B) {
2009349cc55cSDimitry Andric   return (__m256h)__builtin_ia32_permvarhi256((__v16hi)__B, (__v16hi)__A);
2010349cc55cSDimitry Andric }
2011349cc55cSDimitry Andric 
2012349cc55cSDimitry Andric static __inline__ _Float16 __DEFAULT_FN_ATTRS256
2013349cc55cSDimitry Andric _mm256_reduce_add_ph(__m256h __W) {
2014349cc55cSDimitry Andric   return __builtin_ia32_reduce_fadd_ph256(-0.0f16, __W);
2015349cc55cSDimitry Andric }
2016349cc55cSDimitry Andric 
2017349cc55cSDimitry Andric static __inline__ _Float16 __DEFAULT_FN_ATTRS256
2018349cc55cSDimitry Andric _mm256_reduce_mul_ph(__m256h __W) {
2019349cc55cSDimitry Andric   return __builtin_ia32_reduce_fmul_ph256(1.0f16, __W);
2020349cc55cSDimitry Andric }
2021349cc55cSDimitry Andric 
2022349cc55cSDimitry Andric static __inline__ _Float16 __DEFAULT_FN_ATTRS256
2023349cc55cSDimitry Andric _mm256_reduce_max_ph(__m256h __V) {
2024349cc55cSDimitry Andric   return __builtin_ia32_reduce_fmax_ph256(__V);
2025349cc55cSDimitry Andric }
2026349cc55cSDimitry Andric 
2027349cc55cSDimitry Andric static __inline__ _Float16 __DEFAULT_FN_ATTRS256
2028349cc55cSDimitry Andric _mm256_reduce_min_ph(__m256h __V) {
2029349cc55cSDimitry Andric   return __builtin_ia32_reduce_fmin_ph256(__V);
2030349cc55cSDimitry Andric }
2031349cc55cSDimitry Andric 
2032349cc55cSDimitry Andric static __inline__ _Float16 __DEFAULT_FN_ATTRS128
2033349cc55cSDimitry Andric _mm_reduce_add_ph(__m128h __W) {
2034349cc55cSDimitry Andric   return __builtin_ia32_reduce_fadd_ph128(-0.0f16, __W);
2035349cc55cSDimitry Andric }
2036349cc55cSDimitry Andric 
2037349cc55cSDimitry Andric static __inline__ _Float16 __DEFAULT_FN_ATTRS128
2038349cc55cSDimitry Andric _mm_reduce_mul_ph(__m128h __W) {
2039349cc55cSDimitry Andric   return __builtin_ia32_reduce_fmul_ph128(1.0f16, __W);
2040349cc55cSDimitry Andric }
2041349cc55cSDimitry Andric 
2042349cc55cSDimitry Andric static __inline__ _Float16 __DEFAULT_FN_ATTRS128
2043349cc55cSDimitry Andric _mm_reduce_max_ph(__m128h __V) {
2044349cc55cSDimitry Andric   return __builtin_ia32_reduce_fmax_ph128(__V);
2045349cc55cSDimitry Andric }
2046349cc55cSDimitry Andric 
2047349cc55cSDimitry Andric static __inline__ _Float16 __DEFAULT_FN_ATTRS128
2048349cc55cSDimitry Andric _mm_reduce_min_ph(__m128h __V) {
2049349cc55cSDimitry Andric   return __builtin_ia32_reduce_fmin_ph128(__V);
2050349cc55cSDimitry Andric }
2051349cc55cSDimitry Andric 
2052349cc55cSDimitry Andric // intrinsics below are alias for f*mul_*ch
2053349cc55cSDimitry Andric #define _mm_mul_pch(A, B) _mm_fmul_pch(A, B)
2054349cc55cSDimitry Andric #define _mm_mask_mul_pch(W, U, A, B) _mm_mask_fmul_pch(W, U, A, B)
2055349cc55cSDimitry Andric #define _mm_maskz_mul_pch(U, A, B) _mm_maskz_fmul_pch(U, A, B)
2056349cc55cSDimitry Andric #define _mm256_mul_pch(A, B) _mm256_fmul_pch(A, B)
2057349cc55cSDimitry Andric #define _mm256_mask_mul_pch(W, U, A, B) _mm256_mask_fmul_pch(W, U, A, B)
2058349cc55cSDimitry Andric #define _mm256_maskz_mul_pch(U, A, B) _mm256_maskz_fmul_pch(U, A, B)
2059349cc55cSDimitry Andric 
2060349cc55cSDimitry Andric #define _mm_cmul_pch(A, B) _mm_fcmul_pch(A, B)
2061349cc55cSDimitry Andric #define _mm_mask_cmul_pch(W, U, A, B) _mm_mask_fcmul_pch(W, U, A, B)
2062349cc55cSDimitry Andric #define _mm_maskz_cmul_pch(U, A, B) _mm_maskz_fcmul_pch(U, A, B)
2063349cc55cSDimitry Andric #define _mm256_cmul_pch(A, B) _mm256_fcmul_pch(A, B)
2064349cc55cSDimitry Andric #define _mm256_mask_cmul_pch(W, U, A, B) _mm256_mask_fcmul_pch(W, U, A, B)
2065349cc55cSDimitry Andric #define _mm256_maskz_cmul_pch(U, A, B) _mm256_maskz_fcmul_pch(U, A, B)
2066349cc55cSDimitry Andric 
2067349cc55cSDimitry Andric #undef __DEFAULT_FN_ATTRS128
2068349cc55cSDimitry Andric #undef __DEFAULT_FN_ATTRS256
2069349cc55cSDimitry Andric 
2070349cc55cSDimitry Andric #endif
2071bdd1243dSDimitry Andric #endif
2072