1 /*===---- xopintrin.h - XOP intrinsics -------------------------------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9
10 #ifndef __X86INTRIN_H
11 #error "Never use <xopintrin.h> directly; include <x86intrin.h> instead."
12 #endif
13
14 #ifndef __XOPINTRIN_H
15 #define __XOPINTRIN_H
16
17 #include <fma4intrin.h>
18
19 /* Define the default attributes for the functions in this file. */
20 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("xop"), __min_vector_width__(128)))
21 #define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("xop"), __min_vector_width__(256)))
22
23 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maccs_epi16(__m128i __A,__m128i __B,__m128i __C)24 _mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C)
25 {
26 return (__m128i)__builtin_ia32_vpmacssww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C);
27 }
28
29 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_macc_epi16(__m128i __A,__m128i __B,__m128i __C)30 _mm_macc_epi16(__m128i __A, __m128i __B, __m128i __C)
31 {
32 return (__m128i)__builtin_ia32_vpmacsww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C);
33 }
34
35 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maccsd_epi16(__m128i __A,__m128i __B,__m128i __C)36 _mm_maccsd_epi16(__m128i __A, __m128i __B, __m128i __C)
37 {
38 return (__m128i)__builtin_ia32_vpmacsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
39 }
40
41 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maccd_epi16(__m128i __A,__m128i __B,__m128i __C)42 _mm_maccd_epi16(__m128i __A, __m128i __B, __m128i __C)
43 {
44 return (__m128i)__builtin_ia32_vpmacswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
45 }
46
47 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maccs_epi32(__m128i __A,__m128i __B,__m128i __C)48 _mm_maccs_epi32(__m128i __A, __m128i __B, __m128i __C)
49 {
50 return (__m128i)__builtin_ia32_vpmacssdd((__v4si)__A, (__v4si)__B, (__v4si)__C);
51 }
52
53 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_macc_epi32(__m128i __A,__m128i __B,__m128i __C)54 _mm_macc_epi32(__m128i __A, __m128i __B, __m128i __C)
55 {
56 return (__m128i)__builtin_ia32_vpmacsdd((__v4si)__A, (__v4si)__B, (__v4si)__C);
57 }
58
59 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maccslo_epi32(__m128i __A,__m128i __B,__m128i __C)60 _mm_maccslo_epi32(__m128i __A, __m128i __B, __m128i __C)
61 {
62 return (__m128i)__builtin_ia32_vpmacssdql((__v4si)__A, (__v4si)__B, (__v2di)__C);
63 }
64
65 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_macclo_epi32(__m128i __A,__m128i __B,__m128i __C)66 _mm_macclo_epi32(__m128i __A, __m128i __B, __m128i __C)
67 {
68 return (__m128i)__builtin_ia32_vpmacsdql((__v4si)__A, (__v4si)__B, (__v2di)__C);
69 }
70
71 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maccshi_epi32(__m128i __A,__m128i __B,__m128i __C)72 _mm_maccshi_epi32(__m128i __A, __m128i __B, __m128i __C)
73 {
74 return (__m128i)__builtin_ia32_vpmacssdqh((__v4si)__A, (__v4si)__B, (__v2di)__C);
75 }
76
77 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_macchi_epi32(__m128i __A,__m128i __B,__m128i __C)78 _mm_macchi_epi32(__m128i __A, __m128i __B, __m128i __C)
79 {
80 return (__m128i)__builtin_ia32_vpmacsdqh((__v4si)__A, (__v4si)__B, (__v2di)__C);
81 }
82
83 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maddsd_epi16(__m128i __A,__m128i __B,__m128i __C)84 _mm_maddsd_epi16(__m128i __A, __m128i __B, __m128i __C)
85 {
86 return (__m128i)__builtin_ia32_vpmadcsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
87 }
88
89 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maddd_epi16(__m128i __A,__m128i __B,__m128i __C)90 _mm_maddd_epi16(__m128i __A, __m128i __B, __m128i __C)
91 {
92 return (__m128i)__builtin_ia32_vpmadcswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
93 }
94
95 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddw_epi8(__m128i __A)96 _mm_haddw_epi8(__m128i __A)
97 {
98 return (__m128i)__builtin_ia32_vphaddbw((__v16qi)__A);
99 }
100
101 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddd_epi8(__m128i __A)102 _mm_haddd_epi8(__m128i __A)
103 {
104 return (__m128i)__builtin_ia32_vphaddbd((__v16qi)__A);
105 }
106
107 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddq_epi8(__m128i __A)108 _mm_haddq_epi8(__m128i __A)
109 {
110 return (__m128i)__builtin_ia32_vphaddbq((__v16qi)__A);
111 }
112
113 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddd_epi16(__m128i __A)114 _mm_haddd_epi16(__m128i __A)
115 {
116 return (__m128i)__builtin_ia32_vphaddwd((__v8hi)__A);
117 }
118
119 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddq_epi16(__m128i __A)120 _mm_haddq_epi16(__m128i __A)
121 {
122 return (__m128i)__builtin_ia32_vphaddwq((__v8hi)__A);
123 }
124
125 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddq_epi32(__m128i __A)126 _mm_haddq_epi32(__m128i __A)
127 {
128 return (__m128i)__builtin_ia32_vphadddq((__v4si)__A);
129 }
130
131 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddw_epu8(__m128i __A)132 _mm_haddw_epu8(__m128i __A)
133 {
134 return (__m128i)__builtin_ia32_vphaddubw((__v16qi)__A);
135 }
136
137 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddd_epu8(__m128i __A)138 _mm_haddd_epu8(__m128i __A)
139 {
140 return (__m128i)__builtin_ia32_vphaddubd((__v16qi)__A);
141 }
142
143 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddq_epu8(__m128i __A)144 _mm_haddq_epu8(__m128i __A)
145 {
146 return (__m128i)__builtin_ia32_vphaddubq((__v16qi)__A);
147 }
148
149 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddd_epu16(__m128i __A)150 _mm_haddd_epu16(__m128i __A)
151 {
152 return (__m128i)__builtin_ia32_vphadduwd((__v8hi)__A);
153 }
154
155 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddq_epu16(__m128i __A)156 _mm_haddq_epu16(__m128i __A)
157 {
158 return (__m128i)__builtin_ia32_vphadduwq((__v8hi)__A);
159 }
160
161 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_haddq_epu32(__m128i __A)162 _mm_haddq_epu32(__m128i __A)
163 {
164 return (__m128i)__builtin_ia32_vphaddudq((__v4si)__A);
165 }
166
167 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_hsubw_epi8(__m128i __A)168 _mm_hsubw_epi8(__m128i __A)
169 {
170 return (__m128i)__builtin_ia32_vphsubbw((__v16qi)__A);
171 }
172
173 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_hsubd_epi16(__m128i __A)174 _mm_hsubd_epi16(__m128i __A)
175 {
176 return (__m128i)__builtin_ia32_vphsubwd((__v8hi)__A);
177 }
178
179 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_hsubq_epi32(__m128i __A)180 _mm_hsubq_epi32(__m128i __A)
181 {
182 return (__m128i)__builtin_ia32_vphsubdq((__v4si)__A);
183 }
184
185 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_cmov_si128(__m128i __A,__m128i __B,__m128i __C)186 _mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C)
187 {
188 return (__m128i)(((__v2du)__A & (__v2du)__C) | ((__v2du)__B & ~(__v2du)__C));
189 }
190
191 static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_cmov_si256(__m256i __A,__m256i __B,__m256i __C)192 _mm256_cmov_si256(__m256i __A, __m256i __B, __m256i __C)
193 {
194 return (__m256i)(((__v4du)__A & (__v4du)__C) | ((__v4du)__B & ~(__v4du)__C));
195 }
196
197 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_perm_epi8(__m128i __A,__m128i __B,__m128i __C)198 _mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C)
199 {
200 return (__m128i)__builtin_ia32_vpperm((__v16qi)__A, (__v16qi)__B, (__v16qi)__C);
201 }
202
203 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_rot_epi8(__m128i __A,__m128i __B)204 _mm_rot_epi8(__m128i __A, __m128i __B)
205 {
206 return (__m128i)__builtin_ia32_vprotb((__v16qi)__A, (__v16qi)__B);
207 }
208
209 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_rot_epi16(__m128i __A,__m128i __B)210 _mm_rot_epi16(__m128i __A, __m128i __B)
211 {
212 return (__m128i)__builtin_ia32_vprotw((__v8hi)__A, (__v8hi)__B);
213 }
214
215 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_rot_epi32(__m128i __A,__m128i __B)216 _mm_rot_epi32(__m128i __A, __m128i __B)
217 {
218 return (__m128i)__builtin_ia32_vprotd((__v4si)__A, (__v4si)__B);
219 }
220
221 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_rot_epi64(__m128i __A,__m128i __B)222 _mm_rot_epi64(__m128i __A, __m128i __B)
223 {
224 return (__m128i)__builtin_ia32_vprotq((__v2di)__A, (__v2di)__B);
225 }
226
227 #define _mm_roti_epi8(A, N) \
228 ((__m128i)__builtin_ia32_vprotbi((__v16qi)(__m128i)(A), (N)))
229
230 #define _mm_roti_epi16(A, N) \
231 ((__m128i)__builtin_ia32_vprotwi((__v8hi)(__m128i)(A), (N)))
232
233 #define _mm_roti_epi32(A, N) \
234 ((__m128i)__builtin_ia32_vprotdi((__v4si)(__m128i)(A), (N)))
235
236 #define _mm_roti_epi64(A, N) \
237 ((__m128i)__builtin_ia32_vprotqi((__v2di)(__m128i)(A), (N)))
238
239 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_shl_epi8(__m128i __A,__m128i __B)240 _mm_shl_epi8(__m128i __A, __m128i __B)
241 {
242 return (__m128i)__builtin_ia32_vpshlb((__v16qi)__A, (__v16qi)__B);
243 }
244
245 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_shl_epi16(__m128i __A,__m128i __B)246 _mm_shl_epi16(__m128i __A, __m128i __B)
247 {
248 return (__m128i)__builtin_ia32_vpshlw((__v8hi)__A, (__v8hi)__B);
249 }
250
251 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_shl_epi32(__m128i __A,__m128i __B)252 _mm_shl_epi32(__m128i __A, __m128i __B)
253 {
254 return (__m128i)__builtin_ia32_vpshld((__v4si)__A, (__v4si)__B);
255 }
256
257 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_shl_epi64(__m128i __A,__m128i __B)258 _mm_shl_epi64(__m128i __A, __m128i __B)
259 {
260 return (__m128i)__builtin_ia32_vpshlq((__v2di)__A, (__v2di)__B);
261 }
262
263 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_sha_epi8(__m128i __A,__m128i __B)264 _mm_sha_epi8(__m128i __A, __m128i __B)
265 {
266 return (__m128i)__builtin_ia32_vpshab((__v16qi)__A, (__v16qi)__B);
267 }
268
269 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_sha_epi16(__m128i __A,__m128i __B)270 _mm_sha_epi16(__m128i __A, __m128i __B)
271 {
272 return (__m128i)__builtin_ia32_vpshaw((__v8hi)__A, (__v8hi)__B);
273 }
274
275 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_sha_epi32(__m128i __A,__m128i __B)276 _mm_sha_epi32(__m128i __A, __m128i __B)
277 {
278 return (__m128i)__builtin_ia32_vpshad((__v4si)__A, (__v4si)__B);
279 }
280
281 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_sha_epi64(__m128i __A,__m128i __B)282 _mm_sha_epi64(__m128i __A, __m128i __B)
283 {
284 return (__m128i)__builtin_ia32_vpshaq((__v2di)__A, (__v2di)__B);
285 }
286
287 #define _mm_com_epu8(A, B, N) \
288 ((__m128i)__builtin_ia32_vpcomub((__v16qi)(__m128i)(A), \
289 (__v16qi)(__m128i)(B), (N)))
290
291 #define _mm_com_epu16(A, B, N) \
292 ((__m128i)__builtin_ia32_vpcomuw((__v8hi)(__m128i)(A), \
293 (__v8hi)(__m128i)(B), (N)))
294
295 #define _mm_com_epu32(A, B, N) \
296 ((__m128i)__builtin_ia32_vpcomud((__v4si)(__m128i)(A), \
297 (__v4si)(__m128i)(B), (N)))
298
299 #define _mm_com_epu64(A, B, N) \
300 ((__m128i)__builtin_ia32_vpcomuq((__v2di)(__m128i)(A), \
301 (__v2di)(__m128i)(B), (N)))
302
303 #define _mm_com_epi8(A, B, N) \
304 ((__m128i)__builtin_ia32_vpcomb((__v16qi)(__m128i)(A), \
305 (__v16qi)(__m128i)(B), (N)))
306
307 #define _mm_com_epi16(A, B, N) \
308 ((__m128i)__builtin_ia32_vpcomw((__v8hi)(__m128i)(A), \
309 (__v8hi)(__m128i)(B), (N)))
310
311 #define _mm_com_epi32(A, B, N) \
312 ((__m128i)__builtin_ia32_vpcomd((__v4si)(__m128i)(A), \
313 (__v4si)(__m128i)(B), (N)))
314
315 #define _mm_com_epi64(A, B, N) \
316 ((__m128i)__builtin_ia32_vpcomq((__v2di)(__m128i)(A), \
317 (__v2di)(__m128i)(B), (N)))
318
319 #define _MM_PCOMCTRL_LT 0
320 #define _MM_PCOMCTRL_LE 1
321 #define _MM_PCOMCTRL_GT 2
322 #define _MM_PCOMCTRL_GE 3
323 #define _MM_PCOMCTRL_EQ 4
324 #define _MM_PCOMCTRL_NEQ 5
325 #define _MM_PCOMCTRL_FALSE 6
326 #define _MM_PCOMCTRL_TRUE 7
327
328 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comlt_epu8(__m128i __A,__m128i __B)329 _mm_comlt_epu8(__m128i __A, __m128i __B)
330 {
331 return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_LT);
332 }
333
334 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comle_epu8(__m128i __A,__m128i __B)335 _mm_comle_epu8(__m128i __A, __m128i __B)
336 {
337 return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_LE);
338 }
339
340 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comgt_epu8(__m128i __A,__m128i __B)341 _mm_comgt_epu8(__m128i __A, __m128i __B)
342 {
343 return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_GT);
344 }
345
346 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comge_epu8(__m128i __A,__m128i __B)347 _mm_comge_epu8(__m128i __A, __m128i __B)
348 {
349 return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_GE);
350 }
351
352 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comeq_epu8(__m128i __A,__m128i __B)353 _mm_comeq_epu8(__m128i __A, __m128i __B)
354 {
355 return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_EQ);
356 }
357
358 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comneq_epu8(__m128i __A,__m128i __B)359 _mm_comneq_epu8(__m128i __A, __m128i __B)
360 {
361 return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_NEQ);
362 }
363
364 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comfalse_epu8(__m128i __A,__m128i __B)365 _mm_comfalse_epu8(__m128i __A, __m128i __B)
366 {
367 return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_FALSE);
368 }
369
370 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comtrue_epu8(__m128i __A,__m128i __B)371 _mm_comtrue_epu8(__m128i __A, __m128i __B)
372 {
373 return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_TRUE);
374 }
375
376 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comlt_epu16(__m128i __A,__m128i __B)377 _mm_comlt_epu16(__m128i __A, __m128i __B)
378 {
379 return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_LT);
380 }
381
382 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comle_epu16(__m128i __A,__m128i __B)383 _mm_comle_epu16(__m128i __A, __m128i __B)
384 {
385 return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_LE);
386 }
387
388 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comgt_epu16(__m128i __A,__m128i __B)389 _mm_comgt_epu16(__m128i __A, __m128i __B)
390 {
391 return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_GT);
392 }
393
394 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comge_epu16(__m128i __A,__m128i __B)395 _mm_comge_epu16(__m128i __A, __m128i __B)
396 {
397 return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_GE);
398 }
399
400 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comeq_epu16(__m128i __A,__m128i __B)401 _mm_comeq_epu16(__m128i __A, __m128i __B)
402 {
403 return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_EQ);
404 }
405
406 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comneq_epu16(__m128i __A,__m128i __B)407 _mm_comneq_epu16(__m128i __A, __m128i __B)
408 {
409 return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_NEQ);
410 }
411
412 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comfalse_epu16(__m128i __A,__m128i __B)413 _mm_comfalse_epu16(__m128i __A, __m128i __B)
414 {
415 return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_FALSE);
416 }
417
418 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comtrue_epu16(__m128i __A,__m128i __B)419 _mm_comtrue_epu16(__m128i __A, __m128i __B)
420 {
421 return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_TRUE);
422 }
423
424 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comlt_epu32(__m128i __A,__m128i __B)425 _mm_comlt_epu32(__m128i __A, __m128i __B)
426 {
427 return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_LT);
428 }
429
430 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comle_epu32(__m128i __A,__m128i __B)431 _mm_comle_epu32(__m128i __A, __m128i __B)
432 {
433 return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_LE);
434 }
435
436 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comgt_epu32(__m128i __A,__m128i __B)437 _mm_comgt_epu32(__m128i __A, __m128i __B)
438 {
439 return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_GT);
440 }
441
442 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comge_epu32(__m128i __A,__m128i __B)443 _mm_comge_epu32(__m128i __A, __m128i __B)
444 {
445 return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_GE);
446 }
447
448 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comeq_epu32(__m128i __A,__m128i __B)449 _mm_comeq_epu32(__m128i __A, __m128i __B)
450 {
451 return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_EQ);
452 }
453
454 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comneq_epu32(__m128i __A,__m128i __B)455 _mm_comneq_epu32(__m128i __A, __m128i __B)
456 {
457 return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_NEQ);
458 }
459
460 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comfalse_epu32(__m128i __A,__m128i __B)461 _mm_comfalse_epu32(__m128i __A, __m128i __B)
462 {
463 return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_FALSE);
464 }
465
466 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comtrue_epu32(__m128i __A,__m128i __B)467 _mm_comtrue_epu32(__m128i __A, __m128i __B)
468 {
469 return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_TRUE);
470 }
471
472 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comlt_epu64(__m128i __A,__m128i __B)473 _mm_comlt_epu64(__m128i __A, __m128i __B)
474 {
475 return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_LT);
476 }
477
478 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comle_epu64(__m128i __A,__m128i __B)479 _mm_comle_epu64(__m128i __A, __m128i __B)
480 {
481 return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_LE);
482 }
483
484 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comgt_epu64(__m128i __A,__m128i __B)485 _mm_comgt_epu64(__m128i __A, __m128i __B)
486 {
487 return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_GT);
488 }
489
490 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comge_epu64(__m128i __A,__m128i __B)491 _mm_comge_epu64(__m128i __A, __m128i __B)
492 {
493 return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_GE);
494 }
495
496 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comeq_epu64(__m128i __A,__m128i __B)497 _mm_comeq_epu64(__m128i __A, __m128i __B)
498 {
499 return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_EQ);
500 }
501
502 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comneq_epu64(__m128i __A,__m128i __B)503 _mm_comneq_epu64(__m128i __A, __m128i __B)
504 {
505 return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_NEQ);
506 }
507
508 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comfalse_epu64(__m128i __A,__m128i __B)509 _mm_comfalse_epu64(__m128i __A, __m128i __B)
510 {
511 return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_FALSE);
512 }
513
514 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comtrue_epu64(__m128i __A,__m128i __B)515 _mm_comtrue_epu64(__m128i __A, __m128i __B)
516 {
517 return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_TRUE);
518 }
519
520 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comlt_epi8(__m128i __A,__m128i __B)521 _mm_comlt_epi8(__m128i __A, __m128i __B)
522 {
523 return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_LT);
524 }
525
526 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comle_epi8(__m128i __A,__m128i __B)527 _mm_comle_epi8(__m128i __A, __m128i __B)
528 {
529 return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_LE);
530 }
531
532 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comgt_epi8(__m128i __A,__m128i __B)533 _mm_comgt_epi8(__m128i __A, __m128i __B)
534 {
535 return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_GT);
536 }
537
538 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comge_epi8(__m128i __A,__m128i __B)539 _mm_comge_epi8(__m128i __A, __m128i __B)
540 {
541 return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_GE);
542 }
543
544 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comeq_epi8(__m128i __A,__m128i __B)545 _mm_comeq_epi8(__m128i __A, __m128i __B)
546 {
547 return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_EQ);
548 }
549
550 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comneq_epi8(__m128i __A,__m128i __B)551 _mm_comneq_epi8(__m128i __A, __m128i __B)
552 {
553 return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_NEQ);
554 }
555
556 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comfalse_epi8(__m128i __A,__m128i __B)557 _mm_comfalse_epi8(__m128i __A, __m128i __B)
558 {
559 return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_FALSE);
560 }
561
562 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comtrue_epi8(__m128i __A,__m128i __B)563 _mm_comtrue_epi8(__m128i __A, __m128i __B)
564 {
565 return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_TRUE);
566 }
567
568 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comlt_epi16(__m128i __A,__m128i __B)569 _mm_comlt_epi16(__m128i __A, __m128i __B)
570 {
571 return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_LT);
572 }
573
574 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comle_epi16(__m128i __A,__m128i __B)575 _mm_comle_epi16(__m128i __A, __m128i __B)
576 {
577 return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_LE);
578 }
579
580 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comgt_epi16(__m128i __A,__m128i __B)581 _mm_comgt_epi16(__m128i __A, __m128i __B)
582 {
583 return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_GT);
584 }
585
586 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comge_epi16(__m128i __A,__m128i __B)587 _mm_comge_epi16(__m128i __A, __m128i __B)
588 {
589 return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_GE);
590 }
591
592 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comeq_epi16(__m128i __A,__m128i __B)593 _mm_comeq_epi16(__m128i __A, __m128i __B)
594 {
595 return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_EQ);
596 }
597
598 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comneq_epi16(__m128i __A,__m128i __B)599 _mm_comneq_epi16(__m128i __A, __m128i __B)
600 {
601 return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_NEQ);
602 }
603
604 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comfalse_epi16(__m128i __A,__m128i __B)605 _mm_comfalse_epi16(__m128i __A, __m128i __B)
606 {
607 return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_FALSE);
608 }
609
610 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comtrue_epi16(__m128i __A,__m128i __B)611 _mm_comtrue_epi16(__m128i __A, __m128i __B)
612 {
613 return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_TRUE);
614 }
615
616 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comlt_epi32(__m128i __A,__m128i __B)617 _mm_comlt_epi32(__m128i __A, __m128i __B)
618 {
619 return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_LT);
620 }
621
622 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comle_epi32(__m128i __A,__m128i __B)623 _mm_comle_epi32(__m128i __A, __m128i __B)
624 {
625 return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_LE);
626 }
627
628 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comgt_epi32(__m128i __A,__m128i __B)629 _mm_comgt_epi32(__m128i __A, __m128i __B)
630 {
631 return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_GT);
632 }
633
634 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comge_epi32(__m128i __A,__m128i __B)635 _mm_comge_epi32(__m128i __A, __m128i __B)
636 {
637 return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_GE);
638 }
639
640 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comeq_epi32(__m128i __A,__m128i __B)641 _mm_comeq_epi32(__m128i __A, __m128i __B)
642 {
643 return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_EQ);
644 }
645
646 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comneq_epi32(__m128i __A,__m128i __B)647 _mm_comneq_epi32(__m128i __A, __m128i __B)
648 {
649 return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_NEQ);
650 }
651
652 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comfalse_epi32(__m128i __A,__m128i __B)653 _mm_comfalse_epi32(__m128i __A, __m128i __B)
654 {
655 return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_FALSE);
656 }
657
658 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comtrue_epi32(__m128i __A,__m128i __B)659 _mm_comtrue_epi32(__m128i __A, __m128i __B)
660 {
661 return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_TRUE);
662 }
663
664 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comlt_epi64(__m128i __A,__m128i __B)665 _mm_comlt_epi64(__m128i __A, __m128i __B)
666 {
667 return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_LT);
668 }
669
670 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comle_epi64(__m128i __A,__m128i __B)671 _mm_comle_epi64(__m128i __A, __m128i __B)
672 {
673 return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_LE);
674 }
675
676 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comgt_epi64(__m128i __A,__m128i __B)677 _mm_comgt_epi64(__m128i __A, __m128i __B)
678 {
679 return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_GT);
680 }
681
682 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comge_epi64(__m128i __A,__m128i __B)683 _mm_comge_epi64(__m128i __A, __m128i __B)
684 {
685 return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_GE);
686 }
687
688 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comeq_epi64(__m128i __A,__m128i __B)689 _mm_comeq_epi64(__m128i __A, __m128i __B)
690 {
691 return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_EQ);
692 }
693
694 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comneq_epi64(__m128i __A,__m128i __B)695 _mm_comneq_epi64(__m128i __A, __m128i __B)
696 {
697 return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_NEQ);
698 }
699
700 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comfalse_epi64(__m128i __A,__m128i __B)701 _mm_comfalse_epi64(__m128i __A, __m128i __B)
702 {
703 return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_FALSE);
704 }
705
706 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_comtrue_epi64(__m128i __A,__m128i __B)707 _mm_comtrue_epi64(__m128i __A, __m128i __B)
708 {
709 return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_TRUE);
710 }
711
712 #define _mm_permute2_pd(X, Y, C, I) \
713 ((__m128d)__builtin_ia32_vpermil2pd((__v2df)(__m128d)(X), \
714 (__v2df)(__m128d)(Y), \
715 (__v2di)(__m128i)(C), (I)))
716
717 #define _mm256_permute2_pd(X, Y, C, I) \
718 ((__m256d)__builtin_ia32_vpermil2pd256((__v4df)(__m256d)(X), \
719 (__v4df)(__m256d)(Y), \
720 (__v4di)(__m256i)(C), (I)))
721
722 #define _mm_permute2_ps(X, Y, C, I) \
723 ((__m128)__builtin_ia32_vpermil2ps((__v4sf)(__m128)(X), (__v4sf)(__m128)(Y), \
724 (__v4si)(__m128i)(C), (I)))
725
726 #define _mm256_permute2_ps(X, Y, C, I) \
727 ((__m256)__builtin_ia32_vpermil2ps256((__v8sf)(__m256)(X), \
728 (__v8sf)(__m256)(Y), \
729 (__v8si)(__m256i)(C), (I)))
730
731 static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_frcz_ss(__m128 __A)732 _mm_frcz_ss(__m128 __A)
733 {
734 return (__m128)__builtin_ia32_vfrczss((__v4sf)__A);
735 }
736
737 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_frcz_sd(__m128d __A)738 _mm_frcz_sd(__m128d __A)
739 {
740 return (__m128d)__builtin_ia32_vfrczsd((__v2df)__A);
741 }
742
743 static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_frcz_ps(__m128 __A)744 _mm_frcz_ps(__m128 __A)
745 {
746 return (__m128)__builtin_ia32_vfrczps((__v4sf)__A);
747 }
748
749 static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_frcz_pd(__m128d __A)750 _mm_frcz_pd(__m128d __A)
751 {
752 return (__m128d)__builtin_ia32_vfrczpd((__v2df)__A);
753 }
754
755 static __inline__ __m256 __DEFAULT_FN_ATTRS256
_mm256_frcz_ps(__m256 __A)756 _mm256_frcz_ps(__m256 __A)
757 {
758 return (__m256)__builtin_ia32_vfrczps256((__v8sf)__A);
759 }
760
761 static __inline__ __m256d __DEFAULT_FN_ATTRS256
_mm256_frcz_pd(__m256d __A)762 _mm256_frcz_pd(__m256d __A)
763 {
764 return (__m256d)__builtin_ia32_vfrczpd256((__v4df)__A);
765 }
766
767 #undef __DEFAULT_FN_ATTRS
768 #undef __DEFAULT_FN_ATTRS256
769
770 #endif /* __XOPINTRIN_H */
771