xref: /freebsd/contrib/llvm-project/clang/lib/Headers/avx512vlcdintrin.h (revision 5f757f3ff9144b609b3c433dfd370cc6bdc191ad)
10b57cec5SDimitry Andric /*===---- avx512vlcdintrin.h - AVX512VL and AVX512CD intrinsics ------------===
20b57cec5SDimitry Andric  *
30b57cec5SDimitry Andric  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric  * See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric  *
70b57cec5SDimitry Andric  *===-----------------------------------------------------------------------===
80b57cec5SDimitry Andric  */
90b57cec5SDimitry Andric #ifndef __IMMINTRIN_H
100b57cec5SDimitry Andric #error "Never use <avx512vlcdintrin.h> directly; include <immintrin.h> instead."
110b57cec5SDimitry Andric #endif
120b57cec5SDimitry Andric 
130b57cec5SDimitry Andric #ifndef __AVX512VLCDINTRIN_H
140b57cec5SDimitry Andric #define __AVX512VLCDINTRIN_H
150b57cec5SDimitry Andric 
160b57cec5SDimitry Andric /* Define the default attributes for the functions in this file. */
17*5f757f3fSDimitry Andric #define __DEFAULT_FN_ATTRS128                                                  \
18*5f757f3fSDimitry Andric   __attribute__((__always_inline__, __nodebug__,                               \
19*5f757f3fSDimitry Andric                  __target__("avx512vl,avx512cd,no-evex512"),                   \
20*5f757f3fSDimitry Andric                  __min_vector_width__(128)))
21*5f757f3fSDimitry Andric #define __DEFAULT_FN_ATTRS256                                                  \
22*5f757f3fSDimitry Andric   __attribute__((__always_inline__, __nodebug__,                               \
23*5f757f3fSDimitry Andric                  __target__("avx512vl,avx512cd,no-evex512"),                   \
24*5f757f3fSDimitry Andric                  __min_vector_width__(256)))
250b57cec5SDimitry Andric 
260b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
270b57cec5SDimitry Andric _mm_broadcastmb_epi64 (__mmask8 __A)
280b57cec5SDimitry Andric {
290b57cec5SDimitry Andric   return (__m128i) _mm_set1_epi64x((long long) __A);
300b57cec5SDimitry Andric }
310b57cec5SDimitry Andric 
320b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
330b57cec5SDimitry Andric _mm256_broadcastmb_epi64 (__mmask8 __A)
340b57cec5SDimitry Andric {
350b57cec5SDimitry Andric   return (__m256i) _mm256_set1_epi64x((long long)__A);
360b57cec5SDimitry Andric }
370b57cec5SDimitry Andric 
380b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
390b57cec5SDimitry Andric _mm_broadcastmw_epi32 (__mmask16 __A)
400b57cec5SDimitry Andric {
410b57cec5SDimitry Andric   return (__m128i) _mm_set1_epi32((int)__A);
420b57cec5SDimitry Andric }
430b57cec5SDimitry Andric 
440b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
450b57cec5SDimitry Andric _mm256_broadcastmw_epi32 (__mmask16 __A)
460b57cec5SDimitry Andric {
470b57cec5SDimitry Andric   return (__m256i) _mm256_set1_epi32((int)__A);
480b57cec5SDimitry Andric }
490b57cec5SDimitry Andric 
500b57cec5SDimitry Andric 
510b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
520b57cec5SDimitry Andric _mm_conflict_epi64 (__m128i __A)
530b57cec5SDimitry Andric {
540b57cec5SDimitry Andric   return (__m128i) __builtin_ia32_vpconflictdi_128 ((__v2di) __A);
550b57cec5SDimitry Andric }
560b57cec5SDimitry Andric 
570b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
580b57cec5SDimitry Andric _mm_mask_conflict_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
590b57cec5SDimitry Andric {
600b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
610b57cec5SDimitry Andric                                              (__v2di)_mm_conflict_epi64(__A),
620b57cec5SDimitry Andric                                              (__v2di)__W);
630b57cec5SDimitry Andric }
640b57cec5SDimitry Andric 
650b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
660b57cec5SDimitry Andric _mm_maskz_conflict_epi64 (__mmask8 __U, __m128i __A)
670b57cec5SDimitry Andric {
680b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
690b57cec5SDimitry Andric                                              (__v2di)_mm_conflict_epi64(__A),
700b57cec5SDimitry Andric                                              (__v2di)_mm_setzero_si128());
710b57cec5SDimitry Andric }
720b57cec5SDimitry Andric 
730b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
740b57cec5SDimitry Andric _mm256_conflict_epi64 (__m256i __A)
750b57cec5SDimitry Andric {
760b57cec5SDimitry Andric   return (__m256i) __builtin_ia32_vpconflictdi_256 ((__v4di) __A);
770b57cec5SDimitry Andric }
780b57cec5SDimitry Andric 
790b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
800b57cec5SDimitry Andric _mm256_mask_conflict_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
810b57cec5SDimitry Andric {
820b57cec5SDimitry Andric   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
830b57cec5SDimitry Andric                                              (__v4di)_mm256_conflict_epi64(__A),
840b57cec5SDimitry Andric                                              (__v4di)__W);
850b57cec5SDimitry Andric }
860b57cec5SDimitry Andric 
870b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
880b57cec5SDimitry Andric _mm256_maskz_conflict_epi64 (__mmask8 __U, __m256i __A)
890b57cec5SDimitry Andric {
900b57cec5SDimitry Andric   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
910b57cec5SDimitry Andric                                              (__v4di)_mm256_conflict_epi64(__A),
920b57cec5SDimitry Andric                                              (__v4di)_mm256_setzero_si256());
930b57cec5SDimitry Andric }
940b57cec5SDimitry Andric 
950b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
960b57cec5SDimitry Andric _mm_conflict_epi32 (__m128i __A)
970b57cec5SDimitry Andric {
980b57cec5SDimitry Andric   return (__m128i) __builtin_ia32_vpconflictsi_128 ((__v4si) __A);
990b57cec5SDimitry Andric }
1000b57cec5SDimitry Andric 
1010b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
1020b57cec5SDimitry Andric _mm_mask_conflict_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
1030b57cec5SDimitry Andric {
1040b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
1050b57cec5SDimitry Andric                                              (__v4si)_mm_conflict_epi32(__A),
1060b57cec5SDimitry Andric                                              (__v4si)__W);
1070b57cec5SDimitry Andric }
1080b57cec5SDimitry Andric 
1090b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
1100b57cec5SDimitry Andric _mm_maskz_conflict_epi32 (__mmask8 __U, __m128i __A)
1110b57cec5SDimitry Andric {
1120b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
1130b57cec5SDimitry Andric                                              (__v4si)_mm_conflict_epi32(__A),
1140b57cec5SDimitry Andric                                              (__v4si)_mm_setzero_si128());
1150b57cec5SDimitry Andric }
1160b57cec5SDimitry Andric 
1170b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
1180b57cec5SDimitry Andric _mm256_conflict_epi32 (__m256i __A)
1190b57cec5SDimitry Andric {
1200b57cec5SDimitry Andric   return (__m256i) __builtin_ia32_vpconflictsi_256 ((__v8si) __A);
1210b57cec5SDimitry Andric }
1220b57cec5SDimitry Andric 
1230b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
1240b57cec5SDimitry Andric _mm256_mask_conflict_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
1250b57cec5SDimitry Andric {
1260b57cec5SDimitry Andric   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
1270b57cec5SDimitry Andric                                              (__v8si)_mm256_conflict_epi32(__A),
1280b57cec5SDimitry Andric                                              (__v8si)__W);
1290b57cec5SDimitry Andric }
1300b57cec5SDimitry Andric 
1310b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
1320b57cec5SDimitry Andric _mm256_maskz_conflict_epi32 (__mmask8 __U, __m256i __A)
1330b57cec5SDimitry Andric {
1340b57cec5SDimitry Andric   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
1350b57cec5SDimitry Andric                                              (__v8si)_mm256_conflict_epi32(__A),
1360b57cec5SDimitry Andric                                              (__v8si)_mm256_setzero_si256());
1370b57cec5SDimitry Andric }
1380b57cec5SDimitry Andric 
1390b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
1400b57cec5SDimitry Andric _mm_lzcnt_epi32 (__m128i __A)
1410b57cec5SDimitry Andric {
1420b57cec5SDimitry Andric   return (__m128i) __builtin_ia32_vplzcntd_128 ((__v4si) __A);
1430b57cec5SDimitry Andric }
1440b57cec5SDimitry Andric 
1450b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
1460b57cec5SDimitry Andric _mm_mask_lzcnt_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
1470b57cec5SDimitry Andric {
1480b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
1490b57cec5SDimitry Andric                                              (__v4si)_mm_lzcnt_epi32(__A),
1500b57cec5SDimitry Andric                                              (__v4si)__W);
1510b57cec5SDimitry Andric }
1520b57cec5SDimitry Andric 
1530b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
1540b57cec5SDimitry Andric _mm_maskz_lzcnt_epi32 (__mmask8 __U, __m128i __A)
1550b57cec5SDimitry Andric {
1560b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
1570b57cec5SDimitry Andric                                              (__v4si)_mm_lzcnt_epi32(__A),
1580b57cec5SDimitry Andric                                              (__v4si)_mm_setzero_si128());
1590b57cec5SDimitry Andric }
1600b57cec5SDimitry Andric 
1610b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
1620b57cec5SDimitry Andric _mm256_lzcnt_epi32 (__m256i __A)
1630b57cec5SDimitry Andric {
1640b57cec5SDimitry Andric   return (__m256i) __builtin_ia32_vplzcntd_256 ((__v8si) __A);
1650b57cec5SDimitry Andric }
1660b57cec5SDimitry Andric 
1670b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
1680b57cec5SDimitry Andric _mm256_mask_lzcnt_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
1690b57cec5SDimitry Andric {
1700b57cec5SDimitry Andric   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
1710b57cec5SDimitry Andric                                              (__v8si)_mm256_lzcnt_epi32(__A),
1720b57cec5SDimitry Andric                                              (__v8si)__W);
1730b57cec5SDimitry Andric }
1740b57cec5SDimitry Andric 
1750b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
1760b57cec5SDimitry Andric _mm256_maskz_lzcnt_epi32 (__mmask8 __U, __m256i __A)
1770b57cec5SDimitry Andric {
1780b57cec5SDimitry Andric   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
1790b57cec5SDimitry Andric                                              (__v8si)_mm256_lzcnt_epi32(__A),
1800b57cec5SDimitry Andric                                              (__v8si)_mm256_setzero_si256());
1810b57cec5SDimitry Andric }
1820b57cec5SDimitry Andric 
1830b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
1840b57cec5SDimitry Andric _mm_lzcnt_epi64 (__m128i __A)
1850b57cec5SDimitry Andric {
1860b57cec5SDimitry Andric   return (__m128i) __builtin_ia32_vplzcntq_128 ((__v2di) __A);
1870b57cec5SDimitry Andric }
1880b57cec5SDimitry Andric 
1890b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
1900b57cec5SDimitry Andric _mm_mask_lzcnt_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
1910b57cec5SDimitry Andric {
1920b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
1930b57cec5SDimitry Andric                                              (__v2di)_mm_lzcnt_epi64(__A),
1940b57cec5SDimitry Andric                                              (__v2di)__W);
1950b57cec5SDimitry Andric }
1960b57cec5SDimitry Andric 
1970b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
1980b57cec5SDimitry Andric _mm_maskz_lzcnt_epi64 (__mmask8 __U, __m128i __A)
1990b57cec5SDimitry Andric {
2000b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
2010b57cec5SDimitry Andric                                              (__v2di)_mm_lzcnt_epi64(__A),
2020b57cec5SDimitry Andric                                              (__v2di)_mm_setzero_si128());
2030b57cec5SDimitry Andric }
2040b57cec5SDimitry Andric 
2050b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
2060b57cec5SDimitry Andric _mm256_lzcnt_epi64 (__m256i __A)
2070b57cec5SDimitry Andric {
2080b57cec5SDimitry Andric   return (__m256i) __builtin_ia32_vplzcntq_256 ((__v4di) __A);
2090b57cec5SDimitry Andric }
2100b57cec5SDimitry Andric 
2110b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
2120b57cec5SDimitry Andric _mm256_mask_lzcnt_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
2130b57cec5SDimitry Andric {
2140b57cec5SDimitry Andric   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
2150b57cec5SDimitry Andric                                              (__v4di)_mm256_lzcnt_epi64(__A),
2160b57cec5SDimitry Andric                                              (__v4di)__W);
2170b57cec5SDimitry Andric }
2180b57cec5SDimitry Andric 
2190b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
2200b57cec5SDimitry Andric _mm256_maskz_lzcnt_epi64 (__mmask8 __U, __m256i __A)
2210b57cec5SDimitry Andric {
2220b57cec5SDimitry Andric   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
2230b57cec5SDimitry Andric                                              (__v4di)_mm256_lzcnt_epi64(__A),
2240b57cec5SDimitry Andric                                              (__v4di)_mm256_setzero_si256());
2250b57cec5SDimitry Andric }
2260b57cec5SDimitry Andric 
2270b57cec5SDimitry Andric #undef __DEFAULT_FN_ATTRS128
2280b57cec5SDimitry Andric #undef __DEFAULT_FN_ATTRS256
2290b57cec5SDimitry Andric 
2300b57cec5SDimitry Andric #endif /* __AVX512VLCDINTRIN_H */
231