xref: /freebsd/contrib/llvm-project/clang/lib/Headers/avx512vlvbmi2intrin.h (revision 5f757f3ff9144b609b3c433dfd370cc6bdc191ad)
10b57cec5SDimitry Andric /*===------------- avx512vlvbmi2intrin.h - VBMI2 intrinsics -----------------===
20b57cec5SDimitry Andric  *
30b57cec5SDimitry Andric  *
40b57cec5SDimitry Andric  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
50b57cec5SDimitry Andric  * See https://llvm.org/LICENSE.txt for license information.
60b57cec5SDimitry Andric  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
70b57cec5SDimitry Andric  *
80b57cec5SDimitry Andric  *===-----------------------------------------------------------------------===
90b57cec5SDimitry Andric  */
100b57cec5SDimitry Andric #ifndef __IMMINTRIN_H
110b57cec5SDimitry Andric #error "Never use <avx512vlvbmi2intrin.h> directly; include <immintrin.h> instead."
120b57cec5SDimitry Andric #endif
130b57cec5SDimitry Andric 
140b57cec5SDimitry Andric #ifndef __AVX512VLVBMI2INTRIN_H
150b57cec5SDimitry Andric #define __AVX512VLVBMI2INTRIN_H
160b57cec5SDimitry Andric 
170b57cec5SDimitry Andric /* Define the default attributes for the functions in this file. */
18*5f757f3fSDimitry Andric #define __DEFAULT_FN_ATTRS128                                                  \
19*5f757f3fSDimitry Andric   __attribute__((__always_inline__, __nodebug__,                               \
20*5f757f3fSDimitry Andric                  __target__("avx512vl,avx512vbmi2,no-evex512"),                \
21*5f757f3fSDimitry Andric                  __min_vector_width__(128)))
22*5f757f3fSDimitry Andric #define __DEFAULT_FN_ATTRS256                                                  \
23*5f757f3fSDimitry Andric   __attribute__((__always_inline__, __nodebug__,                               \
24*5f757f3fSDimitry Andric                  __target__("avx512vl,avx512vbmi2,no-evex512"),                \
25*5f757f3fSDimitry Andric                  __min_vector_width__(256)))
260b57cec5SDimitry Andric 
270b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
280b57cec5SDimitry Andric _mm_mask_compress_epi16(__m128i __S, __mmask8 __U, __m128i __D)
290b57cec5SDimitry Andric {
300b57cec5SDimitry Andric   return (__m128i) __builtin_ia32_compresshi128_mask ((__v8hi) __D,
310b57cec5SDimitry Andric               (__v8hi) __S,
320b57cec5SDimitry Andric               __U);
330b57cec5SDimitry Andric }
340b57cec5SDimitry Andric 
350b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
360b57cec5SDimitry Andric _mm_maskz_compress_epi16(__mmask8 __U, __m128i __D)
370b57cec5SDimitry Andric {
380b57cec5SDimitry Andric   return (__m128i) __builtin_ia32_compresshi128_mask ((__v8hi) __D,
390b57cec5SDimitry Andric               (__v8hi) _mm_setzero_si128(),
400b57cec5SDimitry Andric               __U);
410b57cec5SDimitry Andric }
420b57cec5SDimitry Andric 
430b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
440b57cec5SDimitry Andric _mm_mask_compress_epi8(__m128i __S, __mmask16 __U, __m128i __D)
450b57cec5SDimitry Andric {
460b57cec5SDimitry Andric   return (__m128i) __builtin_ia32_compressqi128_mask ((__v16qi) __D,
470b57cec5SDimitry Andric               (__v16qi) __S,
480b57cec5SDimitry Andric               __U);
490b57cec5SDimitry Andric }
500b57cec5SDimitry Andric 
510b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
520b57cec5SDimitry Andric _mm_maskz_compress_epi8(__mmask16 __U, __m128i __D)
530b57cec5SDimitry Andric {
540b57cec5SDimitry Andric   return (__m128i) __builtin_ia32_compressqi128_mask ((__v16qi) __D,
550b57cec5SDimitry Andric               (__v16qi) _mm_setzero_si128(),
560b57cec5SDimitry Andric               __U);
570b57cec5SDimitry Andric }
580b57cec5SDimitry Andric 
590b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS128
600b57cec5SDimitry Andric _mm_mask_compressstoreu_epi16(void *__P, __mmask8 __U, __m128i __D)
610b57cec5SDimitry Andric {
620b57cec5SDimitry Andric   __builtin_ia32_compressstorehi128_mask ((__v8hi *) __P, (__v8hi) __D,
630b57cec5SDimitry Andric               __U);
640b57cec5SDimitry Andric }
650b57cec5SDimitry Andric 
660b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS128
670b57cec5SDimitry Andric _mm_mask_compressstoreu_epi8(void *__P, __mmask16 __U, __m128i __D)
680b57cec5SDimitry Andric {
690b57cec5SDimitry Andric   __builtin_ia32_compressstoreqi128_mask ((__v16qi *) __P, (__v16qi) __D,
700b57cec5SDimitry Andric               __U);
710b57cec5SDimitry Andric }
720b57cec5SDimitry Andric 
730b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
740b57cec5SDimitry Andric _mm_mask_expand_epi16(__m128i __S, __mmask8 __U, __m128i __D)
750b57cec5SDimitry Andric {
760b57cec5SDimitry Andric   return (__m128i) __builtin_ia32_expandhi128_mask ((__v8hi) __D,
770b57cec5SDimitry Andric               (__v8hi) __S,
780b57cec5SDimitry Andric               __U);
790b57cec5SDimitry Andric }
800b57cec5SDimitry Andric 
810b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
820b57cec5SDimitry Andric _mm_maskz_expand_epi16(__mmask8 __U, __m128i __D)
830b57cec5SDimitry Andric {
840b57cec5SDimitry Andric   return (__m128i) __builtin_ia32_expandhi128_mask ((__v8hi) __D,
850b57cec5SDimitry Andric               (__v8hi) _mm_setzero_si128(),
860b57cec5SDimitry Andric               __U);
870b57cec5SDimitry Andric }
880b57cec5SDimitry Andric 
890b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
900b57cec5SDimitry Andric _mm_mask_expand_epi8(__m128i __S, __mmask16 __U, __m128i __D)
910b57cec5SDimitry Andric {
920b57cec5SDimitry Andric   return (__m128i) __builtin_ia32_expandqi128_mask ((__v16qi) __D,
930b57cec5SDimitry Andric               (__v16qi) __S,
940b57cec5SDimitry Andric               __U);
950b57cec5SDimitry Andric }
960b57cec5SDimitry Andric 
970b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
980b57cec5SDimitry Andric _mm_maskz_expand_epi8(__mmask16 __U, __m128i __D)
990b57cec5SDimitry Andric {
1000b57cec5SDimitry Andric   return (__m128i) __builtin_ia32_expandqi128_mask ((__v16qi) __D,
1010b57cec5SDimitry Andric               (__v16qi) _mm_setzero_si128(),
1020b57cec5SDimitry Andric               __U);
1030b57cec5SDimitry Andric }
1040b57cec5SDimitry Andric 
1050b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
1060b57cec5SDimitry Andric _mm_mask_expandloadu_epi16(__m128i __S, __mmask8 __U, void const *__P)
1070b57cec5SDimitry Andric {
1080b57cec5SDimitry Andric   return (__m128i) __builtin_ia32_expandloadhi128_mask ((const __v8hi *)__P,
1090b57cec5SDimitry Andric               (__v8hi) __S,
1100b57cec5SDimitry Andric               __U);
1110b57cec5SDimitry Andric }
1120b57cec5SDimitry Andric 
1130b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
1140b57cec5SDimitry Andric _mm_maskz_expandloadu_epi16(__mmask8 __U, void const *__P)
1150b57cec5SDimitry Andric {
1160b57cec5SDimitry Andric   return (__m128i) __builtin_ia32_expandloadhi128_mask ((const __v8hi *)__P,
1170b57cec5SDimitry Andric               (__v8hi) _mm_setzero_si128(),
1180b57cec5SDimitry Andric               __U);
1190b57cec5SDimitry Andric }
1200b57cec5SDimitry Andric 
1210b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
1220b57cec5SDimitry Andric _mm_mask_expandloadu_epi8(__m128i __S, __mmask16 __U, void const *__P)
1230b57cec5SDimitry Andric {
1240b57cec5SDimitry Andric   return (__m128i) __builtin_ia32_expandloadqi128_mask ((const __v16qi *)__P,
1250b57cec5SDimitry Andric               (__v16qi) __S,
1260b57cec5SDimitry Andric               __U);
1270b57cec5SDimitry Andric }
1280b57cec5SDimitry Andric 
1290b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
1300b57cec5SDimitry Andric _mm_maskz_expandloadu_epi8(__mmask16 __U, void const *__P)
1310b57cec5SDimitry Andric {
1320b57cec5SDimitry Andric   return (__m128i) __builtin_ia32_expandloadqi128_mask ((const __v16qi *)__P,
1330b57cec5SDimitry Andric               (__v16qi) _mm_setzero_si128(),
1340b57cec5SDimitry Andric               __U);
1350b57cec5SDimitry Andric }
1360b57cec5SDimitry Andric 
1370b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
1380b57cec5SDimitry Andric _mm256_mask_compress_epi16(__m256i __S, __mmask16 __U, __m256i __D)
1390b57cec5SDimitry Andric {
1400b57cec5SDimitry Andric   return (__m256i) __builtin_ia32_compresshi256_mask ((__v16hi) __D,
1410b57cec5SDimitry Andric               (__v16hi) __S,
1420b57cec5SDimitry Andric               __U);
1430b57cec5SDimitry Andric }
1440b57cec5SDimitry Andric 
1450b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
1460b57cec5SDimitry Andric _mm256_maskz_compress_epi16(__mmask16 __U, __m256i __D)
1470b57cec5SDimitry Andric {
1480b57cec5SDimitry Andric   return (__m256i) __builtin_ia32_compresshi256_mask ((__v16hi) __D,
1490b57cec5SDimitry Andric               (__v16hi) _mm256_setzero_si256(),
1500b57cec5SDimitry Andric               __U);
1510b57cec5SDimitry Andric }
1520b57cec5SDimitry Andric 
1530b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
1540b57cec5SDimitry Andric _mm256_mask_compress_epi8(__m256i __S, __mmask32 __U, __m256i __D)
1550b57cec5SDimitry Andric {
1560b57cec5SDimitry Andric   return (__m256i) __builtin_ia32_compressqi256_mask ((__v32qi) __D,
1570b57cec5SDimitry Andric               (__v32qi) __S,
1580b57cec5SDimitry Andric               __U);
1590b57cec5SDimitry Andric }
1600b57cec5SDimitry Andric 
1610b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
1620b57cec5SDimitry Andric _mm256_maskz_compress_epi8(__mmask32 __U, __m256i __D)
1630b57cec5SDimitry Andric {
1640b57cec5SDimitry Andric   return (__m256i) __builtin_ia32_compressqi256_mask ((__v32qi) __D,
1650b57cec5SDimitry Andric               (__v32qi) _mm256_setzero_si256(),
1660b57cec5SDimitry Andric               __U);
1670b57cec5SDimitry Andric }
1680b57cec5SDimitry Andric 
1690b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS256
1700b57cec5SDimitry Andric _mm256_mask_compressstoreu_epi16(void *__P, __mmask16 __U, __m256i __D)
1710b57cec5SDimitry Andric {
1720b57cec5SDimitry Andric   __builtin_ia32_compressstorehi256_mask ((__v16hi *) __P, (__v16hi) __D,
1730b57cec5SDimitry Andric               __U);
1740b57cec5SDimitry Andric }
1750b57cec5SDimitry Andric 
1760b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS256
1770b57cec5SDimitry Andric _mm256_mask_compressstoreu_epi8(void *__P, __mmask32 __U, __m256i __D)
1780b57cec5SDimitry Andric {
1790b57cec5SDimitry Andric   __builtin_ia32_compressstoreqi256_mask ((__v32qi *) __P, (__v32qi) __D,
1800b57cec5SDimitry Andric               __U);
1810b57cec5SDimitry Andric }
1820b57cec5SDimitry Andric 
1830b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
1840b57cec5SDimitry Andric _mm256_mask_expand_epi16(__m256i __S, __mmask16 __U, __m256i __D)
1850b57cec5SDimitry Andric {
1860b57cec5SDimitry Andric   return (__m256i) __builtin_ia32_expandhi256_mask ((__v16hi) __D,
1870b57cec5SDimitry Andric               (__v16hi) __S,
1880b57cec5SDimitry Andric               __U);
1890b57cec5SDimitry Andric }
1900b57cec5SDimitry Andric 
1910b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
1920b57cec5SDimitry Andric _mm256_maskz_expand_epi16(__mmask16 __U, __m256i __D)
1930b57cec5SDimitry Andric {
1940b57cec5SDimitry Andric   return (__m256i) __builtin_ia32_expandhi256_mask ((__v16hi) __D,
1950b57cec5SDimitry Andric               (__v16hi) _mm256_setzero_si256(),
1960b57cec5SDimitry Andric               __U);
1970b57cec5SDimitry Andric }
1980b57cec5SDimitry Andric 
1990b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
2000b57cec5SDimitry Andric _mm256_mask_expand_epi8(__m256i __S, __mmask32 __U, __m256i __D)
2010b57cec5SDimitry Andric {
2020b57cec5SDimitry Andric   return (__m256i) __builtin_ia32_expandqi256_mask ((__v32qi) __D,
2030b57cec5SDimitry Andric               (__v32qi) __S,
2040b57cec5SDimitry Andric               __U);
2050b57cec5SDimitry Andric }
2060b57cec5SDimitry Andric 
2070b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
2080b57cec5SDimitry Andric _mm256_maskz_expand_epi8(__mmask32 __U, __m256i __D)
2090b57cec5SDimitry Andric {
2100b57cec5SDimitry Andric   return (__m256i) __builtin_ia32_expandqi256_mask ((__v32qi) __D,
2110b57cec5SDimitry Andric               (__v32qi) _mm256_setzero_si256(),
2120b57cec5SDimitry Andric               __U);
2130b57cec5SDimitry Andric }
2140b57cec5SDimitry Andric 
2150b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
2160b57cec5SDimitry Andric _mm256_mask_expandloadu_epi16(__m256i __S, __mmask16 __U, void const *__P)
2170b57cec5SDimitry Andric {
2180b57cec5SDimitry Andric   return (__m256i) __builtin_ia32_expandloadhi256_mask ((const __v16hi *)__P,
2190b57cec5SDimitry Andric               (__v16hi) __S,
2200b57cec5SDimitry Andric               __U);
2210b57cec5SDimitry Andric }
2220b57cec5SDimitry Andric 
2230b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
2240b57cec5SDimitry Andric _mm256_maskz_expandloadu_epi16(__mmask16 __U, void const *__P)
2250b57cec5SDimitry Andric {
2260b57cec5SDimitry Andric   return (__m256i) __builtin_ia32_expandloadhi256_mask ((const __v16hi *)__P,
2270b57cec5SDimitry Andric               (__v16hi) _mm256_setzero_si256(),
2280b57cec5SDimitry Andric               __U);
2290b57cec5SDimitry Andric }
2300b57cec5SDimitry Andric 
2310b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
2320b57cec5SDimitry Andric _mm256_mask_expandloadu_epi8(__m256i __S, __mmask32 __U, void const *__P)
2330b57cec5SDimitry Andric {
2340b57cec5SDimitry Andric   return (__m256i) __builtin_ia32_expandloadqi256_mask ((const __v32qi *)__P,
2350b57cec5SDimitry Andric               (__v32qi) __S,
2360b57cec5SDimitry Andric               __U);
2370b57cec5SDimitry Andric }
2380b57cec5SDimitry Andric 
2390b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
2400b57cec5SDimitry Andric _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P)
2410b57cec5SDimitry Andric {
2420b57cec5SDimitry Andric   return (__m256i) __builtin_ia32_expandloadqi256_mask ((const __v32qi *)__P,
2430b57cec5SDimitry Andric               (__v32qi) _mm256_setzero_si256(),
2440b57cec5SDimitry Andric               __U);
2450b57cec5SDimitry Andric }
2460b57cec5SDimitry Andric 
2470b57cec5SDimitry Andric #define _mm256_shldi_epi64(A, B, I) \
248349cc55cSDimitry Andric   ((__m256i)__builtin_ia32_vpshldq256((__v4di)(__m256i)(A), \
249349cc55cSDimitry Andric                                       (__v4di)(__m256i)(B), (int)(I)))
2500b57cec5SDimitry Andric 
2510b57cec5SDimitry Andric #define _mm256_mask_shldi_epi64(S, U, A, B, I) \
252349cc55cSDimitry Andric   ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
2530b57cec5SDimitry Andric                                      (__v4di)_mm256_shldi_epi64((A), (B), (I)), \
254349cc55cSDimitry Andric                                      (__v4di)(__m256i)(S)))
2550b57cec5SDimitry Andric 
2560b57cec5SDimitry Andric #define _mm256_maskz_shldi_epi64(U, A, B, I) \
257349cc55cSDimitry Andric   ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
2580b57cec5SDimitry Andric                                      (__v4di)_mm256_shldi_epi64((A), (B), (I)), \
259349cc55cSDimitry Andric                                      (__v4di)_mm256_setzero_si256()))
2600b57cec5SDimitry Andric 
2610b57cec5SDimitry Andric #define _mm_shldi_epi64(A, B, I) \
262349cc55cSDimitry Andric   ((__m128i)__builtin_ia32_vpshldq128((__v2di)(__m128i)(A), \
263349cc55cSDimitry Andric                                       (__v2di)(__m128i)(B), (int)(I)))
2640b57cec5SDimitry Andric 
2650b57cec5SDimitry Andric #define _mm_mask_shldi_epi64(S, U, A, B, I) \
266349cc55cSDimitry Andric   ((__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
2670b57cec5SDimitry Andric                                        (__v2di)_mm_shldi_epi64((A), (B), (I)), \
268349cc55cSDimitry Andric                                        (__v2di)(__m128i)(S)))
2690b57cec5SDimitry Andric 
2700b57cec5SDimitry Andric #define _mm_maskz_shldi_epi64(U, A, B, I) \
271349cc55cSDimitry Andric   ((__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
2720b57cec5SDimitry Andric                                        (__v2di)_mm_shldi_epi64((A), (B), (I)), \
273349cc55cSDimitry Andric                                        (__v2di)_mm_setzero_si128()))
2740b57cec5SDimitry Andric 
2750b57cec5SDimitry Andric #define _mm256_shldi_epi32(A, B, I) \
276349cc55cSDimitry Andric   ((__m256i)__builtin_ia32_vpshldd256((__v8si)(__m256i)(A), \
277349cc55cSDimitry Andric                                       (__v8si)(__m256i)(B), (int)(I)))
2780b57cec5SDimitry Andric 
2790b57cec5SDimitry Andric #define _mm256_mask_shldi_epi32(S, U, A, B, I) \
280349cc55cSDimitry Andric   ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
2810b57cec5SDimitry Andric                                      (__v8si)_mm256_shldi_epi32((A), (B), (I)), \
282349cc55cSDimitry Andric                                      (__v8si)(__m256i)(S)))
2830b57cec5SDimitry Andric 
2840b57cec5SDimitry Andric #define _mm256_maskz_shldi_epi32(U, A, B, I) \
285349cc55cSDimitry Andric   ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
2860b57cec5SDimitry Andric                                      (__v8si)_mm256_shldi_epi32((A), (B), (I)), \
287349cc55cSDimitry Andric                                      (__v8si)_mm256_setzero_si256()))
2880b57cec5SDimitry Andric 
2890b57cec5SDimitry Andric #define _mm_shldi_epi32(A, B, I) \
290349cc55cSDimitry Andric   ((__m128i)__builtin_ia32_vpshldd128((__v4si)(__m128i)(A), \
291349cc55cSDimitry Andric                                       (__v4si)(__m128i)(B), (int)(I)))
2920b57cec5SDimitry Andric 
2930b57cec5SDimitry Andric #define _mm_mask_shldi_epi32(S, U, A, B, I) \
294349cc55cSDimitry Andric   ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
2950b57cec5SDimitry Andric                                        (__v4si)_mm_shldi_epi32((A), (B), (I)), \
296349cc55cSDimitry Andric                                        (__v4si)(__m128i)(S)))
2970b57cec5SDimitry Andric 
2980b57cec5SDimitry Andric #define _mm_maskz_shldi_epi32(U, A, B, I) \
299349cc55cSDimitry Andric   ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
3000b57cec5SDimitry Andric                                        (__v4si)_mm_shldi_epi32((A), (B), (I)), \
301349cc55cSDimitry Andric                                        (__v4si)_mm_setzero_si128()))
3020b57cec5SDimitry Andric 
3030b57cec5SDimitry Andric #define _mm256_shldi_epi16(A, B, I) \
304349cc55cSDimitry Andric   ((__m256i)__builtin_ia32_vpshldw256((__v16hi)(__m256i)(A), \
305349cc55cSDimitry Andric                                       (__v16hi)(__m256i)(B), (int)(I)))
3060b57cec5SDimitry Andric 
3070b57cec5SDimitry Andric #define _mm256_mask_shldi_epi16(S, U, A, B, I) \
308349cc55cSDimitry Andric   ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
3090b57cec5SDimitry Andric                                     (__v16hi)_mm256_shldi_epi16((A), (B), (I)), \
310349cc55cSDimitry Andric                                     (__v16hi)(__m256i)(S)))
3110b57cec5SDimitry Andric 
3120b57cec5SDimitry Andric #define _mm256_maskz_shldi_epi16(U, A, B, I) \
313349cc55cSDimitry Andric   ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
3140b57cec5SDimitry Andric                                     (__v16hi)_mm256_shldi_epi16((A), (B), (I)), \
315349cc55cSDimitry Andric                                     (__v16hi)_mm256_setzero_si256()))
3160b57cec5SDimitry Andric 
3170b57cec5SDimitry Andric #define _mm_shldi_epi16(A, B, I) \
318349cc55cSDimitry Andric   ((__m128i)__builtin_ia32_vpshldw128((__v8hi)(__m128i)(A), \
319349cc55cSDimitry Andric                                       (__v8hi)(__m128i)(B), (int)(I)))
3200b57cec5SDimitry Andric 
3210b57cec5SDimitry Andric #define _mm_mask_shldi_epi16(S, U, A, B, I) \
322349cc55cSDimitry Andric   ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
3230b57cec5SDimitry Andric                                        (__v8hi)_mm_shldi_epi16((A), (B), (I)), \
324349cc55cSDimitry Andric                                        (__v8hi)(__m128i)(S)))
3250b57cec5SDimitry Andric 
3260b57cec5SDimitry Andric #define _mm_maskz_shldi_epi16(U, A, B, I) \
327349cc55cSDimitry Andric   ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
3280b57cec5SDimitry Andric                                        (__v8hi)_mm_shldi_epi16((A), (B), (I)), \
329349cc55cSDimitry Andric                                        (__v8hi)_mm_setzero_si128()))
3300b57cec5SDimitry Andric 
3310b57cec5SDimitry Andric #define _mm256_shrdi_epi64(A, B, I) \
332349cc55cSDimitry Andric   ((__m256i)__builtin_ia32_vpshrdq256((__v4di)(__m256i)(A), \
333349cc55cSDimitry Andric                                       (__v4di)(__m256i)(B), (int)(I)))
3340b57cec5SDimitry Andric 
3350b57cec5SDimitry Andric #define _mm256_mask_shrdi_epi64(S, U, A, B, I) \
336349cc55cSDimitry Andric   ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
3370b57cec5SDimitry Andric                                      (__v4di)_mm256_shrdi_epi64((A), (B), (I)), \
338349cc55cSDimitry Andric                                      (__v4di)(__m256i)(S)))
3390b57cec5SDimitry Andric 
3400b57cec5SDimitry Andric #define _mm256_maskz_shrdi_epi64(U, A, B, I) \
341349cc55cSDimitry Andric   ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
3420b57cec5SDimitry Andric                                      (__v4di)_mm256_shrdi_epi64((A), (B), (I)), \
343349cc55cSDimitry Andric                                      (__v4di)_mm256_setzero_si256()))
3440b57cec5SDimitry Andric 
3450b57cec5SDimitry Andric #define _mm_shrdi_epi64(A, B, I) \
346349cc55cSDimitry Andric   ((__m128i)__builtin_ia32_vpshrdq128((__v2di)(__m128i)(A), \
347349cc55cSDimitry Andric                                       (__v2di)(__m128i)(B), (int)(I)))
3480b57cec5SDimitry Andric 
3490b57cec5SDimitry Andric #define _mm_mask_shrdi_epi64(S, U, A, B, I) \
350349cc55cSDimitry Andric   ((__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
3510b57cec5SDimitry Andric                                        (__v2di)_mm_shrdi_epi64((A), (B), (I)), \
352349cc55cSDimitry Andric                                        (__v2di)(__m128i)(S)))
3530b57cec5SDimitry Andric 
3540b57cec5SDimitry Andric #define _mm_maskz_shrdi_epi64(U, A, B, I) \
355349cc55cSDimitry Andric   ((__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
3560b57cec5SDimitry Andric                                        (__v2di)_mm_shrdi_epi64((A), (B), (I)), \
357349cc55cSDimitry Andric                                        (__v2di)_mm_setzero_si128()))
3580b57cec5SDimitry Andric 
3590b57cec5SDimitry Andric #define _mm256_shrdi_epi32(A, B, I) \
360349cc55cSDimitry Andric   ((__m256i)__builtin_ia32_vpshrdd256((__v8si)(__m256i)(A), \
361349cc55cSDimitry Andric                                       (__v8si)(__m256i)(B), (int)(I)))
3620b57cec5SDimitry Andric 
3630b57cec5SDimitry Andric #define _mm256_mask_shrdi_epi32(S, U, A, B, I) \
364349cc55cSDimitry Andric   ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
3650b57cec5SDimitry Andric                                      (__v8si)_mm256_shrdi_epi32((A), (B), (I)), \
366349cc55cSDimitry Andric                                      (__v8si)(__m256i)(S)))
3670b57cec5SDimitry Andric 
3680b57cec5SDimitry Andric #define _mm256_maskz_shrdi_epi32(U, A, B, I) \
369349cc55cSDimitry Andric   ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
3700b57cec5SDimitry Andric                                      (__v8si)_mm256_shrdi_epi32((A), (B), (I)), \
371349cc55cSDimitry Andric                                      (__v8si)_mm256_setzero_si256()))
3720b57cec5SDimitry Andric 
3730b57cec5SDimitry Andric #define _mm_shrdi_epi32(A, B, I) \
374349cc55cSDimitry Andric   ((__m128i)__builtin_ia32_vpshrdd128((__v4si)(__m128i)(A), \
375349cc55cSDimitry Andric                                       (__v4si)(__m128i)(B), (int)(I)))
3760b57cec5SDimitry Andric 
3770b57cec5SDimitry Andric #define _mm_mask_shrdi_epi32(S, U, A, B, I) \
378349cc55cSDimitry Andric   ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
3790b57cec5SDimitry Andric                                        (__v4si)_mm_shrdi_epi32((A), (B), (I)), \
380349cc55cSDimitry Andric                                        (__v4si)(__m128i)(S)))
3810b57cec5SDimitry Andric 
3820b57cec5SDimitry Andric #define _mm_maskz_shrdi_epi32(U, A, B, I) \
383349cc55cSDimitry Andric   ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
3840b57cec5SDimitry Andric                                        (__v4si)_mm_shrdi_epi32((A), (B), (I)), \
385349cc55cSDimitry Andric                                        (__v4si)_mm_setzero_si128()))
3860b57cec5SDimitry Andric 
3870b57cec5SDimitry Andric #define _mm256_shrdi_epi16(A, B, I) \
388349cc55cSDimitry Andric   ((__m256i)__builtin_ia32_vpshrdw256((__v16hi)(__m256i)(A), \
389349cc55cSDimitry Andric                                       (__v16hi)(__m256i)(B), (int)(I)))
3900b57cec5SDimitry Andric 
3910b57cec5SDimitry Andric #define _mm256_mask_shrdi_epi16(S, U, A, B, I) \
392349cc55cSDimitry Andric   ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
3930b57cec5SDimitry Andric                                     (__v16hi)_mm256_shrdi_epi16((A), (B), (I)), \
394349cc55cSDimitry Andric                                     (__v16hi)(__m256i)(S)))
3950b57cec5SDimitry Andric 
3960b57cec5SDimitry Andric #define _mm256_maskz_shrdi_epi16(U, A, B, I) \
397349cc55cSDimitry Andric   ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
3980b57cec5SDimitry Andric                                     (__v16hi)_mm256_shrdi_epi16((A), (B), (I)), \
399349cc55cSDimitry Andric                                     (__v16hi)_mm256_setzero_si256()))
4000b57cec5SDimitry Andric 
4010b57cec5SDimitry Andric #define _mm_shrdi_epi16(A, B, I) \
402349cc55cSDimitry Andric   ((__m128i)__builtin_ia32_vpshrdw128((__v8hi)(__m128i)(A), \
403349cc55cSDimitry Andric                                       (__v8hi)(__m128i)(B), (int)(I)))
4040b57cec5SDimitry Andric 
4050b57cec5SDimitry Andric #define _mm_mask_shrdi_epi16(S, U, A, B, I) \
406349cc55cSDimitry Andric   ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
4070b57cec5SDimitry Andric                                        (__v8hi)_mm_shrdi_epi16((A), (B), (I)), \
408349cc55cSDimitry Andric                                        (__v8hi)(__m128i)(S)))
4090b57cec5SDimitry Andric 
4100b57cec5SDimitry Andric #define _mm_maskz_shrdi_epi16(U, A, B, I) \
411349cc55cSDimitry Andric   ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
4120b57cec5SDimitry Andric                                        (__v8hi)_mm_shrdi_epi16((A), (B), (I)), \
413349cc55cSDimitry Andric                                        (__v8hi)_mm_setzero_si128()))
4140b57cec5SDimitry Andric 
4150b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
4160b57cec5SDimitry Andric _mm256_shldv_epi64(__m256i __A, __m256i __B, __m256i __C)
4170b57cec5SDimitry Andric {
4180b57cec5SDimitry Andric   return (__m256i)__builtin_ia32_vpshldvq256((__v4di)__A, (__v4di)__B,
4190b57cec5SDimitry Andric                                              (__v4di)__C);
4200b57cec5SDimitry Andric }
4210b57cec5SDimitry Andric 
4220b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
4230b57cec5SDimitry Andric _mm256_mask_shldv_epi64(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C)
4240b57cec5SDimitry Andric {
4250b57cec5SDimitry Andric   return (__m256i)__builtin_ia32_selectq_256(__U,
4260b57cec5SDimitry Andric                                       (__v4di)_mm256_shldv_epi64(__A, __B, __C),
4270b57cec5SDimitry Andric                                       (__v4di)__A);
4280b57cec5SDimitry Andric }
4290b57cec5SDimitry Andric 
4300b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
4310b57cec5SDimitry Andric _mm256_maskz_shldv_epi64(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C)
4320b57cec5SDimitry Andric {
4330b57cec5SDimitry Andric   return (__m256i)__builtin_ia32_selectq_256(__U,
4340b57cec5SDimitry Andric                                       (__v4di)_mm256_shldv_epi64(__A, __B, __C),
4350b57cec5SDimitry Andric                                       (__v4di)_mm256_setzero_si256());
4360b57cec5SDimitry Andric }
4370b57cec5SDimitry Andric 
4380b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
4390b57cec5SDimitry Andric _mm_shldv_epi64(__m128i __A, __m128i __B, __m128i __C)
4400b57cec5SDimitry Andric {
4410b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_vpshldvq128((__v2di)__A, (__v2di)__B,
4420b57cec5SDimitry Andric                                              (__v2di)__C);
4430b57cec5SDimitry Andric }
4440b57cec5SDimitry Andric 
4450b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
4460b57cec5SDimitry Andric _mm_mask_shldv_epi64(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C)
4470b57cec5SDimitry Andric {
4480b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_selectq_128(__U,
4490b57cec5SDimitry Andric                                          (__v2di)_mm_shldv_epi64(__A, __B, __C),
4500b57cec5SDimitry Andric                                          (__v2di)__A);
4510b57cec5SDimitry Andric }
4520b57cec5SDimitry Andric 
4530b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
4540b57cec5SDimitry Andric _mm_maskz_shldv_epi64(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C)
4550b57cec5SDimitry Andric {
4560b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_selectq_128(__U,
4570b57cec5SDimitry Andric                                          (__v2di)_mm_shldv_epi64(__A, __B, __C),
4580b57cec5SDimitry Andric                                          (__v2di)_mm_setzero_si128());
4590b57cec5SDimitry Andric }
4600b57cec5SDimitry Andric 
4610b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
4620b57cec5SDimitry Andric _mm256_shldv_epi32(__m256i __A, __m256i __B, __m256i __C)
4630b57cec5SDimitry Andric {
4640b57cec5SDimitry Andric   return (__m256i)__builtin_ia32_vpshldvd256((__v8si)__A, (__v8si)__B,
4650b57cec5SDimitry Andric                                              (__v8si)__C);
4660b57cec5SDimitry Andric }
4670b57cec5SDimitry Andric 
4680b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
4690b57cec5SDimitry Andric _mm256_mask_shldv_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C)
4700b57cec5SDimitry Andric {
4710b57cec5SDimitry Andric   return (__m256i)__builtin_ia32_selectd_256(__U,
4720b57cec5SDimitry Andric                                       (__v8si)_mm256_shldv_epi32(__A, __B, __C),
4730b57cec5SDimitry Andric                                       (__v8si)__A);
4740b57cec5SDimitry Andric }
4750b57cec5SDimitry Andric 
4760b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
4770b57cec5SDimitry Andric _mm256_maskz_shldv_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C)
4780b57cec5SDimitry Andric {
4790b57cec5SDimitry Andric   return (__m256i)__builtin_ia32_selectd_256(__U,
4800b57cec5SDimitry Andric                                       (__v8si)_mm256_shldv_epi32(__A, __B, __C),
4810b57cec5SDimitry Andric                                       (__v8si)_mm256_setzero_si256());
4820b57cec5SDimitry Andric }
4830b57cec5SDimitry Andric 
4840b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
4850b57cec5SDimitry Andric _mm_shldv_epi32(__m128i __A, __m128i __B, __m128i __C)
4860b57cec5SDimitry Andric {
4870b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_vpshldvd128((__v4si)__A, (__v4si)__B,
4880b57cec5SDimitry Andric                                              (__v4si)__C);
4890b57cec5SDimitry Andric }
4900b57cec5SDimitry Andric 
4910b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
4920b57cec5SDimitry Andric _mm_mask_shldv_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C)
4930b57cec5SDimitry Andric {
4940b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_selectd_128(__U,
4950b57cec5SDimitry Andric                                          (__v4si)_mm_shldv_epi32(__A, __B, __C),
4960b57cec5SDimitry Andric                                          (__v4si)__A);
4970b57cec5SDimitry Andric }
4980b57cec5SDimitry Andric 
4990b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
5000b57cec5SDimitry Andric _mm_maskz_shldv_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C)
5010b57cec5SDimitry Andric {
5020b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_selectd_128(__U,
5030b57cec5SDimitry Andric                                          (__v4si)_mm_shldv_epi32(__A, __B, __C),
5040b57cec5SDimitry Andric                                          (__v4si)_mm_setzero_si128());
5050b57cec5SDimitry Andric }
5060b57cec5SDimitry Andric 
5070b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
5080b57cec5SDimitry Andric _mm256_shldv_epi16(__m256i __A, __m256i __B, __m256i __C)
5090b57cec5SDimitry Andric {
5100b57cec5SDimitry Andric   return (__m256i)__builtin_ia32_vpshldvw256((__v16hi)__A, (__v16hi)__B,
5110b57cec5SDimitry Andric                                              (__v16hi)__C);
5120b57cec5SDimitry Andric }
5130b57cec5SDimitry Andric 
5140b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
5150b57cec5SDimitry Andric _mm256_mask_shldv_epi16(__m256i __A, __mmask16 __U, __m256i __B, __m256i __C)
5160b57cec5SDimitry Andric {
5170b57cec5SDimitry Andric   return (__m256i)__builtin_ia32_selectw_256(__U,
5180b57cec5SDimitry Andric                                       (__v16hi)_mm256_shldv_epi16(__A, __B, __C),
5190b57cec5SDimitry Andric                                       (__v16hi)__A);
5200b57cec5SDimitry Andric }
5210b57cec5SDimitry Andric 
5220b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
5230b57cec5SDimitry Andric _mm256_maskz_shldv_epi16(__mmask16 __U, __m256i __A, __m256i __B, __m256i __C)
5240b57cec5SDimitry Andric {
5250b57cec5SDimitry Andric   return (__m256i)__builtin_ia32_selectw_256(__U,
5260b57cec5SDimitry Andric                                       (__v16hi)_mm256_shldv_epi16(__A, __B, __C),
5270b57cec5SDimitry Andric                                       (__v16hi)_mm256_setzero_si256());
5280b57cec5SDimitry Andric }
5290b57cec5SDimitry Andric 
5300b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
5310b57cec5SDimitry Andric _mm_shldv_epi16(__m128i __A, __m128i __B, __m128i __C)
5320b57cec5SDimitry Andric {
5330b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_vpshldvw128((__v8hi)__A, (__v8hi)__B,
5340b57cec5SDimitry Andric                                              (__v8hi)__C);
5350b57cec5SDimitry Andric }
5360b57cec5SDimitry Andric 
5370b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
5380b57cec5SDimitry Andric _mm_mask_shldv_epi16(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C)
5390b57cec5SDimitry Andric {
5400b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_selectw_128(__U,
5410b57cec5SDimitry Andric                                          (__v8hi)_mm_shldv_epi16(__A, __B, __C),
5420b57cec5SDimitry Andric                                          (__v8hi)__A);
5430b57cec5SDimitry Andric }
5440b57cec5SDimitry Andric 
5450b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
5460b57cec5SDimitry Andric _mm_maskz_shldv_epi16(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C)
5470b57cec5SDimitry Andric {
5480b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_selectw_128(__U,
5490b57cec5SDimitry Andric                                          (__v8hi)_mm_shldv_epi16(__A, __B, __C),
5500b57cec5SDimitry Andric                                          (__v8hi)_mm_setzero_si128());
5510b57cec5SDimitry Andric }
5520b57cec5SDimitry Andric 
5530b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
5540b57cec5SDimitry Andric _mm256_shrdv_epi64(__m256i __A, __m256i __B, __m256i __C)
5550b57cec5SDimitry Andric {
5560b57cec5SDimitry Andric   return (__m256i)__builtin_ia32_vpshrdvq256((__v4di)__A, (__v4di)__B,
5570b57cec5SDimitry Andric                                              (__v4di)__C);
5580b57cec5SDimitry Andric }
5590b57cec5SDimitry Andric 
5600b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
5610b57cec5SDimitry Andric _mm256_mask_shrdv_epi64(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C)
5620b57cec5SDimitry Andric {
5630b57cec5SDimitry Andric   return (__m256i)__builtin_ia32_selectq_256(__U,
5640b57cec5SDimitry Andric                                       (__v4di)_mm256_shrdv_epi64(__A, __B, __C),
5650b57cec5SDimitry Andric                                       (__v4di)__A);
5660b57cec5SDimitry Andric }
5670b57cec5SDimitry Andric 
5680b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
5690b57cec5SDimitry Andric _mm256_maskz_shrdv_epi64(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C)
5700b57cec5SDimitry Andric {
5710b57cec5SDimitry Andric   return (__m256i)__builtin_ia32_selectq_256(__U,
5720b57cec5SDimitry Andric                                       (__v4di)_mm256_shrdv_epi64(__A, __B, __C),
5730b57cec5SDimitry Andric                                       (__v4di)_mm256_setzero_si256());
5740b57cec5SDimitry Andric }
5750b57cec5SDimitry Andric 
5760b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
5770b57cec5SDimitry Andric _mm_shrdv_epi64(__m128i __A, __m128i __B, __m128i __C)
5780b57cec5SDimitry Andric {
5790b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_vpshrdvq128((__v2di)__A, (__v2di)__B,
5800b57cec5SDimitry Andric                                              (__v2di)__C);
5810b57cec5SDimitry Andric }
5820b57cec5SDimitry Andric 
5830b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
5840b57cec5SDimitry Andric _mm_mask_shrdv_epi64(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C)
5850b57cec5SDimitry Andric {
5860b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_selectq_128(__U,
5870b57cec5SDimitry Andric                                          (__v2di)_mm_shrdv_epi64(__A, __B, __C),
5880b57cec5SDimitry Andric                                          (__v2di)__A);
5890b57cec5SDimitry Andric }
5900b57cec5SDimitry Andric 
5910b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
5920b57cec5SDimitry Andric _mm_maskz_shrdv_epi64(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C)
5930b57cec5SDimitry Andric {
5940b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_selectq_128(__U,
5950b57cec5SDimitry Andric                                          (__v2di)_mm_shrdv_epi64(__A, __B, __C),
5960b57cec5SDimitry Andric                                          (__v2di)_mm_setzero_si128());
5970b57cec5SDimitry Andric }
5980b57cec5SDimitry Andric 
5990b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
6000b57cec5SDimitry Andric _mm256_shrdv_epi32(__m256i __A, __m256i __B, __m256i __C)
6010b57cec5SDimitry Andric {
6020b57cec5SDimitry Andric   return (__m256i)__builtin_ia32_vpshrdvd256((__v8si)__A, (__v8si)__B,
6030b57cec5SDimitry Andric                                              (__v8si)__C);
6040b57cec5SDimitry Andric }
6050b57cec5SDimitry Andric 
6060b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
6070b57cec5SDimitry Andric _mm256_mask_shrdv_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C)
6080b57cec5SDimitry Andric {
6090b57cec5SDimitry Andric   return (__m256i)__builtin_ia32_selectd_256(__U,
6100b57cec5SDimitry Andric                                       (__v8si)_mm256_shrdv_epi32(__A, __B, __C),
6110b57cec5SDimitry Andric                                       (__v8si)__A);
6120b57cec5SDimitry Andric }
6130b57cec5SDimitry Andric 
6140b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
6150b57cec5SDimitry Andric _mm256_maskz_shrdv_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C)
6160b57cec5SDimitry Andric {
6170b57cec5SDimitry Andric   return (__m256i)__builtin_ia32_selectd_256(__U,
6180b57cec5SDimitry Andric                                       (__v8si)_mm256_shrdv_epi32(__A, __B, __C),
6190b57cec5SDimitry Andric                                       (__v8si)_mm256_setzero_si256());
6200b57cec5SDimitry Andric }
6210b57cec5SDimitry Andric 
6220b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
6230b57cec5SDimitry Andric _mm_shrdv_epi32(__m128i __A, __m128i __B, __m128i __C)
6240b57cec5SDimitry Andric {
6250b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_vpshrdvd128((__v4si)__A, (__v4si)__B,
6260b57cec5SDimitry Andric                                              (__v4si)__C);
6270b57cec5SDimitry Andric }
6280b57cec5SDimitry Andric 
6290b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
6300b57cec5SDimitry Andric _mm_mask_shrdv_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C)
6310b57cec5SDimitry Andric {
6320b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_selectd_128(__U,
6330b57cec5SDimitry Andric                                          (__v4si)_mm_shrdv_epi32(__A, __B, __C),
6340b57cec5SDimitry Andric                                          (__v4si)__A);
6350b57cec5SDimitry Andric }
6360b57cec5SDimitry Andric 
6370b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
6380b57cec5SDimitry Andric _mm_maskz_shrdv_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C)
6390b57cec5SDimitry Andric {
6400b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_selectd_128(__U,
6410b57cec5SDimitry Andric                                          (__v4si)_mm_shrdv_epi32(__A, __B, __C),
6420b57cec5SDimitry Andric                                          (__v4si)_mm_setzero_si128());
6430b57cec5SDimitry Andric }
6440b57cec5SDimitry Andric 
6450b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
6460b57cec5SDimitry Andric _mm256_shrdv_epi16(__m256i __A, __m256i __B, __m256i __C)
6470b57cec5SDimitry Andric {
6480b57cec5SDimitry Andric   return (__m256i)__builtin_ia32_vpshrdvw256((__v16hi)__A, (__v16hi)__B,
6490b57cec5SDimitry Andric                                              (__v16hi)__C);
6500b57cec5SDimitry Andric }
6510b57cec5SDimitry Andric 
6520b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
6530b57cec5SDimitry Andric _mm256_mask_shrdv_epi16(__m256i __A, __mmask16 __U, __m256i __B, __m256i __C)
6540b57cec5SDimitry Andric {
6550b57cec5SDimitry Andric   return (__m256i)__builtin_ia32_selectw_256(__U,
6560b57cec5SDimitry Andric                                      (__v16hi)_mm256_shrdv_epi16(__A, __B, __C),
6570b57cec5SDimitry Andric                                      (__v16hi)__A);
6580b57cec5SDimitry Andric }
6590b57cec5SDimitry Andric 
6600b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256
6610b57cec5SDimitry Andric _mm256_maskz_shrdv_epi16(__mmask16 __U, __m256i __A, __m256i __B, __m256i __C)
6620b57cec5SDimitry Andric {
6630b57cec5SDimitry Andric   return (__m256i)__builtin_ia32_selectw_256(__U,
6640b57cec5SDimitry Andric                                      (__v16hi)_mm256_shrdv_epi16(__A, __B, __C),
6650b57cec5SDimitry Andric                                      (__v16hi)_mm256_setzero_si256());
6660b57cec5SDimitry Andric }
6670b57cec5SDimitry Andric 
6680b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
6690b57cec5SDimitry Andric _mm_shrdv_epi16(__m128i __A, __m128i __B, __m128i __C)
6700b57cec5SDimitry Andric {
6710b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_vpshrdvw128((__v8hi)__A, (__v8hi)__B,
6720b57cec5SDimitry Andric                                              (__v8hi)__C);
6730b57cec5SDimitry Andric }
6740b57cec5SDimitry Andric 
6750b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
6760b57cec5SDimitry Andric _mm_mask_shrdv_epi16(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C)
6770b57cec5SDimitry Andric {
6780b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_selectw_128(__U,
6790b57cec5SDimitry Andric                                          (__v8hi)_mm_shrdv_epi16(__A, __B, __C),
6800b57cec5SDimitry Andric                                          (__v8hi)__A);
6810b57cec5SDimitry Andric }
6820b57cec5SDimitry Andric 
6830b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128
6840b57cec5SDimitry Andric _mm_maskz_shrdv_epi16(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C)
6850b57cec5SDimitry Andric {
6860b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_selectw_128(__U,
6870b57cec5SDimitry Andric                                          (__v8hi)_mm_shrdv_epi16(__A, __B, __C),
6880b57cec5SDimitry Andric                                          (__v8hi)_mm_setzero_si128());
6890b57cec5SDimitry Andric }
6900b57cec5SDimitry Andric 
6910b57cec5SDimitry Andric 
6920b57cec5SDimitry Andric #undef __DEFAULT_FN_ATTRS128
6930b57cec5SDimitry Andric #undef __DEFAULT_FN_ATTRS256
6940b57cec5SDimitry Andric 
6950b57cec5SDimitry Andric #endif
696