10b57cec5SDimitry Andric /*===------------- avx512vlvbmi2intrin.h - VBMI2 intrinsics -----------------=== 20b57cec5SDimitry Andric * 30b57cec5SDimitry Andric * 40b57cec5SDimitry Andric * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 50b57cec5SDimitry Andric * See https://llvm.org/LICENSE.txt for license information. 60b57cec5SDimitry Andric * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 70b57cec5SDimitry Andric * 80b57cec5SDimitry Andric *===-----------------------------------------------------------------------=== 90b57cec5SDimitry Andric */ 100b57cec5SDimitry Andric #ifndef __IMMINTRIN_H 110b57cec5SDimitry Andric #error "Never use <avx512vlvbmi2intrin.h> directly; include <immintrin.h> instead." 120b57cec5SDimitry Andric #endif 130b57cec5SDimitry Andric 140b57cec5SDimitry Andric #ifndef __AVX512VLVBMI2INTRIN_H 150b57cec5SDimitry Andric #define __AVX512VLVBMI2INTRIN_H 160b57cec5SDimitry Andric 170b57cec5SDimitry Andric /* Define the default attributes for the functions in this file. */ 18*5f757f3fSDimitry Andric #define __DEFAULT_FN_ATTRS128 \ 19*5f757f3fSDimitry Andric __attribute__((__always_inline__, __nodebug__, \ 20*5f757f3fSDimitry Andric __target__("avx512vl,avx512vbmi2,no-evex512"), \ 21*5f757f3fSDimitry Andric __min_vector_width__(128))) 22*5f757f3fSDimitry Andric #define __DEFAULT_FN_ATTRS256 \ 23*5f757f3fSDimitry Andric __attribute__((__always_inline__, __nodebug__, \ 24*5f757f3fSDimitry Andric __target__("avx512vl,avx512vbmi2,no-evex512"), \ 25*5f757f3fSDimitry Andric __min_vector_width__(256))) 260b57cec5SDimitry Andric 270b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 280b57cec5SDimitry Andric _mm_mask_compress_epi16(__m128i __S, __mmask8 __U, __m128i __D) 290b57cec5SDimitry Andric { 300b57cec5SDimitry Andric return (__m128i) __builtin_ia32_compresshi128_mask ((__v8hi) __D, 310b57cec5SDimitry Andric (__v8hi) __S, 320b57cec5SDimitry Andric __U); 330b57cec5SDimitry Andric } 340b57cec5SDimitry Andric 350b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 360b57cec5SDimitry Andric _mm_maskz_compress_epi16(__mmask8 __U, __m128i __D) 370b57cec5SDimitry Andric { 380b57cec5SDimitry Andric return (__m128i) __builtin_ia32_compresshi128_mask ((__v8hi) __D, 390b57cec5SDimitry Andric (__v8hi) _mm_setzero_si128(), 400b57cec5SDimitry Andric __U); 410b57cec5SDimitry Andric } 420b57cec5SDimitry Andric 430b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 440b57cec5SDimitry Andric _mm_mask_compress_epi8(__m128i __S, __mmask16 __U, __m128i __D) 450b57cec5SDimitry Andric { 460b57cec5SDimitry Andric return (__m128i) __builtin_ia32_compressqi128_mask ((__v16qi) __D, 470b57cec5SDimitry Andric (__v16qi) __S, 480b57cec5SDimitry Andric __U); 490b57cec5SDimitry Andric } 500b57cec5SDimitry Andric 510b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 520b57cec5SDimitry Andric _mm_maskz_compress_epi8(__mmask16 __U, __m128i __D) 530b57cec5SDimitry Andric { 540b57cec5SDimitry Andric return (__m128i) __builtin_ia32_compressqi128_mask ((__v16qi) __D, 550b57cec5SDimitry Andric (__v16qi) _mm_setzero_si128(), 560b57cec5SDimitry Andric __U); 570b57cec5SDimitry Andric } 580b57cec5SDimitry Andric 590b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS128 600b57cec5SDimitry Andric _mm_mask_compressstoreu_epi16(void *__P, __mmask8 __U, __m128i __D) 610b57cec5SDimitry Andric { 620b57cec5SDimitry Andric __builtin_ia32_compressstorehi128_mask ((__v8hi *) __P, (__v8hi) __D, 630b57cec5SDimitry Andric __U); 640b57cec5SDimitry Andric } 650b57cec5SDimitry Andric 660b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS128 670b57cec5SDimitry Andric _mm_mask_compressstoreu_epi8(void *__P, __mmask16 __U, __m128i __D) 680b57cec5SDimitry Andric { 690b57cec5SDimitry Andric __builtin_ia32_compressstoreqi128_mask ((__v16qi *) __P, (__v16qi) __D, 700b57cec5SDimitry Andric __U); 710b57cec5SDimitry Andric } 720b57cec5SDimitry Andric 730b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 740b57cec5SDimitry Andric _mm_mask_expand_epi16(__m128i __S, __mmask8 __U, __m128i __D) 750b57cec5SDimitry Andric { 760b57cec5SDimitry Andric return (__m128i) __builtin_ia32_expandhi128_mask ((__v8hi) __D, 770b57cec5SDimitry Andric (__v8hi) __S, 780b57cec5SDimitry Andric __U); 790b57cec5SDimitry Andric } 800b57cec5SDimitry Andric 810b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 820b57cec5SDimitry Andric _mm_maskz_expand_epi16(__mmask8 __U, __m128i __D) 830b57cec5SDimitry Andric { 840b57cec5SDimitry Andric return (__m128i) __builtin_ia32_expandhi128_mask ((__v8hi) __D, 850b57cec5SDimitry Andric (__v8hi) _mm_setzero_si128(), 860b57cec5SDimitry Andric __U); 870b57cec5SDimitry Andric } 880b57cec5SDimitry Andric 890b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 900b57cec5SDimitry Andric _mm_mask_expand_epi8(__m128i __S, __mmask16 __U, __m128i __D) 910b57cec5SDimitry Andric { 920b57cec5SDimitry Andric return (__m128i) __builtin_ia32_expandqi128_mask ((__v16qi) __D, 930b57cec5SDimitry Andric (__v16qi) __S, 940b57cec5SDimitry Andric __U); 950b57cec5SDimitry Andric } 960b57cec5SDimitry Andric 970b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 980b57cec5SDimitry Andric _mm_maskz_expand_epi8(__mmask16 __U, __m128i __D) 990b57cec5SDimitry Andric { 1000b57cec5SDimitry Andric return (__m128i) __builtin_ia32_expandqi128_mask ((__v16qi) __D, 1010b57cec5SDimitry Andric (__v16qi) _mm_setzero_si128(), 1020b57cec5SDimitry Andric __U); 1030b57cec5SDimitry Andric } 1040b57cec5SDimitry Andric 1050b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 1060b57cec5SDimitry Andric _mm_mask_expandloadu_epi16(__m128i __S, __mmask8 __U, void const *__P) 1070b57cec5SDimitry Andric { 1080b57cec5SDimitry Andric return (__m128i) __builtin_ia32_expandloadhi128_mask ((const __v8hi *)__P, 1090b57cec5SDimitry Andric (__v8hi) __S, 1100b57cec5SDimitry Andric __U); 1110b57cec5SDimitry Andric } 1120b57cec5SDimitry Andric 1130b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 1140b57cec5SDimitry Andric _mm_maskz_expandloadu_epi16(__mmask8 __U, void const *__P) 1150b57cec5SDimitry Andric { 1160b57cec5SDimitry Andric return (__m128i) __builtin_ia32_expandloadhi128_mask ((const __v8hi *)__P, 1170b57cec5SDimitry Andric (__v8hi) _mm_setzero_si128(), 1180b57cec5SDimitry Andric __U); 1190b57cec5SDimitry Andric } 1200b57cec5SDimitry Andric 1210b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 1220b57cec5SDimitry Andric _mm_mask_expandloadu_epi8(__m128i __S, __mmask16 __U, void const *__P) 1230b57cec5SDimitry Andric { 1240b57cec5SDimitry Andric return (__m128i) __builtin_ia32_expandloadqi128_mask ((const __v16qi *)__P, 1250b57cec5SDimitry Andric (__v16qi) __S, 1260b57cec5SDimitry Andric __U); 1270b57cec5SDimitry Andric } 1280b57cec5SDimitry Andric 1290b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 1300b57cec5SDimitry Andric _mm_maskz_expandloadu_epi8(__mmask16 __U, void const *__P) 1310b57cec5SDimitry Andric { 1320b57cec5SDimitry Andric return (__m128i) __builtin_ia32_expandloadqi128_mask ((const __v16qi *)__P, 1330b57cec5SDimitry Andric (__v16qi) _mm_setzero_si128(), 1340b57cec5SDimitry Andric __U); 1350b57cec5SDimitry Andric } 1360b57cec5SDimitry Andric 1370b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 1380b57cec5SDimitry Andric _mm256_mask_compress_epi16(__m256i __S, __mmask16 __U, __m256i __D) 1390b57cec5SDimitry Andric { 1400b57cec5SDimitry Andric return (__m256i) __builtin_ia32_compresshi256_mask ((__v16hi) __D, 1410b57cec5SDimitry Andric (__v16hi) __S, 1420b57cec5SDimitry Andric __U); 1430b57cec5SDimitry Andric } 1440b57cec5SDimitry Andric 1450b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 1460b57cec5SDimitry Andric _mm256_maskz_compress_epi16(__mmask16 __U, __m256i __D) 1470b57cec5SDimitry Andric { 1480b57cec5SDimitry Andric return (__m256i) __builtin_ia32_compresshi256_mask ((__v16hi) __D, 1490b57cec5SDimitry Andric (__v16hi) _mm256_setzero_si256(), 1500b57cec5SDimitry Andric __U); 1510b57cec5SDimitry Andric } 1520b57cec5SDimitry Andric 1530b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 1540b57cec5SDimitry Andric _mm256_mask_compress_epi8(__m256i __S, __mmask32 __U, __m256i __D) 1550b57cec5SDimitry Andric { 1560b57cec5SDimitry Andric return (__m256i) __builtin_ia32_compressqi256_mask ((__v32qi) __D, 1570b57cec5SDimitry Andric (__v32qi) __S, 1580b57cec5SDimitry Andric __U); 1590b57cec5SDimitry Andric } 1600b57cec5SDimitry Andric 1610b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 1620b57cec5SDimitry Andric _mm256_maskz_compress_epi8(__mmask32 __U, __m256i __D) 1630b57cec5SDimitry Andric { 1640b57cec5SDimitry Andric return (__m256i) __builtin_ia32_compressqi256_mask ((__v32qi) __D, 1650b57cec5SDimitry Andric (__v32qi) _mm256_setzero_si256(), 1660b57cec5SDimitry Andric __U); 1670b57cec5SDimitry Andric } 1680b57cec5SDimitry Andric 1690b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS256 1700b57cec5SDimitry Andric _mm256_mask_compressstoreu_epi16(void *__P, __mmask16 __U, __m256i __D) 1710b57cec5SDimitry Andric { 1720b57cec5SDimitry Andric __builtin_ia32_compressstorehi256_mask ((__v16hi *) __P, (__v16hi) __D, 1730b57cec5SDimitry Andric __U); 1740b57cec5SDimitry Andric } 1750b57cec5SDimitry Andric 1760b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS256 1770b57cec5SDimitry Andric _mm256_mask_compressstoreu_epi8(void *__P, __mmask32 __U, __m256i __D) 1780b57cec5SDimitry Andric { 1790b57cec5SDimitry Andric __builtin_ia32_compressstoreqi256_mask ((__v32qi *) __P, (__v32qi) __D, 1800b57cec5SDimitry Andric __U); 1810b57cec5SDimitry Andric } 1820b57cec5SDimitry Andric 1830b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 1840b57cec5SDimitry Andric _mm256_mask_expand_epi16(__m256i __S, __mmask16 __U, __m256i __D) 1850b57cec5SDimitry Andric { 1860b57cec5SDimitry Andric return (__m256i) __builtin_ia32_expandhi256_mask ((__v16hi) __D, 1870b57cec5SDimitry Andric (__v16hi) __S, 1880b57cec5SDimitry Andric __U); 1890b57cec5SDimitry Andric } 1900b57cec5SDimitry Andric 1910b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 1920b57cec5SDimitry Andric _mm256_maskz_expand_epi16(__mmask16 __U, __m256i __D) 1930b57cec5SDimitry Andric { 1940b57cec5SDimitry Andric return (__m256i) __builtin_ia32_expandhi256_mask ((__v16hi) __D, 1950b57cec5SDimitry Andric (__v16hi) _mm256_setzero_si256(), 1960b57cec5SDimitry Andric __U); 1970b57cec5SDimitry Andric } 1980b57cec5SDimitry Andric 1990b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 2000b57cec5SDimitry Andric _mm256_mask_expand_epi8(__m256i __S, __mmask32 __U, __m256i __D) 2010b57cec5SDimitry Andric { 2020b57cec5SDimitry Andric return (__m256i) __builtin_ia32_expandqi256_mask ((__v32qi) __D, 2030b57cec5SDimitry Andric (__v32qi) __S, 2040b57cec5SDimitry Andric __U); 2050b57cec5SDimitry Andric } 2060b57cec5SDimitry Andric 2070b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 2080b57cec5SDimitry Andric _mm256_maskz_expand_epi8(__mmask32 __U, __m256i __D) 2090b57cec5SDimitry Andric { 2100b57cec5SDimitry Andric return (__m256i) __builtin_ia32_expandqi256_mask ((__v32qi) __D, 2110b57cec5SDimitry Andric (__v32qi) _mm256_setzero_si256(), 2120b57cec5SDimitry Andric __U); 2130b57cec5SDimitry Andric } 2140b57cec5SDimitry Andric 2150b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 2160b57cec5SDimitry Andric _mm256_mask_expandloadu_epi16(__m256i __S, __mmask16 __U, void const *__P) 2170b57cec5SDimitry Andric { 2180b57cec5SDimitry Andric return (__m256i) __builtin_ia32_expandloadhi256_mask ((const __v16hi *)__P, 2190b57cec5SDimitry Andric (__v16hi) __S, 2200b57cec5SDimitry Andric __U); 2210b57cec5SDimitry Andric } 2220b57cec5SDimitry Andric 2230b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 2240b57cec5SDimitry Andric _mm256_maskz_expandloadu_epi16(__mmask16 __U, void const *__P) 2250b57cec5SDimitry Andric { 2260b57cec5SDimitry Andric return (__m256i) __builtin_ia32_expandloadhi256_mask ((const __v16hi *)__P, 2270b57cec5SDimitry Andric (__v16hi) _mm256_setzero_si256(), 2280b57cec5SDimitry Andric __U); 2290b57cec5SDimitry Andric } 2300b57cec5SDimitry Andric 2310b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 2320b57cec5SDimitry Andric _mm256_mask_expandloadu_epi8(__m256i __S, __mmask32 __U, void const *__P) 2330b57cec5SDimitry Andric { 2340b57cec5SDimitry Andric return (__m256i) __builtin_ia32_expandloadqi256_mask ((const __v32qi *)__P, 2350b57cec5SDimitry Andric (__v32qi) __S, 2360b57cec5SDimitry Andric __U); 2370b57cec5SDimitry Andric } 2380b57cec5SDimitry Andric 2390b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 2400b57cec5SDimitry Andric _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P) 2410b57cec5SDimitry Andric { 2420b57cec5SDimitry Andric return (__m256i) __builtin_ia32_expandloadqi256_mask ((const __v32qi *)__P, 2430b57cec5SDimitry Andric (__v32qi) _mm256_setzero_si256(), 2440b57cec5SDimitry Andric __U); 2450b57cec5SDimitry Andric } 2460b57cec5SDimitry Andric 2470b57cec5SDimitry Andric #define _mm256_shldi_epi64(A, B, I) \ 248349cc55cSDimitry Andric ((__m256i)__builtin_ia32_vpshldq256((__v4di)(__m256i)(A), \ 249349cc55cSDimitry Andric (__v4di)(__m256i)(B), (int)(I))) 2500b57cec5SDimitry Andric 2510b57cec5SDimitry Andric #define _mm256_mask_shldi_epi64(S, U, A, B, I) \ 252349cc55cSDimitry Andric ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 2530b57cec5SDimitry Andric (__v4di)_mm256_shldi_epi64((A), (B), (I)), \ 254349cc55cSDimitry Andric (__v4di)(__m256i)(S))) 2550b57cec5SDimitry Andric 2560b57cec5SDimitry Andric #define _mm256_maskz_shldi_epi64(U, A, B, I) \ 257349cc55cSDimitry Andric ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 2580b57cec5SDimitry Andric (__v4di)_mm256_shldi_epi64((A), (B), (I)), \ 259349cc55cSDimitry Andric (__v4di)_mm256_setzero_si256())) 2600b57cec5SDimitry Andric 2610b57cec5SDimitry Andric #define _mm_shldi_epi64(A, B, I) \ 262349cc55cSDimitry Andric ((__m128i)__builtin_ia32_vpshldq128((__v2di)(__m128i)(A), \ 263349cc55cSDimitry Andric (__v2di)(__m128i)(B), (int)(I))) 2640b57cec5SDimitry Andric 2650b57cec5SDimitry Andric #define _mm_mask_shldi_epi64(S, U, A, B, I) \ 266349cc55cSDimitry Andric ((__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ 2670b57cec5SDimitry Andric (__v2di)_mm_shldi_epi64((A), (B), (I)), \ 268349cc55cSDimitry Andric (__v2di)(__m128i)(S))) 2690b57cec5SDimitry Andric 2700b57cec5SDimitry Andric #define _mm_maskz_shldi_epi64(U, A, B, I) \ 271349cc55cSDimitry Andric ((__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ 2720b57cec5SDimitry Andric (__v2di)_mm_shldi_epi64((A), (B), (I)), \ 273349cc55cSDimitry Andric (__v2di)_mm_setzero_si128())) 2740b57cec5SDimitry Andric 2750b57cec5SDimitry Andric #define _mm256_shldi_epi32(A, B, I) \ 276349cc55cSDimitry Andric ((__m256i)__builtin_ia32_vpshldd256((__v8si)(__m256i)(A), \ 277349cc55cSDimitry Andric (__v8si)(__m256i)(B), (int)(I))) 2780b57cec5SDimitry Andric 2790b57cec5SDimitry Andric #define _mm256_mask_shldi_epi32(S, U, A, B, I) \ 280349cc55cSDimitry Andric ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 2810b57cec5SDimitry Andric (__v8si)_mm256_shldi_epi32((A), (B), (I)), \ 282349cc55cSDimitry Andric (__v8si)(__m256i)(S))) 2830b57cec5SDimitry Andric 2840b57cec5SDimitry Andric #define _mm256_maskz_shldi_epi32(U, A, B, I) \ 285349cc55cSDimitry Andric ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 2860b57cec5SDimitry Andric (__v8si)_mm256_shldi_epi32((A), (B), (I)), \ 287349cc55cSDimitry Andric (__v8si)_mm256_setzero_si256())) 2880b57cec5SDimitry Andric 2890b57cec5SDimitry Andric #define _mm_shldi_epi32(A, B, I) \ 290349cc55cSDimitry Andric ((__m128i)__builtin_ia32_vpshldd128((__v4si)(__m128i)(A), \ 291349cc55cSDimitry Andric (__v4si)(__m128i)(B), (int)(I))) 2920b57cec5SDimitry Andric 2930b57cec5SDimitry Andric #define _mm_mask_shldi_epi32(S, U, A, B, I) \ 294349cc55cSDimitry Andric ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ 2950b57cec5SDimitry Andric (__v4si)_mm_shldi_epi32((A), (B), (I)), \ 296349cc55cSDimitry Andric (__v4si)(__m128i)(S))) 2970b57cec5SDimitry Andric 2980b57cec5SDimitry Andric #define _mm_maskz_shldi_epi32(U, A, B, I) \ 299349cc55cSDimitry Andric ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ 3000b57cec5SDimitry Andric (__v4si)_mm_shldi_epi32((A), (B), (I)), \ 301349cc55cSDimitry Andric (__v4si)_mm_setzero_si128())) 3020b57cec5SDimitry Andric 3030b57cec5SDimitry Andric #define _mm256_shldi_epi16(A, B, I) \ 304349cc55cSDimitry Andric ((__m256i)__builtin_ia32_vpshldw256((__v16hi)(__m256i)(A), \ 305349cc55cSDimitry Andric (__v16hi)(__m256i)(B), (int)(I))) 3060b57cec5SDimitry Andric 3070b57cec5SDimitry Andric #define _mm256_mask_shldi_epi16(S, U, A, B, I) \ 308349cc55cSDimitry Andric ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ 3090b57cec5SDimitry Andric (__v16hi)_mm256_shldi_epi16((A), (B), (I)), \ 310349cc55cSDimitry Andric (__v16hi)(__m256i)(S))) 3110b57cec5SDimitry Andric 3120b57cec5SDimitry Andric #define _mm256_maskz_shldi_epi16(U, A, B, I) \ 313349cc55cSDimitry Andric ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ 3140b57cec5SDimitry Andric (__v16hi)_mm256_shldi_epi16((A), (B), (I)), \ 315349cc55cSDimitry Andric (__v16hi)_mm256_setzero_si256())) 3160b57cec5SDimitry Andric 3170b57cec5SDimitry Andric #define _mm_shldi_epi16(A, B, I) \ 318349cc55cSDimitry Andric ((__m128i)__builtin_ia32_vpshldw128((__v8hi)(__m128i)(A), \ 319349cc55cSDimitry Andric (__v8hi)(__m128i)(B), (int)(I))) 3200b57cec5SDimitry Andric 3210b57cec5SDimitry Andric #define _mm_mask_shldi_epi16(S, U, A, B, I) \ 322349cc55cSDimitry Andric ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ 3230b57cec5SDimitry Andric (__v8hi)_mm_shldi_epi16((A), (B), (I)), \ 324349cc55cSDimitry Andric (__v8hi)(__m128i)(S))) 3250b57cec5SDimitry Andric 3260b57cec5SDimitry Andric #define _mm_maskz_shldi_epi16(U, A, B, I) \ 327349cc55cSDimitry Andric ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ 3280b57cec5SDimitry Andric (__v8hi)_mm_shldi_epi16((A), (B), (I)), \ 329349cc55cSDimitry Andric (__v8hi)_mm_setzero_si128())) 3300b57cec5SDimitry Andric 3310b57cec5SDimitry Andric #define _mm256_shrdi_epi64(A, B, I) \ 332349cc55cSDimitry Andric ((__m256i)__builtin_ia32_vpshrdq256((__v4di)(__m256i)(A), \ 333349cc55cSDimitry Andric (__v4di)(__m256i)(B), (int)(I))) 3340b57cec5SDimitry Andric 3350b57cec5SDimitry Andric #define _mm256_mask_shrdi_epi64(S, U, A, B, I) \ 336349cc55cSDimitry Andric ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 3370b57cec5SDimitry Andric (__v4di)_mm256_shrdi_epi64((A), (B), (I)), \ 338349cc55cSDimitry Andric (__v4di)(__m256i)(S))) 3390b57cec5SDimitry Andric 3400b57cec5SDimitry Andric #define _mm256_maskz_shrdi_epi64(U, A, B, I) \ 341349cc55cSDimitry Andric ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 3420b57cec5SDimitry Andric (__v4di)_mm256_shrdi_epi64((A), (B), (I)), \ 343349cc55cSDimitry Andric (__v4di)_mm256_setzero_si256())) 3440b57cec5SDimitry Andric 3450b57cec5SDimitry Andric #define _mm_shrdi_epi64(A, B, I) \ 346349cc55cSDimitry Andric ((__m128i)__builtin_ia32_vpshrdq128((__v2di)(__m128i)(A), \ 347349cc55cSDimitry Andric (__v2di)(__m128i)(B), (int)(I))) 3480b57cec5SDimitry Andric 3490b57cec5SDimitry Andric #define _mm_mask_shrdi_epi64(S, U, A, B, I) \ 350349cc55cSDimitry Andric ((__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ 3510b57cec5SDimitry Andric (__v2di)_mm_shrdi_epi64((A), (B), (I)), \ 352349cc55cSDimitry Andric (__v2di)(__m128i)(S))) 3530b57cec5SDimitry Andric 3540b57cec5SDimitry Andric #define _mm_maskz_shrdi_epi64(U, A, B, I) \ 355349cc55cSDimitry Andric ((__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ 3560b57cec5SDimitry Andric (__v2di)_mm_shrdi_epi64((A), (B), (I)), \ 357349cc55cSDimitry Andric (__v2di)_mm_setzero_si128())) 3580b57cec5SDimitry Andric 3590b57cec5SDimitry Andric #define _mm256_shrdi_epi32(A, B, I) \ 360349cc55cSDimitry Andric ((__m256i)__builtin_ia32_vpshrdd256((__v8si)(__m256i)(A), \ 361349cc55cSDimitry Andric (__v8si)(__m256i)(B), (int)(I))) 3620b57cec5SDimitry Andric 3630b57cec5SDimitry Andric #define _mm256_mask_shrdi_epi32(S, U, A, B, I) \ 364349cc55cSDimitry Andric ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 3650b57cec5SDimitry Andric (__v8si)_mm256_shrdi_epi32((A), (B), (I)), \ 366349cc55cSDimitry Andric (__v8si)(__m256i)(S))) 3670b57cec5SDimitry Andric 3680b57cec5SDimitry Andric #define _mm256_maskz_shrdi_epi32(U, A, B, I) \ 369349cc55cSDimitry Andric ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 3700b57cec5SDimitry Andric (__v8si)_mm256_shrdi_epi32((A), (B), (I)), \ 371349cc55cSDimitry Andric (__v8si)_mm256_setzero_si256())) 3720b57cec5SDimitry Andric 3730b57cec5SDimitry Andric #define _mm_shrdi_epi32(A, B, I) \ 374349cc55cSDimitry Andric ((__m128i)__builtin_ia32_vpshrdd128((__v4si)(__m128i)(A), \ 375349cc55cSDimitry Andric (__v4si)(__m128i)(B), (int)(I))) 3760b57cec5SDimitry Andric 3770b57cec5SDimitry Andric #define _mm_mask_shrdi_epi32(S, U, A, B, I) \ 378349cc55cSDimitry Andric ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ 3790b57cec5SDimitry Andric (__v4si)_mm_shrdi_epi32((A), (B), (I)), \ 380349cc55cSDimitry Andric (__v4si)(__m128i)(S))) 3810b57cec5SDimitry Andric 3820b57cec5SDimitry Andric #define _mm_maskz_shrdi_epi32(U, A, B, I) \ 383349cc55cSDimitry Andric ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ 3840b57cec5SDimitry Andric (__v4si)_mm_shrdi_epi32((A), (B), (I)), \ 385349cc55cSDimitry Andric (__v4si)_mm_setzero_si128())) 3860b57cec5SDimitry Andric 3870b57cec5SDimitry Andric #define _mm256_shrdi_epi16(A, B, I) \ 388349cc55cSDimitry Andric ((__m256i)__builtin_ia32_vpshrdw256((__v16hi)(__m256i)(A), \ 389349cc55cSDimitry Andric (__v16hi)(__m256i)(B), (int)(I))) 3900b57cec5SDimitry Andric 3910b57cec5SDimitry Andric #define _mm256_mask_shrdi_epi16(S, U, A, B, I) \ 392349cc55cSDimitry Andric ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ 3930b57cec5SDimitry Andric (__v16hi)_mm256_shrdi_epi16((A), (B), (I)), \ 394349cc55cSDimitry Andric (__v16hi)(__m256i)(S))) 3950b57cec5SDimitry Andric 3960b57cec5SDimitry Andric #define _mm256_maskz_shrdi_epi16(U, A, B, I) \ 397349cc55cSDimitry Andric ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ 3980b57cec5SDimitry Andric (__v16hi)_mm256_shrdi_epi16((A), (B), (I)), \ 399349cc55cSDimitry Andric (__v16hi)_mm256_setzero_si256())) 4000b57cec5SDimitry Andric 4010b57cec5SDimitry Andric #define _mm_shrdi_epi16(A, B, I) \ 402349cc55cSDimitry Andric ((__m128i)__builtin_ia32_vpshrdw128((__v8hi)(__m128i)(A), \ 403349cc55cSDimitry Andric (__v8hi)(__m128i)(B), (int)(I))) 4040b57cec5SDimitry Andric 4050b57cec5SDimitry Andric #define _mm_mask_shrdi_epi16(S, U, A, B, I) \ 406349cc55cSDimitry Andric ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ 4070b57cec5SDimitry Andric (__v8hi)_mm_shrdi_epi16((A), (B), (I)), \ 408349cc55cSDimitry Andric (__v8hi)(__m128i)(S))) 4090b57cec5SDimitry Andric 4100b57cec5SDimitry Andric #define _mm_maskz_shrdi_epi16(U, A, B, I) \ 411349cc55cSDimitry Andric ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ 4120b57cec5SDimitry Andric (__v8hi)_mm_shrdi_epi16((A), (B), (I)), \ 413349cc55cSDimitry Andric (__v8hi)_mm_setzero_si128())) 4140b57cec5SDimitry Andric 4150b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 4160b57cec5SDimitry Andric _mm256_shldv_epi64(__m256i __A, __m256i __B, __m256i __C) 4170b57cec5SDimitry Andric { 4180b57cec5SDimitry Andric return (__m256i)__builtin_ia32_vpshldvq256((__v4di)__A, (__v4di)__B, 4190b57cec5SDimitry Andric (__v4di)__C); 4200b57cec5SDimitry Andric } 4210b57cec5SDimitry Andric 4220b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 4230b57cec5SDimitry Andric _mm256_mask_shldv_epi64(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) 4240b57cec5SDimitry Andric { 4250b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256(__U, 4260b57cec5SDimitry Andric (__v4di)_mm256_shldv_epi64(__A, __B, __C), 4270b57cec5SDimitry Andric (__v4di)__A); 4280b57cec5SDimitry Andric } 4290b57cec5SDimitry Andric 4300b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 4310b57cec5SDimitry Andric _mm256_maskz_shldv_epi64(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) 4320b57cec5SDimitry Andric { 4330b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256(__U, 4340b57cec5SDimitry Andric (__v4di)_mm256_shldv_epi64(__A, __B, __C), 4350b57cec5SDimitry Andric (__v4di)_mm256_setzero_si256()); 4360b57cec5SDimitry Andric } 4370b57cec5SDimitry Andric 4380b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 4390b57cec5SDimitry Andric _mm_shldv_epi64(__m128i __A, __m128i __B, __m128i __C) 4400b57cec5SDimitry Andric { 4410b57cec5SDimitry Andric return (__m128i)__builtin_ia32_vpshldvq128((__v2di)__A, (__v2di)__B, 4420b57cec5SDimitry Andric (__v2di)__C); 4430b57cec5SDimitry Andric } 4440b57cec5SDimitry Andric 4450b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 4460b57cec5SDimitry Andric _mm_mask_shldv_epi64(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) 4470b57cec5SDimitry Andric { 4480b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128(__U, 4490b57cec5SDimitry Andric (__v2di)_mm_shldv_epi64(__A, __B, __C), 4500b57cec5SDimitry Andric (__v2di)__A); 4510b57cec5SDimitry Andric } 4520b57cec5SDimitry Andric 4530b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 4540b57cec5SDimitry Andric _mm_maskz_shldv_epi64(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) 4550b57cec5SDimitry Andric { 4560b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128(__U, 4570b57cec5SDimitry Andric (__v2di)_mm_shldv_epi64(__A, __B, __C), 4580b57cec5SDimitry Andric (__v2di)_mm_setzero_si128()); 4590b57cec5SDimitry Andric } 4600b57cec5SDimitry Andric 4610b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 4620b57cec5SDimitry Andric _mm256_shldv_epi32(__m256i __A, __m256i __B, __m256i __C) 4630b57cec5SDimitry Andric { 4640b57cec5SDimitry Andric return (__m256i)__builtin_ia32_vpshldvd256((__v8si)__A, (__v8si)__B, 4650b57cec5SDimitry Andric (__v8si)__C); 4660b57cec5SDimitry Andric } 4670b57cec5SDimitry Andric 4680b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 4690b57cec5SDimitry Andric _mm256_mask_shldv_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) 4700b57cec5SDimitry Andric { 4710b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256(__U, 4720b57cec5SDimitry Andric (__v8si)_mm256_shldv_epi32(__A, __B, __C), 4730b57cec5SDimitry Andric (__v8si)__A); 4740b57cec5SDimitry Andric } 4750b57cec5SDimitry Andric 4760b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 4770b57cec5SDimitry Andric _mm256_maskz_shldv_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) 4780b57cec5SDimitry Andric { 4790b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256(__U, 4800b57cec5SDimitry Andric (__v8si)_mm256_shldv_epi32(__A, __B, __C), 4810b57cec5SDimitry Andric (__v8si)_mm256_setzero_si256()); 4820b57cec5SDimitry Andric } 4830b57cec5SDimitry Andric 4840b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 4850b57cec5SDimitry Andric _mm_shldv_epi32(__m128i __A, __m128i __B, __m128i __C) 4860b57cec5SDimitry Andric { 4870b57cec5SDimitry Andric return (__m128i)__builtin_ia32_vpshldvd128((__v4si)__A, (__v4si)__B, 4880b57cec5SDimitry Andric (__v4si)__C); 4890b57cec5SDimitry Andric } 4900b57cec5SDimitry Andric 4910b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 4920b57cec5SDimitry Andric _mm_mask_shldv_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) 4930b57cec5SDimitry Andric { 4940b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128(__U, 4950b57cec5SDimitry Andric (__v4si)_mm_shldv_epi32(__A, __B, __C), 4960b57cec5SDimitry Andric (__v4si)__A); 4970b57cec5SDimitry Andric } 4980b57cec5SDimitry Andric 4990b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 5000b57cec5SDimitry Andric _mm_maskz_shldv_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) 5010b57cec5SDimitry Andric { 5020b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128(__U, 5030b57cec5SDimitry Andric (__v4si)_mm_shldv_epi32(__A, __B, __C), 5040b57cec5SDimitry Andric (__v4si)_mm_setzero_si128()); 5050b57cec5SDimitry Andric } 5060b57cec5SDimitry Andric 5070b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 5080b57cec5SDimitry Andric _mm256_shldv_epi16(__m256i __A, __m256i __B, __m256i __C) 5090b57cec5SDimitry Andric { 5100b57cec5SDimitry Andric return (__m256i)__builtin_ia32_vpshldvw256((__v16hi)__A, (__v16hi)__B, 5110b57cec5SDimitry Andric (__v16hi)__C); 5120b57cec5SDimitry Andric } 5130b57cec5SDimitry Andric 5140b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 5150b57cec5SDimitry Andric _mm256_mask_shldv_epi16(__m256i __A, __mmask16 __U, __m256i __B, __m256i __C) 5160b57cec5SDimitry Andric { 5170b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256(__U, 5180b57cec5SDimitry Andric (__v16hi)_mm256_shldv_epi16(__A, __B, __C), 5190b57cec5SDimitry Andric (__v16hi)__A); 5200b57cec5SDimitry Andric } 5210b57cec5SDimitry Andric 5220b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 5230b57cec5SDimitry Andric _mm256_maskz_shldv_epi16(__mmask16 __U, __m256i __A, __m256i __B, __m256i __C) 5240b57cec5SDimitry Andric { 5250b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256(__U, 5260b57cec5SDimitry Andric (__v16hi)_mm256_shldv_epi16(__A, __B, __C), 5270b57cec5SDimitry Andric (__v16hi)_mm256_setzero_si256()); 5280b57cec5SDimitry Andric } 5290b57cec5SDimitry Andric 5300b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 5310b57cec5SDimitry Andric _mm_shldv_epi16(__m128i __A, __m128i __B, __m128i __C) 5320b57cec5SDimitry Andric { 5330b57cec5SDimitry Andric return (__m128i)__builtin_ia32_vpshldvw128((__v8hi)__A, (__v8hi)__B, 5340b57cec5SDimitry Andric (__v8hi)__C); 5350b57cec5SDimitry Andric } 5360b57cec5SDimitry Andric 5370b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 5380b57cec5SDimitry Andric _mm_mask_shldv_epi16(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) 5390b57cec5SDimitry Andric { 5400b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128(__U, 5410b57cec5SDimitry Andric (__v8hi)_mm_shldv_epi16(__A, __B, __C), 5420b57cec5SDimitry Andric (__v8hi)__A); 5430b57cec5SDimitry Andric } 5440b57cec5SDimitry Andric 5450b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 5460b57cec5SDimitry Andric _mm_maskz_shldv_epi16(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) 5470b57cec5SDimitry Andric { 5480b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128(__U, 5490b57cec5SDimitry Andric (__v8hi)_mm_shldv_epi16(__A, __B, __C), 5500b57cec5SDimitry Andric (__v8hi)_mm_setzero_si128()); 5510b57cec5SDimitry Andric } 5520b57cec5SDimitry Andric 5530b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 5540b57cec5SDimitry Andric _mm256_shrdv_epi64(__m256i __A, __m256i __B, __m256i __C) 5550b57cec5SDimitry Andric { 5560b57cec5SDimitry Andric return (__m256i)__builtin_ia32_vpshrdvq256((__v4di)__A, (__v4di)__B, 5570b57cec5SDimitry Andric (__v4di)__C); 5580b57cec5SDimitry Andric } 5590b57cec5SDimitry Andric 5600b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 5610b57cec5SDimitry Andric _mm256_mask_shrdv_epi64(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) 5620b57cec5SDimitry Andric { 5630b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256(__U, 5640b57cec5SDimitry Andric (__v4di)_mm256_shrdv_epi64(__A, __B, __C), 5650b57cec5SDimitry Andric (__v4di)__A); 5660b57cec5SDimitry Andric } 5670b57cec5SDimitry Andric 5680b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 5690b57cec5SDimitry Andric _mm256_maskz_shrdv_epi64(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) 5700b57cec5SDimitry Andric { 5710b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256(__U, 5720b57cec5SDimitry Andric (__v4di)_mm256_shrdv_epi64(__A, __B, __C), 5730b57cec5SDimitry Andric (__v4di)_mm256_setzero_si256()); 5740b57cec5SDimitry Andric } 5750b57cec5SDimitry Andric 5760b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 5770b57cec5SDimitry Andric _mm_shrdv_epi64(__m128i __A, __m128i __B, __m128i __C) 5780b57cec5SDimitry Andric { 5790b57cec5SDimitry Andric return (__m128i)__builtin_ia32_vpshrdvq128((__v2di)__A, (__v2di)__B, 5800b57cec5SDimitry Andric (__v2di)__C); 5810b57cec5SDimitry Andric } 5820b57cec5SDimitry Andric 5830b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 5840b57cec5SDimitry Andric _mm_mask_shrdv_epi64(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) 5850b57cec5SDimitry Andric { 5860b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128(__U, 5870b57cec5SDimitry Andric (__v2di)_mm_shrdv_epi64(__A, __B, __C), 5880b57cec5SDimitry Andric (__v2di)__A); 5890b57cec5SDimitry Andric } 5900b57cec5SDimitry Andric 5910b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 5920b57cec5SDimitry Andric _mm_maskz_shrdv_epi64(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) 5930b57cec5SDimitry Andric { 5940b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128(__U, 5950b57cec5SDimitry Andric (__v2di)_mm_shrdv_epi64(__A, __B, __C), 5960b57cec5SDimitry Andric (__v2di)_mm_setzero_si128()); 5970b57cec5SDimitry Andric } 5980b57cec5SDimitry Andric 5990b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 6000b57cec5SDimitry Andric _mm256_shrdv_epi32(__m256i __A, __m256i __B, __m256i __C) 6010b57cec5SDimitry Andric { 6020b57cec5SDimitry Andric return (__m256i)__builtin_ia32_vpshrdvd256((__v8si)__A, (__v8si)__B, 6030b57cec5SDimitry Andric (__v8si)__C); 6040b57cec5SDimitry Andric } 6050b57cec5SDimitry Andric 6060b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 6070b57cec5SDimitry Andric _mm256_mask_shrdv_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) 6080b57cec5SDimitry Andric { 6090b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256(__U, 6100b57cec5SDimitry Andric (__v8si)_mm256_shrdv_epi32(__A, __B, __C), 6110b57cec5SDimitry Andric (__v8si)__A); 6120b57cec5SDimitry Andric } 6130b57cec5SDimitry Andric 6140b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 6150b57cec5SDimitry Andric _mm256_maskz_shrdv_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) 6160b57cec5SDimitry Andric { 6170b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256(__U, 6180b57cec5SDimitry Andric (__v8si)_mm256_shrdv_epi32(__A, __B, __C), 6190b57cec5SDimitry Andric (__v8si)_mm256_setzero_si256()); 6200b57cec5SDimitry Andric } 6210b57cec5SDimitry Andric 6220b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 6230b57cec5SDimitry Andric _mm_shrdv_epi32(__m128i __A, __m128i __B, __m128i __C) 6240b57cec5SDimitry Andric { 6250b57cec5SDimitry Andric return (__m128i)__builtin_ia32_vpshrdvd128((__v4si)__A, (__v4si)__B, 6260b57cec5SDimitry Andric (__v4si)__C); 6270b57cec5SDimitry Andric } 6280b57cec5SDimitry Andric 6290b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 6300b57cec5SDimitry Andric _mm_mask_shrdv_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) 6310b57cec5SDimitry Andric { 6320b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128(__U, 6330b57cec5SDimitry Andric (__v4si)_mm_shrdv_epi32(__A, __B, __C), 6340b57cec5SDimitry Andric (__v4si)__A); 6350b57cec5SDimitry Andric } 6360b57cec5SDimitry Andric 6370b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 6380b57cec5SDimitry Andric _mm_maskz_shrdv_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) 6390b57cec5SDimitry Andric { 6400b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128(__U, 6410b57cec5SDimitry Andric (__v4si)_mm_shrdv_epi32(__A, __B, __C), 6420b57cec5SDimitry Andric (__v4si)_mm_setzero_si128()); 6430b57cec5SDimitry Andric } 6440b57cec5SDimitry Andric 6450b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 6460b57cec5SDimitry Andric _mm256_shrdv_epi16(__m256i __A, __m256i __B, __m256i __C) 6470b57cec5SDimitry Andric { 6480b57cec5SDimitry Andric return (__m256i)__builtin_ia32_vpshrdvw256((__v16hi)__A, (__v16hi)__B, 6490b57cec5SDimitry Andric (__v16hi)__C); 6500b57cec5SDimitry Andric } 6510b57cec5SDimitry Andric 6520b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 6530b57cec5SDimitry Andric _mm256_mask_shrdv_epi16(__m256i __A, __mmask16 __U, __m256i __B, __m256i __C) 6540b57cec5SDimitry Andric { 6550b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256(__U, 6560b57cec5SDimitry Andric (__v16hi)_mm256_shrdv_epi16(__A, __B, __C), 6570b57cec5SDimitry Andric (__v16hi)__A); 6580b57cec5SDimitry Andric } 6590b57cec5SDimitry Andric 6600b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 6610b57cec5SDimitry Andric _mm256_maskz_shrdv_epi16(__mmask16 __U, __m256i __A, __m256i __B, __m256i __C) 6620b57cec5SDimitry Andric { 6630b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256(__U, 6640b57cec5SDimitry Andric (__v16hi)_mm256_shrdv_epi16(__A, __B, __C), 6650b57cec5SDimitry Andric (__v16hi)_mm256_setzero_si256()); 6660b57cec5SDimitry Andric } 6670b57cec5SDimitry Andric 6680b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 6690b57cec5SDimitry Andric _mm_shrdv_epi16(__m128i __A, __m128i __B, __m128i __C) 6700b57cec5SDimitry Andric { 6710b57cec5SDimitry Andric return (__m128i)__builtin_ia32_vpshrdvw128((__v8hi)__A, (__v8hi)__B, 6720b57cec5SDimitry Andric (__v8hi)__C); 6730b57cec5SDimitry Andric } 6740b57cec5SDimitry Andric 6750b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 6760b57cec5SDimitry Andric _mm_mask_shrdv_epi16(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) 6770b57cec5SDimitry Andric { 6780b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128(__U, 6790b57cec5SDimitry Andric (__v8hi)_mm_shrdv_epi16(__A, __B, __C), 6800b57cec5SDimitry Andric (__v8hi)__A); 6810b57cec5SDimitry Andric } 6820b57cec5SDimitry Andric 6830b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 6840b57cec5SDimitry Andric _mm_maskz_shrdv_epi16(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) 6850b57cec5SDimitry Andric { 6860b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128(__U, 6870b57cec5SDimitry Andric (__v8hi)_mm_shrdv_epi16(__A, __B, __C), 6880b57cec5SDimitry Andric (__v8hi)_mm_setzero_si128()); 6890b57cec5SDimitry Andric } 6900b57cec5SDimitry Andric 6910b57cec5SDimitry Andric 6920b57cec5SDimitry Andric #undef __DEFAULT_FN_ATTRS128 6930b57cec5SDimitry Andric #undef __DEFAULT_FN_ATTRS256 6940b57cec5SDimitry Andric 6950b57cec5SDimitry Andric #endif 696