10b57cec5SDimitry Andric /*===------------- avx512bwintrin.h - AVX512BW intrinsics ------------------=== 20b57cec5SDimitry Andric * 30b57cec5SDimitry Andric * 40b57cec5SDimitry Andric * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 50b57cec5SDimitry Andric * See https://llvm.org/LICENSE.txt for license information. 60b57cec5SDimitry Andric * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 70b57cec5SDimitry Andric * 80b57cec5SDimitry Andric *===-----------------------------------------------------------------------=== 90b57cec5SDimitry Andric */ 100b57cec5SDimitry Andric #ifndef __IMMINTRIN_H 110b57cec5SDimitry Andric #error "Never use <avx512bwintrin.h> directly; include <immintrin.h> instead." 120b57cec5SDimitry Andric #endif 130b57cec5SDimitry Andric 140b57cec5SDimitry Andric #ifndef __AVX512BWINTRIN_H 150b57cec5SDimitry Andric #define __AVX512BWINTRIN_H 160b57cec5SDimitry Andric 170b57cec5SDimitry Andric typedef unsigned int __mmask32; 180b57cec5SDimitry Andric typedef unsigned long long __mmask64; 190b57cec5SDimitry Andric 200b57cec5SDimitry Andric /* Define the default attributes for the functions in this file. */ 21*5f757f3fSDimitry Andric #define __DEFAULT_FN_ATTRS512 \ 22*5f757f3fSDimitry Andric __attribute__((__always_inline__, __nodebug__, \ 23*5f757f3fSDimitry Andric __target__("avx512bw,evex512"), __min_vector_width__(512))) 24*5f757f3fSDimitry Andric #define __DEFAULT_FN_ATTRS \ 25*5f757f3fSDimitry Andric __attribute__((__always_inline__, __nodebug__, \ 26*5f757f3fSDimitry Andric __target__("avx512bw,no-evex512"))) 270b57cec5SDimitry Andric 280b57cec5SDimitry Andric static __inline __mmask32 __DEFAULT_FN_ATTRS 290b57cec5SDimitry Andric _knot_mask32(__mmask32 __M) 300b57cec5SDimitry Andric { 310b57cec5SDimitry Andric return __builtin_ia32_knotsi(__M); 320b57cec5SDimitry Andric } 330b57cec5SDimitry Andric 34*5f757f3fSDimitry Andric static __inline __mmask64 __DEFAULT_FN_ATTRS _knot_mask64(__mmask64 __M) { 350b57cec5SDimitry Andric return __builtin_ia32_knotdi(__M); 360b57cec5SDimitry Andric } 370b57cec5SDimitry Andric 380b57cec5SDimitry Andric static __inline__ __mmask32 __DEFAULT_FN_ATTRS 390b57cec5SDimitry Andric _kand_mask32(__mmask32 __A, __mmask32 __B) 400b57cec5SDimitry Andric { 410b57cec5SDimitry Andric return (__mmask32)__builtin_ia32_kandsi((__mmask32)__A, (__mmask32)__B); 420b57cec5SDimitry Andric } 430b57cec5SDimitry Andric 44*5f757f3fSDimitry Andric static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kand_mask64(__mmask64 __A, 45*5f757f3fSDimitry Andric __mmask64 __B) { 460b57cec5SDimitry Andric return (__mmask64)__builtin_ia32_kanddi((__mmask64)__A, (__mmask64)__B); 470b57cec5SDimitry Andric } 480b57cec5SDimitry Andric 490b57cec5SDimitry Andric static __inline__ __mmask32 __DEFAULT_FN_ATTRS 500b57cec5SDimitry Andric _kandn_mask32(__mmask32 __A, __mmask32 __B) 510b57cec5SDimitry Andric { 520b57cec5SDimitry Andric return (__mmask32)__builtin_ia32_kandnsi((__mmask32)__A, (__mmask32)__B); 530b57cec5SDimitry Andric } 540b57cec5SDimitry Andric 55*5f757f3fSDimitry Andric static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kandn_mask64(__mmask64 __A, 56*5f757f3fSDimitry Andric __mmask64 __B) { 570b57cec5SDimitry Andric return (__mmask64)__builtin_ia32_kandndi((__mmask64)__A, (__mmask64)__B); 580b57cec5SDimitry Andric } 590b57cec5SDimitry Andric 600b57cec5SDimitry Andric static __inline__ __mmask32 __DEFAULT_FN_ATTRS 610b57cec5SDimitry Andric _kor_mask32(__mmask32 __A, __mmask32 __B) 620b57cec5SDimitry Andric { 630b57cec5SDimitry Andric return (__mmask32)__builtin_ia32_korsi((__mmask32)__A, (__mmask32)__B); 640b57cec5SDimitry Andric } 650b57cec5SDimitry Andric 66*5f757f3fSDimitry Andric static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kor_mask64(__mmask64 __A, 67*5f757f3fSDimitry Andric __mmask64 __B) { 680b57cec5SDimitry Andric return (__mmask64)__builtin_ia32_kordi((__mmask64)__A, (__mmask64)__B); 690b57cec5SDimitry Andric } 700b57cec5SDimitry Andric 710b57cec5SDimitry Andric static __inline__ __mmask32 __DEFAULT_FN_ATTRS 720b57cec5SDimitry Andric _kxnor_mask32(__mmask32 __A, __mmask32 __B) 730b57cec5SDimitry Andric { 740b57cec5SDimitry Andric return (__mmask32)__builtin_ia32_kxnorsi((__mmask32)__A, (__mmask32)__B); 750b57cec5SDimitry Andric } 760b57cec5SDimitry Andric 77*5f757f3fSDimitry Andric static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kxnor_mask64(__mmask64 __A, 78*5f757f3fSDimitry Andric __mmask64 __B) { 790b57cec5SDimitry Andric return (__mmask64)__builtin_ia32_kxnordi((__mmask64)__A, (__mmask64)__B); 800b57cec5SDimitry Andric } 810b57cec5SDimitry Andric 820b57cec5SDimitry Andric static __inline__ __mmask32 __DEFAULT_FN_ATTRS 830b57cec5SDimitry Andric _kxor_mask32(__mmask32 __A, __mmask32 __B) 840b57cec5SDimitry Andric { 850b57cec5SDimitry Andric return (__mmask32)__builtin_ia32_kxorsi((__mmask32)__A, (__mmask32)__B); 860b57cec5SDimitry Andric } 870b57cec5SDimitry Andric 88*5f757f3fSDimitry Andric static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kxor_mask64(__mmask64 __A, 89*5f757f3fSDimitry Andric __mmask64 __B) { 900b57cec5SDimitry Andric return (__mmask64)__builtin_ia32_kxordi((__mmask64)__A, (__mmask64)__B); 910b57cec5SDimitry Andric } 920b57cec5SDimitry Andric 930b57cec5SDimitry Andric static __inline__ unsigned char __DEFAULT_FN_ATTRS 940b57cec5SDimitry Andric _kortestc_mask32_u8(__mmask32 __A, __mmask32 __B) 950b57cec5SDimitry Andric { 960b57cec5SDimitry Andric return (unsigned char)__builtin_ia32_kortestcsi(__A, __B); 970b57cec5SDimitry Andric } 980b57cec5SDimitry Andric 990b57cec5SDimitry Andric static __inline__ unsigned char __DEFAULT_FN_ATTRS 1000b57cec5SDimitry Andric _kortestz_mask32_u8(__mmask32 __A, __mmask32 __B) 1010b57cec5SDimitry Andric { 1020b57cec5SDimitry Andric return (unsigned char)__builtin_ia32_kortestzsi(__A, __B); 1030b57cec5SDimitry Andric } 1040b57cec5SDimitry Andric 1050b57cec5SDimitry Andric static __inline__ unsigned char __DEFAULT_FN_ATTRS 1060b57cec5SDimitry Andric _kortest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) { 1070b57cec5SDimitry Andric *__C = (unsigned char)__builtin_ia32_kortestcsi(__A, __B); 1080b57cec5SDimitry Andric return (unsigned char)__builtin_ia32_kortestzsi(__A, __B); 1090b57cec5SDimitry Andric } 1100b57cec5SDimitry Andric 1110b57cec5SDimitry Andric static __inline__ unsigned char __DEFAULT_FN_ATTRS 112*5f757f3fSDimitry Andric _kortestc_mask64_u8(__mmask64 __A, __mmask64 __B) { 1130b57cec5SDimitry Andric return (unsigned char)__builtin_ia32_kortestcdi(__A, __B); 1140b57cec5SDimitry Andric } 1150b57cec5SDimitry Andric 1160b57cec5SDimitry Andric static __inline__ unsigned char __DEFAULT_FN_ATTRS 117*5f757f3fSDimitry Andric _kortestz_mask64_u8(__mmask64 __A, __mmask64 __B) { 1180b57cec5SDimitry Andric return (unsigned char)__builtin_ia32_kortestzdi(__A, __B); 1190b57cec5SDimitry Andric } 1200b57cec5SDimitry Andric 1210b57cec5SDimitry Andric static __inline__ unsigned char __DEFAULT_FN_ATTRS 1220b57cec5SDimitry Andric _kortest_mask64_u8(__mmask64 __A, __mmask64 __B, unsigned char *__C) { 1230b57cec5SDimitry Andric *__C = (unsigned char)__builtin_ia32_kortestcdi(__A, __B); 1240b57cec5SDimitry Andric return (unsigned char)__builtin_ia32_kortestzdi(__A, __B); 1250b57cec5SDimitry Andric } 1260b57cec5SDimitry Andric 1270b57cec5SDimitry Andric static __inline__ unsigned char __DEFAULT_FN_ATTRS 1280b57cec5SDimitry Andric _ktestc_mask32_u8(__mmask32 __A, __mmask32 __B) 1290b57cec5SDimitry Andric { 1300b57cec5SDimitry Andric return (unsigned char)__builtin_ia32_ktestcsi(__A, __B); 1310b57cec5SDimitry Andric } 1320b57cec5SDimitry Andric 1330b57cec5SDimitry Andric static __inline__ unsigned char __DEFAULT_FN_ATTRS 1340b57cec5SDimitry Andric _ktestz_mask32_u8(__mmask32 __A, __mmask32 __B) 1350b57cec5SDimitry Andric { 1360b57cec5SDimitry Andric return (unsigned char)__builtin_ia32_ktestzsi(__A, __B); 1370b57cec5SDimitry Andric } 1380b57cec5SDimitry Andric 1390b57cec5SDimitry Andric static __inline__ unsigned char __DEFAULT_FN_ATTRS 1400b57cec5SDimitry Andric _ktest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) { 1410b57cec5SDimitry Andric *__C = (unsigned char)__builtin_ia32_ktestcsi(__A, __B); 1420b57cec5SDimitry Andric return (unsigned char)__builtin_ia32_ktestzsi(__A, __B); 1430b57cec5SDimitry Andric } 1440b57cec5SDimitry Andric 1450b57cec5SDimitry Andric static __inline__ unsigned char __DEFAULT_FN_ATTRS 146*5f757f3fSDimitry Andric _ktestc_mask64_u8(__mmask64 __A, __mmask64 __B) { 1470b57cec5SDimitry Andric return (unsigned char)__builtin_ia32_ktestcdi(__A, __B); 1480b57cec5SDimitry Andric } 1490b57cec5SDimitry Andric 1500b57cec5SDimitry Andric static __inline__ unsigned char __DEFAULT_FN_ATTRS 151*5f757f3fSDimitry Andric _ktestz_mask64_u8(__mmask64 __A, __mmask64 __B) { 1520b57cec5SDimitry Andric return (unsigned char)__builtin_ia32_ktestzdi(__A, __B); 1530b57cec5SDimitry Andric } 1540b57cec5SDimitry Andric 1550b57cec5SDimitry Andric static __inline__ unsigned char __DEFAULT_FN_ATTRS 1560b57cec5SDimitry Andric _ktest_mask64_u8(__mmask64 __A, __mmask64 __B, unsigned char *__C) { 1570b57cec5SDimitry Andric *__C = (unsigned char)__builtin_ia32_ktestcdi(__A, __B); 1580b57cec5SDimitry Andric return (unsigned char)__builtin_ia32_ktestzdi(__A, __B); 1590b57cec5SDimitry Andric } 1600b57cec5SDimitry Andric 1610b57cec5SDimitry Andric static __inline__ __mmask32 __DEFAULT_FN_ATTRS 1620b57cec5SDimitry Andric _kadd_mask32(__mmask32 __A, __mmask32 __B) 1630b57cec5SDimitry Andric { 1640b57cec5SDimitry Andric return (__mmask32)__builtin_ia32_kaddsi((__mmask32)__A, (__mmask32)__B); 1650b57cec5SDimitry Andric } 1660b57cec5SDimitry Andric 167*5f757f3fSDimitry Andric static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kadd_mask64(__mmask64 __A, 168*5f757f3fSDimitry Andric __mmask64 __B) { 1690b57cec5SDimitry Andric return (__mmask64)__builtin_ia32_kadddi((__mmask64)__A, (__mmask64)__B); 1700b57cec5SDimitry Andric } 1710b57cec5SDimitry Andric 1720b57cec5SDimitry Andric #define _kshiftli_mask32(A, I) \ 173349cc55cSDimitry Andric ((__mmask32)__builtin_ia32_kshiftlisi((__mmask32)(A), (unsigned int)(I))) 1740b57cec5SDimitry Andric 1750b57cec5SDimitry Andric #define _kshiftri_mask32(A, I) \ 176349cc55cSDimitry Andric ((__mmask32)__builtin_ia32_kshiftrisi((__mmask32)(A), (unsigned int)(I))) 1770b57cec5SDimitry Andric 1780b57cec5SDimitry Andric #define _kshiftli_mask64(A, I) \ 179349cc55cSDimitry Andric ((__mmask64)__builtin_ia32_kshiftlidi((__mmask64)(A), (unsigned int)(I))) 1800b57cec5SDimitry Andric 1810b57cec5SDimitry Andric #define _kshiftri_mask64(A, I) \ 182349cc55cSDimitry Andric ((__mmask64)__builtin_ia32_kshiftridi((__mmask64)(A), (unsigned int)(I))) 1830b57cec5SDimitry Andric 1840b57cec5SDimitry Andric static __inline__ unsigned int __DEFAULT_FN_ATTRS 1850b57cec5SDimitry Andric _cvtmask32_u32(__mmask32 __A) { 1860b57cec5SDimitry Andric return (unsigned int)__builtin_ia32_kmovd((__mmask32)__A); 1870b57cec5SDimitry Andric } 1880b57cec5SDimitry Andric 1890b57cec5SDimitry Andric static __inline__ unsigned long long __DEFAULT_FN_ATTRS 1900b57cec5SDimitry Andric _cvtmask64_u64(__mmask64 __A) { 1910b57cec5SDimitry Andric return (unsigned long long)__builtin_ia32_kmovq((__mmask64)__A); 1920b57cec5SDimitry Andric } 1930b57cec5SDimitry Andric 1940b57cec5SDimitry Andric static __inline__ __mmask32 __DEFAULT_FN_ATTRS 1950b57cec5SDimitry Andric _cvtu32_mask32(unsigned int __A) { 1960b57cec5SDimitry Andric return (__mmask32)__builtin_ia32_kmovd((__mmask32)__A); 1970b57cec5SDimitry Andric } 1980b57cec5SDimitry Andric 1990b57cec5SDimitry Andric static __inline__ __mmask64 __DEFAULT_FN_ATTRS 2000b57cec5SDimitry Andric _cvtu64_mask64(unsigned long long __A) { 2010b57cec5SDimitry Andric return (__mmask64)__builtin_ia32_kmovq((__mmask64)__A); 2020b57cec5SDimitry Andric } 2030b57cec5SDimitry Andric 2040b57cec5SDimitry Andric static __inline__ __mmask32 __DEFAULT_FN_ATTRS 2050b57cec5SDimitry Andric _load_mask32(__mmask32 *__A) { 2060b57cec5SDimitry Andric return (__mmask32)__builtin_ia32_kmovd(*(__mmask32 *)__A); 2070b57cec5SDimitry Andric } 2080b57cec5SDimitry Andric 209*5f757f3fSDimitry Andric static __inline__ __mmask64 __DEFAULT_FN_ATTRS _load_mask64(__mmask64 *__A) { 2100b57cec5SDimitry Andric return (__mmask64)__builtin_ia32_kmovq(*(__mmask64 *)__A); 2110b57cec5SDimitry Andric } 2120b57cec5SDimitry Andric 2130b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS 2140b57cec5SDimitry Andric _store_mask32(__mmask32 *__A, __mmask32 __B) { 2150b57cec5SDimitry Andric *(__mmask32 *)__A = __builtin_ia32_kmovd((__mmask32)__B); 2160b57cec5SDimitry Andric } 2170b57cec5SDimitry Andric 218*5f757f3fSDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS _store_mask64(__mmask64 *__A, 219*5f757f3fSDimitry Andric __mmask64 __B) { 2200b57cec5SDimitry Andric *(__mmask64 *)__A = __builtin_ia32_kmovq((__mmask64)__B); 2210b57cec5SDimitry Andric } 2220b57cec5SDimitry Andric 2230b57cec5SDimitry Andric /* Integer compare */ 2240b57cec5SDimitry Andric 2250b57cec5SDimitry Andric #define _mm512_cmp_epi8_mask(a, b, p) \ 226349cc55cSDimitry Andric ((__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \ 2270b57cec5SDimitry Andric (__v64qi)(__m512i)(b), (int)(p), \ 228349cc55cSDimitry Andric (__mmask64)-1)) 2290b57cec5SDimitry Andric 2300b57cec5SDimitry Andric #define _mm512_mask_cmp_epi8_mask(m, a, b, p) \ 231349cc55cSDimitry Andric ((__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \ 2320b57cec5SDimitry Andric (__v64qi)(__m512i)(b), (int)(p), \ 233349cc55cSDimitry Andric (__mmask64)(m))) 2340b57cec5SDimitry Andric 2350b57cec5SDimitry Andric #define _mm512_cmp_epu8_mask(a, b, p) \ 236349cc55cSDimitry Andric ((__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \ 2370b57cec5SDimitry Andric (__v64qi)(__m512i)(b), (int)(p), \ 238349cc55cSDimitry Andric (__mmask64)-1)) 2390b57cec5SDimitry Andric 2400b57cec5SDimitry Andric #define _mm512_mask_cmp_epu8_mask(m, a, b, p) \ 241349cc55cSDimitry Andric ((__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \ 2420b57cec5SDimitry Andric (__v64qi)(__m512i)(b), (int)(p), \ 243349cc55cSDimitry Andric (__mmask64)(m))) 2440b57cec5SDimitry Andric 2450b57cec5SDimitry Andric #define _mm512_cmp_epi16_mask(a, b, p) \ 246349cc55cSDimitry Andric ((__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \ 2470b57cec5SDimitry Andric (__v32hi)(__m512i)(b), (int)(p), \ 248349cc55cSDimitry Andric (__mmask32)-1)) 2490b57cec5SDimitry Andric 2500b57cec5SDimitry Andric #define _mm512_mask_cmp_epi16_mask(m, a, b, p) \ 251349cc55cSDimitry Andric ((__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \ 2520b57cec5SDimitry Andric (__v32hi)(__m512i)(b), (int)(p), \ 253349cc55cSDimitry Andric (__mmask32)(m))) 2540b57cec5SDimitry Andric 2550b57cec5SDimitry Andric #define _mm512_cmp_epu16_mask(a, b, p) \ 256349cc55cSDimitry Andric ((__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \ 2570b57cec5SDimitry Andric (__v32hi)(__m512i)(b), (int)(p), \ 258349cc55cSDimitry Andric (__mmask32)-1)) 2590b57cec5SDimitry Andric 2600b57cec5SDimitry Andric #define _mm512_mask_cmp_epu16_mask(m, a, b, p) \ 261349cc55cSDimitry Andric ((__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \ 2620b57cec5SDimitry Andric (__v32hi)(__m512i)(b), (int)(p), \ 263349cc55cSDimitry Andric (__mmask32)(m))) 2640b57cec5SDimitry Andric 2650b57cec5SDimitry Andric #define _mm512_cmpeq_epi8_mask(A, B) \ 2660b57cec5SDimitry Andric _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_EQ) 2670b57cec5SDimitry Andric #define _mm512_mask_cmpeq_epi8_mask(k, A, B) \ 2680b57cec5SDimitry Andric _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_EQ) 2690b57cec5SDimitry Andric #define _mm512_cmpge_epi8_mask(A, B) \ 2700b57cec5SDimitry Andric _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_GE) 2710b57cec5SDimitry Andric #define _mm512_mask_cmpge_epi8_mask(k, A, B) \ 2720b57cec5SDimitry Andric _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GE) 2730b57cec5SDimitry Andric #define _mm512_cmpgt_epi8_mask(A, B) \ 2740b57cec5SDimitry Andric _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_GT) 2750b57cec5SDimitry Andric #define _mm512_mask_cmpgt_epi8_mask(k, A, B) \ 2760b57cec5SDimitry Andric _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GT) 2770b57cec5SDimitry Andric #define _mm512_cmple_epi8_mask(A, B) \ 2780b57cec5SDimitry Andric _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_LE) 2790b57cec5SDimitry Andric #define _mm512_mask_cmple_epi8_mask(k, A, B) \ 2800b57cec5SDimitry Andric _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LE) 2810b57cec5SDimitry Andric #define _mm512_cmplt_epi8_mask(A, B) \ 2820b57cec5SDimitry Andric _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_LT) 2830b57cec5SDimitry Andric #define _mm512_mask_cmplt_epi8_mask(k, A, B) \ 2840b57cec5SDimitry Andric _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LT) 2850b57cec5SDimitry Andric #define _mm512_cmpneq_epi8_mask(A, B) \ 2860b57cec5SDimitry Andric _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_NE) 2870b57cec5SDimitry Andric #define _mm512_mask_cmpneq_epi8_mask(k, A, B) \ 2880b57cec5SDimitry Andric _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_NE) 2890b57cec5SDimitry Andric 2900b57cec5SDimitry Andric #define _mm512_cmpeq_epu8_mask(A, B) \ 2910b57cec5SDimitry Andric _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_EQ) 2920b57cec5SDimitry Andric #define _mm512_mask_cmpeq_epu8_mask(k, A, B) \ 2930b57cec5SDimitry Andric _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_EQ) 2940b57cec5SDimitry Andric #define _mm512_cmpge_epu8_mask(A, B) \ 2950b57cec5SDimitry Andric _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_GE) 2960b57cec5SDimitry Andric #define _mm512_mask_cmpge_epu8_mask(k, A, B) \ 2970b57cec5SDimitry Andric _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GE) 2980b57cec5SDimitry Andric #define _mm512_cmpgt_epu8_mask(A, B) \ 2990b57cec5SDimitry Andric _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_GT) 3000b57cec5SDimitry Andric #define _mm512_mask_cmpgt_epu8_mask(k, A, B) \ 3010b57cec5SDimitry Andric _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GT) 3020b57cec5SDimitry Andric #define _mm512_cmple_epu8_mask(A, B) \ 3030b57cec5SDimitry Andric _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_LE) 3040b57cec5SDimitry Andric #define _mm512_mask_cmple_epu8_mask(k, A, B) \ 3050b57cec5SDimitry Andric _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LE) 3060b57cec5SDimitry Andric #define _mm512_cmplt_epu8_mask(A, B) \ 3070b57cec5SDimitry Andric _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_LT) 3080b57cec5SDimitry Andric #define _mm512_mask_cmplt_epu8_mask(k, A, B) \ 3090b57cec5SDimitry Andric _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LT) 3100b57cec5SDimitry Andric #define _mm512_cmpneq_epu8_mask(A, B) \ 3110b57cec5SDimitry Andric _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_NE) 3120b57cec5SDimitry Andric #define _mm512_mask_cmpneq_epu8_mask(k, A, B) \ 3130b57cec5SDimitry Andric _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_NE) 3140b57cec5SDimitry Andric 3150b57cec5SDimitry Andric #define _mm512_cmpeq_epi16_mask(A, B) \ 3160b57cec5SDimitry Andric _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_EQ) 3170b57cec5SDimitry Andric #define _mm512_mask_cmpeq_epi16_mask(k, A, B) \ 3180b57cec5SDimitry Andric _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_EQ) 3190b57cec5SDimitry Andric #define _mm512_cmpge_epi16_mask(A, B) \ 3200b57cec5SDimitry Andric _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_GE) 3210b57cec5SDimitry Andric #define _mm512_mask_cmpge_epi16_mask(k, A, B) \ 3220b57cec5SDimitry Andric _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GE) 3230b57cec5SDimitry Andric #define _mm512_cmpgt_epi16_mask(A, B) \ 3240b57cec5SDimitry Andric _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_GT) 3250b57cec5SDimitry Andric #define _mm512_mask_cmpgt_epi16_mask(k, A, B) \ 3260b57cec5SDimitry Andric _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GT) 3270b57cec5SDimitry Andric #define _mm512_cmple_epi16_mask(A, B) \ 3280b57cec5SDimitry Andric _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_LE) 3290b57cec5SDimitry Andric #define _mm512_mask_cmple_epi16_mask(k, A, B) \ 3300b57cec5SDimitry Andric _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LE) 3310b57cec5SDimitry Andric #define _mm512_cmplt_epi16_mask(A, B) \ 3320b57cec5SDimitry Andric _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_LT) 3330b57cec5SDimitry Andric #define _mm512_mask_cmplt_epi16_mask(k, A, B) \ 3340b57cec5SDimitry Andric _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LT) 3350b57cec5SDimitry Andric #define _mm512_cmpneq_epi16_mask(A, B) \ 3360b57cec5SDimitry Andric _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_NE) 3370b57cec5SDimitry Andric #define _mm512_mask_cmpneq_epi16_mask(k, A, B) \ 3380b57cec5SDimitry Andric _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_NE) 3390b57cec5SDimitry Andric 3400b57cec5SDimitry Andric #define _mm512_cmpeq_epu16_mask(A, B) \ 3410b57cec5SDimitry Andric _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_EQ) 3420b57cec5SDimitry Andric #define _mm512_mask_cmpeq_epu16_mask(k, A, B) \ 3430b57cec5SDimitry Andric _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_EQ) 3440b57cec5SDimitry Andric #define _mm512_cmpge_epu16_mask(A, B) \ 3450b57cec5SDimitry Andric _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_GE) 3460b57cec5SDimitry Andric #define _mm512_mask_cmpge_epu16_mask(k, A, B) \ 3470b57cec5SDimitry Andric _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GE) 3480b57cec5SDimitry Andric #define _mm512_cmpgt_epu16_mask(A, B) \ 3490b57cec5SDimitry Andric _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_GT) 3500b57cec5SDimitry Andric #define _mm512_mask_cmpgt_epu16_mask(k, A, B) \ 3510b57cec5SDimitry Andric _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GT) 3520b57cec5SDimitry Andric #define _mm512_cmple_epu16_mask(A, B) \ 3530b57cec5SDimitry Andric _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_LE) 3540b57cec5SDimitry Andric #define _mm512_mask_cmple_epu16_mask(k, A, B) \ 3550b57cec5SDimitry Andric _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LE) 3560b57cec5SDimitry Andric #define _mm512_cmplt_epu16_mask(A, B) \ 3570b57cec5SDimitry Andric _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_LT) 3580b57cec5SDimitry Andric #define _mm512_mask_cmplt_epu16_mask(k, A, B) \ 3590b57cec5SDimitry Andric _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LT) 3600b57cec5SDimitry Andric #define _mm512_cmpneq_epu16_mask(A, B) \ 3610b57cec5SDimitry Andric _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_NE) 3620b57cec5SDimitry Andric #define _mm512_mask_cmpneq_epu16_mask(k, A, B) \ 3630b57cec5SDimitry Andric _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE) 3640b57cec5SDimitry Andric 3650b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 3660b57cec5SDimitry Andric _mm512_add_epi8 (__m512i __A, __m512i __B) { 3670b57cec5SDimitry Andric return (__m512i) ((__v64qu) __A + (__v64qu) __B); 3680b57cec5SDimitry Andric } 3690b57cec5SDimitry Andric 3700b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 3710b57cec5SDimitry Andric _mm512_mask_add_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { 3720b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 3730b57cec5SDimitry Andric (__v64qi)_mm512_add_epi8(__A, __B), 3740b57cec5SDimitry Andric (__v64qi)__W); 3750b57cec5SDimitry Andric } 3760b57cec5SDimitry Andric 3770b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 3780b57cec5SDimitry Andric _mm512_maskz_add_epi8(__mmask64 __U, __m512i __A, __m512i __B) { 3790b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 3800b57cec5SDimitry Andric (__v64qi)_mm512_add_epi8(__A, __B), 3810b57cec5SDimitry Andric (__v64qi)_mm512_setzero_si512()); 3820b57cec5SDimitry Andric } 3830b57cec5SDimitry Andric 3840b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 3850b57cec5SDimitry Andric _mm512_sub_epi8 (__m512i __A, __m512i __B) { 3860b57cec5SDimitry Andric return (__m512i) ((__v64qu) __A - (__v64qu) __B); 3870b57cec5SDimitry Andric } 3880b57cec5SDimitry Andric 3890b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 3900b57cec5SDimitry Andric _mm512_mask_sub_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { 3910b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 3920b57cec5SDimitry Andric (__v64qi)_mm512_sub_epi8(__A, __B), 3930b57cec5SDimitry Andric (__v64qi)__W); 3940b57cec5SDimitry Andric } 3950b57cec5SDimitry Andric 3960b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 3970b57cec5SDimitry Andric _mm512_maskz_sub_epi8(__mmask64 __U, __m512i __A, __m512i __B) { 3980b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 3990b57cec5SDimitry Andric (__v64qi)_mm512_sub_epi8(__A, __B), 4000b57cec5SDimitry Andric (__v64qi)_mm512_setzero_si512()); 4010b57cec5SDimitry Andric } 4020b57cec5SDimitry Andric 4030b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 4040b57cec5SDimitry Andric _mm512_add_epi16 (__m512i __A, __m512i __B) { 4050b57cec5SDimitry Andric return (__m512i) ((__v32hu) __A + (__v32hu) __B); 4060b57cec5SDimitry Andric } 4070b57cec5SDimitry Andric 4080b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 4090b57cec5SDimitry Andric _mm512_mask_add_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { 4100b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 4110b57cec5SDimitry Andric (__v32hi)_mm512_add_epi16(__A, __B), 4120b57cec5SDimitry Andric (__v32hi)__W); 4130b57cec5SDimitry Andric } 4140b57cec5SDimitry Andric 4150b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 4160b57cec5SDimitry Andric _mm512_maskz_add_epi16(__mmask32 __U, __m512i __A, __m512i __B) { 4170b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 4180b57cec5SDimitry Andric (__v32hi)_mm512_add_epi16(__A, __B), 4190b57cec5SDimitry Andric (__v32hi)_mm512_setzero_si512()); 4200b57cec5SDimitry Andric } 4210b57cec5SDimitry Andric 4220b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 4230b57cec5SDimitry Andric _mm512_sub_epi16 (__m512i __A, __m512i __B) { 4240b57cec5SDimitry Andric return (__m512i) ((__v32hu) __A - (__v32hu) __B); 4250b57cec5SDimitry Andric } 4260b57cec5SDimitry Andric 4270b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 4280b57cec5SDimitry Andric _mm512_mask_sub_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { 4290b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 4300b57cec5SDimitry Andric (__v32hi)_mm512_sub_epi16(__A, __B), 4310b57cec5SDimitry Andric (__v32hi)__W); 4320b57cec5SDimitry Andric } 4330b57cec5SDimitry Andric 4340b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 4350b57cec5SDimitry Andric _mm512_maskz_sub_epi16(__mmask32 __U, __m512i __A, __m512i __B) { 4360b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 4370b57cec5SDimitry Andric (__v32hi)_mm512_sub_epi16(__A, __B), 4380b57cec5SDimitry Andric (__v32hi)_mm512_setzero_si512()); 4390b57cec5SDimitry Andric } 4400b57cec5SDimitry Andric 4410b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 4420b57cec5SDimitry Andric _mm512_mullo_epi16 (__m512i __A, __m512i __B) { 4430b57cec5SDimitry Andric return (__m512i) ((__v32hu) __A * (__v32hu) __B); 4440b57cec5SDimitry Andric } 4450b57cec5SDimitry Andric 4460b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 4470b57cec5SDimitry Andric _mm512_mask_mullo_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { 4480b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 4490b57cec5SDimitry Andric (__v32hi)_mm512_mullo_epi16(__A, __B), 4500b57cec5SDimitry Andric (__v32hi)__W); 4510b57cec5SDimitry Andric } 4520b57cec5SDimitry Andric 4530b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 4540b57cec5SDimitry Andric _mm512_maskz_mullo_epi16(__mmask32 __U, __m512i __A, __m512i __B) { 4550b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 4560b57cec5SDimitry Andric (__v32hi)_mm512_mullo_epi16(__A, __B), 4570b57cec5SDimitry Andric (__v32hi)_mm512_setzero_si512()); 4580b57cec5SDimitry Andric } 4590b57cec5SDimitry Andric 4600b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 4610b57cec5SDimitry Andric _mm512_mask_blend_epi8 (__mmask64 __U, __m512i __A, __m512i __W) 4620b57cec5SDimitry Andric { 4630b57cec5SDimitry Andric return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U, 4640b57cec5SDimitry Andric (__v64qi) __W, 4650b57cec5SDimitry Andric (__v64qi) __A); 4660b57cec5SDimitry Andric } 4670b57cec5SDimitry Andric 4680b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 4690b57cec5SDimitry Andric _mm512_mask_blend_epi16 (__mmask32 __U, __m512i __A, __m512i __W) 4700b57cec5SDimitry Andric { 4710b57cec5SDimitry Andric return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U, 4720b57cec5SDimitry Andric (__v32hi) __W, 4730b57cec5SDimitry Andric (__v32hi) __A); 4740b57cec5SDimitry Andric } 4750b57cec5SDimitry Andric 4760b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 4770b57cec5SDimitry Andric _mm512_abs_epi8 (__m512i __A) 4780b57cec5SDimitry Andric { 47904eeddc0SDimitry Andric return (__m512i)__builtin_elementwise_abs((__v64qs)__A); 4800b57cec5SDimitry Andric } 4810b57cec5SDimitry Andric 4820b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 4830b57cec5SDimitry Andric _mm512_mask_abs_epi8 (__m512i __W, __mmask64 __U, __m512i __A) 4840b57cec5SDimitry Andric { 4850b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 4860b57cec5SDimitry Andric (__v64qi)_mm512_abs_epi8(__A), 4870b57cec5SDimitry Andric (__v64qi)__W); 4880b57cec5SDimitry Andric } 4890b57cec5SDimitry Andric 4900b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 4910b57cec5SDimitry Andric _mm512_maskz_abs_epi8 (__mmask64 __U, __m512i __A) 4920b57cec5SDimitry Andric { 4930b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 4940b57cec5SDimitry Andric (__v64qi)_mm512_abs_epi8(__A), 4950b57cec5SDimitry Andric (__v64qi)_mm512_setzero_si512()); 4960b57cec5SDimitry Andric } 4970b57cec5SDimitry Andric 4980b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 4990b57cec5SDimitry Andric _mm512_abs_epi16 (__m512i __A) 5000b57cec5SDimitry Andric { 50104eeddc0SDimitry Andric return (__m512i)__builtin_elementwise_abs((__v32hi)__A); 5020b57cec5SDimitry Andric } 5030b57cec5SDimitry Andric 5040b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 5050b57cec5SDimitry Andric _mm512_mask_abs_epi16 (__m512i __W, __mmask32 __U, __m512i __A) 5060b57cec5SDimitry Andric { 5070b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 5080b57cec5SDimitry Andric (__v32hi)_mm512_abs_epi16(__A), 5090b57cec5SDimitry Andric (__v32hi)__W); 5100b57cec5SDimitry Andric } 5110b57cec5SDimitry Andric 5120b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 5130b57cec5SDimitry Andric _mm512_maskz_abs_epi16 (__mmask32 __U, __m512i __A) 5140b57cec5SDimitry Andric { 5150b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 5160b57cec5SDimitry Andric (__v32hi)_mm512_abs_epi16(__A), 5170b57cec5SDimitry Andric (__v32hi)_mm512_setzero_si512()); 5180b57cec5SDimitry Andric } 5190b57cec5SDimitry Andric 5200b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 5210b57cec5SDimitry Andric _mm512_packs_epi32(__m512i __A, __m512i __B) 5220b57cec5SDimitry Andric { 5230b57cec5SDimitry Andric return (__m512i)__builtin_ia32_packssdw512((__v16si)__A, (__v16si)__B); 5240b57cec5SDimitry Andric } 5250b57cec5SDimitry Andric 5260b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 5270b57cec5SDimitry Andric _mm512_maskz_packs_epi32(__mmask32 __M, __m512i __A, __m512i __B) 5280b57cec5SDimitry Andric { 5290b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 5300b57cec5SDimitry Andric (__v32hi)_mm512_packs_epi32(__A, __B), 5310b57cec5SDimitry Andric (__v32hi)_mm512_setzero_si512()); 5320b57cec5SDimitry Andric } 5330b57cec5SDimitry Andric 5340b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 5350b57cec5SDimitry Andric _mm512_mask_packs_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) 5360b57cec5SDimitry Andric { 5370b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 5380b57cec5SDimitry Andric (__v32hi)_mm512_packs_epi32(__A, __B), 5390b57cec5SDimitry Andric (__v32hi)__W); 5400b57cec5SDimitry Andric } 5410b57cec5SDimitry Andric 5420b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 5430b57cec5SDimitry Andric _mm512_packs_epi16(__m512i __A, __m512i __B) 5440b57cec5SDimitry Andric { 5450b57cec5SDimitry Andric return (__m512i)__builtin_ia32_packsswb512((__v32hi)__A, (__v32hi) __B); 5460b57cec5SDimitry Andric } 5470b57cec5SDimitry Andric 5480b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 5490b57cec5SDimitry Andric _mm512_mask_packs_epi16(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) 5500b57cec5SDimitry Andric { 5510b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 5520b57cec5SDimitry Andric (__v64qi)_mm512_packs_epi16(__A, __B), 5530b57cec5SDimitry Andric (__v64qi)__W); 5540b57cec5SDimitry Andric } 5550b57cec5SDimitry Andric 5560b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 5570b57cec5SDimitry Andric _mm512_maskz_packs_epi16(__mmask64 __M, __m512i __A, __m512i __B) 5580b57cec5SDimitry Andric { 5590b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 5600b57cec5SDimitry Andric (__v64qi)_mm512_packs_epi16(__A, __B), 5610b57cec5SDimitry Andric (__v64qi)_mm512_setzero_si512()); 5620b57cec5SDimitry Andric } 5630b57cec5SDimitry Andric 5640b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 5650b57cec5SDimitry Andric _mm512_packus_epi32(__m512i __A, __m512i __B) 5660b57cec5SDimitry Andric { 5670b57cec5SDimitry Andric return (__m512i)__builtin_ia32_packusdw512((__v16si) __A, (__v16si) __B); 5680b57cec5SDimitry Andric } 5690b57cec5SDimitry Andric 5700b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 5710b57cec5SDimitry Andric _mm512_maskz_packus_epi32(__mmask32 __M, __m512i __A, __m512i __B) 5720b57cec5SDimitry Andric { 5730b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 5740b57cec5SDimitry Andric (__v32hi)_mm512_packus_epi32(__A, __B), 5750b57cec5SDimitry Andric (__v32hi)_mm512_setzero_si512()); 5760b57cec5SDimitry Andric } 5770b57cec5SDimitry Andric 5780b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 5790b57cec5SDimitry Andric _mm512_mask_packus_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) 5800b57cec5SDimitry Andric { 5810b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 5820b57cec5SDimitry Andric (__v32hi)_mm512_packus_epi32(__A, __B), 5830b57cec5SDimitry Andric (__v32hi)__W); 5840b57cec5SDimitry Andric } 5850b57cec5SDimitry Andric 5860b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 5870b57cec5SDimitry Andric _mm512_packus_epi16(__m512i __A, __m512i __B) 5880b57cec5SDimitry Andric { 5890b57cec5SDimitry Andric return (__m512i)__builtin_ia32_packuswb512((__v32hi) __A, (__v32hi) __B); 5900b57cec5SDimitry Andric } 5910b57cec5SDimitry Andric 5920b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 5930b57cec5SDimitry Andric _mm512_mask_packus_epi16(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) 5940b57cec5SDimitry Andric { 5950b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 5960b57cec5SDimitry Andric (__v64qi)_mm512_packus_epi16(__A, __B), 5970b57cec5SDimitry Andric (__v64qi)__W); 5980b57cec5SDimitry Andric } 5990b57cec5SDimitry Andric 6000b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 6010b57cec5SDimitry Andric _mm512_maskz_packus_epi16(__mmask64 __M, __m512i __A, __m512i __B) 6020b57cec5SDimitry Andric { 6030b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 6040b57cec5SDimitry Andric (__v64qi)_mm512_packus_epi16(__A, __B), 6050b57cec5SDimitry Andric (__v64qi)_mm512_setzero_si512()); 6060b57cec5SDimitry Andric } 6070b57cec5SDimitry Andric 6080b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 6090b57cec5SDimitry Andric _mm512_adds_epi8 (__m512i __A, __m512i __B) 6100b57cec5SDimitry Andric { 61181ad6265SDimitry Andric return (__m512i)__builtin_elementwise_add_sat((__v64qs)__A, (__v64qs)__B); 6120b57cec5SDimitry Andric } 6130b57cec5SDimitry Andric 6140b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 6150b57cec5SDimitry Andric _mm512_mask_adds_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) 6160b57cec5SDimitry Andric { 6170b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 6180b57cec5SDimitry Andric (__v64qi)_mm512_adds_epi8(__A, __B), 6190b57cec5SDimitry Andric (__v64qi)__W); 6200b57cec5SDimitry Andric } 6210b57cec5SDimitry Andric 6220b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 6230b57cec5SDimitry Andric _mm512_maskz_adds_epi8 (__mmask64 __U, __m512i __A, __m512i __B) 6240b57cec5SDimitry Andric { 6250b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 6260b57cec5SDimitry Andric (__v64qi)_mm512_adds_epi8(__A, __B), 6270b57cec5SDimitry Andric (__v64qi)_mm512_setzero_si512()); 6280b57cec5SDimitry Andric } 6290b57cec5SDimitry Andric 6300b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 6310b57cec5SDimitry Andric _mm512_adds_epi16 (__m512i __A, __m512i __B) 6320b57cec5SDimitry Andric { 63381ad6265SDimitry Andric return (__m512i)__builtin_elementwise_add_sat((__v32hi)__A, (__v32hi)__B); 6340b57cec5SDimitry Andric } 6350b57cec5SDimitry Andric 6360b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 6370b57cec5SDimitry Andric _mm512_mask_adds_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) 6380b57cec5SDimitry Andric { 6390b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 6400b57cec5SDimitry Andric (__v32hi)_mm512_adds_epi16(__A, __B), 6410b57cec5SDimitry Andric (__v32hi)__W); 6420b57cec5SDimitry Andric } 6430b57cec5SDimitry Andric 6440b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 6450b57cec5SDimitry Andric _mm512_maskz_adds_epi16 (__mmask32 __U, __m512i __A, __m512i __B) 6460b57cec5SDimitry Andric { 6470b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 6480b57cec5SDimitry Andric (__v32hi)_mm512_adds_epi16(__A, __B), 6490b57cec5SDimitry Andric (__v32hi)_mm512_setzero_si512()); 6500b57cec5SDimitry Andric } 6510b57cec5SDimitry Andric 6520b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 6530b57cec5SDimitry Andric _mm512_adds_epu8 (__m512i __A, __m512i __B) 6540b57cec5SDimitry Andric { 65581ad6265SDimitry Andric return (__m512i)__builtin_elementwise_add_sat((__v64qu) __A, (__v64qu) __B); 6560b57cec5SDimitry Andric } 6570b57cec5SDimitry Andric 6580b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 6590b57cec5SDimitry Andric _mm512_mask_adds_epu8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) 6600b57cec5SDimitry Andric { 6610b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 6620b57cec5SDimitry Andric (__v64qi)_mm512_adds_epu8(__A, __B), 6630b57cec5SDimitry Andric (__v64qi)__W); 6640b57cec5SDimitry Andric } 6650b57cec5SDimitry Andric 6660b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 6670b57cec5SDimitry Andric _mm512_maskz_adds_epu8 (__mmask64 __U, __m512i __A, __m512i __B) 6680b57cec5SDimitry Andric { 6690b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 6700b57cec5SDimitry Andric (__v64qi)_mm512_adds_epu8(__A, __B), 6710b57cec5SDimitry Andric (__v64qi)_mm512_setzero_si512()); 6720b57cec5SDimitry Andric } 6730b57cec5SDimitry Andric 6740b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 6750b57cec5SDimitry Andric _mm512_adds_epu16 (__m512i __A, __m512i __B) 6760b57cec5SDimitry Andric { 67781ad6265SDimitry Andric return (__m512i)__builtin_elementwise_add_sat((__v32hu) __A, (__v32hu) __B); 6780b57cec5SDimitry Andric } 6790b57cec5SDimitry Andric 6800b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 6810b57cec5SDimitry Andric _mm512_mask_adds_epu16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) 6820b57cec5SDimitry Andric { 6830b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 6840b57cec5SDimitry Andric (__v32hi)_mm512_adds_epu16(__A, __B), 6850b57cec5SDimitry Andric (__v32hi)__W); 6860b57cec5SDimitry Andric } 6870b57cec5SDimitry Andric 6880b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 6890b57cec5SDimitry Andric _mm512_maskz_adds_epu16 (__mmask32 __U, __m512i __A, __m512i __B) 6900b57cec5SDimitry Andric { 6910b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 6920b57cec5SDimitry Andric (__v32hi)_mm512_adds_epu16(__A, __B), 6930b57cec5SDimitry Andric (__v32hi)_mm512_setzero_si512()); 6940b57cec5SDimitry Andric } 6950b57cec5SDimitry Andric 6960b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 6970b57cec5SDimitry Andric _mm512_avg_epu8 (__m512i __A, __m512i __B) 6980b57cec5SDimitry Andric { 6990b57cec5SDimitry Andric return (__m512i)__builtin_ia32_pavgb512((__v64qi)__A, (__v64qi)__B); 7000b57cec5SDimitry Andric } 7010b57cec5SDimitry Andric 7020b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 7030b57cec5SDimitry Andric _mm512_mask_avg_epu8 (__m512i __W, __mmask64 __U, __m512i __A, 7040b57cec5SDimitry Andric __m512i __B) 7050b57cec5SDimitry Andric { 7060b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 7070b57cec5SDimitry Andric (__v64qi)_mm512_avg_epu8(__A, __B), 7080b57cec5SDimitry Andric (__v64qi)__W); 7090b57cec5SDimitry Andric } 7100b57cec5SDimitry Andric 7110b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 7120b57cec5SDimitry Andric _mm512_maskz_avg_epu8 (__mmask64 __U, __m512i __A, __m512i __B) 7130b57cec5SDimitry Andric { 7140b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 7150b57cec5SDimitry Andric (__v64qi)_mm512_avg_epu8(__A, __B), 7160b57cec5SDimitry Andric (__v64qi)_mm512_setzero_si512()); 7170b57cec5SDimitry Andric } 7180b57cec5SDimitry Andric 7190b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 7200b57cec5SDimitry Andric _mm512_avg_epu16 (__m512i __A, __m512i __B) 7210b57cec5SDimitry Andric { 7220b57cec5SDimitry Andric return (__m512i)__builtin_ia32_pavgw512((__v32hi)__A, (__v32hi)__B); 7230b57cec5SDimitry Andric } 7240b57cec5SDimitry Andric 7250b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 7260b57cec5SDimitry Andric _mm512_mask_avg_epu16 (__m512i __W, __mmask32 __U, __m512i __A, 7270b57cec5SDimitry Andric __m512i __B) 7280b57cec5SDimitry Andric { 7290b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 7300b57cec5SDimitry Andric (__v32hi)_mm512_avg_epu16(__A, __B), 7310b57cec5SDimitry Andric (__v32hi)__W); 7320b57cec5SDimitry Andric } 7330b57cec5SDimitry Andric 7340b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 7350b57cec5SDimitry Andric _mm512_maskz_avg_epu16 (__mmask32 __U, __m512i __A, __m512i __B) 7360b57cec5SDimitry Andric { 7370b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 7380b57cec5SDimitry Andric (__v32hi)_mm512_avg_epu16(__A, __B), 7390b57cec5SDimitry Andric (__v32hi) _mm512_setzero_si512()); 7400b57cec5SDimitry Andric } 7410b57cec5SDimitry Andric 7420b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 7430b57cec5SDimitry Andric _mm512_max_epi8 (__m512i __A, __m512i __B) 7440b57cec5SDimitry Andric { 74504eeddc0SDimitry Andric return (__m512i)__builtin_elementwise_max((__v64qs) __A, (__v64qs) __B); 7460b57cec5SDimitry Andric } 7470b57cec5SDimitry Andric 7480b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 7490b57cec5SDimitry Andric _mm512_maskz_max_epi8 (__mmask64 __M, __m512i __A, __m512i __B) 7500b57cec5SDimitry Andric { 7510b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 7520b57cec5SDimitry Andric (__v64qi)_mm512_max_epi8(__A, __B), 7530b57cec5SDimitry Andric (__v64qi)_mm512_setzero_si512()); 7540b57cec5SDimitry Andric } 7550b57cec5SDimitry Andric 7560b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 7570b57cec5SDimitry Andric _mm512_mask_max_epi8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) 7580b57cec5SDimitry Andric { 7590b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 7600b57cec5SDimitry Andric (__v64qi)_mm512_max_epi8(__A, __B), 7610b57cec5SDimitry Andric (__v64qi)__W); 7620b57cec5SDimitry Andric } 7630b57cec5SDimitry Andric 7640b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 7650b57cec5SDimitry Andric _mm512_max_epi16 (__m512i __A, __m512i __B) 7660b57cec5SDimitry Andric { 76704eeddc0SDimitry Andric return (__m512i)__builtin_elementwise_max((__v32hi) __A, (__v32hi) __B); 7680b57cec5SDimitry Andric } 7690b57cec5SDimitry Andric 7700b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 7710b57cec5SDimitry Andric _mm512_maskz_max_epi16 (__mmask32 __M, __m512i __A, __m512i __B) 7720b57cec5SDimitry Andric { 7730b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 7740b57cec5SDimitry Andric (__v32hi)_mm512_max_epi16(__A, __B), 7750b57cec5SDimitry Andric (__v32hi)_mm512_setzero_si512()); 7760b57cec5SDimitry Andric } 7770b57cec5SDimitry Andric 7780b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 7790b57cec5SDimitry Andric _mm512_mask_max_epi16 (__m512i __W, __mmask32 __M, __m512i __A, 7800b57cec5SDimitry Andric __m512i __B) 7810b57cec5SDimitry Andric { 7820b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 7830b57cec5SDimitry Andric (__v32hi)_mm512_max_epi16(__A, __B), 7840b57cec5SDimitry Andric (__v32hi)__W); 7850b57cec5SDimitry Andric } 7860b57cec5SDimitry Andric 7870b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 7880b57cec5SDimitry Andric _mm512_max_epu8 (__m512i __A, __m512i __B) 7890b57cec5SDimitry Andric { 79004eeddc0SDimitry Andric return (__m512i)__builtin_elementwise_max((__v64qu)__A, (__v64qu)__B); 7910b57cec5SDimitry Andric } 7920b57cec5SDimitry Andric 7930b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 7940b57cec5SDimitry Andric _mm512_maskz_max_epu8 (__mmask64 __M, __m512i __A, __m512i __B) 7950b57cec5SDimitry Andric { 7960b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 7970b57cec5SDimitry Andric (__v64qi)_mm512_max_epu8(__A, __B), 7980b57cec5SDimitry Andric (__v64qi)_mm512_setzero_si512()); 7990b57cec5SDimitry Andric } 8000b57cec5SDimitry Andric 8010b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 8020b57cec5SDimitry Andric _mm512_mask_max_epu8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) 8030b57cec5SDimitry Andric { 8040b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 8050b57cec5SDimitry Andric (__v64qi)_mm512_max_epu8(__A, __B), 8060b57cec5SDimitry Andric (__v64qi)__W); 8070b57cec5SDimitry Andric } 8080b57cec5SDimitry Andric 8090b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 8100b57cec5SDimitry Andric _mm512_max_epu16 (__m512i __A, __m512i __B) 8110b57cec5SDimitry Andric { 81204eeddc0SDimitry Andric return (__m512i)__builtin_elementwise_max((__v32hu)__A, (__v32hu)__B); 8130b57cec5SDimitry Andric } 8140b57cec5SDimitry Andric 8150b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 8160b57cec5SDimitry Andric _mm512_maskz_max_epu16 (__mmask32 __M, __m512i __A, __m512i __B) 8170b57cec5SDimitry Andric { 8180b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 8190b57cec5SDimitry Andric (__v32hi)_mm512_max_epu16(__A, __B), 8200b57cec5SDimitry Andric (__v32hi)_mm512_setzero_si512()); 8210b57cec5SDimitry Andric } 8220b57cec5SDimitry Andric 8230b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 8240b57cec5SDimitry Andric _mm512_mask_max_epu16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) 8250b57cec5SDimitry Andric { 8260b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 8270b57cec5SDimitry Andric (__v32hi)_mm512_max_epu16(__A, __B), 8280b57cec5SDimitry Andric (__v32hi)__W); 8290b57cec5SDimitry Andric } 8300b57cec5SDimitry Andric 8310b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 8320b57cec5SDimitry Andric _mm512_min_epi8 (__m512i __A, __m512i __B) 8330b57cec5SDimitry Andric { 83404eeddc0SDimitry Andric return (__m512i)__builtin_elementwise_min((__v64qs) __A, (__v64qs) __B); 8350b57cec5SDimitry Andric } 8360b57cec5SDimitry Andric 8370b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 8380b57cec5SDimitry Andric _mm512_maskz_min_epi8 (__mmask64 __M, __m512i __A, __m512i __B) 8390b57cec5SDimitry Andric { 8400b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 8410b57cec5SDimitry Andric (__v64qi)_mm512_min_epi8(__A, __B), 8420b57cec5SDimitry Andric (__v64qi)_mm512_setzero_si512()); 8430b57cec5SDimitry Andric } 8440b57cec5SDimitry Andric 8450b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 8460b57cec5SDimitry Andric _mm512_mask_min_epi8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) 8470b57cec5SDimitry Andric { 8480b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 8490b57cec5SDimitry Andric (__v64qi)_mm512_min_epi8(__A, __B), 8500b57cec5SDimitry Andric (__v64qi)__W); 8510b57cec5SDimitry Andric } 8520b57cec5SDimitry Andric 8530b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 8540b57cec5SDimitry Andric _mm512_min_epi16 (__m512i __A, __m512i __B) 8550b57cec5SDimitry Andric { 85604eeddc0SDimitry Andric return (__m512i)__builtin_elementwise_min((__v32hi) __A, (__v32hi) __B); 8570b57cec5SDimitry Andric } 8580b57cec5SDimitry Andric 8590b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 8600b57cec5SDimitry Andric _mm512_maskz_min_epi16 (__mmask32 __M, __m512i __A, __m512i __B) 8610b57cec5SDimitry Andric { 8620b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 8630b57cec5SDimitry Andric (__v32hi)_mm512_min_epi16(__A, __B), 8640b57cec5SDimitry Andric (__v32hi)_mm512_setzero_si512()); 8650b57cec5SDimitry Andric } 8660b57cec5SDimitry Andric 8670b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 8680b57cec5SDimitry Andric _mm512_mask_min_epi16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) 8690b57cec5SDimitry Andric { 8700b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 8710b57cec5SDimitry Andric (__v32hi)_mm512_min_epi16(__A, __B), 8720b57cec5SDimitry Andric (__v32hi)__W); 8730b57cec5SDimitry Andric } 8740b57cec5SDimitry Andric 8750b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 8760b57cec5SDimitry Andric _mm512_min_epu8 (__m512i __A, __m512i __B) 8770b57cec5SDimitry Andric { 87804eeddc0SDimitry Andric return (__m512i)__builtin_elementwise_min((__v64qu)__A, (__v64qu)__B); 8790b57cec5SDimitry Andric } 8800b57cec5SDimitry Andric 8810b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 8820b57cec5SDimitry Andric _mm512_maskz_min_epu8 (__mmask64 __M, __m512i __A, __m512i __B) 8830b57cec5SDimitry Andric { 8840b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 8850b57cec5SDimitry Andric (__v64qi)_mm512_min_epu8(__A, __B), 8860b57cec5SDimitry Andric (__v64qi)_mm512_setzero_si512()); 8870b57cec5SDimitry Andric } 8880b57cec5SDimitry Andric 8890b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 8900b57cec5SDimitry Andric _mm512_mask_min_epu8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) 8910b57cec5SDimitry Andric { 8920b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 8930b57cec5SDimitry Andric (__v64qi)_mm512_min_epu8(__A, __B), 8940b57cec5SDimitry Andric (__v64qi)__W); 8950b57cec5SDimitry Andric } 8960b57cec5SDimitry Andric 8970b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 8980b57cec5SDimitry Andric _mm512_min_epu16 (__m512i __A, __m512i __B) 8990b57cec5SDimitry Andric { 90004eeddc0SDimitry Andric return (__m512i)__builtin_elementwise_min((__v32hu)__A, (__v32hu)__B); 9010b57cec5SDimitry Andric } 9020b57cec5SDimitry Andric 9030b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 9040b57cec5SDimitry Andric _mm512_maskz_min_epu16 (__mmask32 __M, __m512i __A, __m512i __B) 9050b57cec5SDimitry Andric { 9060b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 9070b57cec5SDimitry Andric (__v32hi)_mm512_min_epu16(__A, __B), 9080b57cec5SDimitry Andric (__v32hi)_mm512_setzero_si512()); 9090b57cec5SDimitry Andric } 9100b57cec5SDimitry Andric 9110b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 9120b57cec5SDimitry Andric _mm512_mask_min_epu16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) 9130b57cec5SDimitry Andric { 9140b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 9150b57cec5SDimitry Andric (__v32hi)_mm512_min_epu16(__A, __B), 9160b57cec5SDimitry Andric (__v32hi)__W); 9170b57cec5SDimitry Andric } 9180b57cec5SDimitry Andric 9190b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 9200b57cec5SDimitry Andric _mm512_shuffle_epi8(__m512i __A, __m512i __B) 9210b57cec5SDimitry Andric { 9220b57cec5SDimitry Andric return (__m512i)__builtin_ia32_pshufb512((__v64qi)__A,(__v64qi)__B); 9230b57cec5SDimitry Andric } 9240b57cec5SDimitry Andric 9250b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 9260b57cec5SDimitry Andric _mm512_mask_shuffle_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) 9270b57cec5SDimitry Andric { 9280b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 9290b57cec5SDimitry Andric (__v64qi)_mm512_shuffle_epi8(__A, __B), 9300b57cec5SDimitry Andric (__v64qi)__W); 9310b57cec5SDimitry Andric } 9320b57cec5SDimitry Andric 9330b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 9340b57cec5SDimitry Andric _mm512_maskz_shuffle_epi8(__mmask64 __U, __m512i __A, __m512i __B) 9350b57cec5SDimitry Andric { 9360b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 9370b57cec5SDimitry Andric (__v64qi)_mm512_shuffle_epi8(__A, __B), 9380b57cec5SDimitry Andric (__v64qi)_mm512_setzero_si512()); 9390b57cec5SDimitry Andric } 9400b57cec5SDimitry Andric 9410b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 9420b57cec5SDimitry Andric _mm512_subs_epi8 (__m512i __A, __m512i __B) 9430b57cec5SDimitry Andric { 94481ad6265SDimitry Andric return (__m512i)__builtin_elementwise_sub_sat((__v64qs)__A, (__v64qs)__B); 9450b57cec5SDimitry Andric } 9460b57cec5SDimitry Andric 9470b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 9480b57cec5SDimitry Andric _mm512_mask_subs_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) 9490b57cec5SDimitry Andric { 9500b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 9510b57cec5SDimitry Andric (__v64qi)_mm512_subs_epi8(__A, __B), 9520b57cec5SDimitry Andric (__v64qi)__W); 9530b57cec5SDimitry Andric } 9540b57cec5SDimitry Andric 9550b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 9560b57cec5SDimitry Andric _mm512_maskz_subs_epi8 (__mmask64 __U, __m512i __A, __m512i __B) 9570b57cec5SDimitry Andric { 9580b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 9590b57cec5SDimitry Andric (__v64qi)_mm512_subs_epi8(__A, __B), 9600b57cec5SDimitry Andric (__v64qi)_mm512_setzero_si512()); 9610b57cec5SDimitry Andric } 9620b57cec5SDimitry Andric 9630b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 9640b57cec5SDimitry Andric _mm512_subs_epi16 (__m512i __A, __m512i __B) 9650b57cec5SDimitry Andric { 96681ad6265SDimitry Andric return (__m512i)__builtin_elementwise_sub_sat((__v32hi)__A, (__v32hi)__B); 9670b57cec5SDimitry Andric } 9680b57cec5SDimitry Andric 9690b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 9700b57cec5SDimitry Andric _mm512_mask_subs_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) 9710b57cec5SDimitry Andric { 9720b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 9730b57cec5SDimitry Andric (__v32hi)_mm512_subs_epi16(__A, __B), 9740b57cec5SDimitry Andric (__v32hi)__W); 9750b57cec5SDimitry Andric } 9760b57cec5SDimitry Andric 9770b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 9780b57cec5SDimitry Andric _mm512_maskz_subs_epi16 (__mmask32 __U, __m512i __A, __m512i __B) 9790b57cec5SDimitry Andric { 9800b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 9810b57cec5SDimitry Andric (__v32hi)_mm512_subs_epi16(__A, __B), 9820b57cec5SDimitry Andric (__v32hi)_mm512_setzero_si512()); 9830b57cec5SDimitry Andric } 9840b57cec5SDimitry Andric 9850b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 9860b57cec5SDimitry Andric _mm512_subs_epu8 (__m512i __A, __m512i __B) 9870b57cec5SDimitry Andric { 98881ad6265SDimitry Andric return (__m512i)__builtin_elementwise_sub_sat((__v64qu) __A, (__v64qu) __B); 9890b57cec5SDimitry Andric } 9900b57cec5SDimitry Andric 9910b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 9920b57cec5SDimitry Andric _mm512_mask_subs_epu8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) 9930b57cec5SDimitry Andric { 9940b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 9950b57cec5SDimitry Andric (__v64qi)_mm512_subs_epu8(__A, __B), 9960b57cec5SDimitry Andric (__v64qi)__W); 9970b57cec5SDimitry Andric } 9980b57cec5SDimitry Andric 9990b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 10000b57cec5SDimitry Andric _mm512_maskz_subs_epu8 (__mmask64 __U, __m512i __A, __m512i __B) 10010b57cec5SDimitry Andric { 10020b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 10030b57cec5SDimitry Andric (__v64qi)_mm512_subs_epu8(__A, __B), 10040b57cec5SDimitry Andric (__v64qi)_mm512_setzero_si512()); 10050b57cec5SDimitry Andric } 10060b57cec5SDimitry Andric 10070b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 10080b57cec5SDimitry Andric _mm512_subs_epu16 (__m512i __A, __m512i __B) 10090b57cec5SDimitry Andric { 101081ad6265SDimitry Andric return (__m512i)__builtin_elementwise_sub_sat((__v32hu) __A, (__v32hu) __B); 10110b57cec5SDimitry Andric } 10120b57cec5SDimitry Andric 10130b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 10140b57cec5SDimitry Andric _mm512_mask_subs_epu16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) 10150b57cec5SDimitry Andric { 10160b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 10170b57cec5SDimitry Andric (__v32hi)_mm512_subs_epu16(__A, __B), 10180b57cec5SDimitry Andric (__v32hi)__W); 10190b57cec5SDimitry Andric } 10200b57cec5SDimitry Andric 10210b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 10220b57cec5SDimitry Andric _mm512_maskz_subs_epu16 (__mmask32 __U, __m512i __A, __m512i __B) 10230b57cec5SDimitry Andric { 10240b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 10250b57cec5SDimitry Andric (__v32hi)_mm512_subs_epu16(__A, __B), 10260b57cec5SDimitry Andric (__v32hi)_mm512_setzero_si512()); 10270b57cec5SDimitry Andric } 10280b57cec5SDimitry Andric 10290b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 10300b57cec5SDimitry Andric _mm512_permutex2var_epi16(__m512i __A, __m512i __I, __m512i __B) 10310b57cec5SDimitry Andric { 10320b57cec5SDimitry Andric return (__m512i)__builtin_ia32_vpermi2varhi512((__v32hi)__A, (__v32hi)__I, 10330b57cec5SDimitry Andric (__v32hi)__B); 10340b57cec5SDimitry Andric } 10350b57cec5SDimitry Andric 10360b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 10370b57cec5SDimitry Andric _mm512_mask_permutex2var_epi16(__m512i __A, __mmask32 __U, __m512i __I, 10380b57cec5SDimitry Andric __m512i __B) 10390b57cec5SDimitry Andric { 10400b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512(__U, 10410b57cec5SDimitry Andric (__v32hi)_mm512_permutex2var_epi16(__A, __I, __B), 10420b57cec5SDimitry Andric (__v32hi)__A); 10430b57cec5SDimitry Andric } 10440b57cec5SDimitry Andric 10450b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 10460b57cec5SDimitry Andric _mm512_mask2_permutex2var_epi16(__m512i __A, __m512i __I, __mmask32 __U, 10470b57cec5SDimitry Andric __m512i __B) 10480b57cec5SDimitry Andric { 10490b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512(__U, 10500b57cec5SDimitry Andric (__v32hi)_mm512_permutex2var_epi16(__A, __I, __B), 10510b57cec5SDimitry Andric (__v32hi)__I); 10520b57cec5SDimitry Andric } 10530b57cec5SDimitry Andric 10540b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 10550b57cec5SDimitry Andric _mm512_maskz_permutex2var_epi16(__mmask32 __U, __m512i __A, __m512i __I, 10560b57cec5SDimitry Andric __m512i __B) 10570b57cec5SDimitry Andric { 10580b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512(__U, 10590b57cec5SDimitry Andric (__v32hi)_mm512_permutex2var_epi16(__A, __I, __B), 10600b57cec5SDimitry Andric (__v32hi)_mm512_setzero_si512()); 10610b57cec5SDimitry Andric } 10620b57cec5SDimitry Andric 10630b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 10640b57cec5SDimitry Andric _mm512_mulhrs_epi16(__m512i __A, __m512i __B) 10650b57cec5SDimitry Andric { 10660b57cec5SDimitry Andric return (__m512i)__builtin_ia32_pmulhrsw512((__v32hi)__A, (__v32hi)__B); 10670b57cec5SDimitry Andric } 10680b57cec5SDimitry Andric 10690b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 10700b57cec5SDimitry Andric _mm512_mask_mulhrs_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) 10710b57cec5SDimitry Andric { 10720b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 10730b57cec5SDimitry Andric (__v32hi)_mm512_mulhrs_epi16(__A, __B), 10740b57cec5SDimitry Andric (__v32hi)__W); 10750b57cec5SDimitry Andric } 10760b57cec5SDimitry Andric 10770b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 10780b57cec5SDimitry Andric _mm512_maskz_mulhrs_epi16(__mmask32 __U, __m512i __A, __m512i __B) 10790b57cec5SDimitry Andric { 10800b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 10810b57cec5SDimitry Andric (__v32hi)_mm512_mulhrs_epi16(__A, __B), 10820b57cec5SDimitry Andric (__v32hi)_mm512_setzero_si512()); 10830b57cec5SDimitry Andric } 10840b57cec5SDimitry Andric 10850b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 10860b57cec5SDimitry Andric _mm512_mulhi_epi16(__m512i __A, __m512i __B) 10870b57cec5SDimitry Andric { 10880b57cec5SDimitry Andric return (__m512i)__builtin_ia32_pmulhw512((__v32hi) __A, (__v32hi) __B); 10890b57cec5SDimitry Andric } 10900b57cec5SDimitry Andric 10910b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 10920b57cec5SDimitry Andric _mm512_mask_mulhi_epi16(__m512i __W, __mmask32 __U, __m512i __A, 10930b57cec5SDimitry Andric __m512i __B) 10940b57cec5SDimitry Andric { 10950b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 10960b57cec5SDimitry Andric (__v32hi)_mm512_mulhi_epi16(__A, __B), 10970b57cec5SDimitry Andric (__v32hi)__W); 10980b57cec5SDimitry Andric } 10990b57cec5SDimitry Andric 11000b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 11010b57cec5SDimitry Andric _mm512_maskz_mulhi_epi16(__mmask32 __U, __m512i __A, __m512i __B) 11020b57cec5SDimitry Andric { 11030b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 11040b57cec5SDimitry Andric (__v32hi)_mm512_mulhi_epi16(__A, __B), 11050b57cec5SDimitry Andric (__v32hi)_mm512_setzero_si512()); 11060b57cec5SDimitry Andric } 11070b57cec5SDimitry Andric 11080b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 11090b57cec5SDimitry Andric _mm512_mulhi_epu16(__m512i __A, __m512i __B) 11100b57cec5SDimitry Andric { 11110b57cec5SDimitry Andric return (__m512i)__builtin_ia32_pmulhuw512((__v32hi) __A, (__v32hi) __B); 11120b57cec5SDimitry Andric } 11130b57cec5SDimitry Andric 11140b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 11150b57cec5SDimitry Andric _mm512_mask_mulhi_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) 11160b57cec5SDimitry Andric { 11170b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 11180b57cec5SDimitry Andric (__v32hi)_mm512_mulhi_epu16(__A, __B), 11190b57cec5SDimitry Andric (__v32hi)__W); 11200b57cec5SDimitry Andric } 11210b57cec5SDimitry Andric 11220b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 11230b57cec5SDimitry Andric _mm512_maskz_mulhi_epu16 (__mmask32 __U, __m512i __A, __m512i __B) 11240b57cec5SDimitry Andric { 11250b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 11260b57cec5SDimitry Andric (__v32hi)_mm512_mulhi_epu16(__A, __B), 11270b57cec5SDimitry Andric (__v32hi)_mm512_setzero_si512()); 11280b57cec5SDimitry Andric } 11290b57cec5SDimitry Andric 11300b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 11310b57cec5SDimitry Andric _mm512_maddubs_epi16(__m512i __X, __m512i __Y) { 11320b57cec5SDimitry Andric return (__m512i)__builtin_ia32_pmaddubsw512((__v64qi)__X, (__v64qi)__Y); 11330b57cec5SDimitry Andric } 11340b57cec5SDimitry Andric 11350b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 11360b57cec5SDimitry Andric _mm512_mask_maddubs_epi16(__m512i __W, __mmask32 __U, __m512i __X, 11370b57cec5SDimitry Andric __m512i __Y) { 11380b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512((__mmask32) __U, 11390b57cec5SDimitry Andric (__v32hi)_mm512_maddubs_epi16(__X, __Y), 11400b57cec5SDimitry Andric (__v32hi)__W); 11410b57cec5SDimitry Andric } 11420b57cec5SDimitry Andric 11430b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 11440b57cec5SDimitry Andric _mm512_maskz_maddubs_epi16(__mmask32 __U, __m512i __X, __m512i __Y) { 11450b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512((__mmask32) __U, 11460b57cec5SDimitry Andric (__v32hi)_mm512_maddubs_epi16(__X, __Y), 11470b57cec5SDimitry Andric (__v32hi)_mm512_setzero_si512()); 11480b57cec5SDimitry Andric } 11490b57cec5SDimitry Andric 11500b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 11510b57cec5SDimitry Andric _mm512_madd_epi16(__m512i __A, __m512i __B) { 11520b57cec5SDimitry Andric return (__m512i)__builtin_ia32_pmaddwd512((__v32hi)__A, (__v32hi)__B); 11530b57cec5SDimitry Andric } 11540b57cec5SDimitry Andric 11550b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 11560b57cec5SDimitry Andric _mm512_mask_madd_epi16(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { 11570b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 11580b57cec5SDimitry Andric (__v16si)_mm512_madd_epi16(__A, __B), 11590b57cec5SDimitry Andric (__v16si)__W); 11600b57cec5SDimitry Andric } 11610b57cec5SDimitry Andric 11620b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 11630b57cec5SDimitry Andric _mm512_maskz_madd_epi16(__mmask16 __U, __m512i __A, __m512i __B) { 11640b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 11650b57cec5SDimitry Andric (__v16si)_mm512_madd_epi16(__A, __B), 11660b57cec5SDimitry Andric (__v16si)_mm512_setzero_si512()); 11670b57cec5SDimitry Andric } 11680b57cec5SDimitry Andric 11690b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS512 11700b57cec5SDimitry Andric _mm512_cvtsepi16_epi8 (__m512i __A) { 11710b57cec5SDimitry Andric return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A, 11720b57cec5SDimitry Andric (__v32qi)_mm256_setzero_si256(), 11730b57cec5SDimitry Andric (__mmask32) -1); 11740b57cec5SDimitry Andric } 11750b57cec5SDimitry Andric 11760b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS512 11770b57cec5SDimitry Andric _mm512_mask_cvtsepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) { 11780b57cec5SDimitry Andric return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A, 11790b57cec5SDimitry Andric (__v32qi)__O, 11800b57cec5SDimitry Andric __M); 11810b57cec5SDimitry Andric } 11820b57cec5SDimitry Andric 11830b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS512 11840b57cec5SDimitry Andric _mm512_maskz_cvtsepi16_epi8 (__mmask32 __M, __m512i __A) { 11850b57cec5SDimitry Andric return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A, 11860b57cec5SDimitry Andric (__v32qi) _mm256_setzero_si256(), 11870b57cec5SDimitry Andric __M); 11880b57cec5SDimitry Andric } 11890b57cec5SDimitry Andric 11900b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS512 11910b57cec5SDimitry Andric _mm512_cvtusepi16_epi8 (__m512i __A) { 11920b57cec5SDimitry Andric return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A, 11930b57cec5SDimitry Andric (__v32qi) _mm256_setzero_si256(), 11940b57cec5SDimitry Andric (__mmask32) -1); 11950b57cec5SDimitry Andric } 11960b57cec5SDimitry Andric 11970b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS512 11980b57cec5SDimitry Andric _mm512_mask_cvtusepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) { 11990b57cec5SDimitry Andric return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A, 12000b57cec5SDimitry Andric (__v32qi) __O, 12010b57cec5SDimitry Andric __M); 12020b57cec5SDimitry Andric } 12030b57cec5SDimitry Andric 12040b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS512 12050b57cec5SDimitry Andric _mm512_maskz_cvtusepi16_epi8 (__mmask32 __M, __m512i __A) { 12060b57cec5SDimitry Andric return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A, 12070b57cec5SDimitry Andric (__v32qi) _mm256_setzero_si256(), 12080b57cec5SDimitry Andric __M); 12090b57cec5SDimitry Andric } 12100b57cec5SDimitry Andric 12110b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS512 12120b57cec5SDimitry Andric _mm512_cvtepi16_epi8 (__m512i __A) { 12130b57cec5SDimitry Andric return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A, 12140b57cec5SDimitry Andric (__v32qi) _mm256_undefined_si256(), 12150b57cec5SDimitry Andric (__mmask32) -1); 12160b57cec5SDimitry Andric } 12170b57cec5SDimitry Andric 12180b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS512 12190b57cec5SDimitry Andric _mm512_mask_cvtepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) { 12200b57cec5SDimitry Andric return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A, 12210b57cec5SDimitry Andric (__v32qi) __O, 12220b57cec5SDimitry Andric __M); 12230b57cec5SDimitry Andric } 12240b57cec5SDimitry Andric 12250b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS512 12260b57cec5SDimitry Andric _mm512_maskz_cvtepi16_epi8 (__mmask32 __M, __m512i __A) { 12270b57cec5SDimitry Andric return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A, 12280b57cec5SDimitry Andric (__v32qi) _mm256_setzero_si256(), 12290b57cec5SDimitry Andric __M); 12300b57cec5SDimitry Andric } 12310b57cec5SDimitry Andric 12320b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS512 12330b57cec5SDimitry Andric _mm512_mask_cvtepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A) 12340b57cec5SDimitry Andric { 12350b57cec5SDimitry Andric __builtin_ia32_pmovwb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M); 12360b57cec5SDimitry Andric } 12370b57cec5SDimitry Andric 12380b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS512 12390b57cec5SDimitry Andric _mm512_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A) 12400b57cec5SDimitry Andric { 12410b57cec5SDimitry Andric __builtin_ia32_pmovswb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M); 12420b57cec5SDimitry Andric } 12430b57cec5SDimitry Andric 12440b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS512 12450b57cec5SDimitry Andric _mm512_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A) 12460b57cec5SDimitry Andric { 12470b57cec5SDimitry Andric __builtin_ia32_pmovuswb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M); 12480b57cec5SDimitry Andric } 12490b57cec5SDimitry Andric 12500b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 12510b57cec5SDimitry Andric _mm512_unpackhi_epi8(__m512i __A, __m512i __B) { 12520b57cec5SDimitry Andric return (__m512i)__builtin_shufflevector((__v64qi)__A, (__v64qi)__B, 12530b57cec5SDimitry Andric 8, 64+8, 9, 64+9, 12540b57cec5SDimitry Andric 10, 64+10, 11, 64+11, 12550b57cec5SDimitry Andric 12, 64+12, 13, 64+13, 12560b57cec5SDimitry Andric 14, 64+14, 15, 64+15, 12570b57cec5SDimitry Andric 24, 64+24, 25, 64+25, 12580b57cec5SDimitry Andric 26, 64+26, 27, 64+27, 12590b57cec5SDimitry Andric 28, 64+28, 29, 64+29, 12600b57cec5SDimitry Andric 30, 64+30, 31, 64+31, 12610b57cec5SDimitry Andric 40, 64+40, 41, 64+41, 12620b57cec5SDimitry Andric 42, 64+42, 43, 64+43, 12630b57cec5SDimitry Andric 44, 64+44, 45, 64+45, 12640b57cec5SDimitry Andric 46, 64+46, 47, 64+47, 12650b57cec5SDimitry Andric 56, 64+56, 57, 64+57, 12660b57cec5SDimitry Andric 58, 64+58, 59, 64+59, 12670b57cec5SDimitry Andric 60, 64+60, 61, 64+61, 12680b57cec5SDimitry Andric 62, 64+62, 63, 64+63); 12690b57cec5SDimitry Andric } 12700b57cec5SDimitry Andric 12710b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 12720b57cec5SDimitry Andric _mm512_mask_unpackhi_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { 12730b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 12740b57cec5SDimitry Andric (__v64qi)_mm512_unpackhi_epi8(__A, __B), 12750b57cec5SDimitry Andric (__v64qi)__W); 12760b57cec5SDimitry Andric } 12770b57cec5SDimitry Andric 12780b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 12790b57cec5SDimitry Andric _mm512_maskz_unpackhi_epi8(__mmask64 __U, __m512i __A, __m512i __B) { 12800b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 12810b57cec5SDimitry Andric (__v64qi)_mm512_unpackhi_epi8(__A, __B), 12820b57cec5SDimitry Andric (__v64qi)_mm512_setzero_si512()); 12830b57cec5SDimitry Andric } 12840b57cec5SDimitry Andric 12850b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 12860b57cec5SDimitry Andric _mm512_unpackhi_epi16(__m512i __A, __m512i __B) { 12870b57cec5SDimitry Andric return (__m512i)__builtin_shufflevector((__v32hi)__A, (__v32hi)__B, 12880b57cec5SDimitry Andric 4, 32+4, 5, 32+5, 12890b57cec5SDimitry Andric 6, 32+6, 7, 32+7, 12900b57cec5SDimitry Andric 12, 32+12, 13, 32+13, 12910b57cec5SDimitry Andric 14, 32+14, 15, 32+15, 12920b57cec5SDimitry Andric 20, 32+20, 21, 32+21, 12930b57cec5SDimitry Andric 22, 32+22, 23, 32+23, 12940b57cec5SDimitry Andric 28, 32+28, 29, 32+29, 12950b57cec5SDimitry Andric 30, 32+30, 31, 32+31); 12960b57cec5SDimitry Andric } 12970b57cec5SDimitry Andric 12980b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 12990b57cec5SDimitry Andric _mm512_mask_unpackhi_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { 13000b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 13010b57cec5SDimitry Andric (__v32hi)_mm512_unpackhi_epi16(__A, __B), 13020b57cec5SDimitry Andric (__v32hi)__W); 13030b57cec5SDimitry Andric } 13040b57cec5SDimitry Andric 13050b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 13060b57cec5SDimitry Andric _mm512_maskz_unpackhi_epi16(__mmask32 __U, __m512i __A, __m512i __B) { 13070b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 13080b57cec5SDimitry Andric (__v32hi)_mm512_unpackhi_epi16(__A, __B), 13090b57cec5SDimitry Andric (__v32hi)_mm512_setzero_si512()); 13100b57cec5SDimitry Andric } 13110b57cec5SDimitry Andric 13120b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 13130b57cec5SDimitry Andric _mm512_unpacklo_epi8(__m512i __A, __m512i __B) { 13140b57cec5SDimitry Andric return (__m512i)__builtin_shufflevector((__v64qi)__A, (__v64qi)__B, 13150b57cec5SDimitry Andric 0, 64+0, 1, 64+1, 13160b57cec5SDimitry Andric 2, 64+2, 3, 64+3, 13170b57cec5SDimitry Andric 4, 64+4, 5, 64+5, 13180b57cec5SDimitry Andric 6, 64+6, 7, 64+7, 13190b57cec5SDimitry Andric 16, 64+16, 17, 64+17, 13200b57cec5SDimitry Andric 18, 64+18, 19, 64+19, 13210b57cec5SDimitry Andric 20, 64+20, 21, 64+21, 13220b57cec5SDimitry Andric 22, 64+22, 23, 64+23, 13230b57cec5SDimitry Andric 32, 64+32, 33, 64+33, 13240b57cec5SDimitry Andric 34, 64+34, 35, 64+35, 13250b57cec5SDimitry Andric 36, 64+36, 37, 64+37, 13260b57cec5SDimitry Andric 38, 64+38, 39, 64+39, 13270b57cec5SDimitry Andric 48, 64+48, 49, 64+49, 13280b57cec5SDimitry Andric 50, 64+50, 51, 64+51, 13290b57cec5SDimitry Andric 52, 64+52, 53, 64+53, 13300b57cec5SDimitry Andric 54, 64+54, 55, 64+55); 13310b57cec5SDimitry Andric } 13320b57cec5SDimitry Andric 13330b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 13340b57cec5SDimitry Andric _mm512_mask_unpacklo_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { 13350b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 13360b57cec5SDimitry Andric (__v64qi)_mm512_unpacklo_epi8(__A, __B), 13370b57cec5SDimitry Andric (__v64qi)__W); 13380b57cec5SDimitry Andric } 13390b57cec5SDimitry Andric 13400b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 13410b57cec5SDimitry Andric _mm512_maskz_unpacklo_epi8(__mmask64 __U, __m512i __A, __m512i __B) { 13420b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 13430b57cec5SDimitry Andric (__v64qi)_mm512_unpacklo_epi8(__A, __B), 13440b57cec5SDimitry Andric (__v64qi)_mm512_setzero_si512()); 13450b57cec5SDimitry Andric } 13460b57cec5SDimitry Andric 13470b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 13480b57cec5SDimitry Andric _mm512_unpacklo_epi16(__m512i __A, __m512i __B) { 13490b57cec5SDimitry Andric return (__m512i)__builtin_shufflevector((__v32hi)__A, (__v32hi)__B, 13500b57cec5SDimitry Andric 0, 32+0, 1, 32+1, 13510b57cec5SDimitry Andric 2, 32+2, 3, 32+3, 13520b57cec5SDimitry Andric 8, 32+8, 9, 32+9, 13530b57cec5SDimitry Andric 10, 32+10, 11, 32+11, 13540b57cec5SDimitry Andric 16, 32+16, 17, 32+17, 13550b57cec5SDimitry Andric 18, 32+18, 19, 32+19, 13560b57cec5SDimitry Andric 24, 32+24, 25, 32+25, 13570b57cec5SDimitry Andric 26, 32+26, 27, 32+27); 13580b57cec5SDimitry Andric } 13590b57cec5SDimitry Andric 13600b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 13610b57cec5SDimitry Andric _mm512_mask_unpacklo_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { 13620b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 13630b57cec5SDimitry Andric (__v32hi)_mm512_unpacklo_epi16(__A, __B), 13640b57cec5SDimitry Andric (__v32hi)__W); 13650b57cec5SDimitry Andric } 13660b57cec5SDimitry Andric 13670b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 13680b57cec5SDimitry Andric _mm512_maskz_unpacklo_epi16(__mmask32 __U, __m512i __A, __m512i __B) { 13690b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 13700b57cec5SDimitry Andric (__v32hi)_mm512_unpacklo_epi16(__A, __B), 13710b57cec5SDimitry Andric (__v32hi)_mm512_setzero_si512()); 13720b57cec5SDimitry Andric } 13730b57cec5SDimitry Andric 13740b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 13750b57cec5SDimitry Andric _mm512_cvtepi8_epi16(__m256i __A) 13760b57cec5SDimitry Andric { 13770b57cec5SDimitry Andric /* This function always performs a signed extension, but __v32qi is a char 13780b57cec5SDimitry Andric which may be signed or unsigned, so use __v32qs. */ 13790b57cec5SDimitry Andric return (__m512i)__builtin_convertvector((__v32qs)__A, __v32hi); 13800b57cec5SDimitry Andric } 13810b57cec5SDimitry Andric 13820b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 13830b57cec5SDimitry Andric _mm512_mask_cvtepi8_epi16(__m512i __W, __mmask32 __U, __m256i __A) 13840b57cec5SDimitry Andric { 13850b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 13860b57cec5SDimitry Andric (__v32hi)_mm512_cvtepi8_epi16(__A), 13870b57cec5SDimitry Andric (__v32hi)__W); 13880b57cec5SDimitry Andric } 13890b57cec5SDimitry Andric 13900b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 13910b57cec5SDimitry Andric _mm512_maskz_cvtepi8_epi16(__mmask32 __U, __m256i __A) 13920b57cec5SDimitry Andric { 13930b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 13940b57cec5SDimitry Andric (__v32hi)_mm512_cvtepi8_epi16(__A), 13950b57cec5SDimitry Andric (__v32hi)_mm512_setzero_si512()); 13960b57cec5SDimitry Andric } 13970b57cec5SDimitry Andric 13980b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 13990b57cec5SDimitry Andric _mm512_cvtepu8_epi16(__m256i __A) 14000b57cec5SDimitry Andric { 14010b57cec5SDimitry Andric return (__m512i)__builtin_convertvector((__v32qu)__A, __v32hi); 14020b57cec5SDimitry Andric } 14030b57cec5SDimitry Andric 14040b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 14050b57cec5SDimitry Andric _mm512_mask_cvtepu8_epi16(__m512i __W, __mmask32 __U, __m256i __A) 14060b57cec5SDimitry Andric { 14070b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 14080b57cec5SDimitry Andric (__v32hi)_mm512_cvtepu8_epi16(__A), 14090b57cec5SDimitry Andric (__v32hi)__W); 14100b57cec5SDimitry Andric } 14110b57cec5SDimitry Andric 14120b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 14130b57cec5SDimitry Andric _mm512_maskz_cvtepu8_epi16(__mmask32 __U, __m256i __A) 14140b57cec5SDimitry Andric { 14150b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 14160b57cec5SDimitry Andric (__v32hi)_mm512_cvtepu8_epi16(__A), 14170b57cec5SDimitry Andric (__v32hi)_mm512_setzero_si512()); 14180b57cec5SDimitry Andric } 14190b57cec5SDimitry Andric 14200b57cec5SDimitry Andric 14210b57cec5SDimitry Andric #define _mm512_shufflehi_epi16(A, imm) \ 1422349cc55cSDimitry Andric ((__m512i)__builtin_ia32_pshufhw512((__v32hi)(__m512i)(A), (int)(imm))) 14230b57cec5SDimitry Andric 14240b57cec5SDimitry Andric #define _mm512_mask_shufflehi_epi16(W, U, A, imm) \ 1425349cc55cSDimitry Andric ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 14260b57cec5SDimitry Andric (__v32hi)_mm512_shufflehi_epi16((A), \ 14270b57cec5SDimitry Andric (imm)), \ 1428349cc55cSDimitry Andric (__v32hi)(__m512i)(W))) 14290b57cec5SDimitry Andric 14300b57cec5SDimitry Andric #define _mm512_maskz_shufflehi_epi16(U, A, imm) \ 1431349cc55cSDimitry Andric ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 14320b57cec5SDimitry Andric (__v32hi)_mm512_shufflehi_epi16((A), \ 14330b57cec5SDimitry Andric (imm)), \ 1434349cc55cSDimitry Andric (__v32hi)_mm512_setzero_si512())) 14350b57cec5SDimitry Andric 14360b57cec5SDimitry Andric #define _mm512_shufflelo_epi16(A, imm) \ 1437349cc55cSDimitry Andric ((__m512i)__builtin_ia32_pshuflw512((__v32hi)(__m512i)(A), (int)(imm))) 14380b57cec5SDimitry Andric 14390b57cec5SDimitry Andric 14400b57cec5SDimitry Andric #define _mm512_mask_shufflelo_epi16(W, U, A, imm) \ 1441349cc55cSDimitry Andric ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 14420b57cec5SDimitry Andric (__v32hi)_mm512_shufflelo_epi16((A), \ 14430b57cec5SDimitry Andric (imm)), \ 1444349cc55cSDimitry Andric (__v32hi)(__m512i)(W))) 14450b57cec5SDimitry Andric 14460b57cec5SDimitry Andric 14470b57cec5SDimitry Andric #define _mm512_maskz_shufflelo_epi16(U, A, imm) \ 1448349cc55cSDimitry Andric ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 14490b57cec5SDimitry Andric (__v32hi)_mm512_shufflelo_epi16((A), \ 14500b57cec5SDimitry Andric (imm)), \ 1451349cc55cSDimitry Andric (__v32hi)_mm512_setzero_si512())) 14520b57cec5SDimitry Andric 14530b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 14540b57cec5SDimitry Andric _mm512_sllv_epi16(__m512i __A, __m512i __B) 14550b57cec5SDimitry Andric { 14560b57cec5SDimitry Andric return (__m512i)__builtin_ia32_psllv32hi((__v32hi) __A, (__v32hi) __B); 14570b57cec5SDimitry Andric } 14580b57cec5SDimitry Andric 14590b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 14600b57cec5SDimitry Andric _mm512_mask_sllv_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) 14610b57cec5SDimitry Andric { 14620b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 14630b57cec5SDimitry Andric (__v32hi)_mm512_sllv_epi16(__A, __B), 14640b57cec5SDimitry Andric (__v32hi)__W); 14650b57cec5SDimitry Andric } 14660b57cec5SDimitry Andric 14670b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 14680b57cec5SDimitry Andric _mm512_maskz_sllv_epi16(__mmask32 __U, __m512i __A, __m512i __B) 14690b57cec5SDimitry Andric { 14700b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 14710b57cec5SDimitry Andric (__v32hi)_mm512_sllv_epi16(__A, __B), 14720b57cec5SDimitry Andric (__v32hi)_mm512_setzero_si512()); 14730b57cec5SDimitry Andric } 14740b57cec5SDimitry Andric 14750b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 14760b57cec5SDimitry Andric _mm512_sll_epi16(__m512i __A, __m128i __B) 14770b57cec5SDimitry Andric { 14780b57cec5SDimitry Andric return (__m512i)__builtin_ia32_psllw512((__v32hi) __A, (__v8hi) __B); 14790b57cec5SDimitry Andric } 14800b57cec5SDimitry Andric 14810b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 14820b57cec5SDimitry Andric _mm512_mask_sll_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B) 14830b57cec5SDimitry Andric { 14840b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 14850b57cec5SDimitry Andric (__v32hi)_mm512_sll_epi16(__A, __B), 14860b57cec5SDimitry Andric (__v32hi)__W); 14870b57cec5SDimitry Andric } 14880b57cec5SDimitry Andric 14890b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 14900b57cec5SDimitry Andric _mm512_maskz_sll_epi16(__mmask32 __U, __m512i __A, __m128i __B) 14910b57cec5SDimitry Andric { 14920b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 14930b57cec5SDimitry Andric (__v32hi)_mm512_sll_epi16(__A, __B), 14940b57cec5SDimitry Andric (__v32hi)_mm512_setzero_si512()); 14950b57cec5SDimitry Andric } 14960b57cec5SDimitry Andric 14970b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 14985ffd83dbSDimitry Andric _mm512_slli_epi16(__m512i __A, unsigned int __B) 14990b57cec5SDimitry Andric { 150081ad6265SDimitry Andric return (__m512i)__builtin_ia32_psllwi512((__v32hi)__A, (int)__B); 15010b57cec5SDimitry Andric } 15020b57cec5SDimitry Andric 15030b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 15045ffd83dbSDimitry Andric _mm512_mask_slli_epi16(__m512i __W, __mmask32 __U, __m512i __A, 15055ffd83dbSDimitry Andric unsigned int __B) 15060b57cec5SDimitry Andric { 15070b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 15080b57cec5SDimitry Andric (__v32hi)_mm512_slli_epi16(__A, __B), 15090b57cec5SDimitry Andric (__v32hi)__W); 15100b57cec5SDimitry Andric } 15110b57cec5SDimitry Andric 15120b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 15135ffd83dbSDimitry Andric _mm512_maskz_slli_epi16(__mmask32 __U, __m512i __A, unsigned int __B) 15140b57cec5SDimitry Andric { 15150b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 15160b57cec5SDimitry Andric (__v32hi)_mm512_slli_epi16(__A, __B), 15170b57cec5SDimitry Andric (__v32hi)_mm512_setzero_si512()); 15180b57cec5SDimitry Andric } 15190b57cec5SDimitry Andric 15200b57cec5SDimitry Andric #define _mm512_bslli_epi128(a, imm) \ 1521349cc55cSDimitry Andric ((__m512i)__builtin_ia32_pslldqi512_byteshift((__v8di)(__m512i)(a), (int)(imm))) 15220b57cec5SDimitry Andric 15230b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 15240b57cec5SDimitry Andric _mm512_srlv_epi16(__m512i __A, __m512i __B) 15250b57cec5SDimitry Andric { 15260b57cec5SDimitry Andric return (__m512i)__builtin_ia32_psrlv32hi((__v32hi)__A, (__v32hi)__B); 15270b57cec5SDimitry Andric } 15280b57cec5SDimitry Andric 15290b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 15300b57cec5SDimitry Andric _mm512_mask_srlv_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) 15310b57cec5SDimitry Andric { 15320b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 15330b57cec5SDimitry Andric (__v32hi)_mm512_srlv_epi16(__A, __B), 15340b57cec5SDimitry Andric (__v32hi)__W); 15350b57cec5SDimitry Andric } 15360b57cec5SDimitry Andric 15370b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 15380b57cec5SDimitry Andric _mm512_maskz_srlv_epi16(__mmask32 __U, __m512i __A, __m512i __B) 15390b57cec5SDimitry Andric { 15400b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 15410b57cec5SDimitry Andric (__v32hi)_mm512_srlv_epi16(__A, __B), 15420b57cec5SDimitry Andric (__v32hi)_mm512_setzero_si512()); 15430b57cec5SDimitry Andric } 15440b57cec5SDimitry Andric 15450b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 15460b57cec5SDimitry Andric _mm512_srav_epi16(__m512i __A, __m512i __B) 15470b57cec5SDimitry Andric { 15480b57cec5SDimitry Andric return (__m512i)__builtin_ia32_psrav32hi((__v32hi)__A, (__v32hi)__B); 15490b57cec5SDimitry Andric } 15500b57cec5SDimitry Andric 15510b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 15520b57cec5SDimitry Andric _mm512_mask_srav_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) 15530b57cec5SDimitry Andric { 15540b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 15550b57cec5SDimitry Andric (__v32hi)_mm512_srav_epi16(__A, __B), 15560b57cec5SDimitry Andric (__v32hi)__W); 15570b57cec5SDimitry Andric } 15580b57cec5SDimitry Andric 15590b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 15600b57cec5SDimitry Andric _mm512_maskz_srav_epi16(__mmask32 __U, __m512i __A, __m512i __B) 15610b57cec5SDimitry Andric { 15620b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 15630b57cec5SDimitry Andric (__v32hi)_mm512_srav_epi16(__A, __B), 15640b57cec5SDimitry Andric (__v32hi)_mm512_setzero_si512()); 15650b57cec5SDimitry Andric } 15660b57cec5SDimitry Andric 15670b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 15680b57cec5SDimitry Andric _mm512_sra_epi16(__m512i __A, __m128i __B) 15690b57cec5SDimitry Andric { 15700b57cec5SDimitry Andric return (__m512i)__builtin_ia32_psraw512((__v32hi) __A, (__v8hi) __B); 15710b57cec5SDimitry Andric } 15720b57cec5SDimitry Andric 15730b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 15740b57cec5SDimitry Andric _mm512_mask_sra_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B) 15750b57cec5SDimitry Andric { 15760b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 15770b57cec5SDimitry Andric (__v32hi)_mm512_sra_epi16(__A, __B), 15780b57cec5SDimitry Andric (__v32hi)__W); 15790b57cec5SDimitry Andric } 15800b57cec5SDimitry Andric 15810b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 15820b57cec5SDimitry Andric _mm512_maskz_sra_epi16(__mmask32 __U, __m512i __A, __m128i __B) 15830b57cec5SDimitry Andric { 15840b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 15850b57cec5SDimitry Andric (__v32hi)_mm512_sra_epi16(__A, __B), 15860b57cec5SDimitry Andric (__v32hi)_mm512_setzero_si512()); 15870b57cec5SDimitry Andric } 15880b57cec5SDimitry Andric 15890b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 15905ffd83dbSDimitry Andric _mm512_srai_epi16(__m512i __A, unsigned int __B) 15910b57cec5SDimitry Andric { 159281ad6265SDimitry Andric return (__m512i)__builtin_ia32_psrawi512((__v32hi)__A, (int)__B); 15930b57cec5SDimitry Andric } 15940b57cec5SDimitry Andric 15950b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 15965ffd83dbSDimitry Andric _mm512_mask_srai_epi16(__m512i __W, __mmask32 __U, __m512i __A, 15975ffd83dbSDimitry Andric unsigned int __B) 15980b57cec5SDimitry Andric { 15990b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 16000b57cec5SDimitry Andric (__v32hi)_mm512_srai_epi16(__A, __B), 16010b57cec5SDimitry Andric (__v32hi)__W); 16020b57cec5SDimitry Andric } 16030b57cec5SDimitry Andric 16040b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 16055ffd83dbSDimitry Andric _mm512_maskz_srai_epi16(__mmask32 __U, __m512i __A, unsigned int __B) 16060b57cec5SDimitry Andric { 16070b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 16080b57cec5SDimitry Andric (__v32hi)_mm512_srai_epi16(__A, __B), 16090b57cec5SDimitry Andric (__v32hi)_mm512_setzero_si512()); 16100b57cec5SDimitry Andric } 16110b57cec5SDimitry Andric 16120b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 16130b57cec5SDimitry Andric _mm512_srl_epi16(__m512i __A, __m128i __B) 16140b57cec5SDimitry Andric { 16150b57cec5SDimitry Andric return (__m512i)__builtin_ia32_psrlw512((__v32hi) __A, (__v8hi) __B); 16160b57cec5SDimitry Andric } 16170b57cec5SDimitry Andric 16180b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 16190b57cec5SDimitry Andric _mm512_mask_srl_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B) 16200b57cec5SDimitry Andric { 16210b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 16220b57cec5SDimitry Andric (__v32hi)_mm512_srl_epi16(__A, __B), 16230b57cec5SDimitry Andric (__v32hi)__W); 16240b57cec5SDimitry Andric } 16250b57cec5SDimitry Andric 16260b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 16270b57cec5SDimitry Andric _mm512_maskz_srl_epi16(__mmask32 __U, __m512i __A, __m128i __B) 16280b57cec5SDimitry Andric { 16290b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 16300b57cec5SDimitry Andric (__v32hi)_mm512_srl_epi16(__A, __B), 16310b57cec5SDimitry Andric (__v32hi)_mm512_setzero_si512()); 16320b57cec5SDimitry Andric } 16330b57cec5SDimitry Andric 16340b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 16355ffd83dbSDimitry Andric _mm512_srli_epi16(__m512i __A, unsigned int __B) 16360b57cec5SDimitry Andric { 163781ad6265SDimitry Andric return (__m512i)__builtin_ia32_psrlwi512((__v32hi)__A, (int)__B); 16380b57cec5SDimitry Andric } 16390b57cec5SDimitry Andric 16400b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 16415ffd83dbSDimitry Andric _mm512_mask_srli_epi16(__m512i __W, __mmask32 __U, __m512i __A, 16425ffd83dbSDimitry Andric unsigned int __B) 16430b57cec5SDimitry Andric { 16440b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 16450b57cec5SDimitry Andric (__v32hi)_mm512_srli_epi16(__A, __B), 16460b57cec5SDimitry Andric (__v32hi)__W); 16470b57cec5SDimitry Andric } 16480b57cec5SDimitry Andric 16490b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 16500b57cec5SDimitry Andric _mm512_maskz_srli_epi16(__mmask32 __U, __m512i __A, int __B) 16510b57cec5SDimitry Andric { 16520b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 165381ad6265SDimitry Andric (__v32hi)_mm512_srli_epi16(__A, (unsigned int)__B), 16540b57cec5SDimitry Andric (__v32hi)_mm512_setzero_si512()); 16550b57cec5SDimitry Andric } 16560b57cec5SDimitry Andric 16570b57cec5SDimitry Andric #define _mm512_bsrli_epi128(a, imm) \ 1658349cc55cSDimitry Andric ((__m512i)__builtin_ia32_psrldqi512_byteshift((__v8di)(__m512i)(a), (int)(imm))) 16590b57cec5SDimitry Andric 16600b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 16610b57cec5SDimitry Andric _mm512_mask_mov_epi16 (__m512i __W, __mmask32 __U, __m512i __A) 16620b57cec5SDimitry Andric { 16630b57cec5SDimitry Andric return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U, 16640b57cec5SDimitry Andric (__v32hi) __A, 16650b57cec5SDimitry Andric (__v32hi) __W); 16660b57cec5SDimitry Andric } 16670b57cec5SDimitry Andric 16680b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 16690b57cec5SDimitry Andric _mm512_maskz_mov_epi16 (__mmask32 __U, __m512i __A) 16700b57cec5SDimitry Andric { 16710b57cec5SDimitry Andric return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U, 16720b57cec5SDimitry Andric (__v32hi) __A, 16730b57cec5SDimitry Andric (__v32hi) _mm512_setzero_si512 ()); 16740b57cec5SDimitry Andric } 16750b57cec5SDimitry Andric 16760b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 16770b57cec5SDimitry Andric _mm512_mask_mov_epi8 (__m512i __W, __mmask64 __U, __m512i __A) 16780b57cec5SDimitry Andric { 16790b57cec5SDimitry Andric return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U, 16800b57cec5SDimitry Andric (__v64qi) __A, 16810b57cec5SDimitry Andric (__v64qi) __W); 16820b57cec5SDimitry Andric } 16830b57cec5SDimitry Andric 16840b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 16850b57cec5SDimitry Andric _mm512_maskz_mov_epi8 (__mmask64 __U, __m512i __A) 16860b57cec5SDimitry Andric { 16870b57cec5SDimitry Andric return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U, 16880b57cec5SDimitry Andric (__v64qi) __A, 16890b57cec5SDimitry Andric (__v64qi) _mm512_setzero_si512 ()); 16900b57cec5SDimitry Andric } 16910b57cec5SDimitry Andric 16920b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 16930b57cec5SDimitry Andric _mm512_mask_set1_epi8 (__m512i __O, __mmask64 __M, char __A) 16940b57cec5SDimitry Andric { 16950b57cec5SDimitry Andric return (__m512i) __builtin_ia32_selectb_512(__M, 16960b57cec5SDimitry Andric (__v64qi)_mm512_set1_epi8(__A), 16970b57cec5SDimitry Andric (__v64qi) __O); 16980b57cec5SDimitry Andric } 16990b57cec5SDimitry Andric 17000b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 17010b57cec5SDimitry Andric _mm512_maskz_set1_epi8 (__mmask64 __M, char __A) 17020b57cec5SDimitry Andric { 17030b57cec5SDimitry Andric return (__m512i) __builtin_ia32_selectb_512(__M, 17040b57cec5SDimitry Andric (__v64qi) _mm512_set1_epi8(__A), 17050b57cec5SDimitry Andric (__v64qi) _mm512_setzero_si512()); 17060b57cec5SDimitry Andric } 17070b57cec5SDimitry Andric 1708*5f757f3fSDimitry Andric static __inline__ __mmask64 __DEFAULT_FN_ATTRS _mm512_kunpackd(__mmask64 __A, 1709*5f757f3fSDimitry Andric __mmask64 __B) { 17100b57cec5SDimitry Andric return (__mmask64) __builtin_ia32_kunpckdi ((__mmask64) __A, 17110b57cec5SDimitry Andric (__mmask64) __B); 17120b57cec5SDimitry Andric } 17130b57cec5SDimitry Andric 17140b57cec5SDimitry Andric static __inline__ __mmask32 __DEFAULT_FN_ATTRS 17150b57cec5SDimitry Andric _mm512_kunpackw (__mmask32 __A, __mmask32 __B) 17160b57cec5SDimitry Andric { 17170b57cec5SDimitry Andric return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A, 17180b57cec5SDimitry Andric (__mmask32) __B); 17190b57cec5SDimitry Andric } 17200b57cec5SDimitry Andric 17210b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512 17220b57cec5SDimitry Andric _mm512_loadu_epi16 (void const *__P) 17230b57cec5SDimitry Andric { 17240b57cec5SDimitry Andric struct __loadu_epi16 { 17250b57cec5SDimitry Andric __m512i_u __v; 17260b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 1727480093f4SDimitry Andric return ((const struct __loadu_epi16*)__P)->__v; 17280b57cec5SDimitry Andric } 17290b57cec5SDimitry Andric 17300b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 17310b57cec5SDimitry Andric _mm512_mask_loadu_epi16 (__m512i __W, __mmask32 __U, void const *__P) 17320b57cec5SDimitry Andric { 1733480093f4SDimitry Andric return (__m512i) __builtin_ia32_loaddquhi512_mask ((const __v32hi *) __P, 17340b57cec5SDimitry Andric (__v32hi) __W, 17350b57cec5SDimitry Andric (__mmask32) __U); 17360b57cec5SDimitry Andric } 17370b57cec5SDimitry Andric 17380b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 17390b57cec5SDimitry Andric _mm512_maskz_loadu_epi16 (__mmask32 __U, void const *__P) 17400b57cec5SDimitry Andric { 1741480093f4SDimitry Andric return (__m512i) __builtin_ia32_loaddquhi512_mask ((const __v32hi *) __P, 17420b57cec5SDimitry Andric (__v32hi) 17430b57cec5SDimitry Andric _mm512_setzero_si512 (), 17440b57cec5SDimitry Andric (__mmask32) __U); 17450b57cec5SDimitry Andric } 17460b57cec5SDimitry Andric 17470b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512 17480b57cec5SDimitry Andric _mm512_loadu_epi8 (void const *__P) 17490b57cec5SDimitry Andric { 17500b57cec5SDimitry Andric struct __loadu_epi8 { 17510b57cec5SDimitry Andric __m512i_u __v; 17520b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 1753480093f4SDimitry Andric return ((const struct __loadu_epi8*)__P)->__v; 17540b57cec5SDimitry Andric } 17550b57cec5SDimitry Andric 17560b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 17570b57cec5SDimitry Andric _mm512_mask_loadu_epi8 (__m512i __W, __mmask64 __U, void const *__P) 17580b57cec5SDimitry Andric { 1759480093f4SDimitry Andric return (__m512i) __builtin_ia32_loaddquqi512_mask ((const __v64qi *) __P, 17600b57cec5SDimitry Andric (__v64qi) __W, 17610b57cec5SDimitry Andric (__mmask64) __U); 17620b57cec5SDimitry Andric } 17630b57cec5SDimitry Andric 17640b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 17650b57cec5SDimitry Andric _mm512_maskz_loadu_epi8 (__mmask64 __U, void const *__P) 17660b57cec5SDimitry Andric { 1767480093f4SDimitry Andric return (__m512i) __builtin_ia32_loaddquqi512_mask ((const __v64qi *) __P, 17680b57cec5SDimitry Andric (__v64qi) 17690b57cec5SDimitry Andric _mm512_setzero_si512 (), 17700b57cec5SDimitry Andric (__mmask64) __U); 17710b57cec5SDimitry Andric } 17720b57cec5SDimitry Andric 17730b57cec5SDimitry Andric static __inline void __DEFAULT_FN_ATTRS512 17740b57cec5SDimitry Andric _mm512_storeu_epi16 (void *__P, __m512i __A) 17750b57cec5SDimitry Andric { 17760b57cec5SDimitry Andric struct __storeu_epi16 { 17770b57cec5SDimitry Andric __m512i_u __v; 17780b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 17790b57cec5SDimitry Andric ((struct __storeu_epi16*)__P)->__v = __A; 17800b57cec5SDimitry Andric } 17810b57cec5SDimitry Andric 17820b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS512 17830b57cec5SDimitry Andric _mm512_mask_storeu_epi16 (void *__P, __mmask32 __U, __m512i __A) 17840b57cec5SDimitry Andric { 17850b57cec5SDimitry Andric __builtin_ia32_storedquhi512_mask ((__v32hi *) __P, 17860b57cec5SDimitry Andric (__v32hi) __A, 17870b57cec5SDimitry Andric (__mmask32) __U); 17880b57cec5SDimitry Andric } 17890b57cec5SDimitry Andric 17900b57cec5SDimitry Andric static __inline void __DEFAULT_FN_ATTRS512 17910b57cec5SDimitry Andric _mm512_storeu_epi8 (void *__P, __m512i __A) 17920b57cec5SDimitry Andric { 17930b57cec5SDimitry Andric struct __storeu_epi8 { 17940b57cec5SDimitry Andric __m512i_u __v; 17950b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 17960b57cec5SDimitry Andric ((struct __storeu_epi8*)__P)->__v = __A; 17970b57cec5SDimitry Andric } 17980b57cec5SDimitry Andric 17990b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS512 18000b57cec5SDimitry Andric _mm512_mask_storeu_epi8 (void *__P, __mmask64 __U, __m512i __A) 18010b57cec5SDimitry Andric { 18020b57cec5SDimitry Andric __builtin_ia32_storedquqi512_mask ((__v64qi *) __P, 18030b57cec5SDimitry Andric (__v64qi) __A, 18040b57cec5SDimitry Andric (__mmask64) __U); 18050b57cec5SDimitry Andric } 18060b57cec5SDimitry Andric 18070b57cec5SDimitry Andric static __inline__ __mmask64 __DEFAULT_FN_ATTRS512 18080b57cec5SDimitry Andric _mm512_test_epi8_mask (__m512i __A, __m512i __B) 18090b57cec5SDimitry Andric { 18100b57cec5SDimitry Andric return _mm512_cmpneq_epi8_mask (_mm512_and_epi32 (__A, __B), 18110b57cec5SDimitry Andric _mm512_setzero_si512()); 18120b57cec5SDimitry Andric } 18130b57cec5SDimitry Andric 18140b57cec5SDimitry Andric static __inline__ __mmask64 __DEFAULT_FN_ATTRS512 18150b57cec5SDimitry Andric _mm512_mask_test_epi8_mask (__mmask64 __U, __m512i __A, __m512i __B) 18160b57cec5SDimitry Andric { 18170b57cec5SDimitry Andric return _mm512_mask_cmpneq_epi8_mask (__U, _mm512_and_epi32 (__A, __B), 18180b57cec5SDimitry Andric _mm512_setzero_si512()); 18190b57cec5SDimitry Andric } 18200b57cec5SDimitry Andric 18210b57cec5SDimitry Andric static __inline__ __mmask32 __DEFAULT_FN_ATTRS512 18220b57cec5SDimitry Andric _mm512_test_epi16_mask (__m512i __A, __m512i __B) 18230b57cec5SDimitry Andric { 18240b57cec5SDimitry Andric return _mm512_cmpneq_epi16_mask (_mm512_and_epi32 (__A, __B), 18250b57cec5SDimitry Andric _mm512_setzero_si512()); 18260b57cec5SDimitry Andric } 18270b57cec5SDimitry Andric 18280b57cec5SDimitry Andric static __inline__ __mmask32 __DEFAULT_FN_ATTRS512 18290b57cec5SDimitry Andric _mm512_mask_test_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B) 18300b57cec5SDimitry Andric { 18310b57cec5SDimitry Andric return _mm512_mask_cmpneq_epi16_mask (__U, _mm512_and_epi32 (__A, __B), 18320b57cec5SDimitry Andric _mm512_setzero_si512()); 18330b57cec5SDimitry Andric } 18340b57cec5SDimitry Andric 18350b57cec5SDimitry Andric static __inline__ __mmask64 __DEFAULT_FN_ATTRS512 18360b57cec5SDimitry Andric _mm512_testn_epi8_mask (__m512i __A, __m512i __B) 18370b57cec5SDimitry Andric { 18380b57cec5SDimitry Andric return _mm512_cmpeq_epi8_mask (_mm512_and_epi32 (__A, __B), _mm512_setzero_si512()); 18390b57cec5SDimitry Andric } 18400b57cec5SDimitry Andric 18410b57cec5SDimitry Andric static __inline__ __mmask64 __DEFAULT_FN_ATTRS512 18420b57cec5SDimitry Andric _mm512_mask_testn_epi8_mask (__mmask64 __U, __m512i __A, __m512i __B) 18430b57cec5SDimitry Andric { 18440b57cec5SDimitry Andric return _mm512_mask_cmpeq_epi8_mask (__U, _mm512_and_epi32 (__A, __B), 18450b57cec5SDimitry Andric _mm512_setzero_si512()); 18460b57cec5SDimitry Andric } 18470b57cec5SDimitry Andric 18480b57cec5SDimitry Andric static __inline__ __mmask32 __DEFAULT_FN_ATTRS512 18490b57cec5SDimitry Andric _mm512_testn_epi16_mask (__m512i __A, __m512i __B) 18500b57cec5SDimitry Andric { 18510b57cec5SDimitry Andric return _mm512_cmpeq_epi16_mask (_mm512_and_epi32 (__A, __B), 18520b57cec5SDimitry Andric _mm512_setzero_si512()); 18530b57cec5SDimitry Andric } 18540b57cec5SDimitry Andric 18550b57cec5SDimitry Andric static __inline__ __mmask32 __DEFAULT_FN_ATTRS512 18560b57cec5SDimitry Andric _mm512_mask_testn_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B) 18570b57cec5SDimitry Andric { 18580b57cec5SDimitry Andric return _mm512_mask_cmpeq_epi16_mask (__U, _mm512_and_epi32 (__A, __B), 18590b57cec5SDimitry Andric _mm512_setzero_si512()); 18600b57cec5SDimitry Andric } 18610b57cec5SDimitry Andric 18620b57cec5SDimitry Andric static __inline__ __mmask64 __DEFAULT_FN_ATTRS512 18630b57cec5SDimitry Andric _mm512_movepi8_mask (__m512i __A) 18640b57cec5SDimitry Andric { 18650b57cec5SDimitry Andric return (__mmask64) __builtin_ia32_cvtb2mask512 ((__v64qi) __A); 18660b57cec5SDimitry Andric } 18670b57cec5SDimitry Andric 18680b57cec5SDimitry Andric static __inline__ __mmask32 __DEFAULT_FN_ATTRS512 18690b57cec5SDimitry Andric _mm512_movepi16_mask (__m512i __A) 18700b57cec5SDimitry Andric { 18710b57cec5SDimitry Andric return (__mmask32) __builtin_ia32_cvtw2mask512 ((__v32hi) __A); 18720b57cec5SDimitry Andric } 18730b57cec5SDimitry Andric 18740b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 18750b57cec5SDimitry Andric _mm512_movm_epi8 (__mmask64 __A) 18760b57cec5SDimitry Andric { 18770b57cec5SDimitry Andric return (__m512i) __builtin_ia32_cvtmask2b512 (__A); 18780b57cec5SDimitry Andric } 18790b57cec5SDimitry Andric 18800b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 18810b57cec5SDimitry Andric _mm512_movm_epi16 (__mmask32 __A) 18820b57cec5SDimitry Andric { 18830b57cec5SDimitry Andric return (__m512i) __builtin_ia32_cvtmask2w512 (__A); 18840b57cec5SDimitry Andric } 18850b57cec5SDimitry Andric 18860b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 18870b57cec5SDimitry Andric _mm512_broadcastb_epi8 (__m128i __A) 18880b57cec5SDimitry Andric { 18890b57cec5SDimitry Andric return (__m512i)__builtin_shufflevector((__v16qi) __A, (__v16qi) __A, 18900b57cec5SDimitry Andric 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18910b57cec5SDimitry Andric 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18920b57cec5SDimitry Andric 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18930b57cec5SDimitry Andric 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); 18940b57cec5SDimitry Andric } 18950b57cec5SDimitry Andric 18960b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 18970b57cec5SDimitry Andric _mm512_mask_broadcastb_epi8 (__m512i __O, __mmask64 __M, __m128i __A) 18980b57cec5SDimitry Andric { 18990b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectb_512(__M, 19000b57cec5SDimitry Andric (__v64qi) _mm512_broadcastb_epi8(__A), 19010b57cec5SDimitry Andric (__v64qi) __O); 19020b57cec5SDimitry Andric } 19030b57cec5SDimitry Andric 19040b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 19050b57cec5SDimitry Andric _mm512_maskz_broadcastb_epi8 (__mmask64 __M, __m128i __A) 19060b57cec5SDimitry Andric { 19070b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectb_512(__M, 19080b57cec5SDimitry Andric (__v64qi) _mm512_broadcastb_epi8(__A), 19090b57cec5SDimitry Andric (__v64qi) _mm512_setzero_si512()); 19100b57cec5SDimitry Andric } 19110b57cec5SDimitry Andric 19120b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 19130b57cec5SDimitry Andric _mm512_mask_set1_epi16 (__m512i __O, __mmask32 __M, short __A) 19140b57cec5SDimitry Andric { 19150b57cec5SDimitry Andric return (__m512i) __builtin_ia32_selectw_512(__M, 19160b57cec5SDimitry Andric (__v32hi) _mm512_set1_epi16(__A), 19170b57cec5SDimitry Andric (__v32hi) __O); 19180b57cec5SDimitry Andric } 19190b57cec5SDimitry Andric 19200b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 19210b57cec5SDimitry Andric _mm512_maskz_set1_epi16 (__mmask32 __M, short __A) 19220b57cec5SDimitry Andric { 19230b57cec5SDimitry Andric return (__m512i) __builtin_ia32_selectw_512(__M, 19240b57cec5SDimitry Andric (__v32hi) _mm512_set1_epi16(__A), 19250b57cec5SDimitry Andric (__v32hi) _mm512_setzero_si512()); 19260b57cec5SDimitry Andric } 19270b57cec5SDimitry Andric 19280b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 19290b57cec5SDimitry Andric _mm512_broadcastw_epi16 (__m128i __A) 19300b57cec5SDimitry Andric { 19310b57cec5SDimitry Andric return (__m512i)__builtin_shufflevector((__v8hi) __A, (__v8hi) __A, 19320b57cec5SDimitry Andric 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 19330b57cec5SDimitry Andric 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); 19340b57cec5SDimitry Andric } 19350b57cec5SDimitry Andric 19360b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 19370b57cec5SDimitry Andric _mm512_mask_broadcastw_epi16 (__m512i __O, __mmask32 __M, __m128i __A) 19380b57cec5SDimitry Andric { 19390b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512(__M, 19400b57cec5SDimitry Andric (__v32hi) _mm512_broadcastw_epi16(__A), 19410b57cec5SDimitry Andric (__v32hi) __O); 19420b57cec5SDimitry Andric } 19430b57cec5SDimitry Andric 19440b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 19450b57cec5SDimitry Andric _mm512_maskz_broadcastw_epi16 (__mmask32 __M, __m128i __A) 19460b57cec5SDimitry Andric { 19470b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512(__M, 19480b57cec5SDimitry Andric (__v32hi) _mm512_broadcastw_epi16(__A), 19490b57cec5SDimitry Andric (__v32hi) _mm512_setzero_si512()); 19500b57cec5SDimitry Andric } 19510b57cec5SDimitry Andric 19520b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 19530b57cec5SDimitry Andric _mm512_permutexvar_epi16 (__m512i __A, __m512i __B) 19540b57cec5SDimitry Andric { 19550b57cec5SDimitry Andric return (__m512i)__builtin_ia32_permvarhi512((__v32hi)__B, (__v32hi)__A); 19560b57cec5SDimitry Andric } 19570b57cec5SDimitry Andric 19580b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 19590b57cec5SDimitry Andric _mm512_maskz_permutexvar_epi16 (__mmask32 __M, __m512i __A, 19600b57cec5SDimitry Andric __m512i __B) 19610b57cec5SDimitry Andric { 19620b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 19630b57cec5SDimitry Andric (__v32hi)_mm512_permutexvar_epi16(__A, __B), 19640b57cec5SDimitry Andric (__v32hi)_mm512_setzero_si512()); 19650b57cec5SDimitry Andric } 19660b57cec5SDimitry Andric 19670b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 19680b57cec5SDimitry Andric _mm512_mask_permutexvar_epi16 (__m512i __W, __mmask32 __M, __m512i __A, 19690b57cec5SDimitry Andric __m512i __B) 19700b57cec5SDimitry Andric { 19710b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 19720b57cec5SDimitry Andric (__v32hi)_mm512_permutexvar_epi16(__A, __B), 19730b57cec5SDimitry Andric (__v32hi)__W); 19740b57cec5SDimitry Andric } 19750b57cec5SDimitry Andric 19760b57cec5SDimitry Andric #define _mm512_alignr_epi8(A, B, N) \ 1977349cc55cSDimitry Andric ((__m512i)__builtin_ia32_palignr512((__v64qi)(__m512i)(A), \ 1978349cc55cSDimitry Andric (__v64qi)(__m512i)(B), (int)(N))) 19790b57cec5SDimitry Andric 19800b57cec5SDimitry Andric #define _mm512_mask_alignr_epi8(W, U, A, B, N) \ 1981349cc55cSDimitry Andric ((__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \ 19820b57cec5SDimitry Andric (__v64qi)_mm512_alignr_epi8((A), (B), (int)(N)), \ 1983349cc55cSDimitry Andric (__v64qi)(__m512i)(W))) 19840b57cec5SDimitry Andric 19850b57cec5SDimitry Andric #define _mm512_maskz_alignr_epi8(U, A, B, N) \ 1986349cc55cSDimitry Andric ((__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \ 19870b57cec5SDimitry Andric (__v64qi)_mm512_alignr_epi8((A), (B), (int)(N)), \ 1988349cc55cSDimitry Andric (__v64qi)(__m512i)_mm512_setzero_si512())) 19890b57cec5SDimitry Andric 19900b57cec5SDimitry Andric #define _mm512_dbsad_epu8(A, B, imm) \ 1991349cc55cSDimitry Andric ((__m512i)__builtin_ia32_dbpsadbw512((__v64qi)(__m512i)(A), \ 1992349cc55cSDimitry Andric (__v64qi)(__m512i)(B), (int)(imm))) 19930b57cec5SDimitry Andric 19940b57cec5SDimitry Andric #define _mm512_mask_dbsad_epu8(W, U, A, B, imm) \ 1995349cc55cSDimitry Andric ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 19960b57cec5SDimitry Andric (__v32hi)_mm512_dbsad_epu8((A), (B), (imm)), \ 1997349cc55cSDimitry Andric (__v32hi)(__m512i)(W))) 19980b57cec5SDimitry Andric 19990b57cec5SDimitry Andric #define _mm512_maskz_dbsad_epu8(U, A, B, imm) \ 2000349cc55cSDimitry Andric ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 20010b57cec5SDimitry Andric (__v32hi)_mm512_dbsad_epu8((A), (B), (imm)), \ 2002349cc55cSDimitry Andric (__v32hi)_mm512_setzero_si512())) 20030b57cec5SDimitry Andric 20040b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 20050b57cec5SDimitry Andric _mm512_sad_epu8 (__m512i __A, __m512i __B) 20060b57cec5SDimitry Andric { 20070b57cec5SDimitry Andric return (__m512i) __builtin_ia32_psadbw512 ((__v64qi) __A, 20080b57cec5SDimitry Andric (__v64qi) __B); 20090b57cec5SDimitry Andric } 20100b57cec5SDimitry Andric 20110b57cec5SDimitry Andric #undef __DEFAULT_FN_ATTRS512 20120b57cec5SDimitry Andric #undef __DEFAULT_FN_ATTRS 20130b57cec5SDimitry Andric 20140b57cec5SDimitry Andric #endif 2015