10b57cec5SDimitry Andric /*===---- avx512vlbwintrin.h - AVX512VL and AVX512BW intrinsics ------------=== 20b57cec5SDimitry Andric * 30b57cec5SDimitry Andric * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric * See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric * 70b57cec5SDimitry Andric *===-----------------------------------------------------------------------=== 80b57cec5SDimitry Andric */ 90b57cec5SDimitry Andric 100b57cec5SDimitry Andric #ifndef __IMMINTRIN_H 110b57cec5SDimitry Andric #error "Never use <avx512vlbwintrin.h> directly; include <immintrin.h> instead." 120b57cec5SDimitry Andric #endif 130b57cec5SDimitry Andric 140b57cec5SDimitry Andric #ifndef __AVX512VLBWINTRIN_H 150b57cec5SDimitry Andric #define __AVX512VLBWINTRIN_H 160b57cec5SDimitry Andric 170b57cec5SDimitry Andric /* Define the default attributes for the functions in this file. */ 18*5f757f3fSDimitry Andric #define __DEFAULT_FN_ATTRS128 \ 19*5f757f3fSDimitry Andric __attribute__((__always_inline__, __nodebug__, \ 20*5f757f3fSDimitry Andric __target__("avx512vl,avx512bw,no-evex512"), \ 21*5f757f3fSDimitry Andric __min_vector_width__(128))) 22*5f757f3fSDimitry Andric #define __DEFAULT_FN_ATTRS256 \ 23*5f757f3fSDimitry Andric __attribute__((__always_inline__, __nodebug__, \ 24*5f757f3fSDimitry Andric __target__("avx512vl,avx512bw,no-evex512"), \ 25*5f757f3fSDimitry Andric __min_vector_width__(256))) 260b57cec5SDimitry Andric 270b57cec5SDimitry Andric /* Integer compare */ 280b57cec5SDimitry Andric 290b57cec5SDimitry Andric #define _mm_cmp_epi8_mask(a, b, p) \ 30349cc55cSDimitry Andric ((__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)(__m128i)(a), \ 310b57cec5SDimitry Andric (__v16qi)(__m128i)(b), (int)(p), \ 32349cc55cSDimitry Andric (__mmask16)-1)) 330b57cec5SDimitry Andric 340b57cec5SDimitry Andric #define _mm_mask_cmp_epi8_mask(m, a, b, p) \ 35349cc55cSDimitry Andric ((__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)(__m128i)(a), \ 360b57cec5SDimitry Andric (__v16qi)(__m128i)(b), (int)(p), \ 37349cc55cSDimitry Andric (__mmask16)(m))) 380b57cec5SDimitry Andric 390b57cec5SDimitry Andric #define _mm_cmp_epu8_mask(a, b, p) \ 40349cc55cSDimitry Andric ((__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)(__m128i)(a), \ 410b57cec5SDimitry Andric (__v16qi)(__m128i)(b), (int)(p), \ 42349cc55cSDimitry Andric (__mmask16)-1)) 430b57cec5SDimitry Andric 440b57cec5SDimitry Andric #define _mm_mask_cmp_epu8_mask(m, a, b, p) \ 45349cc55cSDimitry Andric ((__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)(__m128i)(a), \ 460b57cec5SDimitry Andric (__v16qi)(__m128i)(b), (int)(p), \ 47349cc55cSDimitry Andric (__mmask16)(m))) 480b57cec5SDimitry Andric 490b57cec5SDimitry Andric #define _mm256_cmp_epi8_mask(a, b, p) \ 50349cc55cSDimitry Andric ((__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)(__m256i)(a), \ 510b57cec5SDimitry Andric (__v32qi)(__m256i)(b), (int)(p), \ 52349cc55cSDimitry Andric (__mmask32)-1)) 530b57cec5SDimitry Andric 540b57cec5SDimitry Andric #define _mm256_mask_cmp_epi8_mask(m, a, b, p) \ 55349cc55cSDimitry Andric ((__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)(__m256i)(a), \ 560b57cec5SDimitry Andric (__v32qi)(__m256i)(b), (int)(p), \ 57349cc55cSDimitry Andric (__mmask32)(m))) 580b57cec5SDimitry Andric 590b57cec5SDimitry Andric #define _mm256_cmp_epu8_mask(a, b, p) \ 60349cc55cSDimitry Andric ((__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)(__m256i)(a), \ 610b57cec5SDimitry Andric (__v32qi)(__m256i)(b), (int)(p), \ 62349cc55cSDimitry Andric (__mmask32)-1)) 630b57cec5SDimitry Andric 640b57cec5SDimitry Andric #define _mm256_mask_cmp_epu8_mask(m, a, b, p) \ 65349cc55cSDimitry Andric ((__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)(__m256i)(a), \ 660b57cec5SDimitry Andric (__v32qi)(__m256i)(b), (int)(p), \ 67349cc55cSDimitry Andric (__mmask32)(m))) 680b57cec5SDimitry Andric 690b57cec5SDimitry Andric #define _mm_cmp_epi16_mask(a, b, p) \ 70349cc55cSDimitry Andric ((__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)(__m128i)(a), \ 710b57cec5SDimitry Andric (__v8hi)(__m128i)(b), (int)(p), \ 72349cc55cSDimitry Andric (__mmask8)-1)) 730b57cec5SDimitry Andric 740b57cec5SDimitry Andric #define _mm_mask_cmp_epi16_mask(m, a, b, p) \ 75349cc55cSDimitry Andric ((__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)(__m128i)(a), \ 760b57cec5SDimitry Andric (__v8hi)(__m128i)(b), (int)(p), \ 77349cc55cSDimitry Andric (__mmask8)(m))) 780b57cec5SDimitry Andric 790b57cec5SDimitry Andric #define _mm_cmp_epu16_mask(a, b, p) \ 80349cc55cSDimitry Andric ((__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)(__m128i)(a), \ 810b57cec5SDimitry Andric (__v8hi)(__m128i)(b), (int)(p), \ 82349cc55cSDimitry Andric (__mmask8)-1)) 830b57cec5SDimitry Andric 840b57cec5SDimitry Andric #define _mm_mask_cmp_epu16_mask(m, a, b, p) \ 85349cc55cSDimitry Andric ((__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)(__m128i)(a), \ 860b57cec5SDimitry Andric (__v8hi)(__m128i)(b), (int)(p), \ 87349cc55cSDimitry Andric (__mmask8)(m))) 880b57cec5SDimitry Andric 890b57cec5SDimitry Andric #define _mm256_cmp_epi16_mask(a, b, p) \ 90349cc55cSDimitry Andric ((__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)(__m256i)(a), \ 910b57cec5SDimitry Andric (__v16hi)(__m256i)(b), (int)(p), \ 92349cc55cSDimitry Andric (__mmask16)-1)) 930b57cec5SDimitry Andric 940b57cec5SDimitry Andric #define _mm256_mask_cmp_epi16_mask(m, a, b, p) \ 95349cc55cSDimitry Andric ((__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)(__m256i)(a), \ 960b57cec5SDimitry Andric (__v16hi)(__m256i)(b), (int)(p), \ 97349cc55cSDimitry Andric (__mmask16)(m))) 980b57cec5SDimitry Andric 990b57cec5SDimitry Andric #define _mm256_cmp_epu16_mask(a, b, p) \ 100349cc55cSDimitry Andric ((__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)(__m256i)(a), \ 1010b57cec5SDimitry Andric (__v16hi)(__m256i)(b), (int)(p), \ 102349cc55cSDimitry Andric (__mmask16)-1)) 1030b57cec5SDimitry Andric 1040b57cec5SDimitry Andric #define _mm256_mask_cmp_epu16_mask(m, a, b, p) \ 105349cc55cSDimitry Andric ((__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)(__m256i)(a), \ 1060b57cec5SDimitry Andric (__v16hi)(__m256i)(b), (int)(p), \ 107349cc55cSDimitry Andric (__mmask16)(m))) 1080b57cec5SDimitry Andric 1090b57cec5SDimitry Andric #define _mm_cmpeq_epi8_mask(A, B) \ 1100b57cec5SDimitry Andric _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_EQ) 1110b57cec5SDimitry Andric #define _mm_mask_cmpeq_epi8_mask(k, A, B) \ 1120b57cec5SDimitry Andric _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_EQ) 1130b57cec5SDimitry Andric #define _mm_cmpge_epi8_mask(A, B) \ 1140b57cec5SDimitry Andric _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_GE) 1150b57cec5SDimitry Andric #define _mm_mask_cmpge_epi8_mask(k, A, B) \ 1160b57cec5SDimitry Andric _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GE) 1170b57cec5SDimitry Andric #define _mm_cmpgt_epi8_mask(A, B) \ 1180b57cec5SDimitry Andric _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_GT) 1190b57cec5SDimitry Andric #define _mm_mask_cmpgt_epi8_mask(k, A, B) \ 1200b57cec5SDimitry Andric _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GT) 1210b57cec5SDimitry Andric #define _mm_cmple_epi8_mask(A, B) \ 1220b57cec5SDimitry Andric _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_LE) 1230b57cec5SDimitry Andric #define _mm_mask_cmple_epi8_mask(k, A, B) \ 1240b57cec5SDimitry Andric _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LE) 1250b57cec5SDimitry Andric #define _mm_cmplt_epi8_mask(A, B) \ 1260b57cec5SDimitry Andric _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_LT) 1270b57cec5SDimitry Andric #define _mm_mask_cmplt_epi8_mask(k, A, B) \ 1280b57cec5SDimitry Andric _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LT) 1290b57cec5SDimitry Andric #define _mm_cmpneq_epi8_mask(A, B) \ 1300b57cec5SDimitry Andric _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_NE) 1310b57cec5SDimitry Andric #define _mm_mask_cmpneq_epi8_mask(k, A, B) \ 1320b57cec5SDimitry Andric _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_NE) 1330b57cec5SDimitry Andric 1340b57cec5SDimitry Andric #define _mm256_cmpeq_epi8_mask(A, B) \ 1350b57cec5SDimitry Andric _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_EQ) 1360b57cec5SDimitry Andric #define _mm256_mask_cmpeq_epi8_mask(k, A, B) \ 1370b57cec5SDimitry Andric _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_EQ) 1380b57cec5SDimitry Andric #define _mm256_cmpge_epi8_mask(A, B) \ 1390b57cec5SDimitry Andric _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_GE) 1400b57cec5SDimitry Andric #define _mm256_mask_cmpge_epi8_mask(k, A, B) \ 1410b57cec5SDimitry Andric _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GE) 1420b57cec5SDimitry Andric #define _mm256_cmpgt_epi8_mask(A, B) \ 1430b57cec5SDimitry Andric _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_GT) 1440b57cec5SDimitry Andric #define _mm256_mask_cmpgt_epi8_mask(k, A, B) \ 1450b57cec5SDimitry Andric _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GT) 1460b57cec5SDimitry Andric #define _mm256_cmple_epi8_mask(A, B) \ 1470b57cec5SDimitry Andric _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_LE) 1480b57cec5SDimitry Andric #define _mm256_mask_cmple_epi8_mask(k, A, B) \ 1490b57cec5SDimitry Andric _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LE) 1500b57cec5SDimitry Andric #define _mm256_cmplt_epi8_mask(A, B) \ 1510b57cec5SDimitry Andric _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_LT) 1520b57cec5SDimitry Andric #define _mm256_mask_cmplt_epi8_mask(k, A, B) \ 1530b57cec5SDimitry Andric _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LT) 1540b57cec5SDimitry Andric #define _mm256_cmpneq_epi8_mask(A, B) \ 1550b57cec5SDimitry Andric _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_NE) 1560b57cec5SDimitry Andric #define _mm256_mask_cmpneq_epi8_mask(k, A, B) \ 1570b57cec5SDimitry Andric _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_NE) 1580b57cec5SDimitry Andric 1590b57cec5SDimitry Andric #define _mm_cmpeq_epu8_mask(A, B) \ 1600b57cec5SDimitry Andric _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_EQ) 1610b57cec5SDimitry Andric #define _mm_mask_cmpeq_epu8_mask(k, A, B) \ 1620b57cec5SDimitry Andric _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_EQ) 1630b57cec5SDimitry Andric #define _mm_cmpge_epu8_mask(A, B) \ 1640b57cec5SDimitry Andric _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_GE) 1650b57cec5SDimitry Andric #define _mm_mask_cmpge_epu8_mask(k, A, B) \ 1660b57cec5SDimitry Andric _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GE) 1670b57cec5SDimitry Andric #define _mm_cmpgt_epu8_mask(A, B) \ 1680b57cec5SDimitry Andric _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_GT) 1690b57cec5SDimitry Andric #define _mm_mask_cmpgt_epu8_mask(k, A, B) \ 1700b57cec5SDimitry Andric _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GT) 1710b57cec5SDimitry Andric #define _mm_cmple_epu8_mask(A, B) \ 1720b57cec5SDimitry Andric _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_LE) 1730b57cec5SDimitry Andric #define _mm_mask_cmple_epu8_mask(k, A, B) \ 1740b57cec5SDimitry Andric _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LE) 1750b57cec5SDimitry Andric #define _mm_cmplt_epu8_mask(A, B) \ 1760b57cec5SDimitry Andric _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_LT) 1770b57cec5SDimitry Andric #define _mm_mask_cmplt_epu8_mask(k, A, B) \ 1780b57cec5SDimitry Andric _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LT) 1790b57cec5SDimitry Andric #define _mm_cmpneq_epu8_mask(A, B) \ 1800b57cec5SDimitry Andric _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_NE) 1810b57cec5SDimitry Andric #define _mm_mask_cmpneq_epu8_mask(k, A, B) \ 1820b57cec5SDimitry Andric _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_NE) 1830b57cec5SDimitry Andric 1840b57cec5SDimitry Andric #define _mm256_cmpeq_epu8_mask(A, B) \ 1850b57cec5SDimitry Andric _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_EQ) 1860b57cec5SDimitry Andric #define _mm256_mask_cmpeq_epu8_mask(k, A, B) \ 1870b57cec5SDimitry Andric _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_EQ) 1880b57cec5SDimitry Andric #define _mm256_cmpge_epu8_mask(A, B) \ 1890b57cec5SDimitry Andric _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_GE) 1900b57cec5SDimitry Andric #define _mm256_mask_cmpge_epu8_mask(k, A, B) \ 1910b57cec5SDimitry Andric _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GE) 1920b57cec5SDimitry Andric #define _mm256_cmpgt_epu8_mask(A, B) \ 1930b57cec5SDimitry Andric _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_GT) 1940b57cec5SDimitry Andric #define _mm256_mask_cmpgt_epu8_mask(k, A, B) \ 1950b57cec5SDimitry Andric _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GT) 1960b57cec5SDimitry Andric #define _mm256_cmple_epu8_mask(A, B) \ 1970b57cec5SDimitry Andric _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_LE) 1980b57cec5SDimitry Andric #define _mm256_mask_cmple_epu8_mask(k, A, B) \ 1990b57cec5SDimitry Andric _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LE) 2000b57cec5SDimitry Andric #define _mm256_cmplt_epu8_mask(A, B) \ 2010b57cec5SDimitry Andric _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_LT) 2020b57cec5SDimitry Andric #define _mm256_mask_cmplt_epu8_mask(k, A, B) \ 2030b57cec5SDimitry Andric _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LT) 2040b57cec5SDimitry Andric #define _mm256_cmpneq_epu8_mask(A, B) \ 2050b57cec5SDimitry Andric _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_NE) 2060b57cec5SDimitry Andric #define _mm256_mask_cmpneq_epu8_mask(k, A, B) \ 2070b57cec5SDimitry Andric _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_NE) 2080b57cec5SDimitry Andric 2090b57cec5SDimitry Andric #define _mm_cmpeq_epi16_mask(A, B) \ 2100b57cec5SDimitry Andric _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_EQ) 2110b57cec5SDimitry Andric #define _mm_mask_cmpeq_epi16_mask(k, A, B) \ 2120b57cec5SDimitry Andric _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_EQ) 2130b57cec5SDimitry Andric #define _mm_cmpge_epi16_mask(A, B) \ 2140b57cec5SDimitry Andric _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_GE) 2150b57cec5SDimitry Andric #define _mm_mask_cmpge_epi16_mask(k, A, B) \ 2160b57cec5SDimitry Andric _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GE) 2170b57cec5SDimitry Andric #define _mm_cmpgt_epi16_mask(A, B) \ 2180b57cec5SDimitry Andric _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_GT) 2190b57cec5SDimitry Andric #define _mm_mask_cmpgt_epi16_mask(k, A, B) \ 2200b57cec5SDimitry Andric _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GT) 2210b57cec5SDimitry Andric #define _mm_cmple_epi16_mask(A, B) \ 2220b57cec5SDimitry Andric _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_LE) 2230b57cec5SDimitry Andric #define _mm_mask_cmple_epi16_mask(k, A, B) \ 2240b57cec5SDimitry Andric _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LE) 2250b57cec5SDimitry Andric #define _mm_cmplt_epi16_mask(A, B) \ 2260b57cec5SDimitry Andric _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_LT) 2270b57cec5SDimitry Andric #define _mm_mask_cmplt_epi16_mask(k, A, B) \ 2280b57cec5SDimitry Andric _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LT) 2290b57cec5SDimitry Andric #define _mm_cmpneq_epi16_mask(A, B) \ 2300b57cec5SDimitry Andric _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_NE) 2310b57cec5SDimitry Andric #define _mm_mask_cmpneq_epi16_mask(k, A, B) \ 2320b57cec5SDimitry Andric _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_NE) 2330b57cec5SDimitry Andric 2340b57cec5SDimitry Andric #define _mm256_cmpeq_epi16_mask(A, B) \ 2350b57cec5SDimitry Andric _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_EQ) 2360b57cec5SDimitry Andric #define _mm256_mask_cmpeq_epi16_mask(k, A, B) \ 2370b57cec5SDimitry Andric _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_EQ) 2380b57cec5SDimitry Andric #define _mm256_cmpge_epi16_mask(A, B) \ 2390b57cec5SDimitry Andric _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_GE) 2400b57cec5SDimitry Andric #define _mm256_mask_cmpge_epi16_mask(k, A, B) \ 2410b57cec5SDimitry Andric _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GE) 2420b57cec5SDimitry Andric #define _mm256_cmpgt_epi16_mask(A, B) \ 2430b57cec5SDimitry Andric _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_GT) 2440b57cec5SDimitry Andric #define _mm256_mask_cmpgt_epi16_mask(k, A, B) \ 2450b57cec5SDimitry Andric _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GT) 2460b57cec5SDimitry Andric #define _mm256_cmple_epi16_mask(A, B) \ 2470b57cec5SDimitry Andric _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_LE) 2480b57cec5SDimitry Andric #define _mm256_mask_cmple_epi16_mask(k, A, B) \ 2490b57cec5SDimitry Andric _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LE) 2500b57cec5SDimitry Andric #define _mm256_cmplt_epi16_mask(A, B) \ 2510b57cec5SDimitry Andric _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_LT) 2520b57cec5SDimitry Andric #define _mm256_mask_cmplt_epi16_mask(k, A, B) \ 2530b57cec5SDimitry Andric _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LT) 2540b57cec5SDimitry Andric #define _mm256_cmpneq_epi16_mask(A, B) \ 2550b57cec5SDimitry Andric _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_NE) 2560b57cec5SDimitry Andric #define _mm256_mask_cmpneq_epi16_mask(k, A, B) \ 2570b57cec5SDimitry Andric _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_NE) 2580b57cec5SDimitry Andric 2590b57cec5SDimitry Andric #define _mm_cmpeq_epu16_mask(A, B) \ 2600b57cec5SDimitry Andric _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_EQ) 2610b57cec5SDimitry Andric #define _mm_mask_cmpeq_epu16_mask(k, A, B) \ 2620b57cec5SDimitry Andric _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_EQ) 2630b57cec5SDimitry Andric #define _mm_cmpge_epu16_mask(A, B) \ 2640b57cec5SDimitry Andric _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_GE) 2650b57cec5SDimitry Andric #define _mm_mask_cmpge_epu16_mask(k, A, B) \ 2660b57cec5SDimitry Andric _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GE) 2670b57cec5SDimitry Andric #define _mm_cmpgt_epu16_mask(A, B) \ 2680b57cec5SDimitry Andric _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_GT) 2690b57cec5SDimitry Andric #define _mm_mask_cmpgt_epu16_mask(k, A, B) \ 2700b57cec5SDimitry Andric _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GT) 2710b57cec5SDimitry Andric #define _mm_cmple_epu16_mask(A, B) \ 2720b57cec5SDimitry Andric _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_LE) 2730b57cec5SDimitry Andric #define _mm_mask_cmple_epu16_mask(k, A, B) \ 2740b57cec5SDimitry Andric _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LE) 2750b57cec5SDimitry Andric #define _mm_cmplt_epu16_mask(A, B) \ 2760b57cec5SDimitry Andric _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_LT) 2770b57cec5SDimitry Andric #define _mm_mask_cmplt_epu16_mask(k, A, B) \ 2780b57cec5SDimitry Andric _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LT) 2790b57cec5SDimitry Andric #define _mm_cmpneq_epu16_mask(A, B) \ 2800b57cec5SDimitry Andric _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_NE) 2810b57cec5SDimitry Andric #define _mm_mask_cmpneq_epu16_mask(k, A, B) \ 2820b57cec5SDimitry Andric _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE) 2830b57cec5SDimitry Andric 2840b57cec5SDimitry Andric #define _mm256_cmpeq_epu16_mask(A, B) \ 2850b57cec5SDimitry Andric _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_EQ) 2860b57cec5SDimitry Andric #define _mm256_mask_cmpeq_epu16_mask(k, A, B) \ 2870b57cec5SDimitry Andric _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_EQ) 2880b57cec5SDimitry Andric #define _mm256_cmpge_epu16_mask(A, B) \ 2890b57cec5SDimitry Andric _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_GE) 2900b57cec5SDimitry Andric #define _mm256_mask_cmpge_epu16_mask(k, A, B) \ 2910b57cec5SDimitry Andric _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GE) 2920b57cec5SDimitry Andric #define _mm256_cmpgt_epu16_mask(A, B) \ 2930b57cec5SDimitry Andric _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_GT) 2940b57cec5SDimitry Andric #define _mm256_mask_cmpgt_epu16_mask(k, A, B) \ 2950b57cec5SDimitry Andric _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GT) 2960b57cec5SDimitry Andric #define _mm256_cmple_epu16_mask(A, B) \ 2970b57cec5SDimitry Andric _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_LE) 2980b57cec5SDimitry Andric #define _mm256_mask_cmple_epu16_mask(k, A, B) \ 2990b57cec5SDimitry Andric _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LE) 3000b57cec5SDimitry Andric #define _mm256_cmplt_epu16_mask(A, B) \ 3010b57cec5SDimitry Andric _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_LT) 3020b57cec5SDimitry Andric #define _mm256_mask_cmplt_epu16_mask(k, A, B) \ 3030b57cec5SDimitry Andric _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LT) 3040b57cec5SDimitry Andric #define _mm256_cmpneq_epu16_mask(A, B) \ 3050b57cec5SDimitry Andric _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_NE) 3060b57cec5SDimitry Andric #define _mm256_mask_cmpneq_epu16_mask(k, A, B) \ 3070b57cec5SDimitry Andric _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE) 3080b57cec5SDimitry Andric 3090b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 3100b57cec5SDimitry Andric _mm256_mask_add_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B){ 3110b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, 3120b57cec5SDimitry Andric (__v32qi)_mm256_add_epi8(__A, __B), 3130b57cec5SDimitry Andric (__v32qi)__W); 3140b57cec5SDimitry Andric } 3150b57cec5SDimitry Andric 3160b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 3170b57cec5SDimitry Andric _mm256_maskz_add_epi8(__mmask32 __U, __m256i __A, __m256i __B) { 3180b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, 3190b57cec5SDimitry Andric (__v32qi)_mm256_add_epi8(__A, __B), 3200b57cec5SDimitry Andric (__v32qi)_mm256_setzero_si256()); 3210b57cec5SDimitry Andric } 3220b57cec5SDimitry Andric 3230b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 3240b57cec5SDimitry Andric _mm256_mask_add_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { 3250b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 3260b57cec5SDimitry Andric (__v16hi)_mm256_add_epi16(__A, __B), 3270b57cec5SDimitry Andric (__v16hi)__W); 3280b57cec5SDimitry Andric } 3290b57cec5SDimitry Andric 3300b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 3310b57cec5SDimitry Andric _mm256_maskz_add_epi16(__mmask16 __U, __m256i __A, __m256i __B) { 3320b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 3330b57cec5SDimitry Andric (__v16hi)_mm256_add_epi16(__A, __B), 3340b57cec5SDimitry Andric (__v16hi)_mm256_setzero_si256()); 3350b57cec5SDimitry Andric } 3360b57cec5SDimitry Andric 3370b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 3380b57cec5SDimitry Andric _mm256_mask_sub_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { 3390b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, 3400b57cec5SDimitry Andric (__v32qi)_mm256_sub_epi8(__A, __B), 3410b57cec5SDimitry Andric (__v32qi)__W); 3420b57cec5SDimitry Andric } 3430b57cec5SDimitry Andric 3440b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 3450b57cec5SDimitry Andric _mm256_maskz_sub_epi8(__mmask32 __U, __m256i __A, __m256i __B) { 3460b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, 3470b57cec5SDimitry Andric (__v32qi)_mm256_sub_epi8(__A, __B), 3480b57cec5SDimitry Andric (__v32qi)_mm256_setzero_si256()); 3490b57cec5SDimitry Andric } 3500b57cec5SDimitry Andric 3510b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 3520b57cec5SDimitry Andric _mm256_mask_sub_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { 3530b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 3540b57cec5SDimitry Andric (__v16hi)_mm256_sub_epi16(__A, __B), 3550b57cec5SDimitry Andric (__v16hi)__W); 3560b57cec5SDimitry Andric } 3570b57cec5SDimitry Andric 3580b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 3590b57cec5SDimitry Andric _mm256_maskz_sub_epi16(__mmask16 __U, __m256i __A, __m256i __B) { 3600b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 3610b57cec5SDimitry Andric (__v16hi)_mm256_sub_epi16(__A, __B), 3620b57cec5SDimitry Andric (__v16hi)_mm256_setzero_si256()); 3630b57cec5SDimitry Andric } 3640b57cec5SDimitry Andric 3650b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 3660b57cec5SDimitry Andric _mm_mask_add_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { 3670b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, 3680b57cec5SDimitry Andric (__v16qi)_mm_add_epi8(__A, __B), 3690b57cec5SDimitry Andric (__v16qi)__W); 3700b57cec5SDimitry Andric } 3710b57cec5SDimitry Andric 3720b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 3730b57cec5SDimitry Andric _mm_maskz_add_epi8(__mmask16 __U, __m128i __A, __m128i __B) { 3740b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, 3750b57cec5SDimitry Andric (__v16qi)_mm_add_epi8(__A, __B), 3760b57cec5SDimitry Andric (__v16qi)_mm_setzero_si128()); 3770b57cec5SDimitry Andric } 3780b57cec5SDimitry Andric 3790b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 3800b57cec5SDimitry Andric _mm_mask_add_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { 3810b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 3820b57cec5SDimitry Andric (__v8hi)_mm_add_epi16(__A, __B), 3830b57cec5SDimitry Andric (__v8hi)__W); 3840b57cec5SDimitry Andric } 3850b57cec5SDimitry Andric 3860b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 3870b57cec5SDimitry Andric _mm_maskz_add_epi16(__mmask8 __U, __m128i __A, __m128i __B) { 3880b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 3890b57cec5SDimitry Andric (__v8hi)_mm_add_epi16(__A, __B), 3900b57cec5SDimitry Andric (__v8hi)_mm_setzero_si128()); 3910b57cec5SDimitry Andric } 3920b57cec5SDimitry Andric 3930b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 3940b57cec5SDimitry Andric _mm_mask_sub_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { 3950b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, 3960b57cec5SDimitry Andric (__v16qi)_mm_sub_epi8(__A, __B), 3970b57cec5SDimitry Andric (__v16qi)__W); 3980b57cec5SDimitry Andric } 3990b57cec5SDimitry Andric 4000b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 4010b57cec5SDimitry Andric _mm_maskz_sub_epi8(__mmask16 __U, __m128i __A, __m128i __B) { 4020b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, 4030b57cec5SDimitry Andric (__v16qi)_mm_sub_epi8(__A, __B), 4040b57cec5SDimitry Andric (__v16qi)_mm_setzero_si128()); 4050b57cec5SDimitry Andric } 4060b57cec5SDimitry Andric 4070b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 4080b57cec5SDimitry Andric _mm_mask_sub_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { 4090b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 4100b57cec5SDimitry Andric (__v8hi)_mm_sub_epi16(__A, __B), 4110b57cec5SDimitry Andric (__v8hi)__W); 4120b57cec5SDimitry Andric } 4130b57cec5SDimitry Andric 4140b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 4150b57cec5SDimitry Andric _mm_maskz_sub_epi16(__mmask8 __U, __m128i __A, __m128i __B) { 4160b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 4170b57cec5SDimitry Andric (__v8hi)_mm_sub_epi16(__A, __B), 4180b57cec5SDimitry Andric (__v8hi)_mm_setzero_si128()); 4190b57cec5SDimitry Andric } 4200b57cec5SDimitry Andric 4210b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 4220b57cec5SDimitry Andric _mm256_mask_mullo_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { 4230b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 4240b57cec5SDimitry Andric (__v16hi)_mm256_mullo_epi16(__A, __B), 4250b57cec5SDimitry Andric (__v16hi)__W); 4260b57cec5SDimitry Andric } 4270b57cec5SDimitry Andric 4280b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 4290b57cec5SDimitry Andric _mm256_maskz_mullo_epi16(__mmask16 __U, __m256i __A, __m256i __B) { 4300b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 4310b57cec5SDimitry Andric (__v16hi)_mm256_mullo_epi16(__A, __B), 4320b57cec5SDimitry Andric (__v16hi)_mm256_setzero_si256()); 4330b57cec5SDimitry Andric } 4340b57cec5SDimitry Andric 4350b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 4360b57cec5SDimitry Andric _mm_mask_mullo_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { 4370b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 4380b57cec5SDimitry Andric (__v8hi)_mm_mullo_epi16(__A, __B), 4390b57cec5SDimitry Andric (__v8hi)__W); 4400b57cec5SDimitry Andric } 4410b57cec5SDimitry Andric 4420b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 4430b57cec5SDimitry Andric _mm_maskz_mullo_epi16(__mmask8 __U, __m128i __A, __m128i __B) { 4440b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 4450b57cec5SDimitry Andric (__v8hi)_mm_mullo_epi16(__A, __B), 4460b57cec5SDimitry Andric (__v8hi)_mm_setzero_si128()); 4470b57cec5SDimitry Andric } 4480b57cec5SDimitry Andric 4490b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 4500b57cec5SDimitry Andric _mm_mask_blend_epi8 (__mmask16 __U, __m128i __A, __m128i __W) 4510b57cec5SDimitry Andric { 4520b57cec5SDimitry Andric return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U, 4530b57cec5SDimitry Andric (__v16qi) __W, 4540b57cec5SDimitry Andric (__v16qi) __A); 4550b57cec5SDimitry Andric } 4560b57cec5SDimitry Andric 4570b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 4580b57cec5SDimitry Andric _mm256_mask_blend_epi8 (__mmask32 __U, __m256i __A, __m256i __W) 4590b57cec5SDimitry Andric { 4600b57cec5SDimitry Andric return (__m256i) __builtin_ia32_selectb_256 ((__mmask32) __U, 4610b57cec5SDimitry Andric (__v32qi) __W, 4620b57cec5SDimitry Andric (__v32qi) __A); 4630b57cec5SDimitry Andric } 4640b57cec5SDimitry Andric 4650b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 4660b57cec5SDimitry Andric _mm_mask_blend_epi16 (__mmask8 __U, __m128i __A, __m128i __W) 4670b57cec5SDimitry Andric { 4680b57cec5SDimitry Andric return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U, 4690b57cec5SDimitry Andric (__v8hi) __W, 4700b57cec5SDimitry Andric (__v8hi) __A); 4710b57cec5SDimitry Andric } 4720b57cec5SDimitry Andric 4730b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 4740b57cec5SDimitry Andric _mm256_mask_blend_epi16 (__mmask16 __U, __m256i __A, __m256i __W) 4750b57cec5SDimitry Andric { 4760b57cec5SDimitry Andric return (__m256i) __builtin_ia32_selectw_256 ((__mmask16) __U, 4770b57cec5SDimitry Andric (__v16hi) __W, 4780b57cec5SDimitry Andric (__v16hi) __A); 4790b57cec5SDimitry Andric } 4800b57cec5SDimitry Andric 4810b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 4820b57cec5SDimitry Andric _mm_mask_abs_epi8(__m128i __W, __mmask16 __U, __m128i __A) 4830b57cec5SDimitry Andric { 4840b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, 4850b57cec5SDimitry Andric (__v16qi)_mm_abs_epi8(__A), 4860b57cec5SDimitry Andric (__v16qi)__W); 4870b57cec5SDimitry Andric } 4880b57cec5SDimitry Andric 4890b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 4900b57cec5SDimitry Andric _mm_maskz_abs_epi8(__mmask16 __U, __m128i __A) 4910b57cec5SDimitry Andric { 4920b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, 4930b57cec5SDimitry Andric (__v16qi)_mm_abs_epi8(__A), 4940b57cec5SDimitry Andric (__v16qi)_mm_setzero_si128()); 4950b57cec5SDimitry Andric } 4960b57cec5SDimitry Andric 4970b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 4980b57cec5SDimitry Andric _mm256_mask_abs_epi8(__m256i __W, __mmask32 __U, __m256i __A) 4990b57cec5SDimitry Andric { 5000b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, 5010b57cec5SDimitry Andric (__v32qi)_mm256_abs_epi8(__A), 5020b57cec5SDimitry Andric (__v32qi)__W); 5030b57cec5SDimitry Andric } 5040b57cec5SDimitry Andric 5050b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 5060b57cec5SDimitry Andric _mm256_maskz_abs_epi8 (__mmask32 __U, __m256i __A) 5070b57cec5SDimitry Andric { 5080b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, 5090b57cec5SDimitry Andric (__v32qi)_mm256_abs_epi8(__A), 5100b57cec5SDimitry Andric (__v32qi)_mm256_setzero_si256()); 5110b57cec5SDimitry Andric } 5120b57cec5SDimitry Andric 5130b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 5140b57cec5SDimitry Andric _mm_mask_abs_epi16(__m128i __W, __mmask8 __U, __m128i __A) 5150b57cec5SDimitry Andric { 5160b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 5170b57cec5SDimitry Andric (__v8hi)_mm_abs_epi16(__A), 5180b57cec5SDimitry Andric (__v8hi)__W); 5190b57cec5SDimitry Andric } 5200b57cec5SDimitry Andric 5210b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 5220b57cec5SDimitry Andric _mm_maskz_abs_epi16(__mmask8 __U, __m128i __A) 5230b57cec5SDimitry Andric { 5240b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 5250b57cec5SDimitry Andric (__v8hi)_mm_abs_epi16(__A), 5260b57cec5SDimitry Andric (__v8hi)_mm_setzero_si128()); 5270b57cec5SDimitry Andric } 5280b57cec5SDimitry Andric 5290b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 5300b57cec5SDimitry Andric _mm256_mask_abs_epi16(__m256i __W, __mmask16 __U, __m256i __A) 5310b57cec5SDimitry Andric { 5320b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 5330b57cec5SDimitry Andric (__v16hi)_mm256_abs_epi16(__A), 5340b57cec5SDimitry Andric (__v16hi)__W); 5350b57cec5SDimitry Andric } 5360b57cec5SDimitry Andric 5370b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 5380b57cec5SDimitry Andric _mm256_maskz_abs_epi16(__mmask16 __U, __m256i __A) 5390b57cec5SDimitry Andric { 5400b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 5410b57cec5SDimitry Andric (__v16hi)_mm256_abs_epi16(__A), 5420b57cec5SDimitry Andric (__v16hi)_mm256_setzero_si256()); 5430b57cec5SDimitry Andric } 5440b57cec5SDimitry Andric 5450b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 5460b57cec5SDimitry Andric _mm_maskz_packs_epi32(__mmask8 __M, __m128i __A, __m128i __B) { 5470b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, 5480b57cec5SDimitry Andric (__v8hi)_mm_packs_epi32(__A, __B), 5490b57cec5SDimitry Andric (__v8hi)_mm_setzero_si128()); 5500b57cec5SDimitry Andric } 5510b57cec5SDimitry Andric 5520b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 5530b57cec5SDimitry Andric _mm_mask_packs_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) 5540b57cec5SDimitry Andric { 5550b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, 5560b57cec5SDimitry Andric (__v8hi)_mm_packs_epi32(__A, __B), 5570b57cec5SDimitry Andric (__v8hi)__W); 5580b57cec5SDimitry Andric } 5590b57cec5SDimitry Andric 5600b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 5610b57cec5SDimitry Andric _mm256_maskz_packs_epi32(__mmask16 __M, __m256i __A, __m256i __B) 5620b57cec5SDimitry Andric { 5630b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, 5640b57cec5SDimitry Andric (__v16hi)_mm256_packs_epi32(__A, __B), 5650b57cec5SDimitry Andric (__v16hi)_mm256_setzero_si256()); 5660b57cec5SDimitry Andric } 5670b57cec5SDimitry Andric 5680b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 5690b57cec5SDimitry Andric _mm256_mask_packs_epi32(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) 5700b57cec5SDimitry Andric { 5710b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, 5720b57cec5SDimitry Andric (__v16hi)_mm256_packs_epi32(__A, __B), 5730b57cec5SDimitry Andric (__v16hi)__W); 5740b57cec5SDimitry Andric } 5750b57cec5SDimitry Andric 5760b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 5770b57cec5SDimitry Andric _mm_maskz_packs_epi16(__mmask16 __M, __m128i __A, __m128i __B) 5780b57cec5SDimitry Andric { 5790b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, 5800b57cec5SDimitry Andric (__v16qi)_mm_packs_epi16(__A, __B), 5810b57cec5SDimitry Andric (__v16qi)_mm_setzero_si128()); 5820b57cec5SDimitry Andric } 5830b57cec5SDimitry Andric 5840b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 5850b57cec5SDimitry Andric _mm_mask_packs_epi16(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) 5860b57cec5SDimitry Andric { 5870b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, 5880b57cec5SDimitry Andric (__v16qi)_mm_packs_epi16(__A, __B), 5890b57cec5SDimitry Andric (__v16qi)__W); 5900b57cec5SDimitry Andric } 5910b57cec5SDimitry Andric 5920b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 5930b57cec5SDimitry Andric _mm256_maskz_packs_epi16(__mmask32 __M, __m256i __A, __m256i __B) 5940b57cec5SDimitry Andric { 5950b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, 5960b57cec5SDimitry Andric (__v32qi)_mm256_packs_epi16(__A, __B), 5970b57cec5SDimitry Andric (__v32qi)_mm256_setzero_si256()); 5980b57cec5SDimitry Andric } 5990b57cec5SDimitry Andric 6000b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 6010b57cec5SDimitry Andric _mm256_mask_packs_epi16(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) 6020b57cec5SDimitry Andric { 6030b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, 6040b57cec5SDimitry Andric (__v32qi)_mm256_packs_epi16(__A, __B), 6050b57cec5SDimitry Andric (__v32qi)__W); 6060b57cec5SDimitry Andric } 6070b57cec5SDimitry Andric 6080b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 6090b57cec5SDimitry Andric _mm_maskz_packus_epi32(__mmask8 __M, __m128i __A, __m128i __B) 6100b57cec5SDimitry Andric { 6110b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, 6120b57cec5SDimitry Andric (__v8hi)_mm_packus_epi32(__A, __B), 6130b57cec5SDimitry Andric (__v8hi)_mm_setzero_si128()); 6140b57cec5SDimitry Andric } 6150b57cec5SDimitry Andric 6160b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 6170b57cec5SDimitry Andric _mm_mask_packus_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) 6180b57cec5SDimitry Andric { 6190b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, 6200b57cec5SDimitry Andric (__v8hi)_mm_packus_epi32(__A, __B), 6210b57cec5SDimitry Andric (__v8hi)__W); 6220b57cec5SDimitry Andric } 6230b57cec5SDimitry Andric 6240b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 6250b57cec5SDimitry Andric _mm256_maskz_packus_epi32(__mmask16 __M, __m256i __A, __m256i __B) 6260b57cec5SDimitry Andric { 6270b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, 6280b57cec5SDimitry Andric (__v16hi)_mm256_packus_epi32(__A, __B), 6290b57cec5SDimitry Andric (__v16hi)_mm256_setzero_si256()); 6300b57cec5SDimitry Andric } 6310b57cec5SDimitry Andric 6320b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 6330b57cec5SDimitry Andric _mm256_mask_packus_epi32(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) 6340b57cec5SDimitry Andric { 6350b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, 6360b57cec5SDimitry Andric (__v16hi)_mm256_packus_epi32(__A, __B), 6370b57cec5SDimitry Andric (__v16hi)__W); 6380b57cec5SDimitry Andric } 6390b57cec5SDimitry Andric 6400b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 6410b57cec5SDimitry Andric _mm_maskz_packus_epi16(__mmask16 __M, __m128i __A, __m128i __B) 6420b57cec5SDimitry Andric { 6430b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, 6440b57cec5SDimitry Andric (__v16qi)_mm_packus_epi16(__A, __B), 6450b57cec5SDimitry Andric (__v16qi)_mm_setzero_si128()); 6460b57cec5SDimitry Andric } 6470b57cec5SDimitry Andric 6480b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 6490b57cec5SDimitry Andric _mm_mask_packus_epi16(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) 6500b57cec5SDimitry Andric { 6510b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, 6520b57cec5SDimitry Andric (__v16qi)_mm_packus_epi16(__A, __B), 6530b57cec5SDimitry Andric (__v16qi)__W); 6540b57cec5SDimitry Andric } 6550b57cec5SDimitry Andric 6560b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 6570b57cec5SDimitry Andric _mm256_maskz_packus_epi16(__mmask32 __M, __m256i __A, __m256i __B) 6580b57cec5SDimitry Andric { 6590b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, 6600b57cec5SDimitry Andric (__v32qi)_mm256_packus_epi16(__A, __B), 6610b57cec5SDimitry Andric (__v32qi)_mm256_setzero_si256()); 6620b57cec5SDimitry Andric } 6630b57cec5SDimitry Andric 6640b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 6650b57cec5SDimitry Andric _mm256_mask_packus_epi16(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) 6660b57cec5SDimitry Andric { 6670b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, 6680b57cec5SDimitry Andric (__v32qi)_mm256_packus_epi16(__A, __B), 6690b57cec5SDimitry Andric (__v32qi)__W); 6700b57cec5SDimitry Andric } 6710b57cec5SDimitry Andric 6720b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 6730b57cec5SDimitry Andric _mm_mask_adds_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) 6740b57cec5SDimitry Andric { 6750b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, 6760b57cec5SDimitry Andric (__v16qi)_mm_adds_epi8(__A, __B), 6770b57cec5SDimitry Andric (__v16qi)__W); 6780b57cec5SDimitry Andric } 6790b57cec5SDimitry Andric 6800b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 6810b57cec5SDimitry Andric _mm_maskz_adds_epi8(__mmask16 __U, __m128i __A, __m128i __B) 6820b57cec5SDimitry Andric { 6830b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, 6840b57cec5SDimitry Andric (__v16qi)_mm_adds_epi8(__A, __B), 6850b57cec5SDimitry Andric (__v16qi)_mm_setzero_si128()); 6860b57cec5SDimitry Andric } 6870b57cec5SDimitry Andric 6880b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 6890b57cec5SDimitry Andric _mm256_mask_adds_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) 6900b57cec5SDimitry Andric { 6910b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, 6920b57cec5SDimitry Andric (__v32qi)_mm256_adds_epi8(__A, __B), 6930b57cec5SDimitry Andric (__v32qi)__W); 6940b57cec5SDimitry Andric } 6950b57cec5SDimitry Andric 6960b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 6970b57cec5SDimitry Andric _mm256_maskz_adds_epi8(__mmask32 __U, __m256i __A, __m256i __B) 6980b57cec5SDimitry Andric { 6990b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, 7000b57cec5SDimitry Andric (__v32qi)_mm256_adds_epi8(__A, __B), 7010b57cec5SDimitry Andric (__v32qi)_mm256_setzero_si256()); 7020b57cec5SDimitry Andric } 7030b57cec5SDimitry Andric 7040b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 7050b57cec5SDimitry Andric _mm_mask_adds_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 7060b57cec5SDimitry Andric { 7070b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 7080b57cec5SDimitry Andric (__v8hi)_mm_adds_epi16(__A, __B), 7090b57cec5SDimitry Andric (__v8hi)__W); 7100b57cec5SDimitry Andric } 7110b57cec5SDimitry Andric 7120b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 7130b57cec5SDimitry Andric _mm_maskz_adds_epi16(__mmask8 __U, __m128i __A, __m128i __B) 7140b57cec5SDimitry Andric { 7150b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 7160b57cec5SDimitry Andric (__v8hi)_mm_adds_epi16(__A, __B), 7170b57cec5SDimitry Andric (__v8hi)_mm_setzero_si128()); 7180b57cec5SDimitry Andric } 7190b57cec5SDimitry Andric 7200b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 7210b57cec5SDimitry Andric _mm256_mask_adds_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) 7220b57cec5SDimitry Andric { 7230b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 7240b57cec5SDimitry Andric (__v16hi)_mm256_adds_epi16(__A, __B), 7250b57cec5SDimitry Andric (__v16hi)__W); 7260b57cec5SDimitry Andric } 7270b57cec5SDimitry Andric 7280b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 7290b57cec5SDimitry Andric _mm256_maskz_adds_epi16(__mmask16 __U, __m256i __A, __m256i __B) 7300b57cec5SDimitry Andric { 7310b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 7320b57cec5SDimitry Andric (__v16hi)_mm256_adds_epi16(__A, __B), 7330b57cec5SDimitry Andric (__v16hi)_mm256_setzero_si256()); 7340b57cec5SDimitry Andric } 7350b57cec5SDimitry Andric 7360b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 7370b57cec5SDimitry Andric _mm_mask_adds_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) 7380b57cec5SDimitry Andric { 7390b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, 7400b57cec5SDimitry Andric (__v16qi)_mm_adds_epu8(__A, __B), 7410b57cec5SDimitry Andric (__v16qi)__W); 7420b57cec5SDimitry Andric } 7430b57cec5SDimitry Andric 7440b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 7450b57cec5SDimitry Andric _mm_maskz_adds_epu8(__mmask16 __U, __m128i __A, __m128i __B) 7460b57cec5SDimitry Andric { 7470b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, 7480b57cec5SDimitry Andric (__v16qi)_mm_adds_epu8(__A, __B), 7490b57cec5SDimitry Andric (__v16qi)_mm_setzero_si128()); 7500b57cec5SDimitry Andric } 7510b57cec5SDimitry Andric 7520b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 7530b57cec5SDimitry Andric _mm256_mask_adds_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) 7540b57cec5SDimitry Andric { 7550b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, 7560b57cec5SDimitry Andric (__v32qi)_mm256_adds_epu8(__A, __B), 7570b57cec5SDimitry Andric (__v32qi)__W); 7580b57cec5SDimitry Andric } 7590b57cec5SDimitry Andric 7600b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 7610b57cec5SDimitry Andric _mm256_maskz_adds_epu8(__mmask32 __U, __m256i __A, __m256i __B) 7620b57cec5SDimitry Andric { 7630b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, 7640b57cec5SDimitry Andric (__v32qi)_mm256_adds_epu8(__A, __B), 7650b57cec5SDimitry Andric (__v32qi)_mm256_setzero_si256()); 7660b57cec5SDimitry Andric } 7670b57cec5SDimitry Andric 7680b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 7690b57cec5SDimitry Andric _mm_mask_adds_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 7700b57cec5SDimitry Andric { 7710b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 7720b57cec5SDimitry Andric (__v8hi)_mm_adds_epu16(__A, __B), 7730b57cec5SDimitry Andric (__v8hi)__W); 7740b57cec5SDimitry Andric } 7750b57cec5SDimitry Andric 7760b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 7770b57cec5SDimitry Andric _mm_maskz_adds_epu16(__mmask8 __U, __m128i __A, __m128i __B) 7780b57cec5SDimitry Andric { 7790b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 7800b57cec5SDimitry Andric (__v8hi)_mm_adds_epu16(__A, __B), 7810b57cec5SDimitry Andric (__v8hi)_mm_setzero_si128()); 7820b57cec5SDimitry Andric } 7830b57cec5SDimitry Andric 7840b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 7850b57cec5SDimitry Andric _mm256_mask_adds_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) 7860b57cec5SDimitry Andric { 7870b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 7880b57cec5SDimitry Andric (__v16hi)_mm256_adds_epu16(__A, __B), 7890b57cec5SDimitry Andric (__v16hi)__W); 7900b57cec5SDimitry Andric } 7910b57cec5SDimitry Andric 7920b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 7930b57cec5SDimitry Andric _mm256_maskz_adds_epu16(__mmask16 __U, __m256i __A, __m256i __B) 7940b57cec5SDimitry Andric { 7950b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 7960b57cec5SDimitry Andric (__v16hi)_mm256_adds_epu16(__A, __B), 7970b57cec5SDimitry Andric (__v16hi)_mm256_setzero_si256()); 7980b57cec5SDimitry Andric } 7990b57cec5SDimitry Andric 8000b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 8010b57cec5SDimitry Andric _mm_mask_avg_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) 8020b57cec5SDimitry Andric { 8030b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, 8040b57cec5SDimitry Andric (__v16qi)_mm_avg_epu8(__A, __B), 8050b57cec5SDimitry Andric (__v16qi)__W); 8060b57cec5SDimitry Andric } 8070b57cec5SDimitry Andric 8080b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 8090b57cec5SDimitry Andric _mm_maskz_avg_epu8(__mmask16 __U, __m128i __A, __m128i __B) 8100b57cec5SDimitry Andric { 8110b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, 8120b57cec5SDimitry Andric (__v16qi)_mm_avg_epu8(__A, __B), 8130b57cec5SDimitry Andric (__v16qi)_mm_setzero_si128()); 8140b57cec5SDimitry Andric } 8150b57cec5SDimitry Andric 8160b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 8170b57cec5SDimitry Andric _mm256_mask_avg_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) 8180b57cec5SDimitry Andric { 8190b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, 8200b57cec5SDimitry Andric (__v32qi)_mm256_avg_epu8(__A, __B), 8210b57cec5SDimitry Andric (__v32qi)__W); 8220b57cec5SDimitry Andric } 8230b57cec5SDimitry Andric 8240b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 8250b57cec5SDimitry Andric _mm256_maskz_avg_epu8(__mmask32 __U, __m256i __A, __m256i __B) 8260b57cec5SDimitry Andric { 8270b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, 8280b57cec5SDimitry Andric (__v32qi)_mm256_avg_epu8(__A, __B), 8290b57cec5SDimitry Andric (__v32qi)_mm256_setzero_si256()); 8300b57cec5SDimitry Andric } 8310b57cec5SDimitry Andric 8320b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 8330b57cec5SDimitry Andric _mm_mask_avg_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 8340b57cec5SDimitry Andric { 8350b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 8360b57cec5SDimitry Andric (__v8hi)_mm_avg_epu16(__A, __B), 8370b57cec5SDimitry Andric (__v8hi)__W); 8380b57cec5SDimitry Andric } 8390b57cec5SDimitry Andric 8400b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 8410b57cec5SDimitry Andric _mm_maskz_avg_epu16(__mmask8 __U, __m128i __A, __m128i __B) 8420b57cec5SDimitry Andric { 8430b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 8440b57cec5SDimitry Andric (__v8hi)_mm_avg_epu16(__A, __B), 8450b57cec5SDimitry Andric (__v8hi)_mm_setzero_si128()); 8460b57cec5SDimitry Andric } 8470b57cec5SDimitry Andric 8480b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 8490b57cec5SDimitry Andric _mm256_mask_avg_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) 8500b57cec5SDimitry Andric { 8510b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 8520b57cec5SDimitry Andric (__v16hi)_mm256_avg_epu16(__A, __B), 8530b57cec5SDimitry Andric (__v16hi)__W); 8540b57cec5SDimitry Andric } 8550b57cec5SDimitry Andric 8560b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 8570b57cec5SDimitry Andric _mm256_maskz_avg_epu16(__mmask16 __U, __m256i __A, __m256i __B) 8580b57cec5SDimitry Andric { 8590b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 8600b57cec5SDimitry Andric (__v16hi)_mm256_avg_epu16(__A, __B), 8610b57cec5SDimitry Andric (__v16hi)_mm256_setzero_si256()); 8620b57cec5SDimitry Andric } 8630b57cec5SDimitry Andric 8640b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 8650b57cec5SDimitry Andric _mm_maskz_max_epi8(__mmask16 __M, __m128i __A, __m128i __B) 8660b57cec5SDimitry Andric { 8670b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, 8680b57cec5SDimitry Andric (__v16qi)_mm_max_epi8(__A, __B), 8690b57cec5SDimitry Andric (__v16qi)_mm_setzero_si128()); 8700b57cec5SDimitry Andric } 8710b57cec5SDimitry Andric 8720b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 8730b57cec5SDimitry Andric _mm_mask_max_epi8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) 8740b57cec5SDimitry Andric { 8750b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, 8760b57cec5SDimitry Andric (__v16qi)_mm_max_epi8(__A, __B), 8770b57cec5SDimitry Andric (__v16qi)__W); 8780b57cec5SDimitry Andric } 8790b57cec5SDimitry Andric 8800b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 8810b57cec5SDimitry Andric _mm256_maskz_max_epi8(__mmask32 __M, __m256i __A, __m256i __B) 8820b57cec5SDimitry Andric { 8830b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, 8840b57cec5SDimitry Andric (__v32qi)_mm256_max_epi8(__A, __B), 8850b57cec5SDimitry Andric (__v32qi)_mm256_setzero_si256()); 8860b57cec5SDimitry Andric } 8870b57cec5SDimitry Andric 8880b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 8890b57cec5SDimitry Andric _mm256_mask_max_epi8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) 8900b57cec5SDimitry Andric { 8910b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, 8920b57cec5SDimitry Andric (__v32qi)_mm256_max_epi8(__A, __B), 8930b57cec5SDimitry Andric (__v32qi)__W); 8940b57cec5SDimitry Andric } 8950b57cec5SDimitry Andric 8960b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 8970b57cec5SDimitry Andric _mm_maskz_max_epi16(__mmask8 __M, __m128i __A, __m128i __B) 8980b57cec5SDimitry Andric { 8990b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, 9000b57cec5SDimitry Andric (__v8hi)_mm_max_epi16(__A, __B), 9010b57cec5SDimitry Andric (__v8hi)_mm_setzero_si128()); 9020b57cec5SDimitry Andric } 9030b57cec5SDimitry Andric 9040b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 9050b57cec5SDimitry Andric _mm_mask_max_epi16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) 9060b57cec5SDimitry Andric { 9070b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, 9080b57cec5SDimitry Andric (__v8hi)_mm_max_epi16(__A, __B), 9090b57cec5SDimitry Andric (__v8hi)__W); 9100b57cec5SDimitry Andric } 9110b57cec5SDimitry Andric 9120b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 9130b57cec5SDimitry Andric _mm256_maskz_max_epi16(__mmask16 __M, __m256i __A, __m256i __B) 9140b57cec5SDimitry Andric { 9150b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, 9160b57cec5SDimitry Andric (__v16hi)_mm256_max_epi16(__A, __B), 9170b57cec5SDimitry Andric (__v16hi)_mm256_setzero_si256()); 9180b57cec5SDimitry Andric } 9190b57cec5SDimitry Andric 9200b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 9210b57cec5SDimitry Andric _mm256_mask_max_epi16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) 9220b57cec5SDimitry Andric { 9230b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, 9240b57cec5SDimitry Andric (__v16hi)_mm256_max_epi16(__A, __B), 9250b57cec5SDimitry Andric (__v16hi)__W); 9260b57cec5SDimitry Andric } 9270b57cec5SDimitry Andric 9280b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 9290b57cec5SDimitry Andric _mm_maskz_max_epu8(__mmask16 __M, __m128i __A, __m128i __B) 9300b57cec5SDimitry Andric { 9310b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, 9320b57cec5SDimitry Andric (__v16qi)_mm_max_epu8(__A, __B), 9330b57cec5SDimitry Andric (__v16qi)_mm_setzero_si128()); 9340b57cec5SDimitry Andric } 9350b57cec5SDimitry Andric 9360b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 9370b57cec5SDimitry Andric _mm_mask_max_epu8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) 9380b57cec5SDimitry Andric { 9390b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, 9400b57cec5SDimitry Andric (__v16qi)_mm_max_epu8(__A, __B), 9410b57cec5SDimitry Andric (__v16qi)__W); 9420b57cec5SDimitry Andric } 9430b57cec5SDimitry Andric 9440b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 9450b57cec5SDimitry Andric _mm256_maskz_max_epu8 (__mmask32 __M, __m256i __A, __m256i __B) 9460b57cec5SDimitry Andric { 9470b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, 9480b57cec5SDimitry Andric (__v32qi)_mm256_max_epu8(__A, __B), 9490b57cec5SDimitry Andric (__v32qi)_mm256_setzero_si256()); 9500b57cec5SDimitry Andric } 9510b57cec5SDimitry Andric 9520b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 9530b57cec5SDimitry Andric _mm256_mask_max_epu8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) 9540b57cec5SDimitry Andric { 9550b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, 9560b57cec5SDimitry Andric (__v32qi)_mm256_max_epu8(__A, __B), 9570b57cec5SDimitry Andric (__v32qi)__W); 9580b57cec5SDimitry Andric } 9590b57cec5SDimitry Andric 9600b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 9610b57cec5SDimitry Andric _mm_maskz_max_epu16(__mmask8 __M, __m128i __A, __m128i __B) 9620b57cec5SDimitry Andric { 9630b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, 9640b57cec5SDimitry Andric (__v8hi)_mm_max_epu16(__A, __B), 9650b57cec5SDimitry Andric (__v8hi)_mm_setzero_si128()); 9660b57cec5SDimitry Andric } 9670b57cec5SDimitry Andric 9680b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 9690b57cec5SDimitry Andric _mm_mask_max_epu16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) 9700b57cec5SDimitry Andric { 9710b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, 9720b57cec5SDimitry Andric (__v8hi)_mm_max_epu16(__A, __B), 9730b57cec5SDimitry Andric (__v8hi)__W); 9740b57cec5SDimitry Andric } 9750b57cec5SDimitry Andric 9760b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 9770b57cec5SDimitry Andric _mm256_maskz_max_epu16(__mmask16 __M, __m256i __A, __m256i __B) 9780b57cec5SDimitry Andric { 9790b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, 9800b57cec5SDimitry Andric (__v16hi)_mm256_max_epu16(__A, __B), 9810b57cec5SDimitry Andric (__v16hi)_mm256_setzero_si256()); 9820b57cec5SDimitry Andric } 9830b57cec5SDimitry Andric 9840b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 9850b57cec5SDimitry Andric _mm256_mask_max_epu16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) 9860b57cec5SDimitry Andric { 9870b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, 9880b57cec5SDimitry Andric (__v16hi)_mm256_max_epu16(__A, __B), 9890b57cec5SDimitry Andric (__v16hi)__W); 9900b57cec5SDimitry Andric } 9910b57cec5SDimitry Andric 9920b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 9930b57cec5SDimitry Andric _mm_maskz_min_epi8(__mmask16 __M, __m128i __A, __m128i __B) 9940b57cec5SDimitry Andric { 9950b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, 9960b57cec5SDimitry Andric (__v16qi)_mm_min_epi8(__A, __B), 9970b57cec5SDimitry Andric (__v16qi)_mm_setzero_si128()); 9980b57cec5SDimitry Andric } 9990b57cec5SDimitry Andric 10000b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 10010b57cec5SDimitry Andric _mm_mask_min_epi8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) 10020b57cec5SDimitry Andric { 10030b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, 10040b57cec5SDimitry Andric (__v16qi)_mm_min_epi8(__A, __B), 10050b57cec5SDimitry Andric (__v16qi)__W); 10060b57cec5SDimitry Andric } 10070b57cec5SDimitry Andric 10080b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 10090b57cec5SDimitry Andric _mm256_maskz_min_epi8(__mmask32 __M, __m256i __A, __m256i __B) 10100b57cec5SDimitry Andric { 10110b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, 10120b57cec5SDimitry Andric (__v32qi)_mm256_min_epi8(__A, __B), 10130b57cec5SDimitry Andric (__v32qi)_mm256_setzero_si256()); 10140b57cec5SDimitry Andric } 10150b57cec5SDimitry Andric 10160b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 10170b57cec5SDimitry Andric _mm256_mask_min_epi8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) 10180b57cec5SDimitry Andric { 10190b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, 10200b57cec5SDimitry Andric (__v32qi)_mm256_min_epi8(__A, __B), 10210b57cec5SDimitry Andric (__v32qi)__W); 10220b57cec5SDimitry Andric } 10230b57cec5SDimitry Andric 10240b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 10250b57cec5SDimitry Andric _mm_maskz_min_epi16(__mmask8 __M, __m128i __A, __m128i __B) 10260b57cec5SDimitry Andric { 10270b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, 10280b57cec5SDimitry Andric (__v8hi)_mm_min_epi16(__A, __B), 10290b57cec5SDimitry Andric (__v8hi)_mm_setzero_si128()); 10300b57cec5SDimitry Andric } 10310b57cec5SDimitry Andric 10320b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 10330b57cec5SDimitry Andric _mm_mask_min_epi16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) 10340b57cec5SDimitry Andric { 10350b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, 10360b57cec5SDimitry Andric (__v8hi)_mm_min_epi16(__A, __B), 10370b57cec5SDimitry Andric (__v8hi)__W); 10380b57cec5SDimitry Andric } 10390b57cec5SDimitry Andric 10400b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 10410b57cec5SDimitry Andric _mm256_maskz_min_epi16(__mmask16 __M, __m256i __A, __m256i __B) 10420b57cec5SDimitry Andric { 10430b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, 10440b57cec5SDimitry Andric (__v16hi)_mm256_min_epi16(__A, __B), 10450b57cec5SDimitry Andric (__v16hi)_mm256_setzero_si256()); 10460b57cec5SDimitry Andric } 10470b57cec5SDimitry Andric 10480b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 10490b57cec5SDimitry Andric _mm256_mask_min_epi16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) 10500b57cec5SDimitry Andric { 10510b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, 10520b57cec5SDimitry Andric (__v16hi)_mm256_min_epi16(__A, __B), 10530b57cec5SDimitry Andric (__v16hi)__W); 10540b57cec5SDimitry Andric } 10550b57cec5SDimitry Andric 10560b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 10570b57cec5SDimitry Andric _mm_maskz_min_epu8(__mmask16 __M, __m128i __A, __m128i __B) 10580b57cec5SDimitry Andric { 10590b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, 10600b57cec5SDimitry Andric (__v16qi)_mm_min_epu8(__A, __B), 10610b57cec5SDimitry Andric (__v16qi)_mm_setzero_si128()); 10620b57cec5SDimitry Andric } 10630b57cec5SDimitry Andric 10640b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 10650b57cec5SDimitry Andric _mm_mask_min_epu8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) 10660b57cec5SDimitry Andric { 10670b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, 10680b57cec5SDimitry Andric (__v16qi)_mm_min_epu8(__A, __B), 10690b57cec5SDimitry Andric (__v16qi)__W); 10700b57cec5SDimitry Andric } 10710b57cec5SDimitry Andric 10720b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 10730b57cec5SDimitry Andric _mm256_maskz_min_epu8 (__mmask32 __M, __m256i __A, __m256i __B) 10740b57cec5SDimitry Andric { 10750b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, 10760b57cec5SDimitry Andric (__v32qi)_mm256_min_epu8(__A, __B), 10770b57cec5SDimitry Andric (__v32qi)_mm256_setzero_si256()); 10780b57cec5SDimitry Andric } 10790b57cec5SDimitry Andric 10800b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 10810b57cec5SDimitry Andric _mm256_mask_min_epu8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) 10820b57cec5SDimitry Andric { 10830b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, 10840b57cec5SDimitry Andric (__v32qi)_mm256_min_epu8(__A, __B), 10850b57cec5SDimitry Andric (__v32qi)__W); 10860b57cec5SDimitry Andric } 10870b57cec5SDimitry Andric 10880b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 10890b57cec5SDimitry Andric _mm_maskz_min_epu16(__mmask8 __M, __m128i __A, __m128i __B) 10900b57cec5SDimitry Andric { 10910b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, 10920b57cec5SDimitry Andric (__v8hi)_mm_min_epu16(__A, __B), 10930b57cec5SDimitry Andric (__v8hi)_mm_setzero_si128()); 10940b57cec5SDimitry Andric } 10950b57cec5SDimitry Andric 10960b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 10970b57cec5SDimitry Andric _mm_mask_min_epu16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) 10980b57cec5SDimitry Andric { 10990b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, 11000b57cec5SDimitry Andric (__v8hi)_mm_min_epu16(__A, __B), 11010b57cec5SDimitry Andric (__v8hi)__W); 11020b57cec5SDimitry Andric } 11030b57cec5SDimitry Andric 11040b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 11050b57cec5SDimitry Andric _mm256_maskz_min_epu16(__mmask16 __M, __m256i __A, __m256i __B) 11060b57cec5SDimitry Andric { 11070b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, 11080b57cec5SDimitry Andric (__v16hi)_mm256_min_epu16(__A, __B), 11090b57cec5SDimitry Andric (__v16hi)_mm256_setzero_si256()); 11100b57cec5SDimitry Andric } 11110b57cec5SDimitry Andric 11120b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 11130b57cec5SDimitry Andric _mm256_mask_min_epu16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) 11140b57cec5SDimitry Andric { 11150b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, 11160b57cec5SDimitry Andric (__v16hi)_mm256_min_epu16(__A, __B), 11170b57cec5SDimitry Andric (__v16hi)__W); 11180b57cec5SDimitry Andric } 11190b57cec5SDimitry Andric 11200b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 11210b57cec5SDimitry Andric _mm_mask_shuffle_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) 11220b57cec5SDimitry Andric { 11230b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, 11240b57cec5SDimitry Andric (__v16qi)_mm_shuffle_epi8(__A, __B), 11250b57cec5SDimitry Andric (__v16qi)__W); 11260b57cec5SDimitry Andric } 11270b57cec5SDimitry Andric 11280b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 11290b57cec5SDimitry Andric _mm_maskz_shuffle_epi8(__mmask16 __U, __m128i __A, __m128i __B) 11300b57cec5SDimitry Andric { 11310b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, 11320b57cec5SDimitry Andric (__v16qi)_mm_shuffle_epi8(__A, __B), 11330b57cec5SDimitry Andric (__v16qi)_mm_setzero_si128()); 11340b57cec5SDimitry Andric } 11350b57cec5SDimitry Andric 11360b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 11370b57cec5SDimitry Andric _mm256_mask_shuffle_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) 11380b57cec5SDimitry Andric { 11390b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, 11400b57cec5SDimitry Andric (__v32qi)_mm256_shuffle_epi8(__A, __B), 11410b57cec5SDimitry Andric (__v32qi)__W); 11420b57cec5SDimitry Andric } 11430b57cec5SDimitry Andric 11440b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 11450b57cec5SDimitry Andric _mm256_maskz_shuffle_epi8(__mmask32 __U, __m256i __A, __m256i __B) 11460b57cec5SDimitry Andric { 11470b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, 11480b57cec5SDimitry Andric (__v32qi)_mm256_shuffle_epi8(__A, __B), 11490b57cec5SDimitry Andric (__v32qi)_mm256_setzero_si256()); 11500b57cec5SDimitry Andric } 11510b57cec5SDimitry Andric 11520b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 11530b57cec5SDimitry Andric _mm_mask_subs_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) 11540b57cec5SDimitry Andric { 11550b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, 11560b57cec5SDimitry Andric (__v16qi)_mm_subs_epi8(__A, __B), 11570b57cec5SDimitry Andric (__v16qi)__W); 11580b57cec5SDimitry Andric } 11590b57cec5SDimitry Andric 11600b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 11610b57cec5SDimitry Andric _mm_maskz_subs_epi8(__mmask16 __U, __m128i __A, __m128i __B) 11620b57cec5SDimitry Andric { 11630b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, 11640b57cec5SDimitry Andric (__v16qi)_mm_subs_epi8(__A, __B), 11650b57cec5SDimitry Andric (__v16qi)_mm_setzero_si128()); 11660b57cec5SDimitry Andric } 11670b57cec5SDimitry Andric 11680b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 11690b57cec5SDimitry Andric _mm256_mask_subs_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) 11700b57cec5SDimitry Andric { 11710b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, 11720b57cec5SDimitry Andric (__v32qi)_mm256_subs_epi8(__A, __B), 11730b57cec5SDimitry Andric (__v32qi)__W); 11740b57cec5SDimitry Andric } 11750b57cec5SDimitry Andric 11760b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 11770b57cec5SDimitry Andric _mm256_maskz_subs_epi8(__mmask32 __U, __m256i __A, __m256i __B) 11780b57cec5SDimitry Andric { 11790b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, 11800b57cec5SDimitry Andric (__v32qi)_mm256_subs_epi8(__A, __B), 11810b57cec5SDimitry Andric (__v32qi)_mm256_setzero_si256()); 11820b57cec5SDimitry Andric } 11830b57cec5SDimitry Andric 11840b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 11850b57cec5SDimitry Andric _mm_mask_subs_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 11860b57cec5SDimitry Andric { 11870b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 11880b57cec5SDimitry Andric (__v8hi)_mm_subs_epi16(__A, __B), 11890b57cec5SDimitry Andric (__v8hi)__W); 11900b57cec5SDimitry Andric } 11910b57cec5SDimitry Andric 11920b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 11930b57cec5SDimitry Andric _mm_maskz_subs_epi16(__mmask8 __U, __m128i __A, __m128i __B) 11940b57cec5SDimitry Andric { 11950b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 11960b57cec5SDimitry Andric (__v8hi)_mm_subs_epi16(__A, __B), 11970b57cec5SDimitry Andric (__v8hi)_mm_setzero_si128()); 11980b57cec5SDimitry Andric } 11990b57cec5SDimitry Andric 12000b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 12010b57cec5SDimitry Andric _mm256_mask_subs_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) 12020b57cec5SDimitry Andric { 12030b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 12040b57cec5SDimitry Andric (__v16hi)_mm256_subs_epi16(__A, __B), 12050b57cec5SDimitry Andric (__v16hi)__W); 12060b57cec5SDimitry Andric } 12070b57cec5SDimitry Andric 12080b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 12090b57cec5SDimitry Andric _mm256_maskz_subs_epi16(__mmask16 __U, __m256i __A, __m256i __B) 12100b57cec5SDimitry Andric { 12110b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 12120b57cec5SDimitry Andric (__v16hi)_mm256_subs_epi16(__A, __B), 12130b57cec5SDimitry Andric (__v16hi)_mm256_setzero_si256()); 12140b57cec5SDimitry Andric } 12150b57cec5SDimitry Andric 12160b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 12170b57cec5SDimitry Andric _mm_mask_subs_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) 12180b57cec5SDimitry Andric { 12190b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, 12200b57cec5SDimitry Andric (__v16qi)_mm_subs_epu8(__A, __B), 12210b57cec5SDimitry Andric (__v16qi)__W); 12220b57cec5SDimitry Andric } 12230b57cec5SDimitry Andric 12240b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 12250b57cec5SDimitry Andric _mm_maskz_subs_epu8(__mmask16 __U, __m128i __A, __m128i __B) 12260b57cec5SDimitry Andric { 12270b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, 12280b57cec5SDimitry Andric (__v16qi)_mm_subs_epu8(__A, __B), 12290b57cec5SDimitry Andric (__v16qi)_mm_setzero_si128()); 12300b57cec5SDimitry Andric } 12310b57cec5SDimitry Andric 12320b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 12330b57cec5SDimitry Andric _mm256_mask_subs_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) 12340b57cec5SDimitry Andric { 12350b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, 12360b57cec5SDimitry Andric (__v32qi)_mm256_subs_epu8(__A, __B), 12370b57cec5SDimitry Andric (__v32qi)__W); 12380b57cec5SDimitry Andric } 12390b57cec5SDimitry Andric 12400b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 12410b57cec5SDimitry Andric _mm256_maskz_subs_epu8(__mmask32 __U, __m256i __A, __m256i __B) 12420b57cec5SDimitry Andric { 12430b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, 12440b57cec5SDimitry Andric (__v32qi)_mm256_subs_epu8(__A, __B), 12450b57cec5SDimitry Andric (__v32qi)_mm256_setzero_si256()); 12460b57cec5SDimitry Andric } 12470b57cec5SDimitry Andric 12480b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 12490b57cec5SDimitry Andric _mm_mask_subs_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 12500b57cec5SDimitry Andric { 12510b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 12520b57cec5SDimitry Andric (__v8hi)_mm_subs_epu16(__A, __B), 12530b57cec5SDimitry Andric (__v8hi)__W); 12540b57cec5SDimitry Andric } 12550b57cec5SDimitry Andric 12560b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 12570b57cec5SDimitry Andric _mm_maskz_subs_epu16(__mmask8 __U, __m128i __A, __m128i __B) 12580b57cec5SDimitry Andric { 12590b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 12600b57cec5SDimitry Andric (__v8hi)_mm_subs_epu16(__A, __B), 12610b57cec5SDimitry Andric (__v8hi)_mm_setzero_si128()); 12620b57cec5SDimitry Andric } 12630b57cec5SDimitry Andric 12640b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 12650b57cec5SDimitry Andric _mm256_mask_subs_epu16(__m256i __W, __mmask16 __U, __m256i __A, 12660b57cec5SDimitry Andric __m256i __B) { 12670b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 12680b57cec5SDimitry Andric (__v16hi)_mm256_subs_epu16(__A, __B), 12690b57cec5SDimitry Andric (__v16hi)__W); 12700b57cec5SDimitry Andric } 12710b57cec5SDimitry Andric 12720b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 12730b57cec5SDimitry Andric _mm256_maskz_subs_epu16(__mmask16 __U, __m256i __A, __m256i __B) 12740b57cec5SDimitry Andric { 12750b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 12760b57cec5SDimitry Andric (__v16hi)_mm256_subs_epu16(__A, __B), 12770b57cec5SDimitry Andric (__v16hi)_mm256_setzero_si256()); 12780b57cec5SDimitry Andric } 12790b57cec5SDimitry Andric 12800b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 12810b57cec5SDimitry Andric _mm_permutex2var_epi16(__m128i __A, __m128i __I, __m128i __B) 12820b57cec5SDimitry Andric { 12830b57cec5SDimitry Andric return (__m128i)__builtin_ia32_vpermi2varhi128((__v8hi)__A, (__v8hi)__I, 12840b57cec5SDimitry Andric (__v8hi) __B); 12850b57cec5SDimitry Andric } 12860b57cec5SDimitry Andric 12870b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 12880b57cec5SDimitry Andric _mm_mask_permutex2var_epi16(__m128i __A, __mmask8 __U, __m128i __I, 12890b57cec5SDimitry Andric __m128i __B) 12900b57cec5SDimitry Andric { 12910b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128(__U, 12920b57cec5SDimitry Andric (__v8hi)_mm_permutex2var_epi16(__A, __I, __B), 12930b57cec5SDimitry Andric (__v8hi)__A); 12940b57cec5SDimitry Andric } 12950b57cec5SDimitry Andric 12960b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 12970b57cec5SDimitry Andric _mm_mask2_permutex2var_epi16(__m128i __A, __m128i __I, __mmask8 __U, 12980b57cec5SDimitry Andric __m128i __B) 12990b57cec5SDimitry Andric { 13000b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128(__U, 13010b57cec5SDimitry Andric (__v8hi)_mm_permutex2var_epi16(__A, __I, __B), 13020b57cec5SDimitry Andric (__v8hi)__I); 13030b57cec5SDimitry Andric } 13040b57cec5SDimitry Andric 13050b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 13060b57cec5SDimitry Andric _mm_maskz_permutex2var_epi16 (__mmask8 __U, __m128i __A, __m128i __I, 13070b57cec5SDimitry Andric __m128i __B) 13080b57cec5SDimitry Andric { 13090b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128(__U, 13100b57cec5SDimitry Andric (__v8hi)_mm_permutex2var_epi16(__A, __I, __B), 13110b57cec5SDimitry Andric (__v8hi)_mm_setzero_si128()); 13120b57cec5SDimitry Andric } 13130b57cec5SDimitry Andric 13140b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 13150b57cec5SDimitry Andric _mm256_permutex2var_epi16(__m256i __A, __m256i __I, __m256i __B) 13160b57cec5SDimitry Andric { 13170b57cec5SDimitry Andric return (__m256i)__builtin_ia32_vpermi2varhi256((__v16hi)__A, (__v16hi)__I, 13180b57cec5SDimitry Andric (__v16hi)__B); 13190b57cec5SDimitry Andric } 13200b57cec5SDimitry Andric 13210b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 13220b57cec5SDimitry Andric _mm256_mask_permutex2var_epi16(__m256i __A, __mmask16 __U, __m256i __I, 13230b57cec5SDimitry Andric __m256i __B) 13240b57cec5SDimitry Andric { 13250b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256(__U, 13260b57cec5SDimitry Andric (__v16hi)_mm256_permutex2var_epi16(__A, __I, __B), 13270b57cec5SDimitry Andric (__v16hi)__A); 13280b57cec5SDimitry Andric } 13290b57cec5SDimitry Andric 13300b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 13310b57cec5SDimitry Andric _mm256_mask2_permutex2var_epi16(__m256i __A, __m256i __I, __mmask16 __U, 13320b57cec5SDimitry Andric __m256i __B) 13330b57cec5SDimitry Andric { 13340b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256(__U, 13350b57cec5SDimitry Andric (__v16hi)_mm256_permutex2var_epi16(__A, __I, __B), 13360b57cec5SDimitry Andric (__v16hi)__I); 13370b57cec5SDimitry Andric } 13380b57cec5SDimitry Andric 13390b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 13400b57cec5SDimitry Andric _mm256_maskz_permutex2var_epi16 (__mmask16 __U, __m256i __A, __m256i __I, 13410b57cec5SDimitry Andric __m256i __B) 13420b57cec5SDimitry Andric { 13430b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256(__U, 13440b57cec5SDimitry Andric (__v16hi)_mm256_permutex2var_epi16(__A, __I, __B), 13450b57cec5SDimitry Andric (__v16hi)_mm256_setzero_si256()); 13460b57cec5SDimitry Andric } 13470b57cec5SDimitry Andric 13480b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 13490b57cec5SDimitry Andric _mm_mask_maddubs_epi16(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) { 13500b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 13510b57cec5SDimitry Andric (__v8hi)_mm_maddubs_epi16(__X, __Y), 13520b57cec5SDimitry Andric (__v8hi)__W); 13530b57cec5SDimitry Andric } 13540b57cec5SDimitry Andric 13550b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 13560b57cec5SDimitry Andric _mm_maskz_maddubs_epi16(__mmask8 __U, __m128i __X, __m128i __Y) { 13570b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 13580b57cec5SDimitry Andric (__v8hi)_mm_maddubs_epi16(__X, __Y), 13590b57cec5SDimitry Andric (__v8hi)_mm_setzero_si128()); 13600b57cec5SDimitry Andric } 13610b57cec5SDimitry Andric 13620b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 13630b57cec5SDimitry Andric _mm256_mask_maddubs_epi16(__m256i __W, __mmask16 __U, __m256i __X, 13640b57cec5SDimitry Andric __m256i __Y) { 13650b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 13660b57cec5SDimitry Andric (__v16hi)_mm256_maddubs_epi16(__X, __Y), 13670b57cec5SDimitry Andric (__v16hi)__W); 13680b57cec5SDimitry Andric } 13690b57cec5SDimitry Andric 13700b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 13710b57cec5SDimitry Andric _mm256_maskz_maddubs_epi16(__mmask16 __U, __m256i __X, __m256i __Y) { 13720b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 13730b57cec5SDimitry Andric (__v16hi)_mm256_maddubs_epi16(__X, __Y), 13740b57cec5SDimitry Andric (__v16hi)_mm256_setzero_si256()); 13750b57cec5SDimitry Andric } 13760b57cec5SDimitry Andric 13770b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 13780b57cec5SDimitry Andric _mm_mask_madd_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { 13790b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 13800b57cec5SDimitry Andric (__v4si)_mm_madd_epi16(__A, __B), 13810b57cec5SDimitry Andric (__v4si)__W); 13820b57cec5SDimitry Andric } 13830b57cec5SDimitry Andric 13840b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 13850b57cec5SDimitry Andric _mm_maskz_madd_epi16(__mmask8 __U, __m128i __A, __m128i __B) { 13860b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 13870b57cec5SDimitry Andric (__v4si)_mm_madd_epi16(__A, __B), 13880b57cec5SDimitry Andric (__v4si)_mm_setzero_si128()); 13890b57cec5SDimitry Andric } 13900b57cec5SDimitry Andric 13910b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 13920b57cec5SDimitry Andric _mm256_mask_madd_epi16(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { 13930b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 13940b57cec5SDimitry Andric (__v8si)_mm256_madd_epi16(__A, __B), 13950b57cec5SDimitry Andric (__v8si)__W); 13960b57cec5SDimitry Andric } 13970b57cec5SDimitry Andric 13980b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 13990b57cec5SDimitry Andric _mm256_maskz_madd_epi16(__mmask8 __U, __m256i __A, __m256i __B) { 14000b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 14010b57cec5SDimitry Andric (__v8si)_mm256_madd_epi16(__A, __B), 14020b57cec5SDimitry Andric (__v8si)_mm256_setzero_si256()); 14030b57cec5SDimitry Andric } 14040b57cec5SDimitry Andric 14050b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 14060b57cec5SDimitry Andric _mm_cvtsepi16_epi8 (__m128i __A) { 14070b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A, 14080b57cec5SDimitry Andric (__v16qi) _mm_setzero_si128(), 14090b57cec5SDimitry Andric (__mmask8) -1); 14100b57cec5SDimitry Andric } 14110b57cec5SDimitry Andric 14120b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 14130b57cec5SDimitry Andric _mm_mask_cvtsepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A) { 14140b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A, 14150b57cec5SDimitry Andric (__v16qi) __O, 14160b57cec5SDimitry Andric __M); 14170b57cec5SDimitry Andric } 14180b57cec5SDimitry Andric 14190b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 14200b57cec5SDimitry Andric _mm_maskz_cvtsepi16_epi8 (__mmask8 __M, __m128i __A) { 14210b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A, 14220b57cec5SDimitry Andric (__v16qi) _mm_setzero_si128(), 14230b57cec5SDimitry Andric __M); 14240b57cec5SDimitry Andric } 14250b57cec5SDimitry Andric 14260b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS256 14270b57cec5SDimitry Andric _mm256_cvtsepi16_epi8 (__m256i __A) { 14280b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A, 14290b57cec5SDimitry Andric (__v16qi) _mm_setzero_si128(), 14300b57cec5SDimitry Andric (__mmask16) -1); 14310b57cec5SDimitry Andric } 14320b57cec5SDimitry Andric 14330b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS256 14340b57cec5SDimitry Andric _mm256_mask_cvtsepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A) { 14350b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A, 14360b57cec5SDimitry Andric (__v16qi) __O, 14370b57cec5SDimitry Andric __M); 14380b57cec5SDimitry Andric } 14390b57cec5SDimitry Andric 14400b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS256 14410b57cec5SDimitry Andric _mm256_maskz_cvtsepi16_epi8 (__mmask16 __M, __m256i __A) { 14420b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A, 14430b57cec5SDimitry Andric (__v16qi) _mm_setzero_si128(), 14440b57cec5SDimitry Andric __M); 14450b57cec5SDimitry Andric } 14460b57cec5SDimitry Andric 14470b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 14480b57cec5SDimitry Andric _mm_cvtusepi16_epi8 (__m128i __A) { 14490b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A, 14500b57cec5SDimitry Andric (__v16qi) _mm_setzero_si128(), 14510b57cec5SDimitry Andric (__mmask8) -1); 14520b57cec5SDimitry Andric } 14530b57cec5SDimitry Andric 14540b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 14550b57cec5SDimitry Andric _mm_mask_cvtusepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A) { 14560b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A, 14570b57cec5SDimitry Andric (__v16qi) __O, 14580b57cec5SDimitry Andric __M); 14590b57cec5SDimitry Andric } 14600b57cec5SDimitry Andric 14610b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 14620b57cec5SDimitry Andric _mm_maskz_cvtusepi16_epi8 (__mmask8 __M, __m128i __A) { 14630b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A, 14640b57cec5SDimitry Andric (__v16qi) _mm_setzero_si128(), 14650b57cec5SDimitry Andric __M); 14660b57cec5SDimitry Andric } 14670b57cec5SDimitry Andric 14680b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS256 14690b57cec5SDimitry Andric _mm256_cvtusepi16_epi8 (__m256i __A) { 14700b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A, 14710b57cec5SDimitry Andric (__v16qi) _mm_setzero_si128(), 14720b57cec5SDimitry Andric (__mmask16) -1); 14730b57cec5SDimitry Andric } 14740b57cec5SDimitry Andric 14750b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS256 14760b57cec5SDimitry Andric _mm256_mask_cvtusepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A) { 14770b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A, 14780b57cec5SDimitry Andric (__v16qi) __O, 14790b57cec5SDimitry Andric __M); 14800b57cec5SDimitry Andric } 14810b57cec5SDimitry Andric 14820b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS256 14830b57cec5SDimitry Andric _mm256_maskz_cvtusepi16_epi8 (__mmask16 __M, __m256i __A) { 14840b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A, 14850b57cec5SDimitry Andric (__v16qi) _mm_setzero_si128(), 14860b57cec5SDimitry Andric __M); 14870b57cec5SDimitry Andric } 14880b57cec5SDimitry Andric 14890b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 14900b57cec5SDimitry Andric _mm_cvtepi16_epi8 (__m128i __A) { 14910b57cec5SDimitry Andric return (__m128i)__builtin_shufflevector( 14920b57cec5SDimitry Andric __builtin_convertvector((__v8hi)__A, __v8qi), 14930b57cec5SDimitry Andric (__v8qi){0, 0, 0, 0, 0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 14940b57cec5SDimitry Andric 12, 13, 14, 15); 14950b57cec5SDimitry Andric } 14960b57cec5SDimitry Andric 14970b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 14980b57cec5SDimitry Andric _mm_mask_cvtepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A) { 14990b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A, 15000b57cec5SDimitry Andric (__v16qi) __O, 15010b57cec5SDimitry Andric __M); 15020b57cec5SDimitry Andric } 15030b57cec5SDimitry Andric 15040b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 15050b57cec5SDimitry Andric _mm_maskz_cvtepi16_epi8 (__mmask8 __M, __m128i __A) { 15060b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A, 15070b57cec5SDimitry Andric (__v16qi) _mm_setzero_si128(), 15080b57cec5SDimitry Andric __M); 15090b57cec5SDimitry Andric } 15100b57cec5SDimitry Andric 15110b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS128 15120b57cec5SDimitry Andric _mm_mask_cvtepi16_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 15130b57cec5SDimitry Andric { 15140b57cec5SDimitry Andric __builtin_ia32_pmovwb128mem_mask ((__v16qi *) __P, (__v8hi) __A, __M); 15150b57cec5SDimitry Andric } 15160b57cec5SDimitry Andric 15170b57cec5SDimitry Andric 15180b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS128 15190b57cec5SDimitry Andric _mm_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 15200b57cec5SDimitry Andric { 15210b57cec5SDimitry Andric __builtin_ia32_pmovswb128mem_mask ((__v16qi *) __P, (__v8hi) __A, __M); 15220b57cec5SDimitry Andric } 15230b57cec5SDimitry Andric 15240b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS128 15250b57cec5SDimitry Andric _mm_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 15260b57cec5SDimitry Andric { 15270b57cec5SDimitry Andric __builtin_ia32_pmovuswb128mem_mask ((__v16qi *) __P, (__v8hi) __A, __M); 15280b57cec5SDimitry Andric } 15290b57cec5SDimitry Andric 15300b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS256 15310b57cec5SDimitry Andric _mm256_cvtepi16_epi8 (__m256i __A) { 15320b57cec5SDimitry Andric return (__m128i)__builtin_convertvector((__v16hi) __A, __v16qi); 15330b57cec5SDimitry Andric } 15340b57cec5SDimitry Andric 15350b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS256 15360b57cec5SDimitry Andric _mm256_mask_cvtepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A) { 15370b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, 15380b57cec5SDimitry Andric (__v16qi)_mm256_cvtepi16_epi8(__A), 15390b57cec5SDimitry Andric (__v16qi)__O); 15400b57cec5SDimitry Andric } 15410b57cec5SDimitry Andric 15420b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS256 15430b57cec5SDimitry Andric _mm256_maskz_cvtepi16_epi8 (__mmask16 __M, __m256i __A) { 15440b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, 15450b57cec5SDimitry Andric (__v16qi)_mm256_cvtepi16_epi8(__A), 15460b57cec5SDimitry Andric (__v16qi)_mm_setzero_si128()); 15470b57cec5SDimitry Andric } 15480b57cec5SDimitry Andric 15490b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS256 15500b57cec5SDimitry Andric _mm256_mask_cvtepi16_storeu_epi8 (void * __P, __mmask16 __M, __m256i __A) 15510b57cec5SDimitry Andric { 15520b57cec5SDimitry Andric __builtin_ia32_pmovwb256mem_mask ((__v16qi *) __P, (__v16hi) __A, __M); 15530b57cec5SDimitry Andric } 15540b57cec5SDimitry Andric 15550b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS256 15560b57cec5SDimitry Andric _mm256_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask16 __M, __m256i __A) 15570b57cec5SDimitry Andric { 15580b57cec5SDimitry Andric __builtin_ia32_pmovswb256mem_mask ((__v16qi *) __P, (__v16hi) __A, __M); 15590b57cec5SDimitry Andric } 15600b57cec5SDimitry Andric 15610b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS256 15620b57cec5SDimitry Andric _mm256_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask16 __M, __m256i __A) 15630b57cec5SDimitry Andric { 15640b57cec5SDimitry Andric __builtin_ia32_pmovuswb256mem_mask ((__v16qi*) __P, (__v16hi) __A, __M); 15650b57cec5SDimitry Andric } 15660b57cec5SDimitry Andric 15670b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 15680b57cec5SDimitry Andric _mm_mask_mulhrs_epi16(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) { 15690b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 15700b57cec5SDimitry Andric (__v8hi)_mm_mulhrs_epi16(__X, __Y), 15710b57cec5SDimitry Andric (__v8hi)__W); 15720b57cec5SDimitry Andric } 15730b57cec5SDimitry Andric 15740b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 15750b57cec5SDimitry Andric _mm_maskz_mulhrs_epi16(__mmask8 __U, __m128i __X, __m128i __Y) { 15760b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 15770b57cec5SDimitry Andric (__v8hi)_mm_mulhrs_epi16(__X, __Y), 15780b57cec5SDimitry Andric (__v8hi)_mm_setzero_si128()); 15790b57cec5SDimitry Andric } 15800b57cec5SDimitry Andric 15810b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 15820b57cec5SDimitry Andric _mm256_mask_mulhrs_epi16(__m256i __W, __mmask16 __U, __m256i __X, __m256i __Y) { 15830b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 15840b57cec5SDimitry Andric (__v16hi)_mm256_mulhrs_epi16(__X, __Y), 15850b57cec5SDimitry Andric (__v16hi)__W); 15860b57cec5SDimitry Andric } 15870b57cec5SDimitry Andric 15880b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 15890b57cec5SDimitry Andric _mm256_maskz_mulhrs_epi16(__mmask16 __U, __m256i __X, __m256i __Y) { 15900b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 15910b57cec5SDimitry Andric (__v16hi)_mm256_mulhrs_epi16(__X, __Y), 15920b57cec5SDimitry Andric (__v16hi)_mm256_setzero_si256()); 15930b57cec5SDimitry Andric } 15940b57cec5SDimitry Andric 15950b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 15960b57cec5SDimitry Andric _mm_mask_mulhi_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { 15970b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 15980b57cec5SDimitry Andric (__v8hi)_mm_mulhi_epu16(__A, __B), 15990b57cec5SDimitry Andric (__v8hi)__W); 16000b57cec5SDimitry Andric } 16010b57cec5SDimitry Andric 16020b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 16030b57cec5SDimitry Andric _mm_maskz_mulhi_epu16(__mmask8 __U, __m128i __A, __m128i __B) { 16040b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 16050b57cec5SDimitry Andric (__v8hi)_mm_mulhi_epu16(__A, __B), 16060b57cec5SDimitry Andric (__v8hi)_mm_setzero_si128()); 16070b57cec5SDimitry Andric } 16080b57cec5SDimitry Andric 16090b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 16100b57cec5SDimitry Andric _mm256_mask_mulhi_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { 16110b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 16120b57cec5SDimitry Andric (__v16hi)_mm256_mulhi_epu16(__A, __B), 16130b57cec5SDimitry Andric (__v16hi)__W); 16140b57cec5SDimitry Andric } 16150b57cec5SDimitry Andric 16160b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 16170b57cec5SDimitry Andric _mm256_maskz_mulhi_epu16(__mmask16 __U, __m256i __A, __m256i __B) { 16180b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 16190b57cec5SDimitry Andric (__v16hi)_mm256_mulhi_epu16(__A, __B), 16200b57cec5SDimitry Andric (__v16hi)_mm256_setzero_si256()); 16210b57cec5SDimitry Andric } 16220b57cec5SDimitry Andric 16230b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 16240b57cec5SDimitry Andric _mm_mask_mulhi_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { 16250b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 16260b57cec5SDimitry Andric (__v8hi)_mm_mulhi_epi16(__A, __B), 16270b57cec5SDimitry Andric (__v8hi)__W); 16280b57cec5SDimitry Andric } 16290b57cec5SDimitry Andric 16300b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 16310b57cec5SDimitry Andric _mm_maskz_mulhi_epi16(__mmask8 __U, __m128i __A, __m128i __B) { 16320b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 16330b57cec5SDimitry Andric (__v8hi)_mm_mulhi_epi16(__A, __B), 16340b57cec5SDimitry Andric (__v8hi)_mm_setzero_si128()); 16350b57cec5SDimitry Andric } 16360b57cec5SDimitry Andric 16370b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 16380b57cec5SDimitry Andric _mm256_mask_mulhi_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { 16390b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 16400b57cec5SDimitry Andric (__v16hi)_mm256_mulhi_epi16(__A, __B), 16410b57cec5SDimitry Andric (__v16hi)__W); 16420b57cec5SDimitry Andric } 16430b57cec5SDimitry Andric 16440b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 16450b57cec5SDimitry Andric _mm256_maskz_mulhi_epi16(__mmask16 __U, __m256i __A, __m256i __B) { 16460b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 16470b57cec5SDimitry Andric (__v16hi)_mm256_mulhi_epi16(__A, __B), 16480b57cec5SDimitry Andric (__v16hi)_mm256_setzero_si256()); 16490b57cec5SDimitry Andric } 16500b57cec5SDimitry Andric 16510b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 16520b57cec5SDimitry Andric _mm_mask_unpackhi_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { 16530b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, 16540b57cec5SDimitry Andric (__v16qi)_mm_unpackhi_epi8(__A, __B), 16550b57cec5SDimitry Andric (__v16qi)__W); 16560b57cec5SDimitry Andric } 16570b57cec5SDimitry Andric 16580b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 16590b57cec5SDimitry Andric _mm_maskz_unpackhi_epi8(__mmask16 __U, __m128i __A, __m128i __B) { 16600b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, 16610b57cec5SDimitry Andric (__v16qi)_mm_unpackhi_epi8(__A, __B), 16620b57cec5SDimitry Andric (__v16qi)_mm_setzero_si128()); 16630b57cec5SDimitry Andric } 16640b57cec5SDimitry Andric 16650b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 16660b57cec5SDimitry Andric _mm256_mask_unpackhi_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { 16670b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, 16680b57cec5SDimitry Andric (__v32qi)_mm256_unpackhi_epi8(__A, __B), 16690b57cec5SDimitry Andric (__v32qi)__W); 16700b57cec5SDimitry Andric } 16710b57cec5SDimitry Andric 16720b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 16730b57cec5SDimitry Andric _mm256_maskz_unpackhi_epi8(__mmask32 __U, __m256i __A, __m256i __B) { 16740b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, 16750b57cec5SDimitry Andric (__v32qi)_mm256_unpackhi_epi8(__A, __B), 16760b57cec5SDimitry Andric (__v32qi)_mm256_setzero_si256()); 16770b57cec5SDimitry Andric } 16780b57cec5SDimitry Andric 16790b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 16800b57cec5SDimitry Andric _mm_mask_unpackhi_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { 16810b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 16820b57cec5SDimitry Andric (__v8hi)_mm_unpackhi_epi16(__A, __B), 16830b57cec5SDimitry Andric (__v8hi)__W); 16840b57cec5SDimitry Andric } 16850b57cec5SDimitry Andric 16860b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 16870b57cec5SDimitry Andric _mm_maskz_unpackhi_epi16(__mmask8 __U, __m128i __A, __m128i __B) { 16880b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 16890b57cec5SDimitry Andric (__v8hi)_mm_unpackhi_epi16(__A, __B), 16900b57cec5SDimitry Andric (__v8hi) _mm_setzero_si128()); 16910b57cec5SDimitry Andric } 16920b57cec5SDimitry Andric 16930b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 16940b57cec5SDimitry Andric _mm256_mask_unpackhi_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { 16950b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 16960b57cec5SDimitry Andric (__v16hi)_mm256_unpackhi_epi16(__A, __B), 16970b57cec5SDimitry Andric (__v16hi)__W); 16980b57cec5SDimitry Andric } 16990b57cec5SDimitry Andric 17000b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 17010b57cec5SDimitry Andric _mm256_maskz_unpackhi_epi16(__mmask16 __U, __m256i __A, __m256i __B) { 17020b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 17030b57cec5SDimitry Andric (__v16hi)_mm256_unpackhi_epi16(__A, __B), 17040b57cec5SDimitry Andric (__v16hi)_mm256_setzero_si256()); 17050b57cec5SDimitry Andric } 17060b57cec5SDimitry Andric 17070b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 17080b57cec5SDimitry Andric _mm_mask_unpacklo_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { 17090b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, 17100b57cec5SDimitry Andric (__v16qi)_mm_unpacklo_epi8(__A, __B), 17110b57cec5SDimitry Andric (__v16qi)__W); 17120b57cec5SDimitry Andric } 17130b57cec5SDimitry Andric 17140b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 17150b57cec5SDimitry Andric _mm_maskz_unpacklo_epi8(__mmask16 __U, __m128i __A, __m128i __B) { 17160b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, 17170b57cec5SDimitry Andric (__v16qi)_mm_unpacklo_epi8(__A, __B), 17180b57cec5SDimitry Andric (__v16qi)_mm_setzero_si128()); 17190b57cec5SDimitry Andric } 17200b57cec5SDimitry Andric 17210b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 17220b57cec5SDimitry Andric _mm256_mask_unpacklo_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { 17230b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, 17240b57cec5SDimitry Andric (__v32qi)_mm256_unpacklo_epi8(__A, __B), 17250b57cec5SDimitry Andric (__v32qi)__W); 17260b57cec5SDimitry Andric } 17270b57cec5SDimitry Andric 17280b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 17290b57cec5SDimitry Andric _mm256_maskz_unpacklo_epi8(__mmask32 __U, __m256i __A, __m256i __B) { 17300b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, 17310b57cec5SDimitry Andric (__v32qi)_mm256_unpacklo_epi8(__A, __B), 17320b57cec5SDimitry Andric (__v32qi)_mm256_setzero_si256()); 17330b57cec5SDimitry Andric } 17340b57cec5SDimitry Andric 17350b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 17360b57cec5SDimitry Andric _mm_mask_unpacklo_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { 17370b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 17380b57cec5SDimitry Andric (__v8hi)_mm_unpacklo_epi16(__A, __B), 17390b57cec5SDimitry Andric (__v8hi)__W); 17400b57cec5SDimitry Andric } 17410b57cec5SDimitry Andric 17420b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 17430b57cec5SDimitry Andric _mm_maskz_unpacklo_epi16(__mmask8 __U, __m128i __A, __m128i __B) { 17440b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 17450b57cec5SDimitry Andric (__v8hi)_mm_unpacklo_epi16(__A, __B), 17460b57cec5SDimitry Andric (__v8hi) _mm_setzero_si128()); 17470b57cec5SDimitry Andric } 17480b57cec5SDimitry Andric 17490b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 17500b57cec5SDimitry Andric _mm256_mask_unpacklo_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { 17510b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 17520b57cec5SDimitry Andric (__v16hi)_mm256_unpacklo_epi16(__A, __B), 17530b57cec5SDimitry Andric (__v16hi)__W); 17540b57cec5SDimitry Andric } 17550b57cec5SDimitry Andric 17560b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 17570b57cec5SDimitry Andric _mm256_maskz_unpacklo_epi16(__mmask16 __U, __m256i __A, __m256i __B) { 17580b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 17590b57cec5SDimitry Andric (__v16hi)_mm256_unpacklo_epi16(__A, __B), 17600b57cec5SDimitry Andric (__v16hi)_mm256_setzero_si256()); 17610b57cec5SDimitry Andric } 17620b57cec5SDimitry Andric 17630b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 17640b57cec5SDimitry Andric _mm_mask_cvtepi8_epi16(__m128i __W, __mmask8 __U, __m128i __A) 17650b57cec5SDimitry Andric { 17660b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 17670b57cec5SDimitry Andric (__v8hi)_mm_cvtepi8_epi16(__A), 17680b57cec5SDimitry Andric (__v8hi)__W); 17690b57cec5SDimitry Andric } 17700b57cec5SDimitry Andric 17710b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 17720b57cec5SDimitry Andric _mm_maskz_cvtepi8_epi16(__mmask8 __U, __m128i __A) 17730b57cec5SDimitry Andric { 17740b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 17750b57cec5SDimitry Andric (__v8hi)_mm_cvtepi8_epi16(__A), 17760b57cec5SDimitry Andric (__v8hi)_mm_setzero_si128()); 17770b57cec5SDimitry Andric } 17780b57cec5SDimitry Andric 17790b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 17800b57cec5SDimitry Andric _mm256_mask_cvtepi8_epi16(__m256i __W, __mmask16 __U, __m128i __A) 17810b57cec5SDimitry Andric { 17820b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 17830b57cec5SDimitry Andric (__v16hi)_mm256_cvtepi8_epi16(__A), 17840b57cec5SDimitry Andric (__v16hi)__W); 17850b57cec5SDimitry Andric } 17860b57cec5SDimitry Andric 17870b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 17880b57cec5SDimitry Andric _mm256_maskz_cvtepi8_epi16(__mmask16 __U, __m128i __A) 17890b57cec5SDimitry Andric { 17900b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 17910b57cec5SDimitry Andric (__v16hi)_mm256_cvtepi8_epi16(__A), 17920b57cec5SDimitry Andric (__v16hi)_mm256_setzero_si256()); 17930b57cec5SDimitry Andric } 17940b57cec5SDimitry Andric 17950b57cec5SDimitry Andric 17960b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 17970b57cec5SDimitry Andric _mm_mask_cvtepu8_epi16(__m128i __W, __mmask8 __U, __m128i __A) 17980b57cec5SDimitry Andric { 17990b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 18000b57cec5SDimitry Andric (__v8hi)_mm_cvtepu8_epi16(__A), 18010b57cec5SDimitry Andric (__v8hi)__W); 18020b57cec5SDimitry Andric } 18030b57cec5SDimitry Andric 18040b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 18050b57cec5SDimitry Andric _mm_maskz_cvtepu8_epi16(__mmask8 __U, __m128i __A) 18060b57cec5SDimitry Andric { 18070b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 18080b57cec5SDimitry Andric (__v8hi)_mm_cvtepu8_epi16(__A), 18090b57cec5SDimitry Andric (__v8hi)_mm_setzero_si128()); 18100b57cec5SDimitry Andric } 18110b57cec5SDimitry Andric 18120b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 18130b57cec5SDimitry Andric _mm256_mask_cvtepu8_epi16(__m256i __W, __mmask16 __U, __m128i __A) 18140b57cec5SDimitry Andric { 18150b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 18160b57cec5SDimitry Andric (__v16hi)_mm256_cvtepu8_epi16(__A), 18170b57cec5SDimitry Andric (__v16hi)__W); 18180b57cec5SDimitry Andric } 18190b57cec5SDimitry Andric 18200b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 18210b57cec5SDimitry Andric _mm256_maskz_cvtepu8_epi16 (__mmask16 __U, __m128i __A) 18220b57cec5SDimitry Andric { 18230b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 18240b57cec5SDimitry Andric (__v16hi)_mm256_cvtepu8_epi16(__A), 18250b57cec5SDimitry Andric (__v16hi)_mm256_setzero_si256()); 18260b57cec5SDimitry Andric } 18270b57cec5SDimitry Andric 18280b57cec5SDimitry Andric 18290b57cec5SDimitry Andric #define _mm_mask_shufflehi_epi16(W, U, A, imm) \ 1830349cc55cSDimitry Andric ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ 18310b57cec5SDimitry Andric (__v8hi)_mm_shufflehi_epi16((A), (imm)), \ 1832349cc55cSDimitry Andric (__v8hi)(__m128i)(W))) 18330b57cec5SDimitry Andric 18340b57cec5SDimitry Andric #define _mm_maskz_shufflehi_epi16(U, A, imm) \ 1835349cc55cSDimitry Andric ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ 18360b57cec5SDimitry Andric (__v8hi)_mm_shufflehi_epi16((A), (imm)), \ 1837349cc55cSDimitry Andric (__v8hi)_mm_setzero_si128())) 18380b57cec5SDimitry Andric 18390b57cec5SDimitry Andric #define _mm256_mask_shufflehi_epi16(W, U, A, imm) \ 1840349cc55cSDimitry Andric ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ 18410b57cec5SDimitry Andric (__v16hi)_mm256_shufflehi_epi16((A), (imm)), \ 1842349cc55cSDimitry Andric (__v16hi)(__m256i)(W))) 18430b57cec5SDimitry Andric 18440b57cec5SDimitry Andric #define _mm256_maskz_shufflehi_epi16(U, A, imm) \ 1845349cc55cSDimitry Andric ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ 18460b57cec5SDimitry Andric (__v16hi)_mm256_shufflehi_epi16((A), (imm)), \ 1847349cc55cSDimitry Andric (__v16hi)_mm256_setzero_si256())) 18480b57cec5SDimitry Andric 18490b57cec5SDimitry Andric #define _mm_mask_shufflelo_epi16(W, U, A, imm) \ 1850349cc55cSDimitry Andric ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ 18510b57cec5SDimitry Andric (__v8hi)_mm_shufflelo_epi16((A), (imm)), \ 1852349cc55cSDimitry Andric (__v8hi)(__m128i)(W))) 18530b57cec5SDimitry Andric 18540b57cec5SDimitry Andric #define _mm_maskz_shufflelo_epi16(U, A, imm) \ 1855349cc55cSDimitry Andric ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ 18560b57cec5SDimitry Andric (__v8hi)_mm_shufflelo_epi16((A), (imm)), \ 1857349cc55cSDimitry Andric (__v8hi)_mm_setzero_si128())) 18580b57cec5SDimitry Andric 18590b57cec5SDimitry Andric #define _mm256_mask_shufflelo_epi16(W, U, A, imm) \ 1860349cc55cSDimitry Andric ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ 18610b57cec5SDimitry Andric (__v16hi)_mm256_shufflelo_epi16((A), \ 18620b57cec5SDimitry Andric (imm)), \ 1863349cc55cSDimitry Andric (__v16hi)(__m256i)(W))) 18640b57cec5SDimitry Andric 18650b57cec5SDimitry Andric #define _mm256_maskz_shufflelo_epi16(U, A, imm) \ 1866349cc55cSDimitry Andric ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ 18670b57cec5SDimitry Andric (__v16hi)_mm256_shufflelo_epi16((A), \ 18680b57cec5SDimitry Andric (imm)), \ 1869349cc55cSDimitry Andric (__v16hi)_mm256_setzero_si256())) 18700b57cec5SDimitry Andric 18710b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 18720b57cec5SDimitry Andric _mm256_sllv_epi16(__m256i __A, __m256i __B) 18730b57cec5SDimitry Andric { 18740b57cec5SDimitry Andric return (__m256i)__builtin_ia32_psllv16hi((__v16hi)__A, (__v16hi)__B); 18750b57cec5SDimitry Andric } 18760b57cec5SDimitry Andric 18770b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 18780b57cec5SDimitry Andric _mm256_mask_sllv_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) 18790b57cec5SDimitry Andric { 18800b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 18810b57cec5SDimitry Andric (__v16hi)_mm256_sllv_epi16(__A, __B), 18820b57cec5SDimitry Andric (__v16hi)__W); 18830b57cec5SDimitry Andric } 18840b57cec5SDimitry Andric 18850b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 18860b57cec5SDimitry Andric _mm256_maskz_sllv_epi16(__mmask16 __U, __m256i __A, __m256i __B) 18870b57cec5SDimitry Andric { 18880b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 18890b57cec5SDimitry Andric (__v16hi)_mm256_sllv_epi16(__A, __B), 18900b57cec5SDimitry Andric (__v16hi)_mm256_setzero_si256()); 18910b57cec5SDimitry Andric } 18920b57cec5SDimitry Andric 18930b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 18940b57cec5SDimitry Andric _mm_sllv_epi16(__m128i __A, __m128i __B) 18950b57cec5SDimitry Andric { 18960b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psllv8hi((__v8hi)__A, (__v8hi)__B); 18970b57cec5SDimitry Andric } 18980b57cec5SDimitry Andric 18990b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 19000b57cec5SDimitry Andric _mm_mask_sllv_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 19010b57cec5SDimitry Andric { 19020b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 19030b57cec5SDimitry Andric (__v8hi)_mm_sllv_epi16(__A, __B), 19040b57cec5SDimitry Andric (__v8hi)__W); 19050b57cec5SDimitry Andric } 19060b57cec5SDimitry Andric 19070b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 19080b57cec5SDimitry Andric _mm_maskz_sllv_epi16(__mmask8 __U, __m128i __A, __m128i __B) 19090b57cec5SDimitry Andric { 19100b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 19110b57cec5SDimitry Andric (__v8hi)_mm_sllv_epi16(__A, __B), 19120b57cec5SDimitry Andric (__v8hi)_mm_setzero_si128()); 19130b57cec5SDimitry Andric } 19140b57cec5SDimitry Andric 19150b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 19160b57cec5SDimitry Andric _mm_mask_sll_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 19170b57cec5SDimitry Andric { 19180b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 19190b57cec5SDimitry Andric (__v8hi)_mm_sll_epi16(__A, __B), 19200b57cec5SDimitry Andric (__v8hi)__W); 19210b57cec5SDimitry Andric } 19220b57cec5SDimitry Andric 19230b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 19240b57cec5SDimitry Andric _mm_maskz_sll_epi16 (__mmask8 __U, __m128i __A, __m128i __B) 19250b57cec5SDimitry Andric { 19260b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 19270b57cec5SDimitry Andric (__v8hi)_mm_sll_epi16(__A, __B), 19280b57cec5SDimitry Andric (__v8hi)_mm_setzero_si128()); 19290b57cec5SDimitry Andric } 19300b57cec5SDimitry Andric 19310b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 19320b57cec5SDimitry Andric _mm256_mask_sll_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m128i __B) 19330b57cec5SDimitry Andric { 19340b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 19350b57cec5SDimitry Andric (__v16hi)_mm256_sll_epi16(__A, __B), 19360b57cec5SDimitry Andric (__v16hi)__W); 19370b57cec5SDimitry Andric } 19380b57cec5SDimitry Andric 19390b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 19400b57cec5SDimitry Andric _mm256_maskz_sll_epi16(__mmask16 __U, __m256i __A, __m128i __B) 19410b57cec5SDimitry Andric { 19420b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 19430b57cec5SDimitry Andric (__v16hi)_mm256_sll_epi16(__A, __B), 19440b57cec5SDimitry Andric (__v16hi)_mm256_setzero_si256()); 19450b57cec5SDimitry Andric } 19460b57cec5SDimitry Andric 19470b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 19485ffd83dbSDimitry Andric _mm_mask_slli_epi16(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B) 19490b57cec5SDimitry Andric { 19500b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 195181ad6265SDimitry Andric (__v8hi)_mm_slli_epi16(__A, (int)__B), 19520b57cec5SDimitry Andric (__v8hi)__W); 19530b57cec5SDimitry Andric } 19540b57cec5SDimitry Andric 19550b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 19565ffd83dbSDimitry Andric _mm_maskz_slli_epi16 (__mmask8 __U, __m128i __A, unsigned int __B) 19570b57cec5SDimitry Andric { 19580b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 195981ad6265SDimitry Andric (__v8hi)_mm_slli_epi16(__A, (int)__B), 19600b57cec5SDimitry Andric (__v8hi)_mm_setzero_si128()); 19610b57cec5SDimitry Andric } 19620b57cec5SDimitry Andric 19630b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 19645ffd83dbSDimitry Andric _mm256_mask_slli_epi16(__m256i __W, __mmask16 __U, __m256i __A, 19655ffd83dbSDimitry Andric unsigned int __B) 19660b57cec5SDimitry Andric { 19670b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 196881ad6265SDimitry Andric (__v16hi)_mm256_slli_epi16(__A, (int)__B), 19690b57cec5SDimitry Andric (__v16hi)__W); 19700b57cec5SDimitry Andric } 19710b57cec5SDimitry Andric 19720b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 19735ffd83dbSDimitry Andric _mm256_maskz_slli_epi16(__mmask16 __U, __m256i __A, unsigned int __B) 19740b57cec5SDimitry Andric { 19750b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 197681ad6265SDimitry Andric (__v16hi)_mm256_slli_epi16(__A, (int)__B), 19770b57cec5SDimitry Andric (__v16hi)_mm256_setzero_si256()); 19780b57cec5SDimitry Andric } 19790b57cec5SDimitry Andric 19800b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 19810b57cec5SDimitry Andric _mm256_srlv_epi16(__m256i __A, __m256i __B) 19820b57cec5SDimitry Andric { 19830b57cec5SDimitry Andric return (__m256i)__builtin_ia32_psrlv16hi((__v16hi)__A, (__v16hi)__B); 19840b57cec5SDimitry Andric } 19850b57cec5SDimitry Andric 19860b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 19870b57cec5SDimitry Andric _mm256_mask_srlv_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) 19880b57cec5SDimitry Andric { 19890b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 19900b57cec5SDimitry Andric (__v16hi)_mm256_srlv_epi16(__A, __B), 19910b57cec5SDimitry Andric (__v16hi)__W); 19920b57cec5SDimitry Andric } 19930b57cec5SDimitry Andric 19940b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 19950b57cec5SDimitry Andric _mm256_maskz_srlv_epi16(__mmask16 __U, __m256i __A, __m256i __B) 19960b57cec5SDimitry Andric { 19970b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 19980b57cec5SDimitry Andric (__v16hi)_mm256_srlv_epi16(__A, __B), 19990b57cec5SDimitry Andric (__v16hi)_mm256_setzero_si256()); 20000b57cec5SDimitry Andric } 20010b57cec5SDimitry Andric 20020b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 20030b57cec5SDimitry Andric _mm_srlv_epi16(__m128i __A, __m128i __B) 20040b57cec5SDimitry Andric { 20050b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psrlv8hi((__v8hi)__A, (__v8hi)__B); 20060b57cec5SDimitry Andric } 20070b57cec5SDimitry Andric 20080b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 20090b57cec5SDimitry Andric _mm_mask_srlv_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 20100b57cec5SDimitry Andric { 20110b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 20120b57cec5SDimitry Andric (__v8hi)_mm_srlv_epi16(__A, __B), 20130b57cec5SDimitry Andric (__v8hi)__W); 20140b57cec5SDimitry Andric } 20150b57cec5SDimitry Andric 20160b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 20170b57cec5SDimitry Andric _mm_maskz_srlv_epi16(__mmask8 __U, __m128i __A, __m128i __B) 20180b57cec5SDimitry Andric { 20190b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 20200b57cec5SDimitry Andric (__v8hi)_mm_srlv_epi16(__A, __B), 20210b57cec5SDimitry Andric (__v8hi)_mm_setzero_si128()); 20220b57cec5SDimitry Andric } 20230b57cec5SDimitry Andric 20240b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 20250b57cec5SDimitry Andric _mm256_srav_epi16(__m256i __A, __m256i __B) 20260b57cec5SDimitry Andric { 20270b57cec5SDimitry Andric return (__m256i)__builtin_ia32_psrav16hi((__v16hi)__A, (__v16hi)__B); 20280b57cec5SDimitry Andric } 20290b57cec5SDimitry Andric 20300b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 20310b57cec5SDimitry Andric _mm256_mask_srav_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) 20320b57cec5SDimitry Andric { 20330b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 20340b57cec5SDimitry Andric (__v16hi)_mm256_srav_epi16(__A, __B), 20350b57cec5SDimitry Andric (__v16hi)__W); 20360b57cec5SDimitry Andric } 20370b57cec5SDimitry Andric 20380b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 20390b57cec5SDimitry Andric _mm256_maskz_srav_epi16(__mmask16 __U, __m256i __A, __m256i __B) 20400b57cec5SDimitry Andric { 20410b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 20420b57cec5SDimitry Andric (__v16hi)_mm256_srav_epi16(__A, __B), 20430b57cec5SDimitry Andric (__v16hi)_mm256_setzero_si256()); 20440b57cec5SDimitry Andric } 20450b57cec5SDimitry Andric 20460b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 20470b57cec5SDimitry Andric _mm_srav_epi16(__m128i __A, __m128i __B) 20480b57cec5SDimitry Andric { 20490b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psrav8hi((__v8hi)__A, (__v8hi)__B); 20500b57cec5SDimitry Andric } 20510b57cec5SDimitry Andric 20520b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 20530b57cec5SDimitry Andric _mm_mask_srav_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 20540b57cec5SDimitry Andric { 20550b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 20560b57cec5SDimitry Andric (__v8hi)_mm_srav_epi16(__A, __B), 20570b57cec5SDimitry Andric (__v8hi)__W); 20580b57cec5SDimitry Andric } 20590b57cec5SDimitry Andric 20600b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 20610b57cec5SDimitry Andric _mm_maskz_srav_epi16(__mmask8 __U, __m128i __A, __m128i __B) 20620b57cec5SDimitry Andric { 20630b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 20640b57cec5SDimitry Andric (__v8hi)_mm_srav_epi16(__A, __B), 20650b57cec5SDimitry Andric (__v8hi)_mm_setzero_si128()); 20660b57cec5SDimitry Andric } 20670b57cec5SDimitry Andric 20680b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 20690b57cec5SDimitry Andric _mm_mask_sra_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 20700b57cec5SDimitry Andric { 20710b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 20720b57cec5SDimitry Andric (__v8hi)_mm_sra_epi16(__A, __B), 20730b57cec5SDimitry Andric (__v8hi)__W); 20740b57cec5SDimitry Andric } 20750b57cec5SDimitry Andric 20760b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 20770b57cec5SDimitry Andric _mm_maskz_sra_epi16(__mmask8 __U, __m128i __A, __m128i __B) 20780b57cec5SDimitry Andric { 20790b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 20800b57cec5SDimitry Andric (__v8hi)_mm_sra_epi16(__A, __B), 20810b57cec5SDimitry Andric (__v8hi)_mm_setzero_si128()); 20820b57cec5SDimitry Andric } 20830b57cec5SDimitry Andric 20840b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 20850b57cec5SDimitry Andric _mm256_mask_sra_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m128i __B) 20860b57cec5SDimitry Andric { 20870b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 20880b57cec5SDimitry Andric (__v16hi)_mm256_sra_epi16(__A, __B), 20890b57cec5SDimitry Andric (__v16hi)__W); 20900b57cec5SDimitry Andric } 20910b57cec5SDimitry Andric 20920b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 20930b57cec5SDimitry Andric _mm256_maskz_sra_epi16(__mmask16 __U, __m256i __A, __m128i __B) 20940b57cec5SDimitry Andric { 20950b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 20960b57cec5SDimitry Andric (__v16hi)_mm256_sra_epi16(__A, __B), 20970b57cec5SDimitry Andric (__v16hi)_mm256_setzero_si256()); 20980b57cec5SDimitry Andric } 20990b57cec5SDimitry Andric 21000b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 21015ffd83dbSDimitry Andric _mm_mask_srai_epi16(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B) 21020b57cec5SDimitry Andric { 21030b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 210481ad6265SDimitry Andric (__v8hi)_mm_srai_epi16(__A, (int)__B), 21050b57cec5SDimitry Andric (__v8hi)__W); 21060b57cec5SDimitry Andric } 21070b57cec5SDimitry Andric 21080b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 21095ffd83dbSDimitry Andric _mm_maskz_srai_epi16(__mmask8 __U, __m128i __A, unsigned int __B) 21100b57cec5SDimitry Andric { 21110b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 211281ad6265SDimitry Andric (__v8hi)_mm_srai_epi16(__A, (int)__B), 21130b57cec5SDimitry Andric (__v8hi)_mm_setzero_si128()); 21140b57cec5SDimitry Andric } 21150b57cec5SDimitry Andric 21160b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 21175ffd83dbSDimitry Andric _mm256_mask_srai_epi16(__m256i __W, __mmask16 __U, __m256i __A, 21185ffd83dbSDimitry Andric unsigned int __B) 21190b57cec5SDimitry Andric { 21200b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 212181ad6265SDimitry Andric (__v16hi)_mm256_srai_epi16(__A, (int)__B), 21220b57cec5SDimitry Andric (__v16hi)__W); 21230b57cec5SDimitry Andric } 21240b57cec5SDimitry Andric 21250b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 21265ffd83dbSDimitry Andric _mm256_maskz_srai_epi16(__mmask16 __U, __m256i __A, unsigned int __B) 21270b57cec5SDimitry Andric { 21280b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 212981ad6265SDimitry Andric (__v16hi)_mm256_srai_epi16(__A, (int)__B), 21300b57cec5SDimitry Andric (__v16hi)_mm256_setzero_si256()); 21310b57cec5SDimitry Andric } 21320b57cec5SDimitry Andric 21330b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 21340b57cec5SDimitry Andric _mm_mask_srl_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 21350b57cec5SDimitry Andric { 21360b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 21370b57cec5SDimitry Andric (__v8hi)_mm_srl_epi16(__A, __B), 21380b57cec5SDimitry Andric (__v8hi)__W); 21390b57cec5SDimitry Andric } 21400b57cec5SDimitry Andric 21410b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 21420b57cec5SDimitry Andric _mm_maskz_srl_epi16 (__mmask8 __U, __m128i __A, __m128i __B) 21430b57cec5SDimitry Andric { 21440b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 21450b57cec5SDimitry Andric (__v8hi)_mm_srl_epi16(__A, __B), 21460b57cec5SDimitry Andric (__v8hi)_mm_setzero_si128()); 21470b57cec5SDimitry Andric } 21480b57cec5SDimitry Andric 21490b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 21500b57cec5SDimitry Andric _mm256_mask_srl_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m128i __B) 21510b57cec5SDimitry Andric { 21520b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 21530b57cec5SDimitry Andric (__v16hi)_mm256_srl_epi16(__A, __B), 21540b57cec5SDimitry Andric (__v16hi)__W); 21550b57cec5SDimitry Andric } 21560b57cec5SDimitry Andric 21570b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 21580b57cec5SDimitry Andric _mm256_maskz_srl_epi16(__mmask16 __U, __m256i __A, __m128i __B) 21590b57cec5SDimitry Andric { 21600b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 21610b57cec5SDimitry Andric (__v16hi)_mm256_srl_epi16(__A, __B), 21620b57cec5SDimitry Andric (__v16hi)_mm256_setzero_si256()); 21630b57cec5SDimitry Andric } 21640b57cec5SDimitry Andric 21650b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 21660b57cec5SDimitry Andric _mm_mask_srli_epi16(__m128i __W, __mmask8 __U, __m128i __A, int __B) 21670b57cec5SDimitry Andric { 21680b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 21690b57cec5SDimitry Andric (__v8hi)_mm_srli_epi16(__A, __B), 21700b57cec5SDimitry Andric (__v8hi)__W); 21710b57cec5SDimitry Andric } 21720b57cec5SDimitry Andric 21730b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 21740b57cec5SDimitry Andric _mm_maskz_srli_epi16 (__mmask8 __U, __m128i __A, int __B) 21750b57cec5SDimitry Andric { 21760b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 21770b57cec5SDimitry Andric (__v8hi)_mm_srli_epi16(__A, __B), 21780b57cec5SDimitry Andric (__v8hi)_mm_setzero_si128()); 21790b57cec5SDimitry Andric } 21800b57cec5SDimitry Andric 21810b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 21820b57cec5SDimitry Andric _mm256_mask_srli_epi16(__m256i __W, __mmask16 __U, __m256i __A, int __B) 21830b57cec5SDimitry Andric { 21840b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 21850b57cec5SDimitry Andric (__v16hi)_mm256_srli_epi16(__A, __B), 21860b57cec5SDimitry Andric (__v16hi)__W); 21870b57cec5SDimitry Andric } 21880b57cec5SDimitry Andric 21890b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 21900b57cec5SDimitry Andric _mm256_maskz_srli_epi16(__mmask16 __U, __m256i __A, int __B) 21910b57cec5SDimitry Andric { 21920b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 21930b57cec5SDimitry Andric (__v16hi)_mm256_srli_epi16(__A, __B), 21940b57cec5SDimitry Andric (__v16hi)_mm256_setzero_si256()); 21950b57cec5SDimitry Andric } 21960b57cec5SDimitry Andric 21970b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 21980b57cec5SDimitry Andric _mm_mask_mov_epi16 (__m128i __W, __mmask8 __U, __m128i __A) 21990b57cec5SDimitry Andric { 22000b57cec5SDimitry Andric return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U, 22010b57cec5SDimitry Andric (__v8hi) __A, 22020b57cec5SDimitry Andric (__v8hi) __W); 22030b57cec5SDimitry Andric } 22040b57cec5SDimitry Andric 22050b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 22060b57cec5SDimitry Andric _mm_maskz_mov_epi16 (__mmask8 __U, __m128i __A) 22070b57cec5SDimitry Andric { 22080b57cec5SDimitry Andric return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U, 22090b57cec5SDimitry Andric (__v8hi) __A, 22100b57cec5SDimitry Andric (__v8hi) _mm_setzero_si128 ()); 22110b57cec5SDimitry Andric } 22120b57cec5SDimitry Andric 22130b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 22140b57cec5SDimitry Andric _mm256_mask_mov_epi16 (__m256i __W, __mmask16 __U, __m256i __A) 22150b57cec5SDimitry Andric { 22160b57cec5SDimitry Andric return (__m256i) __builtin_ia32_selectw_256 ((__mmask16) __U, 22170b57cec5SDimitry Andric (__v16hi) __A, 22180b57cec5SDimitry Andric (__v16hi) __W); 22190b57cec5SDimitry Andric } 22200b57cec5SDimitry Andric 22210b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 22220b57cec5SDimitry Andric _mm256_maskz_mov_epi16 (__mmask16 __U, __m256i __A) 22230b57cec5SDimitry Andric { 22240b57cec5SDimitry Andric return (__m256i) __builtin_ia32_selectw_256 ((__mmask16) __U, 22250b57cec5SDimitry Andric (__v16hi) __A, 22260b57cec5SDimitry Andric (__v16hi) _mm256_setzero_si256 ()); 22270b57cec5SDimitry Andric } 22280b57cec5SDimitry Andric 22290b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 22300b57cec5SDimitry Andric _mm_mask_mov_epi8 (__m128i __W, __mmask16 __U, __m128i __A) 22310b57cec5SDimitry Andric { 22320b57cec5SDimitry Andric return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U, 22330b57cec5SDimitry Andric (__v16qi) __A, 22340b57cec5SDimitry Andric (__v16qi) __W); 22350b57cec5SDimitry Andric } 22360b57cec5SDimitry Andric 22370b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 22380b57cec5SDimitry Andric _mm_maskz_mov_epi8 (__mmask16 __U, __m128i __A) 22390b57cec5SDimitry Andric { 22400b57cec5SDimitry Andric return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U, 22410b57cec5SDimitry Andric (__v16qi) __A, 22420b57cec5SDimitry Andric (__v16qi) _mm_setzero_si128 ()); 22430b57cec5SDimitry Andric } 22440b57cec5SDimitry Andric 22450b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 22460b57cec5SDimitry Andric _mm256_mask_mov_epi8 (__m256i __W, __mmask32 __U, __m256i __A) 22470b57cec5SDimitry Andric { 22480b57cec5SDimitry Andric return (__m256i) __builtin_ia32_selectb_256 ((__mmask32) __U, 22490b57cec5SDimitry Andric (__v32qi) __A, 22500b57cec5SDimitry Andric (__v32qi) __W); 22510b57cec5SDimitry Andric } 22520b57cec5SDimitry Andric 22530b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 22540b57cec5SDimitry Andric _mm256_maskz_mov_epi8 (__mmask32 __U, __m256i __A) 22550b57cec5SDimitry Andric { 22560b57cec5SDimitry Andric return (__m256i) __builtin_ia32_selectb_256 ((__mmask32) __U, 22570b57cec5SDimitry Andric (__v32qi) __A, 22580b57cec5SDimitry Andric (__v32qi) _mm256_setzero_si256 ()); 22590b57cec5SDimitry Andric } 22600b57cec5SDimitry Andric 22610b57cec5SDimitry Andric 22620b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 22630b57cec5SDimitry Andric _mm_mask_set1_epi8 (__m128i __O, __mmask16 __M, char __A) 22640b57cec5SDimitry Andric { 22650b57cec5SDimitry Andric return (__m128i) __builtin_ia32_selectb_128(__M, 22660b57cec5SDimitry Andric (__v16qi) _mm_set1_epi8(__A), 22670b57cec5SDimitry Andric (__v16qi) __O); 22680b57cec5SDimitry Andric } 22690b57cec5SDimitry Andric 22700b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 22710b57cec5SDimitry Andric _mm_maskz_set1_epi8 (__mmask16 __M, char __A) 22720b57cec5SDimitry Andric { 22730b57cec5SDimitry Andric return (__m128i) __builtin_ia32_selectb_128(__M, 22740b57cec5SDimitry Andric (__v16qi) _mm_set1_epi8(__A), 22750b57cec5SDimitry Andric (__v16qi) _mm_setzero_si128()); 22760b57cec5SDimitry Andric } 22770b57cec5SDimitry Andric 22780b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 22790b57cec5SDimitry Andric _mm256_mask_set1_epi8 (__m256i __O, __mmask32 __M, char __A) 22800b57cec5SDimitry Andric { 22810b57cec5SDimitry Andric return (__m256i) __builtin_ia32_selectb_256(__M, 22820b57cec5SDimitry Andric (__v32qi) _mm256_set1_epi8(__A), 22830b57cec5SDimitry Andric (__v32qi) __O); 22840b57cec5SDimitry Andric } 22850b57cec5SDimitry Andric 22860b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 22870b57cec5SDimitry Andric _mm256_maskz_set1_epi8 (__mmask32 __M, char __A) 22880b57cec5SDimitry Andric { 22890b57cec5SDimitry Andric return (__m256i) __builtin_ia32_selectb_256(__M, 22900b57cec5SDimitry Andric (__v32qi) _mm256_set1_epi8(__A), 22910b57cec5SDimitry Andric (__v32qi) _mm256_setzero_si256()); 22920b57cec5SDimitry Andric } 22930b57cec5SDimitry Andric 22940b57cec5SDimitry Andric static __inline __m128i __DEFAULT_FN_ATTRS128 22950b57cec5SDimitry Andric _mm_loadu_epi16 (void const *__P) 22960b57cec5SDimitry Andric { 22970b57cec5SDimitry Andric struct __loadu_epi16 { 22980b57cec5SDimitry Andric __m128i_u __v; 22990b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 2300480093f4SDimitry Andric return ((const struct __loadu_epi16*)__P)->__v; 23010b57cec5SDimitry Andric } 23020b57cec5SDimitry Andric 23030b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 23040b57cec5SDimitry Andric _mm_mask_loadu_epi16 (__m128i __W, __mmask8 __U, void const *__P) 23050b57cec5SDimitry Andric { 2306480093f4SDimitry Andric return (__m128i) __builtin_ia32_loaddquhi128_mask ((const __v8hi *) __P, 23070b57cec5SDimitry Andric (__v8hi) __W, 23080b57cec5SDimitry Andric (__mmask8) __U); 23090b57cec5SDimitry Andric } 23100b57cec5SDimitry Andric 23110b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 23120b57cec5SDimitry Andric _mm_maskz_loadu_epi16 (__mmask8 __U, void const *__P) 23130b57cec5SDimitry Andric { 2314480093f4SDimitry Andric return (__m128i) __builtin_ia32_loaddquhi128_mask ((const __v8hi *) __P, 23150b57cec5SDimitry Andric (__v8hi) 23160b57cec5SDimitry Andric _mm_setzero_si128 (), 23170b57cec5SDimitry Andric (__mmask8) __U); 23180b57cec5SDimitry Andric } 23190b57cec5SDimitry Andric 23200b57cec5SDimitry Andric static __inline __m256i __DEFAULT_FN_ATTRS256 23210b57cec5SDimitry Andric _mm256_loadu_epi16 (void const *__P) 23220b57cec5SDimitry Andric { 23230b57cec5SDimitry Andric struct __loadu_epi16 { 23240b57cec5SDimitry Andric __m256i_u __v; 23250b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 2326480093f4SDimitry Andric return ((const struct __loadu_epi16*)__P)->__v; 23270b57cec5SDimitry Andric } 23280b57cec5SDimitry Andric 23290b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 23300b57cec5SDimitry Andric _mm256_mask_loadu_epi16 (__m256i __W, __mmask16 __U, void const *__P) 23310b57cec5SDimitry Andric { 2332480093f4SDimitry Andric return (__m256i) __builtin_ia32_loaddquhi256_mask ((const __v16hi *) __P, 23330b57cec5SDimitry Andric (__v16hi) __W, 23340b57cec5SDimitry Andric (__mmask16) __U); 23350b57cec5SDimitry Andric } 23360b57cec5SDimitry Andric 23370b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 23380b57cec5SDimitry Andric _mm256_maskz_loadu_epi16 (__mmask16 __U, void const *__P) 23390b57cec5SDimitry Andric { 2340480093f4SDimitry Andric return (__m256i) __builtin_ia32_loaddquhi256_mask ((const __v16hi *) __P, 23410b57cec5SDimitry Andric (__v16hi) 23420b57cec5SDimitry Andric _mm256_setzero_si256 (), 23430b57cec5SDimitry Andric (__mmask16) __U); 23440b57cec5SDimitry Andric } 23450b57cec5SDimitry Andric 23460b57cec5SDimitry Andric static __inline __m128i __DEFAULT_FN_ATTRS128 23470b57cec5SDimitry Andric _mm_loadu_epi8 (void const *__P) 23480b57cec5SDimitry Andric { 23490b57cec5SDimitry Andric struct __loadu_epi8 { 23500b57cec5SDimitry Andric __m128i_u __v; 23510b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 2352480093f4SDimitry Andric return ((const struct __loadu_epi8*)__P)->__v; 23530b57cec5SDimitry Andric } 23540b57cec5SDimitry Andric 23550b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 23560b57cec5SDimitry Andric _mm_mask_loadu_epi8 (__m128i __W, __mmask16 __U, void const *__P) 23570b57cec5SDimitry Andric { 2358480093f4SDimitry Andric return (__m128i) __builtin_ia32_loaddquqi128_mask ((const __v16qi *) __P, 23590b57cec5SDimitry Andric (__v16qi) __W, 23600b57cec5SDimitry Andric (__mmask16) __U); 23610b57cec5SDimitry Andric } 23620b57cec5SDimitry Andric 23630b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 23640b57cec5SDimitry Andric _mm_maskz_loadu_epi8 (__mmask16 __U, void const *__P) 23650b57cec5SDimitry Andric { 2366480093f4SDimitry Andric return (__m128i) __builtin_ia32_loaddquqi128_mask ((const __v16qi *) __P, 23670b57cec5SDimitry Andric (__v16qi) 23680b57cec5SDimitry Andric _mm_setzero_si128 (), 23690b57cec5SDimitry Andric (__mmask16) __U); 23700b57cec5SDimitry Andric } 23710b57cec5SDimitry Andric 23720b57cec5SDimitry Andric static __inline __m256i __DEFAULT_FN_ATTRS256 23730b57cec5SDimitry Andric _mm256_loadu_epi8 (void const *__P) 23740b57cec5SDimitry Andric { 23750b57cec5SDimitry Andric struct __loadu_epi8 { 23760b57cec5SDimitry Andric __m256i_u __v; 23770b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 2378480093f4SDimitry Andric return ((const struct __loadu_epi8*)__P)->__v; 23790b57cec5SDimitry Andric } 23800b57cec5SDimitry Andric 23810b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 23820b57cec5SDimitry Andric _mm256_mask_loadu_epi8 (__m256i __W, __mmask32 __U, void const *__P) 23830b57cec5SDimitry Andric { 2384480093f4SDimitry Andric return (__m256i) __builtin_ia32_loaddquqi256_mask ((const __v32qi *) __P, 23850b57cec5SDimitry Andric (__v32qi) __W, 23860b57cec5SDimitry Andric (__mmask32) __U); 23870b57cec5SDimitry Andric } 23880b57cec5SDimitry Andric 23890b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 23900b57cec5SDimitry Andric _mm256_maskz_loadu_epi8 (__mmask32 __U, void const *__P) 23910b57cec5SDimitry Andric { 2392480093f4SDimitry Andric return (__m256i) __builtin_ia32_loaddquqi256_mask ((const __v32qi *) __P, 23930b57cec5SDimitry Andric (__v32qi) 23940b57cec5SDimitry Andric _mm256_setzero_si256 (), 23950b57cec5SDimitry Andric (__mmask32) __U); 23960b57cec5SDimitry Andric } 23970b57cec5SDimitry Andric 23980b57cec5SDimitry Andric static __inline void __DEFAULT_FN_ATTRS128 23990b57cec5SDimitry Andric _mm_storeu_epi16 (void *__P, __m128i __A) 24000b57cec5SDimitry Andric { 24010b57cec5SDimitry Andric struct __storeu_epi16 { 24020b57cec5SDimitry Andric __m128i_u __v; 24030b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 24040b57cec5SDimitry Andric ((struct __storeu_epi16*)__P)->__v = __A; 24050b57cec5SDimitry Andric } 24060b57cec5SDimitry Andric 24070b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS128 24080b57cec5SDimitry Andric _mm_mask_storeu_epi16 (void *__P, __mmask8 __U, __m128i __A) 24090b57cec5SDimitry Andric { 24100b57cec5SDimitry Andric __builtin_ia32_storedquhi128_mask ((__v8hi *) __P, 24110b57cec5SDimitry Andric (__v8hi) __A, 24120b57cec5SDimitry Andric (__mmask8) __U); 24130b57cec5SDimitry Andric } 24140b57cec5SDimitry Andric 24150b57cec5SDimitry Andric static __inline void __DEFAULT_FN_ATTRS256 24160b57cec5SDimitry Andric _mm256_storeu_epi16 (void *__P, __m256i __A) 24170b57cec5SDimitry Andric { 24180b57cec5SDimitry Andric struct __storeu_epi16 { 24190b57cec5SDimitry Andric __m256i_u __v; 24200b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 24210b57cec5SDimitry Andric ((struct __storeu_epi16*)__P)->__v = __A; 24220b57cec5SDimitry Andric } 24230b57cec5SDimitry Andric 24240b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS256 24250b57cec5SDimitry Andric _mm256_mask_storeu_epi16 (void *__P, __mmask16 __U, __m256i __A) 24260b57cec5SDimitry Andric { 24270b57cec5SDimitry Andric __builtin_ia32_storedquhi256_mask ((__v16hi *) __P, 24280b57cec5SDimitry Andric (__v16hi) __A, 24290b57cec5SDimitry Andric (__mmask16) __U); 24300b57cec5SDimitry Andric } 24310b57cec5SDimitry Andric 24320b57cec5SDimitry Andric static __inline void __DEFAULT_FN_ATTRS128 24330b57cec5SDimitry Andric _mm_storeu_epi8 (void *__P, __m128i __A) 24340b57cec5SDimitry Andric { 24350b57cec5SDimitry Andric struct __storeu_epi8 { 24360b57cec5SDimitry Andric __m128i_u __v; 24370b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 24380b57cec5SDimitry Andric ((struct __storeu_epi8*)__P)->__v = __A; 24390b57cec5SDimitry Andric } 24400b57cec5SDimitry Andric 24410b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS128 24420b57cec5SDimitry Andric _mm_mask_storeu_epi8 (void *__P, __mmask16 __U, __m128i __A) 24430b57cec5SDimitry Andric { 24440b57cec5SDimitry Andric __builtin_ia32_storedquqi128_mask ((__v16qi *) __P, 24450b57cec5SDimitry Andric (__v16qi) __A, 24460b57cec5SDimitry Andric (__mmask16) __U); 24470b57cec5SDimitry Andric } 24480b57cec5SDimitry Andric 24490b57cec5SDimitry Andric static __inline void __DEFAULT_FN_ATTRS256 24500b57cec5SDimitry Andric _mm256_storeu_epi8 (void *__P, __m256i __A) 24510b57cec5SDimitry Andric { 24520b57cec5SDimitry Andric struct __storeu_epi8 { 24530b57cec5SDimitry Andric __m256i_u __v; 24540b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 24550b57cec5SDimitry Andric ((struct __storeu_epi8*)__P)->__v = __A; 24560b57cec5SDimitry Andric } 24570b57cec5SDimitry Andric 24580b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS256 24590b57cec5SDimitry Andric _mm256_mask_storeu_epi8 (void *__P, __mmask32 __U, __m256i __A) 24600b57cec5SDimitry Andric { 24610b57cec5SDimitry Andric __builtin_ia32_storedquqi256_mask ((__v32qi *) __P, 24620b57cec5SDimitry Andric (__v32qi) __A, 24630b57cec5SDimitry Andric (__mmask32) __U); 24640b57cec5SDimitry Andric } 24650b57cec5SDimitry Andric 24660b57cec5SDimitry Andric static __inline__ __mmask16 __DEFAULT_FN_ATTRS128 24670b57cec5SDimitry Andric _mm_test_epi8_mask (__m128i __A, __m128i __B) 24680b57cec5SDimitry Andric { 24690b57cec5SDimitry Andric return _mm_cmpneq_epi8_mask (_mm_and_si128(__A, __B), _mm_setzero_si128()); 24700b57cec5SDimitry Andric } 24710b57cec5SDimitry Andric 24720b57cec5SDimitry Andric static __inline__ __mmask16 __DEFAULT_FN_ATTRS128 24730b57cec5SDimitry Andric _mm_mask_test_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B) 24740b57cec5SDimitry Andric { 24750b57cec5SDimitry Andric return _mm_mask_cmpneq_epi8_mask (__U, _mm_and_si128 (__A, __B), 24760b57cec5SDimitry Andric _mm_setzero_si128()); 24770b57cec5SDimitry Andric } 24780b57cec5SDimitry Andric 24790b57cec5SDimitry Andric static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 24800b57cec5SDimitry Andric _mm256_test_epi8_mask (__m256i __A, __m256i __B) 24810b57cec5SDimitry Andric { 24820b57cec5SDimitry Andric return _mm256_cmpneq_epi8_mask (_mm256_and_si256(__A, __B), 24830b57cec5SDimitry Andric _mm256_setzero_si256()); 24840b57cec5SDimitry Andric } 24850b57cec5SDimitry Andric 24860b57cec5SDimitry Andric static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 24870b57cec5SDimitry Andric _mm256_mask_test_epi8_mask (__mmask32 __U, __m256i __A, __m256i __B) 24880b57cec5SDimitry Andric { 24890b57cec5SDimitry Andric return _mm256_mask_cmpneq_epi8_mask (__U, _mm256_and_si256(__A, __B), 24900b57cec5SDimitry Andric _mm256_setzero_si256()); 24910b57cec5SDimitry Andric } 24920b57cec5SDimitry Andric 24930b57cec5SDimitry Andric static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 24940b57cec5SDimitry Andric _mm_test_epi16_mask (__m128i __A, __m128i __B) 24950b57cec5SDimitry Andric { 24960b57cec5SDimitry Andric return _mm_cmpneq_epi16_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128()); 24970b57cec5SDimitry Andric } 24980b57cec5SDimitry Andric 24990b57cec5SDimitry Andric static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 25000b57cec5SDimitry Andric _mm_mask_test_epi16_mask (__mmask8 __U, __m128i __A, __m128i __B) 25010b57cec5SDimitry Andric { 25020b57cec5SDimitry Andric return _mm_mask_cmpneq_epi16_mask (__U, _mm_and_si128 (__A, __B), 25030b57cec5SDimitry Andric _mm_setzero_si128()); 25040b57cec5SDimitry Andric } 25050b57cec5SDimitry Andric 25060b57cec5SDimitry Andric static __inline__ __mmask16 __DEFAULT_FN_ATTRS256 25070b57cec5SDimitry Andric _mm256_test_epi16_mask (__m256i __A, __m256i __B) 25080b57cec5SDimitry Andric { 25090b57cec5SDimitry Andric return _mm256_cmpneq_epi16_mask (_mm256_and_si256 (__A, __B), 25100b57cec5SDimitry Andric _mm256_setzero_si256 ()); 25110b57cec5SDimitry Andric } 25120b57cec5SDimitry Andric 25130b57cec5SDimitry Andric static __inline__ __mmask16 __DEFAULT_FN_ATTRS256 25140b57cec5SDimitry Andric _mm256_mask_test_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B) 25150b57cec5SDimitry Andric { 25160b57cec5SDimitry Andric return _mm256_mask_cmpneq_epi16_mask (__U, _mm256_and_si256(__A, __B), 25170b57cec5SDimitry Andric _mm256_setzero_si256()); 25180b57cec5SDimitry Andric } 25190b57cec5SDimitry Andric 25200b57cec5SDimitry Andric static __inline__ __mmask16 __DEFAULT_FN_ATTRS128 25210b57cec5SDimitry Andric _mm_testn_epi8_mask (__m128i __A, __m128i __B) 25220b57cec5SDimitry Andric { 25230b57cec5SDimitry Andric return _mm_cmpeq_epi8_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128()); 25240b57cec5SDimitry Andric } 25250b57cec5SDimitry Andric 25260b57cec5SDimitry Andric static __inline__ __mmask16 __DEFAULT_FN_ATTRS128 25270b57cec5SDimitry Andric _mm_mask_testn_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B) 25280b57cec5SDimitry Andric { 25290b57cec5SDimitry Andric return _mm_mask_cmpeq_epi8_mask (__U, _mm_and_si128 (__A, __B), 25300b57cec5SDimitry Andric _mm_setzero_si128()); 25310b57cec5SDimitry Andric } 25320b57cec5SDimitry Andric 25330b57cec5SDimitry Andric static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 25340b57cec5SDimitry Andric _mm256_testn_epi8_mask (__m256i __A, __m256i __B) 25350b57cec5SDimitry Andric { 25360b57cec5SDimitry Andric return _mm256_cmpeq_epi8_mask (_mm256_and_si256 (__A, __B), 25370b57cec5SDimitry Andric _mm256_setzero_si256()); 25380b57cec5SDimitry Andric } 25390b57cec5SDimitry Andric 25400b57cec5SDimitry Andric static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 25410b57cec5SDimitry Andric _mm256_mask_testn_epi8_mask (__mmask32 __U, __m256i __A, __m256i __B) 25420b57cec5SDimitry Andric { 25430b57cec5SDimitry Andric return _mm256_mask_cmpeq_epi8_mask (__U, _mm256_and_si256 (__A, __B), 25440b57cec5SDimitry Andric _mm256_setzero_si256()); 25450b57cec5SDimitry Andric } 25460b57cec5SDimitry Andric 25470b57cec5SDimitry Andric static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 25480b57cec5SDimitry Andric _mm_testn_epi16_mask (__m128i __A, __m128i __B) 25490b57cec5SDimitry Andric { 25500b57cec5SDimitry Andric return _mm_cmpeq_epi16_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128()); 25510b57cec5SDimitry Andric } 25520b57cec5SDimitry Andric 25530b57cec5SDimitry Andric static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 25540b57cec5SDimitry Andric _mm_mask_testn_epi16_mask (__mmask8 __U, __m128i __A, __m128i __B) 25550b57cec5SDimitry Andric { 25560b57cec5SDimitry Andric return _mm_mask_cmpeq_epi16_mask (__U, _mm_and_si128(__A, __B), _mm_setzero_si128()); 25570b57cec5SDimitry Andric } 25580b57cec5SDimitry Andric 25590b57cec5SDimitry Andric static __inline__ __mmask16 __DEFAULT_FN_ATTRS256 25600b57cec5SDimitry Andric _mm256_testn_epi16_mask (__m256i __A, __m256i __B) 25610b57cec5SDimitry Andric { 25620b57cec5SDimitry Andric return _mm256_cmpeq_epi16_mask (_mm256_and_si256(__A, __B), 25630b57cec5SDimitry Andric _mm256_setzero_si256()); 25640b57cec5SDimitry Andric } 25650b57cec5SDimitry Andric 25660b57cec5SDimitry Andric static __inline__ __mmask16 __DEFAULT_FN_ATTRS256 25670b57cec5SDimitry Andric _mm256_mask_testn_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B) 25680b57cec5SDimitry Andric { 25690b57cec5SDimitry Andric return _mm256_mask_cmpeq_epi16_mask (__U, _mm256_and_si256 (__A, __B), 25700b57cec5SDimitry Andric _mm256_setzero_si256()); 25710b57cec5SDimitry Andric } 25720b57cec5SDimitry Andric 25730b57cec5SDimitry Andric static __inline__ __mmask16 __DEFAULT_FN_ATTRS128 25740b57cec5SDimitry Andric _mm_movepi8_mask (__m128i __A) 25750b57cec5SDimitry Andric { 25760b57cec5SDimitry Andric return (__mmask16) __builtin_ia32_cvtb2mask128 ((__v16qi) __A); 25770b57cec5SDimitry Andric } 25780b57cec5SDimitry Andric 25790b57cec5SDimitry Andric static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 25800b57cec5SDimitry Andric _mm256_movepi8_mask (__m256i __A) 25810b57cec5SDimitry Andric { 25820b57cec5SDimitry Andric return (__mmask32) __builtin_ia32_cvtb2mask256 ((__v32qi) __A); 25830b57cec5SDimitry Andric } 25840b57cec5SDimitry Andric 25850b57cec5SDimitry Andric static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 25860b57cec5SDimitry Andric _mm_movepi16_mask (__m128i __A) 25870b57cec5SDimitry Andric { 25880b57cec5SDimitry Andric return (__mmask8) __builtin_ia32_cvtw2mask128 ((__v8hi) __A); 25890b57cec5SDimitry Andric } 25900b57cec5SDimitry Andric 25910b57cec5SDimitry Andric static __inline__ __mmask16 __DEFAULT_FN_ATTRS256 25920b57cec5SDimitry Andric _mm256_movepi16_mask (__m256i __A) 25930b57cec5SDimitry Andric { 25940b57cec5SDimitry Andric return (__mmask16) __builtin_ia32_cvtw2mask256 ((__v16hi) __A); 25950b57cec5SDimitry Andric } 25960b57cec5SDimitry Andric 25970b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 25980b57cec5SDimitry Andric _mm_movm_epi8 (__mmask16 __A) 25990b57cec5SDimitry Andric { 26000b57cec5SDimitry Andric return (__m128i) __builtin_ia32_cvtmask2b128 (__A); 26010b57cec5SDimitry Andric } 26020b57cec5SDimitry Andric 26030b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 26040b57cec5SDimitry Andric _mm256_movm_epi8 (__mmask32 __A) 26050b57cec5SDimitry Andric { 26060b57cec5SDimitry Andric return (__m256i) __builtin_ia32_cvtmask2b256 (__A); 26070b57cec5SDimitry Andric } 26080b57cec5SDimitry Andric 26090b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 26100b57cec5SDimitry Andric _mm_movm_epi16 (__mmask8 __A) 26110b57cec5SDimitry Andric { 26120b57cec5SDimitry Andric return (__m128i) __builtin_ia32_cvtmask2w128 (__A); 26130b57cec5SDimitry Andric } 26140b57cec5SDimitry Andric 26150b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 26160b57cec5SDimitry Andric _mm256_movm_epi16 (__mmask16 __A) 26170b57cec5SDimitry Andric { 26180b57cec5SDimitry Andric return (__m256i) __builtin_ia32_cvtmask2w256 (__A); 26190b57cec5SDimitry Andric } 26200b57cec5SDimitry Andric 26210b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 26220b57cec5SDimitry Andric _mm_mask_broadcastb_epi8 (__m128i __O, __mmask16 __M, __m128i __A) 26230b57cec5SDimitry Andric { 26240b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectb_128(__M, 26250b57cec5SDimitry Andric (__v16qi) _mm_broadcastb_epi8(__A), 26260b57cec5SDimitry Andric (__v16qi) __O); 26270b57cec5SDimitry Andric } 26280b57cec5SDimitry Andric 26290b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 26300b57cec5SDimitry Andric _mm_maskz_broadcastb_epi8 (__mmask16 __M, __m128i __A) 26310b57cec5SDimitry Andric { 26320b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectb_128(__M, 26330b57cec5SDimitry Andric (__v16qi) _mm_broadcastb_epi8(__A), 26340b57cec5SDimitry Andric (__v16qi) _mm_setzero_si128()); 26350b57cec5SDimitry Andric } 26360b57cec5SDimitry Andric 26370b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 26380b57cec5SDimitry Andric _mm256_mask_broadcastb_epi8 (__m256i __O, __mmask32 __M, __m128i __A) 26390b57cec5SDimitry Andric { 26400b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectb_256(__M, 26410b57cec5SDimitry Andric (__v32qi) _mm256_broadcastb_epi8(__A), 26420b57cec5SDimitry Andric (__v32qi) __O); 26430b57cec5SDimitry Andric } 26440b57cec5SDimitry Andric 26450b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 26460b57cec5SDimitry Andric _mm256_maskz_broadcastb_epi8 (__mmask32 __M, __m128i __A) 26470b57cec5SDimitry Andric { 26480b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectb_256(__M, 26490b57cec5SDimitry Andric (__v32qi) _mm256_broadcastb_epi8(__A), 26500b57cec5SDimitry Andric (__v32qi) _mm256_setzero_si256()); 26510b57cec5SDimitry Andric } 26520b57cec5SDimitry Andric 26530b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 26540b57cec5SDimitry Andric _mm_mask_broadcastw_epi16 (__m128i __O, __mmask8 __M, __m128i __A) 26550b57cec5SDimitry Andric { 26560b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128(__M, 26570b57cec5SDimitry Andric (__v8hi) _mm_broadcastw_epi16(__A), 26580b57cec5SDimitry Andric (__v8hi) __O); 26590b57cec5SDimitry Andric } 26600b57cec5SDimitry Andric 26610b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 26620b57cec5SDimitry Andric _mm_maskz_broadcastw_epi16 (__mmask8 __M, __m128i __A) 26630b57cec5SDimitry Andric { 26640b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128(__M, 26650b57cec5SDimitry Andric (__v8hi) _mm_broadcastw_epi16(__A), 26660b57cec5SDimitry Andric (__v8hi) _mm_setzero_si128()); 26670b57cec5SDimitry Andric } 26680b57cec5SDimitry Andric 26690b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 26700b57cec5SDimitry Andric _mm256_mask_broadcastw_epi16 (__m256i __O, __mmask16 __M, __m128i __A) 26710b57cec5SDimitry Andric { 26720b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256(__M, 26730b57cec5SDimitry Andric (__v16hi) _mm256_broadcastw_epi16(__A), 26740b57cec5SDimitry Andric (__v16hi) __O); 26750b57cec5SDimitry Andric } 26760b57cec5SDimitry Andric 26770b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 26780b57cec5SDimitry Andric _mm256_maskz_broadcastw_epi16 (__mmask16 __M, __m128i __A) 26790b57cec5SDimitry Andric { 26800b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256(__M, 26810b57cec5SDimitry Andric (__v16hi) _mm256_broadcastw_epi16(__A), 26820b57cec5SDimitry Andric (__v16hi) _mm256_setzero_si256()); 26830b57cec5SDimitry Andric } 26840b57cec5SDimitry Andric 26850b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 26860b57cec5SDimitry Andric _mm256_mask_set1_epi16 (__m256i __O, __mmask16 __M, short __A) 26870b57cec5SDimitry Andric { 26880b57cec5SDimitry Andric return (__m256i) __builtin_ia32_selectw_256 (__M, 26890b57cec5SDimitry Andric (__v16hi) _mm256_set1_epi16(__A), 26900b57cec5SDimitry Andric (__v16hi) __O); 26910b57cec5SDimitry Andric } 26920b57cec5SDimitry Andric 26930b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 26940b57cec5SDimitry Andric _mm256_maskz_set1_epi16 (__mmask16 __M, short __A) 26950b57cec5SDimitry Andric { 26960b57cec5SDimitry Andric return (__m256i) __builtin_ia32_selectw_256(__M, 26970b57cec5SDimitry Andric (__v16hi)_mm256_set1_epi16(__A), 26980b57cec5SDimitry Andric (__v16hi) _mm256_setzero_si256()); 26990b57cec5SDimitry Andric } 27000b57cec5SDimitry Andric 27010b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 27020b57cec5SDimitry Andric _mm_mask_set1_epi16 (__m128i __O, __mmask8 __M, short __A) 27030b57cec5SDimitry Andric { 27040b57cec5SDimitry Andric return (__m128i) __builtin_ia32_selectw_128(__M, 27050b57cec5SDimitry Andric (__v8hi) _mm_set1_epi16(__A), 27060b57cec5SDimitry Andric (__v8hi) __O); 27070b57cec5SDimitry Andric } 27080b57cec5SDimitry Andric 27090b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 27100b57cec5SDimitry Andric _mm_maskz_set1_epi16 (__mmask8 __M, short __A) 27110b57cec5SDimitry Andric { 27120b57cec5SDimitry Andric return (__m128i) __builtin_ia32_selectw_128(__M, 27130b57cec5SDimitry Andric (__v8hi) _mm_set1_epi16(__A), 27140b57cec5SDimitry Andric (__v8hi) _mm_setzero_si128()); 27150b57cec5SDimitry Andric } 27160b57cec5SDimitry Andric 27170b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 27180b57cec5SDimitry Andric _mm_permutexvar_epi16 (__m128i __A, __m128i __B) 27190b57cec5SDimitry Andric { 27200b57cec5SDimitry Andric return (__m128i)__builtin_ia32_permvarhi128((__v8hi) __B, (__v8hi) __A); 27210b57cec5SDimitry Andric } 27220b57cec5SDimitry Andric 27230b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 27240b57cec5SDimitry Andric _mm_maskz_permutexvar_epi16 (__mmask8 __M, __m128i __A, __m128i __B) 27250b57cec5SDimitry Andric { 27260b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, 27270b57cec5SDimitry Andric (__v8hi)_mm_permutexvar_epi16(__A, __B), 27280b57cec5SDimitry Andric (__v8hi) _mm_setzero_si128()); 27290b57cec5SDimitry Andric } 27300b57cec5SDimitry Andric 27310b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 27320b57cec5SDimitry Andric _mm_mask_permutexvar_epi16 (__m128i __W, __mmask8 __M, __m128i __A, 27330b57cec5SDimitry Andric __m128i __B) 27340b57cec5SDimitry Andric { 27350b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, 27360b57cec5SDimitry Andric (__v8hi)_mm_permutexvar_epi16(__A, __B), 27370b57cec5SDimitry Andric (__v8hi)__W); 27380b57cec5SDimitry Andric } 27390b57cec5SDimitry Andric 27400b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 27410b57cec5SDimitry Andric _mm256_permutexvar_epi16 (__m256i __A, __m256i __B) 27420b57cec5SDimitry Andric { 27430b57cec5SDimitry Andric return (__m256i)__builtin_ia32_permvarhi256((__v16hi) __B, (__v16hi) __A); 27440b57cec5SDimitry Andric } 27450b57cec5SDimitry Andric 27460b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 27470b57cec5SDimitry Andric _mm256_maskz_permutexvar_epi16 (__mmask16 __M, __m256i __A, 27480b57cec5SDimitry Andric __m256i __B) 27490b57cec5SDimitry Andric { 27500b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, 27510b57cec5SDimitry Andric (__v16hi)_mm256_permutexvar_epi16(__A, __B), 27520b57cec5SDimitry Andric (__v16hi)_mm256_setzero_si256()); 27530b57cec5SDimitry Andric } 27540b57cec5SDimitry Andric 27550b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 27560b57cec5SDimitry Andric _mm256_mask_permutexvar_epi16 (__m256i __W, __mmask16 __M, __m256i __A, 27570b57cec5SDimitry Andric __m256i __B) 27580b57cec5SDimitry Andric { 27590b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, 27600b57cec5SDimitry Andric (__v16hi)_mm256_permutexvar_epi16(__A, __B), 27610b57cec5SDimitry Andric (__v16hi)__W); 27620b57cec5SDimitry Andric } 27630b57cec5SDimitry Andric 27640b57cec5SDimitry Andric #define _mm_mask_alignr_epi8(W, U, A, B, N) \ 2765349cc55cSDimitry Andric ((__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \ 27660b57cec5SDimitry Andric (__v16qi)_mm_alignr_epi8((A), (B), (int)(N)), \ 2767349cc55cSDimitry Andric (__v16qi)(__m128i)(W))) 27680b57cec5SDimitry Andric 27690b57cec5SDimitry Andric #define _mm_maskz_alignr_epi8(U, A, B, N) \ 2770349cc55cSDimitry Andric ((__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \ 27710b57cec5SDimitry Andric (__v16qi)_mm_alignr_epi8((A), (B), (int)(N)), \ 2772349cc55cSDimitry Andric (__v16qi)_mm_setzero_si128())) 27730b57cec5SDimitry Andric 27740b57cec5SDimitry Andric #define _mm256_mask_alignr_epi8(W, U, A, B, N) \ 2775349cc55cSDimitry Andric ((__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \ 27760b57cec5SDimitry Andric (__v32qi)_mm256_alignr_epi8((A), (B), (int)(N)), \ 2777349cc55cSDimitry Andric (__v32qi)(__m256i)(W))) 27780b57cec5SDimitry Andric 27790b57cec5SDimitry Andric #define _mm256_maskz_alignr_epi8(U, A, B, N) \ 2780349cc55cSDimitry Andric ((__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \ 27810b57cec5SDimitry Andric (__v32qi)_mm256_alignr_epi8((A), (B), (int)(N)), \ 2782349cc55cSDimitry Andric (__v32qi)_mm256_setzero_si256())) 27830b57cec5SDimitry Andric 27840b57cec5SDimitry Andric #define _mm_dbsad_epu8(A, B, imm) \ 2785349cc55cSDimitry Andric ((__m128i)__builtin_ia32_dbpsadbw128((__v16qi)(__m128i)(A), \ 2786349cc55cSDimitry Andric (__v16qi)(__m128i)(B), (int)(imm))) 27870b57cec5SDimitry Andric 27880b57cec5SDimitry Andric #define _mm_mask_dbsad_epu8(W, U, A, B, imm) \ 2789349cc55cSDimitry Andric ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ 27900b57cec5SDimitry Andric (__v8hi)_mm_dbsad_epu8((A), (B), (imm)), \ 2791349cc55cSDimitry Andric (__v8hi)(__m128i)(W))) 27920b57cec5SDimitry Andric 27930b57cec5SDimitry Andric #define _mm_maskz_dbsad_epu8(U, A, B, imm) \ 2794349cc55cSDimitry Andric ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ 27950b57cec5SDimitry Andric (__v8hi)_mm_dbsad_epu8((A), (B), (imm)), \ 2796349cc55cSDimitry Andric (__v8hi)_mm_setzero_si128())) 27970b57cec5SDimitry Andric 27980b57cec5SDimitry Andric #define _mm256_dbsad_epu8(A, B, imm) \ 2799349cc55cSDimitry Andric ((__m256i)__builtin_ia32_dbpsadbw256((__v32qi)(__m256i)(A), \ 2800349cc55cSDimitry Andric (__v32qi)(__m256i)(B), (int)(imm))) 28010b57cec5SDimitry Andric 28020b57cec5SDimitry Andric #define _mm256_mask_dbsad_epu8(W, U, A, B, imm) \ 2803349cc55cSDimitry Andric ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ 28040b57cec5SDimitry Andric (__v16hi)_mm256_dbsad_epu8((A), (B), (imm)), \ 2805349cc55cSDimitry Andric (__v16hi)(__m256i)(W))) 28060b57cec5SDimitry Andric 28070b57cec5SDimitry Andric #define _mm256_maskz_dbsad_epu8(U, A, B, imm) \ 2808349cc55cSDimitry Andric ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ 28090b57cec5SDimitry Andric (__v16hi)_mm256_dbsad_epu8((A), (B), (imm)), \ 2810349cc55cSDimitry Andric (__v16hi)_mm256_setzero_si256())) 28110b57cec5SDimitry Andric 2812bdd1243dSDimitry Andric static __inline__ short __DEFAULT_FN_ATTRS128 2813bdd1243dSDimitry Andric _mm_reduce_add_epi16(__m128i __W) { 2814bdd1243dSDimitry Andric return __builtin_reduce_add((__v8hi)__W); 2815bdd1243dSDimitry Andric } 2816bdd1243dSDimitry Andric 2817bdd1243dSDimitry Andric static __inline__ short __DEFAULT_FN_ATTRS128 2818bdd1243dSDimitry Andric _mm_reduce_mul_epi16(__m128i __W) { 2819bdd1243dSDimitry Andric return __builtin_reduce_mul((__v8hi)__W); 2820bdd1243dSDimitry Andric } 2821bdd1243dSDimitry Andric 2822bdd1243dSDimitry Andric static __inline__ short __DEFAULT_FN_ATTRS128 2823bdd1243dSDimitry Andric _mm_reduce_and_epi16(__m128i __W) { 2824bdd1243dSDimitry Andric return __builtin_reduce_and((__v8hi)__W); 2825bdd1243dSDimitry Andric } 2826bdd1243dSDimitry Andric 2827bdd1243dSDimitry Andric static __inline__ short __DEFAULT_FN_ATTRS128 2828bdd1243dSDimitry Andric _mm_reduce_or_epi16(__m128i __W) { 2829bdd1243dSDimitry Andric return __builtin_reduce_or((__v8hi)__W); 2830bdd1243dSDimitry Andric } 2831bdd1243dSDimitry Andric 2832bdd1243dSDimitry Andric static __inline__ short __DEFAULT_FN_ATTRS128 2833bdd1243dSDimitry Andric _mm_mask_reduce_add_epi16( __mmask8 __M, __m128i __W) { 2834bdd1243dSDimitry Andric __W = _mm_maskz_mov_epi16(__M, __W); 2835bdd1243dSDimitry Andric return __builtin_reduce_add((__v8hi)__W); 2836bdd1243dSDimitry Andric } 2837bdd1243dSDimitry Andric 2838bdd1243dSDimitry Andric static __inline__ short __DEFAULT_FN_ATTRS128 2839bdd1243dSDimitry Andric _mm_mask_reduce_mul_epi16( __mmask8 __M, __m128i __W) { 2840bdd1243dSDimitry Andric __W = _mm_mask_mov_epi16(_mm_set1_epi16(1), __M, __W); 2841bdd1243dSDimitry Andric return __builtin_reduce_mul((__v8hi)__W); 2842bdd1243dSDimitry Andric } 2843bdd1243dSDimitry Andric 2844bdd1243dSDimitry Andric static __inline__ short __DEFAULT_FN_ATTRS128 2845bdd1243dSDimitry Andric _mm_mask_reduce_and_epi16( __mmask8 __M, __m128i __W) { 2846bdd1243dSDimitry Andric __W = _mm_mask_mov_epi16(_mm_set1_epi16(-1), __M, __W); 2847bdd1243dSDimitry Andric return __builtin_reduce_and((__v8hi)__W); 2848bdd1243dSDimitry Andric } 2849bdd1243dSDimitry Andric 2850bdd1243dSDimitry Andric static __inline__ short __DEFAULT_FN_ATTRS128 2851bdd1243dSDimitry Andric _mm_mask_reduce_or_epi16(__mmask8 __M, __m128i __W) { 2852bdd1243dSDimitry Andric __W = _mm_maskz_mov_epi16(__M, __W); 2853bdd1243dSDimitry Andric return __builtin_reduce_or((__v8hi)__W); 2854bdd1243dSDimitry Andric } 2855bdd1243dSDimitry Andric 2856bdd1243dSDimitry Andric static __inline__ short __DEFAULT_FN_ATTRS128 2857bdd1243dSDimitry Andric _mm_reduce_max_epi16(__m128i __V) { 2858bdd1243dSDimitry Andric return __builtin_reduce_max((__v8hi)__V); 2859bdd1243dSDimitry Andric } 2860bdd1243dSDimitry Andric 2861bdd1243dSDimitry Andric static __inline__ unsigned short __DEFAULT_FN_ATTRS128 2862bdd1243dSDimitry Andric _mm_reduce_max_epu16(__m128i __V) { 2863bdd1243dSDimitry Andric return __builtin_reduce_max((__v8hu)__V); 2864bdd1243dSDimitry Andric } 2865bdd1243dSDimitry Andric 2866bdd1243dSDimitry Andric static __inline__ short __DEFAULT_FN_ATTRS128 2867bdd1243dSDimitry Andric _mm_reduce_min_epi16(__m128i __V) { 2868bdd1243dSDimitry Andric return __builtin_reduce_min((__v8hi)__V); 2869bdd1243dSDimitry Andric } 2870bdd1243dSDimitry Andric 2871bdd1243dSDimitry Andric static __inline__ unsigned short __DEFAULT_FN_ATTRS128 2872bdd1243dSDimitry Andric _mm_reduce_min_epu16(__m128i __V) { 2873bdd1243dSDimitry Andric return __builtin_reduce_min((__v8hu)__V); 2874bdd1243dSDimitry Andric } 2875bdd1243dSDimitry Andric 2876bdd1243dSDimitry Andric static __inline__ short __DEFAULT_FN_ATTRS128 2877bdd1243dSDimitry Andric _mm_mask_reduce_max_epi16(__mmask16 __M, __m128i __V) { 2878bdd1243dSDimitry Andric __V = _mm_mask_mov_epi16(_mm_set1_epi16(-32767-1), __M, __V); 2879bdd1243dSDimitry Andric return __builtin_reduce_max((__v8hi)__V); 2880bdd1243dSDimitry Andric } 2881bdd1243dSDimitry Andric 2882bdd1243dSDimitry Andric static __inline__ unsigned short __DEFAULT_FN_ATTRS128 2883bdd1243dSDimitry Andric _mm_mask_reduce_max_epu16(__mmask16 __M, __m128i __V) { 2884bdd1243dSDimitry Andric __V = _mm_maskz_mov_epi16(__M, __V); 2885bdd1243dSDimitry Andric return __builtin_reduce_max((__v8hu)__V); 2886bdd1243dSDimitry Andric } 2887bdd1243dSDimitry Andric 2888bdd1243dSDimitry Andric static __inline__ short __DEFAULT_FN_ATTRS128 2889bdd1243dSDimitry Andric _mm_mask_reduce_min_epi16(__mmask16 __M, __m128i __V) { 2890bdd1243dSDimitry Andric __V = _mm_mask_mov_epi16(_mm_set1_epi16(32767), __M, __V); 2891bdd1243dSDimitry Andric return __builtin_reduce_min((__v8hi)__V); 2892bdd1243dSDimitry Andric } 2893bdd1243dSDimitry Andric 2894bdd1243dSDimitry Andric static __inline__ unsigned short __DEFAULT_FN_ATTRS128 2895bdd1243dSDimitry Andric _mm_mask_reduce_min_epu16(__mmask16 __M, __m128i __V) { 2896bdd1243dSDimitry Andric __V = _mm_mask_mov_epi16(_mm_set1_epi16(-1), __M, __V); 2897bdd1243dSDimitry Andric return __builtin_reduce_min((__v8hu)__V); 2898bdd1243dSDimitry Andric } 2899bdd1243dSDimitry Andric 2900bdd1243dSDimitry Andric static __inline__ short __DEFAULT_FN_ATTRS256 2901bdd1243dSDimitry Andric _mm256_reduce_add_epi16(__m256i __W) { 2902bdd1243dSDimitry Andric return __builtin_reduce_add((__v16hi)__W); 2903bdd1243dSDimitry Andric } 2904bdd1243dSDimitry Andric 2905bdd1243dSDimitry Andric static __inline__ short __DEFAULT_FN_ATTRS256 2906bdd1243dSDimitry Andric _mm256_reduce_mul_epi16(__m256i __W) { 2907bdd1243dSDimitry Andric return __builtin_reduce_mul((__v16hi)__W); 2908bdd1243dSDimitry Andric } 2909bdd1243dSDimitry Andric 2910bdd1243dSDimitry Andric static __inline__ short __DEFAULT_FN_ATTRS256 2911bdd1243dSDimitry Andric _mm256_reduce_and_epi16(__m256i __W) { 2912bdd1243dSDimitry Andric return __builtin_reduce_and((__v16hi)__W); 2913bdd1243dSDimitry Andric } 2914bdd1243dSDimitry Andric 2915bdd1243dSDimitry Andric static __inline__ short __DEFAULT_FN_ATTRS256 2916bdd1243dSDimitry Andric _mm256_reduce_or_epi16(__m256i __W) { 2917bdd1243dSDimitry Andric return __builtin_reduce_or((__v16hi)__W); 2918bdd1243dSDimitry Andric } 2919bdd1243dSDimitry Andric 2920bdd1243dSDimitry Andric static __inline__ short __DEFAULT_FN_ATTRS256 2921bdd1243dSDimitry Andric _mm256_mask_reduce_add_epi16( __mmask16 __M, __m256i __W) { 2922bdd1243dSDimitry Andric __W = _mm256_maskz_mov_epi16(__M, __W); 2923bdd1243dSDimitry Andric return __builtin_reduce_add((__v16hi)__W); 2924bdd1243dSDimitry Andric } 2925bdd1243dSDimitry Andric 2926bdd1243dSDimitry Andric static __inline__ short __DEFAULT_FN_ATTRS256 2927bdd1243dSDimitry Andric _mm256_mask_reduce_mul_epi16( __mmask16 __M, __m256i __W) { 2928bdd1243dSDimitry Andric __W = _mm256_mask_mov_epi16(_mm256_set1_epi16(1), __M, __W); 2929bdd1243dSDimitry Andric return __builtin_reduce_mul((__v16hi)__W); 2930bdd1243dSDimitry Andric } 2931bdd1243dSDimitry Andric 2932bdd1243dSDimitry Andric static __inline__ short __DEFAULT_FN_ATTRS256 2933bdd1243dSDimitry Andric _mm256_mask_reduce_and_epi16( __mmask16 __M, __m256i __W) { 2934bdd1243dSDimitry Andric __W = _mm256_mask_mov_epi16(_mm256_set1_epi16(-1), __M, __W); 2935bdd1243dSDimitry Andric return __builtin_reduce_and((__v16hi)__W); 2936bdd1243dSDimitry Andric } 2937bdd1243dSDimitry Andric 2938bdd1243dSDimitry Andric static __inline__ short __DEFAULT_FN_ATTRS256 2939bdd1243dSDimitry Andric _mm256_mask_reduce_or_epi16(__mmask16 __M, __m256i __W) { 2940bdd1243dSDimitry Andric __W = _mm256_maskz_mov_epi16(__M, __W); 2941bdd1243dSDimitry Andric return __builtin_reduce_or((__v16hi)__W); 2942bdd1243dSDimitry Andric } 2943bdd1243dSDimitry Andric 2944bdd1243dSDimitry Andric static __inline__ short __DEFAULT_FN_ATTRS256 2945bdd1243dSDimitry Andric _mm256_reduce_max_epi16(__m256i __V) { 2946bdd1243dSDimitry Andric return __builtin_reduce_max((__v16hi)__V); 2947bdd1243dSDimitry Andric } 2948bdd1243dSDimitry Andric 2949bdd1243dSDimitry Andric static __inline__ unsigned short __DEFAULT_FN_ATTRS256 2950bdd1243dSDimitry Andric _mm256_reduce_max_epu16(__m256i __V) { 2951bdd1243dSDimitry Andric return __builtin_reduce_max((__v16hu)__V); 2952bdd1243dSDimitry Andric } 2953bdd1243dSDimitry Andric 2954bdd1243dSDimitry Andric static __inline__ short __DEFAULT_FN_ATTRS256 2955bdd1243dSDimitry Andric _mm256_reduce_min_epi16(__m256i __V) { 2956bdd1243dSDimitry Andric return __builtin_reduce_min((__v16hi)__V); 2957bdd1243dSDimitry Andric } 2958bdd1243dSDimitry Andric 2959bdd1243dSDimitry Andric static __inline__ unsigned short __DEFAULT_FN_ATTRS256 2960bdd1243dSDimitry Andric _mm256_reduce_min_epu16(__m256i __V) { 2961bdd1243dSDimitry Andric return __builtin_reduce_min((__v16hu)__V); 2962bdd1243dSDimitry Andric } 2963bdd1243dSDimitry Andric 2964bdd1243dSDimitry Andric static __inline__ short __DEFAULT_FN_ATTRS256 2965bdd1243dSDimitry Andric _mm256_mask_reduce_max_epi16(__mmask16 __M, __m256i __V) { 2966bdd1243dSDimitry Andric __V = _mm256_mask_mov_epi16(_mm256_set1_epi16(-32767-1), __M, __V); 2967bdd1243dSDimitry Andric return __builtin_reduce_max((__v16hi)__V); 2968bdd1243dSDimitry Andric } 2969bdd1243dSDimitry Andric 2970bdd1243dSDimitry Andric static __inline__ unsigned short __DEFAULT_FN_ATTRS256 2971bdd1243dSDimitry Andric _mm256_mask_reduce_max_epu16(__mmask16 __M, __m256i __V) { 2972bdd1243dSDimitry Andric __V = _mm256_maskz_mov_epi16(__M, __V); 2973bdd1243dSDimitry Andric return __builtin_reduce_max((__v16hu)__V); 2974bdd1243dSDimitry Andric } 2975bdd1243dSDimitry Andric 2976bdd1243dSDimitry Andric static __inline__ short __DEFAULT_FN_ATTRS256 2977bdd1243dSDimitry Andric _mm256_mask_reduce_min_epi16(__mmask16 __M, __m256i __V) { 2978bdd1243dSDimitry Andric __V = _mm256_mask_mov_epi16(_mm256_set1_epi16(32767), __M, __V); 2979bdd1243dSDimitry Andric return __builtin_reduce_min((__v16hi)__V); 2980bdd1243dSDimitry Andric } 2981bdd1243dSDimitry Andric 2982bdd1243dSDimitry Andric static __inline__ unsigned short __DEFAULT_FN_ATTRS256 2983bdd1243dSDimitry Andric _mm256_mask_reduce_min_epu16(__mmask16 __M, __m256i __V) { 2984bdd1243dSDimitry Andric __V = _mm256_mask_mov_epi16(_mm256_set1_epi16(-1), __M, __V); 2985bdd1243dSDimitry Andric return __builtin_reduce_min((__v16hu)__V); 2986bdd1243dSDimitry Andric } 2987bdd1243dSDimitry Andric 2988bdd1243dSDimitry Andric static __inline__ signed char __DEFAULT_FN_ATTRS128 2989bdd1243dSDimitry Andric _mm_reduce_add_epi8(__m128i __W) { 2990bdd1243dSDimitry Andric return __builtin_reduce_add((__v16qs)__W); 2991bdd1243dSDimitry Andric } 2992bdd1243dSDimitry Andric 2993bdd1243dSDimitry Andric static __inline__ signed char __DEFAULT_FN_ATTRS128 2994bdd1243dSDimitry Andric _mm_reduce_mul_epi8(__m128i __W) { 2995bdd1243dSDimitry Andric return __builtin_reduce_mul((__v16qs)__W); 2996bdd1243dSDimitry Andric } 2997bdd1243dSDimitry Andric 2998bdd1243dSDimitry Andric static __inline__ signed char __DEFAULT_FN_ATTRS128 2999bdd1243dSDimitry Andric _mm_reduce_and_epi8(__m128i __W) { 3000bdd1243dSDimitry Andric return __builtin_reduce_and((__v16qs)__W); 3001bdd1243dSDimitry Andric } 3002bdd1243dSDimitry Andric 3003bdd1243dSDimitry Andric static __inline__ signed char __DEFAULT_FN_ATTRS128 3004bdd1243dSDimitry Andric _mm_reduce_or_epi8(__m128i __W) { 3005bdd1243dSDimitry Andric return __builtin_reduce_or((__v16qs)__W); 3006bdd1243dSDimitry Andric } 3007bdd1243dSDimitry Andric 3008bdd1243dSDimitry Andric static __inline__ signed char __DEFAULT_FN_ATTRS128 3009bdd1243dSDimitry Andric _mm_mask_reduce_add_epi8(__mmask16 __M, __m128i __W) { 3010bdd1243dSDimitry Andric __W = _mm_maskz_mov_epi8(__M, __W); 3011bdd1243dSDimitry Andric return __builtin_reduce_add((__v16qs)__W); 3012bdd1243dSDimitry Andric } 3013bdd1243dSDimitry Andric 3014bdd1243dSDimitry Andric static __inline__ signed char __DEFAULT_FN_ATTRS128 3015bdd1243dSDimitry Andric _mm_mask_reduce_mul_epi8(__mmask16 __M, __m128i __W) { 3016bdd1243dSDimitry Andric __W = _mm_mask_mov_epi8(_mm_set1_epi8(1), __M, __W); 3017bdd1243dSDimitry Andric return __builtin_reduce_mul((__v16qs)__W); 3018bdd1243dSDimitry Andric } 3019bdd1243dSDimitry Andric 3020bdd1243dSDimitry Andric static __inline__ signed char __DEFAULT_FN_ATTRS128 3021bdd1243dSDimitry Andric _mm_mask_reduce_and_epi8(__mmask16 __M, __m128i __W) { 3022bdd1243dSDimitry Andric __W = _mm_mask_mov_epi8(_mm_set1_epi8(-1), __M, __W); 3023bdd1243dSDimitry Andric return __builtin_reduce_and((__v16qs)__W); 3024bdd1243dSDimitry Andric } 3025bdd1243dSDimitry Andric 3026bdd1243dSDimitry Andric static __inline__ signed char __DEFAULT_FN_ATTRS128 3027bdd1243dSDimitry Andric _mm_mask_reduce_or_epi8(__mmask16 __M, __m128i __W) { 3028bdd1243dSDimitry Andric __W = _mm_maskz_mov_epi8(__M, __W); 3029bdd1243dSDimitry Andric return __builtin_reduce_or((__v16qs)__W); 3030bdd1243dSDimitry Andric } 3031bdd1243dSDimitry Andric 3032bdd1243dSDimitry Andric static __inline__ signed char __DEFAULT_FN_ATTRS128 3033bdd1243dSDimitry Andric _mm_reduce_max_epi8(__m128i __V) { 3034bdd1243dSDimitry Andric return __builtin_reduce_max((__v16qs)__V); 3035bdd1243dSDimitry Andric } 3036bdd1243dSDimitry Andric 3037bdd1243dSDimitry Andric static __inline__ unsigned char __DEFAULT_FN_ATTRS128 3038bdd1243dSDimitry Andric _mm_reduce_max_epu8(__m128i __V) { 3039bdd1243dSDimitry Andric return __builtin_reduce_max((__v16qu)__V); 3040bdd1243dSDimitry Andric } 3041bdd1243dSDimitry Andric 3042bdd1243dSDimitry Andric static __inline__ signed char __DEFAULT_FN_ATTRS128 3043bdd1243dSDimitry Andric _mm_reduce_min_epi8(__m128i __V) { 3044bdd1243dSDimitry Andric return __builtin_reduce_min((__v16qs)__V); 3045bdd1243dSDimitry Andric } 3046bdd1243dSDimitry Andric 3047bdd1243dSDimitry Andric static __inline__ unsigned char __DEFAULT_FN_ATTRS128 3048bdd1243dSDimitry Andric _mm_reduce_min_epu8(__m128i __V) { 3049bdd1243dSDimitry Andric return __builtin_reduce_min((__v16qu)__V); 3050bdd1243dSDimitry Andric } 3051bdd1243dSDimitry Andric 3052bdd1243dSDimitry Andric static __inline__ signed char __DEFAULT_FN_ATTRS128 3053bdd1243dSDimitry Andric _mm_mask_reduce_max_epi8(__mmask16 __M, __m128i __V) { 3054bdd1243dSDimitry Andric __V = _mm_mask_mov_epi8(_mm_set1_epi8(-127-1), __M, __V); 3055bdd1243dSDimitry Andric return __builtin_reduce_max((__v16qs)__V); 3056bdd1243dSDimitry Andric } 3057bdd1243dSDimitry Andric 3058bdd1243dSDimitry Andric static __inline__ unsigned char __DEFAULT_FN_ATTRS128 3059bdd1243dSDimitry Andric _mm_mask_reduce_max_epu8(__mmask16 __M, __m128i __V) { 3060bdd1243dSDimitry Andric __V = _mm_maskz_mov_epi8(__M, __V); 3061bdd1243dSDimitry Andric return __builtin_reduce_max((__v16qu)__V); 3062bdd1243dSDimitry Andric } 3063bdd1243dSDimitry Andric 3064bdd1243dSDimitry Andric static __inline__ signed char __DEFAULT_FN_ATTRS128 3065bdd1243dSDimitry Andric _mm_mask_reduce_min_epi8(__mmask16 __M, __m128i __V) { 3066bdd1243dSDimitry Andric __V = _mm_mask_mov_epi8(_mm_set1_epi8(127), __M, __V); 3067bdd1243dSDimitry Andric return __builtin_reduce_min((__v16qs)__V); 3068bdd1243dSDimitry Andric } 3069bdd1243dSDimitry Andric 3070bdd1243dSDimitry Andric static __inline__ unsigned char __DEFAULT_FN_ATTRS128 3071bdd1243dSDimitry Andric _mm_mask_reduce_min_epu8(__mmask16 __M, __m128i __V) { 3072bdd1243dSDimitry Andric __V = _mm_mask_mov_epi8(_mm_set1_epi8(-1), __M, __V); 3073bdd1243dSDimitry Andric return __builtin_reduce_min((__v16qu)__V); 3074bdd1243dSDimitry Andric } 3075bdd1243dSDimitry Andric 3076bdd1243dSDimitry Andric static __inline__ signed char __DEFAULT_FN_ATTRS256 3077bdd1243dSDimitry Andric _mm256_reduce_add_epi8(__m256i __W) { 3078bdd1243dSDimitry Andric return __builtin_reduce_add((__v32qs)__W); 3079bdd1243dSDimitry Andric } 3080bdd1243dSDimitry Andric 3081bdd1243dSDimitry Andric static __inline__ signed char __DEFAULT_FN_ATTRS256 3082bdd1243dSDimitry Andric _mm256_reduce_mul_epi8(__m256i __W) { 3083bdd1243dSDimitry Andric return __builtin_reduce_mul((__v32qs)__W); 3084bdd1243dSDimitry Andric } 3085bdd1243dSDimitry Andric 3086bdd1243dSDimitry Andric static __inline__ signed char __DEFAULT_FN_ATTRS256 3087bdd1243dSDimitry Andric _mm256_reduce_and_epi8(__m256i __W) { 3088bdd1243dSDimitry Andric return __builtin_reduce_and((__v32qs)__W); 3089bdd1243dSDimitry Andric } 3090bdd1243dSDimitry Andric 3091bdd1243dSDimitry Andric static __inline__ signed char __DEFAULT_FN_ATTRS256 3092bdd1243dSDimitry Andric _mm256_reduce_or_epi8(__m256i __W) { 3093bdd1243dSDimitry Andric return __builtin_reduce_or((__v32qs)__W); 3094bdd1243dSDimitry Andric } 3095bdd1243dSDimitry Andric 3096bdd1243dSDimitry Andric static __inline__ signed char __DEFAULT_FN_ATTRS256 3097bdd1243dSDimitry Andric _mm256_mask_reduce_add_epi8(__mmask32 __M, __m256i __W) { 3098bdd1243dSDimitry Andric __W = _mm256_maskz_mov_epi8(__M, __W); 3099bdd1243dSDimitry Andric return __builtin_reduce_add((__v32qs)__W); 3100bdd1243dSDimitry Andric } 3101bdd1243dSDimitry Andric 3102bdd1243dSDimitry Andric static __inline__ signed char __DEFAULT_FN_ATTRS256 3103bdd1243dSDimitry Andric _mm256_mask_reduce_mul_epi8(__mmask32 __M, __m256i __W) { 3104bdd1243dSDimitry Andric __W = _mm256_mask_mov_epi8(_mm256_set1_epi8(1), __M, __W); 3105bdd1243dSDimitry Andric return __builtin_reduce_mul((__v32qs)__W); 3106bdd1243dSDimitry Andric } 3107bdd1243dSDimitry Andric 3108bdd1243dSDimitry Andric static __inline__ signed char __DEFAULT_FN_ATTRS256 3109bdd1243dSDimitry Andric _mm256_mask_reduce_and_epi8(__mmask32 __M, __m256i __W) { 3110bdd1243dSDimitry Andric __W = _mm256_mask_mov_epi8(_mm256_set1_epi8(-1), __M, __W); 3111bdd1243dSDimitry Andric return __builtin_reduce_and((__v32qs)__W); 3112bdd1243dSDimitry Andric } 3113bdd1243dSDimitry Andric 3114bdd1243dSDimitry Andric static __inline__ signed char __DEFAULT_FN_ATTRS256 3115bdd1243dSDimitry Andric _mm256_mask_reduce_or_epi8(__mmask32 __M, __m256i __W) { 3116bdd1243dSDimitry Andric __W = _mm256_maskz_mov_epi8(__M, __W); 3117bdd1243dSDimitry Andric return __builtin_reduce_or((__v32qs)__W); 3118bdd1243dSDimitry Andric } 3119bdd1243dSDimitry Andric 3120bdd1243dSDimitry Andric static __inline__ signed char __DEFAULT_FN_ATTRS256 3121bdd1243dSDimitry Andric _mm256_reduce_max_epi8(__m256i __V) { 3122bdd1243dSDimitry Andric return __builtin_reduce_max((__v32qs)__V); 3123bdd1243dSDimitry Andric } 3124bdd1243dSDimitry Andric 3125bdd1243dSDimitry Andric static __inline__ unsigned char __DEFAULT_FN_ATTRS256 3126bdd1243dSDimitry Andric _mm256_reduce_max_epu8(__m256i __V) { 3127bdd1243dSDimitry Andric return __builtin_reduce_max((__v32qu)__V); 3128bdd1243dSDimitry Andric } 3129bdd1243dSDimitry Andric 3130bdd1243dSDimitry Andric static __inline__ signed char __DEFAULT_FN_ATTRS256 3131bdd1243dSDimitry Andric _mm256_reduce_min_epi8(__m256i __V) { 3132bdd1243dSDimitry Andric return __builtin_reduce_min((__v32qs)__V); 3133bdd1243dSDimitry Andric } 3134bdd1243dSDimitry Andric 3135bdd1243dSDimitry Andric static __inline__ unsigned char __DEFAULT_FN_ATTRS256 3136bdd1243dSDimitry Andric _mm256_reduce_min_epu8(__m256i __V) { 3137bdd1243dSDimitry Andric return __builtin_reduce_min((__v32qu)__V); 3138bdd1243dSDimitry Andric } 3139bdd1243dSDimitry Andric 3140bdd1243dSDimitry Andric static __inline__ signed char __DEFAULT_FN_ATTRS256 3141bdd1243dSDimitry Andric _mm256_mask_reduce_max_epi8(__mmask32 __M, __m256i __V) { 3142bdd1243dSDimitry Andric __V = _mm256_mask_mov_epi8(_mm256_set1_epi8(-127-1), __M, __V); 3143bdd1243dSDimitry Andric return __builtin_reduce_max((__v32qs)__V); 3144bdd1243dSDimitry Andric } 3145bdd1243dSDimitry Andric 3146bdd1243dSDimitry Andric static __inline__ unsigned char __DEFAULT_FN_ATTRS256 3147bdd1243dSDimitry Andric _mm256_mask_reduce_max_epu8(__mmask32 __M, __m256i __V) { 3148bdd1243dSDimitry Andric __V = _mm256_maskz_mov_epi8(__M, __V); 3149bdd1243dSDimitry Andric return __builtin_reduce_max((__v32qu)__V); 3150bdd1243dSDimitry Andric } 3151bdd1243dSDimitry Andric 3152bdd1243dSDimitry Andric static __inline__ signed char __DEFAULT_FN_ATTRS256 3153bdd1243dSDimitry Andric _mm256_mask_reduce_min_epi8(__mmask32 __M, __m256i __V) { 3154bdd1243dSDimitry Andric __V = _mm256_mask_mov_epi8(_mm256_set1_epi8(127), __M, __V); 3155bdd1243dSDimitry Andric return __builtin_reduce_min((__v32qs)__V); 3156bdd1243dSDimitry Andric } 3157bdd1243dSDimitry Andric 3158bdd1243dSDimitry Andric static __inline__ unsigned char __DEFAULT_FN_ATTRS256 3159bdd1243dSDimitry Andric _mm256_mask_reduce_min_epu8(__mmask32 __M, __m256i __V) { 3160bdd1243dSDimitry Andric __V = _mm256_mask_mov_epi8(_mm256_set1_epi8(-1), __M, __V); 3161bdd1243dSDimitry Andric return __builtin_reduce_min((__v32qu)__V); 3162bdd1243dSDimitry Andric } 3163bdd1243dSDimitry Andric 31640b57cec5SDimitry Andric #undef __DEFAULT_FN_ATTRS128 31650b57cec5SDimitry Andric #undef __DEFAULT_FN_ATTRS256 31660b57cec5SDimitry Andric 31670b57cec5SDimitry Andric #endif /* __AVX512VLBWINTRIN_H */ 3168