10b57cec5SDimitry Andric /*===---- avx512vlintrin.h - AVX512VL intrinsics ---------------------------=== 20b57cec5SDimitry Andric * 30b57cec5SDimitry Andric * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric * See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric * 70b57cec5SDimitry Andric *===-----------------------------------------------------------------------=== 80b57cec5SDimitry Andric */ 90b57cec5SDimitry Andric 100b57cec5SDimitry Andric #ifndef __IMMINTRIN_H 110b57cec5SDimitry Andric #error "Never use <avx512vlintrin.h> directly; include <immintrin.h> instead." 120b57cec5SDimitry Andric #endif 130b57cec5SDimitry Andric 140b57cec5SDimitry Andric #ifndef __AVX512VLINTRIN_H 150b57cec5SDimitry Andric #define __AVX512VLINTRIN_H 160b57cec5SDimitry Andric 17*5f757f3fSDimitry Andric #define __DEFAULT_FN_ATTRS128 \ 18*5f757f3fSDimitry Andric __attribute__((__always_inline__, __nodebug__, \ 19*5f757f3fSDimitry Andric __target__("avx512vl,no-evex512"), \ 20*5f757f3fSDimitry Andric __min_vector_width__(128))) 21*5f757f3fSDimitry Andric #define __DEFAULT_FN_ATTRS256 \ 22*5f757f3fSDimitry Andric __attribute__((__always_inline__, __nodebug__, \ 23*5f757f3fSDimitry Andric __target__("avx512vl,no-evex512"), \ 24*5f757f3fSDimitry Andric __min_vector_width__(256))) 250b57cec5SDimitry Andric 260b57cec5SDimitry Andric typedef short __v2hi __attribute__((__vector_size__(4))); 270b57cec5SDimitry Andric typedef char __v4qi __attribute__((__vector_size__(4))); 280b57cec5SDimitry Andric typedef char __v2qi __attribute__((__vector_size__(2))); 290b57cec5SDimitry Andric 300b57cec5SDimitry Andric /* Integer compare */ 310b57cec5SDimitry Andric 320b57cec5SDimitry Andric #define _mm_cmpeq_epi32_mask(A, B) \ 330b57cec5SDimitry Andric _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ) 340b57cec5SDimitry Andric #define _mm_mask_cmpeq_epi32_mask(k, A, B) \ 350b57cec5SDimitry Andric _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ) 360b57cec5SDimitry Andric #define _mm_cmpge_epi32_mask(A, B) \ 370b57cec5SDimitry Andric _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_GE) 380b57cec5SDimitry Andric #define _mm_mask_cmpge_epi32_mask(k, A, B) \ 390b57cec5SDimitry Andric _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE) 400b57cec5SDimitry Andric #define _mm_cmpgt_epi32_mask(A, B) \ 410b57cec5SDimitry Andric _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_GT) 420b57cec5SDimitry Andric #define _mm_mask_cmpgt_epi32_mask(k, A, B) \ 430b57cec5SDimitry Andric _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT) 440b57cec5SDimitry Andric #define _mm_cmple_epi32_mask(A, B) \ 450b57cec5SDimitry Andric _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_LE) 460b57cec5SDimitry Andric #define _mm_mask_cmple_epi32_mask(k, A, B) \ 470b57cec5SDimitry Andric _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE) 480b57cec5SDimitry Andric #define _mm_cmplt_epi32_mask(A, B) \ 490b57cec5SDimitry Andric _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_LT) 500b57cec5SDimitry Andric #define _mm_mask_cmplt_epi32_mask(k, A, B) \ 510b57cec5SDimitry Andric _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT) 520b57cec5SDimitry Andric #define _mm_cmpneq_epi32_mask(A, B) \ 530b57cec5SDimitry Andric _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_NE) 540b57cec5SDimitry Andric #define _mm_mask_cmpneq_epi32_mask(k, A, B) \ 550b57cec5SDimitry Andric _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE) 560b57cec5SDimitry Andric 570b57cec5SDimitry Andric #define _mm256_cmpeq_epi32_mask(A, B) \ 580b57cec5SDimitry Andric _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ) 590b57cec5SDimitry Andric #define _mm256_mask_cmpeq_epi32_mask(k, A, B) \ 600b57cec5SDimitry Andric _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ) 610b57cec5SDimitry Andric #define _mm256_cmpge_epi32_mask(A, B) \ 620b57cec5SDimitry Andric _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_GE) 630b57cec5SDimitry Andric #define _mm256_mask_cmpge_epi32_mask(k, A, B) \ 640b57cec5SDimitry Andric _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE) 650b57cec5SDimitry Andric #define _mm256_cmpgt_epi32_mask(A, B) \ 660b57cec5SDimitry Andric _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_GT) 670b57cec5SDimitry Andric #define _mm256_mask_cmpgt_epi32_mask(k, A, B) \ 680b57cec5SDimitry Andric _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT) 690b57cec5SDimitry Andric #define _mm256_cmple_epi32_mask(A, B) \ 700b57cec5SDimitry Andric _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_LE) 710b57cec5SDimitry Andric #define _mm256_mask_cmple_epi32_mask(k, A, B) \ 720b57cec5SDimitry Andric _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE) 730b57cec5SDimitry Andric #define _mm256_cmplt_epi32_mask(A, B) \ 740b57cec5SDimitry Andric _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_LT) 750b57cec5SDimitry Andric #define _mm256_mask_cmplt_epi32_mask(k, A, B) \ 760b57cec5SDimitry Andric _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT) 770b57cec5SDimitry Andric #define _mm256_cmpneq_epi32_mask(A, B) \ 780b57cec5SDimitry Andric _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_NE) 790b57cec5SDimitry Andric #define _mm256_mask_cmpneq_epi32_mask(k, A, B) \ 800b57cec5SDimitry Andric _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE) 810b57cec5SDimitry Andric 820b57cec5SDimitry Andric #define _mm_cmpeq_epu32_mask(A, B) \ 830b57cec5SDimitry Andric _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ) 840b57cec5SDimitry Andric #define _mm_mask_cmpeq_epu32_mask(k, A, B) \ 850b57cec5SDimitry Andric _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ) 860b57cec5SDimitry Andric #define _mm_cmpge_epu32_mask(A, B) \ 870b57cec5SDimitry Andric _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_GE) 880b57cec5SDimitry Andric #define _mm_mask_cmpge_epu32_mask(k, A, B) \ 890b57cec5SDimitry Andric _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE) 900b57cec5SDimitry Andric #define _mm_cmpgt_epu32_mask(A, B) \ 910b57cec5SDimitry Andric _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_GT) 920b57cec5SDimitry Andric #define _mm_mask_cmpgt_epu32_mask(k, A, B) \ 930b57cec5SDimitry Andric _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT) 940b57cec5SDimitry Andric #define _mm_cmple_epu32_mask(A, B) \ 950b57cec5SDimitry Andric _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_LE) 960b57cec5SDimitry Andric #define _mm_mask_cmple_epu32_mask(k, A, B) \ 970b57cec5SDimitry Andric _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE) 980b57cec5SDimitry Andric #define _mm_cmplt_epu32_mask(A, B) \ 990b57cec5SDimitry Andric _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_LT) 1000b57cec5SDimitry Andric #define _mm_mask_cmplt_epu32_mask(k, A, B) \ 1010b57cec5SDimitry Andric _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT) 1020b57cec5SDimitry Andric #define _mm_cmpneq_epu32_mask(A, B) \ 1030b57cec5SDimitry Andric _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_NE) 1040b57cec5SDimitry Andric #define _mm_mask_cmpneq_epu32_mask(k, A, B) \ 1050b57cec5SDimitry Andric _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE) 1060b57cec5SDimitry Andric 1070b57cec5SDimitry Andric #define _mm256_cmpeq_epu32_mask(A, B) \ 1080b57cec5SDimitry Andric _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ) 1090b57cec5SDimitry Andric #define _mm256_mask_cmpeq_epu32_mask(k, A, B) \ 1100b57cec5SDimitry Andric _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ) 1110b57cec5SDimitry Andric #define _mm256_cmpge_epu32_mask(A, B) \ 1120b57cec5SDimitry Andric _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_GE) 1130b57cec5SDimitry Andric #define _mm256_mask_cmpge_epu32_mask(k, A, B) \ 1140b57cec5SDimitry Andric _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE) 1150b57cec5SDimitry Andric #define _mm256_cmpgt_epu32_mask(A, B) \ 1160b57cec5SDimitry Andric _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_GT) 1170b57cec5SDimitry Andric #define _mm256_mask_cmpgt_epu32_mask(k, A, B) \ 1180b57cec5SDimitry Andric _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT) 1190b57cec5SDimitry Andric #define _mm256_cmple_epu32_mask(A, B) \ 1200b57cec5SDimitry Andric _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_LE) 1210b57cec5SDimitry Andric #define _mm256_mask_cmple_epu32_mask(k, A, B) \ 1220b57cec5SDimitry Andric _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE) 1230b57cec5SDimitry Andric #define _mm256_cmplt_epu32_mask(A, B) \ 1240b57cec5SDimitry Andric _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_LT) 1250b57cec5SDimitry Andric #define _mm256_mask_cmplt_epu32_mask(k, A, B) \ 1260b57cec5SDimitry Andric _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT) 1270b57cec5SDimitry Andric #define _mm256_cmpneq_epu32_mask(A, B) \ 1280b57cec5SDimitry Andric _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_NE) 1290b57cec5SDimitry Andric #define _mm256_mask_cmpneq_epu32_mask(k, A, B) \ 1300b57cec5SDimitry Andric _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE) 1310b57cec5SDimitry Andric 1320b57cec5SDimitry Andric #define _mm_cmpeq_epi64_mask(A, B) \ 1330b57cec5SDimitry Andric _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ) 1340b57cec5SDimitry Andric #define _mm_mask_cmpeq_epi64_mask(k, A, B) \ 1350b57cec5SDimitry Andric _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ) 1360b57cec5SDimitry Andric #define _mm_cmpge_epi64_mask(A, B) \ 1370b57cec5SDimitry Andric _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_GE) 1380b57cec5SDimitry Andric #define _mm_mask_cmpge_epi64_mask(k, A, B) \ 1390b57cec5SDimitry Andric _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE) 1400b57cec5SDimitry Andric #define _mm_cmpgt_epi64_mask(A, B) \ 1410b57cec5SDimitry Andric _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_GT) 1420b57cec5SDimitry Andric #define _mm_mask_cmpgt_epi64_mask(k, A, B) \ 1430b57cec5SDimitry Andric _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT) 1440b57cec5SDimitry Andric #define _mm_cmple_epi64_mask(A, B) \ 1450b57cec5SDimitry Andric _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_LE) 1460b57cec5SDimitry Andric #define _mm_mask_cmple_epi64_mask(k, A, B) \ 1470b57cec5SDimitry Andric _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE) 1480b57cec5SDimitry Andric #define _mm_cmplt_epi64_mask(A, B) \ 1490b57cec5SDimitry Andric _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_LT) 1500b57cec5SDimitry Andric #define _mm_mask_cmplt_epi64_mask(k, A, B) \ 1510b57cec5SDimitry Andric _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT) 1520b57cec5SDimitry Andric #define _mm_cmpneq_epi64_mask(A, B) \ 1530b57cec5SDimitry Andric _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_NE) 1540b57cec5SDimitry Andric #define _mm_mask_cmpneq_epi64_mask(k, A, B) \ 1550b57cec5SDimitry Andric _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE) 1560b57cec5SDimitry Andric 1570b57cec5SDimitry Andric #define _mm256_cmpeq_epi64_mask(A, B) \ 1580b57cec5SDimitry Andric _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ) 1590b57cec5SDimitry Andric #define _mm256_mask_cmpeq_epi64_mask(k, A, B) \ 1600b57cec5SDimitry Andric _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ) 1610b57cec5SDimitry Andric #define _mm256_cmpge_epi64_mask(A, B) \ 1620b57cec5SDimitry Andric _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_GE) 1630b57cec5SDimitry Andric #define _mm256_mask_cmpge_epi64_mask(k, A, B) \ 1640b57cec5SDimitry Andric _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE) 1650b57cec5SDimitry Andric #define _mm256_cmpgt_epi64_mask(A, B) \ 1660b57cec5SDimitry Andric _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_GT) 1670b57cec5SDimitry Andric #define _mm256_mask_cmpgt_epi64_mask(k, A, B) \ 1680b57cec5SDimitry Andric _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT) 1690b57cec5SDimitry Andric #define _mm256_cmple_epi64_mask(A, B) \ 1700b57cec5SDimitry Andric _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_LE) 1710b57cec5SDimitry Andric #define _mm256_mask_cmple_epi64_mask(k, A, B) \ 1720b57cec5SDimitry Andric _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE) 1730b57cec5SDimitry Andric #define _mm256_cmplt_epi64_mask(A, B) \ 1740b57cec5SDimitry Andric _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_LT) 1750b57cec5SDimitry Andric #define _mm256_mask_cmplt_epi64_mask(k, A, B) \ 1760b57cec5SDimitry Andric _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT) 1770b57cec5SDimitry Andric #define _mm256_cmpneq_epi64_mask(A, B) \ 1780b57cec5SDimitry Andric _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_NE) 1790b57cec5SDimitry Andric #define _mm256_mask_cmpneq_epi64_mask(k, A, B) \ 1800b57cec5SDimitry Andric _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE) 1810b57cec5SDimitry Andric 1820b57cec5SDimitry Andric #define _mm_cmpeq_epu64_mask(A, B) \ 1830b57cec5SDimitry Andric _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ) 1840b57cec5SDimitry Andric #define _mm_mask_cmpeq_epu64_mask(k, A, B) \ 1850b57cec5SDimitry Andric _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ) 1860b57cec5SDimitry Andric #define _mm_cmpge_epu64_mask(A, B) \ 1870b57cec5SDimitry Andric _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_GE) 1880b57cec5SDimitry Andric #define _mm_mask_cmpge_epu64_mask(k, A, B) \ 1890b57cec5SDimitry Andric _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE) 1900b57cec5SDimitry Andric #define _mm_cmpgt_epu64_mask(A, B) \ 1910b57cec5SDimitry Andric _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_GT) 1920b57cec5SDimitry Andric #define _mm_mask_cmpgt_epu64_mask(k, A, B) \ 1930b57cec5SDimitry Andric _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT) 1940b57cec5SDimitry Andric #define _mm_cmple_epu64_mask(A, B) \ 1950b57cec5SDimitry Andric _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_LE) 1960b57cec5SDimitry Andric #define _mm_mask_cmple_epu64_mask(k, A, B) \ 1970b57cec5SDimitry Andric _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE) 1980b57cec5SDimitry Andric #define _mm_cmplt_epu64_mask(A, B) \ 1990b57cec5SDimitry Andric _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_LT) 2000b57cec5SDimitry Andric #define _mm_mask_cmplt_epu64_mask(k, A, B) \ 2010b57cec5SDimitry Andric _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT) 2020b57cec5SDimitry Andric #define _mm_cmpneq_epu64_mask(A, B) \ 2030b57cec5SDimitry Andric _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_NE) 2040b57cec5SDimitry Andric #define _mm_mask_cmpneq_epu64_mask(k, A, B) \ 2050b57cec5SDimitry Andric _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE) 2060b57cec5SDimitry Andric 2070b57cec5SDimitry Andric #define _mm256_cmpeq_epu64_mask(A, B) \ 2080b57cec5SDimitry Andric _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ) 2090b57cec5SDimitry Andric #define _mm256_mask_cmpeq_epu64_mask(k, A, B) \ 2100b57cec5SDimitry Andric _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ) 2110b57cec5SDimitry Andric #define _mm256_cmpge_epu64_mask(A, B) \ 2120b57cec5SDimitry Andric _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_GE) 2130b57cec5SDimitry Andric #define _mm256_mask_cmpge_epu64_mask(k, A, B) \ 2140b57cec5SDimitry Andric _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE) 2150b57cec5SDimitry Andric #define _mm256_cmpgt_epu64_mask(A, B) \ 2160b57cec5SDimitry Andric _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_GT) 2170b57cec5SDimitry Andric #define _mm256_mask_cmpgt_epu64_mask(k, A, B) \ 2180b57cec5SDimitry Andric _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT) 2190b57cec5SDimitry Andric #define _mm256_cmple_epu64_mask(A, B) \ 2200b57cec5SDimitry Andric _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_LE) 2210b57cec5SDimitry Andric #define _mm256_mask_cmple_epu64_mask(k, A, B) \ 2220b57cec5SDimitry Andric _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE) 2230b57cec5SDimitry Andric #define _mm256_cmplt_epu64_mask(A, B) \ 2240b57cec5SDimitry Andric _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_LT) 2250b57cec5SDimitry Andric #define _mm256_mask_cmplt_epu64_mask(k, A, B) \ 2260b57cec5SDimitry Andric _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT) 2270b57cec5SDimitry Andric #define _mm256_cmpneq_epu64_mask(A, B) \ 2280b57cec5SDimitry Andric _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_NE) 2290b57cec5SDimitry Andric #define _mm256_mask_cmpneq_epu64_mask(k, A, B) \ 2300b57cec5SDimitry Andric _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE) 2310b57cec5SDimitry Andric 2320b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 2330b57cec5SDimitry Andric _mm256_mask_add_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 2340b57cec5SDimitry Andric { 2350b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 2360b57cec5SDimitry Andric (__v8si)_mm256_add_epi32(__A, __B), 2370b57cec5SDimitry Andric (__v8si)__W); 2380b57cec5SDimitry Andric } 2390b57cec5SDimitry Andric 2400b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 2410b57cec5SDimitry Andric _mm256_maskz_add_epi32(__mmask8 __U, __m256i __A, __m256i __B) 2420b57cec5SDimitry Andric { 2430b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 2440b57cec5SDimitry Andric (__v8si)_mm256_add_epi32(__A, __B), 2450b57cec5SDimitry Andric (__v8si)_mm256_setzero_si256()); 2460b57cec5SDimitry Andric } 2470b57cec5SDimitry Andric 2480b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 2490b57cec5SDimitry Andric _mm256_mask_add_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 2500b57cec5SDimitry Andric { 2510b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 2520b57cec5SDimitry Andric (__v4di)_mm256_add_epi64(__A, __B), 2530b57cec5SDimitry Andric (__v4di)__W); 2540b57cec5SDimitry Andric } 2550b57cec5SDimitry Andric 2560b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 2570b57cec5SDimitry Andric _mm256_maskz_add_epi64(__mmask8 __U, __m256i __A, __m256i __B) 2580b57cec5SDimitry Andric { 2590b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 2600b57cec5SDimitry Andric (__v4di)_mm256_add_epi64(__A, __B), 2610b57cec5SDimitry Andric (__v4di)_mm256_setzero_si256()); 2620b57cec5SDimitry Andric } 2630b57cec5SDimitry Andric 2640b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 2650b57cec5SDimitry Andric _mm256_mask_sub_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 2660b57cec5SDimitry Andric { 2670b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 2680b57cec5SDimitry Andric (__v8si)_mm256_sub_epi32(__A, __B), 2690b57cec5SDimitry Andric (__v8si)__W); 2700b57cec5SDimitry Andric } 2710b57cec5SDimitry Andric 2720b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 2730b57cec5SDimitry Andric _mm256_maskz_sub_epi32(__mmask8 __U, __m256i __A, __m256i __B) 2740b57cec5SDimitry Andric { 2750b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 2760b57cec5SDimitry Andric (__v8si)_mm256_sub_epi32(__A, __B), 2770b57cec5SDimitry Andric (__v8si)_mm256_setzero_si256()); 2780b57cec5SDimitry Andric } 2790b57cec5SDimitry Andric 2800b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 2810b57cec5SDimitry Andric _mm256_mask_sub_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 2820b57cec5SDimitry Andric { 2830b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 2840b57cec5SDimitry Andric (__v4di)_mm256_sub_epi64(__A, __B), 2850b57cec5SDimitry Andric (__v4di)__W); 2860b57cec5SDimitry Andric } 2870b57cec5SDimitry Andric 2880b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 2890b57cec5SDimitry Andric _mm256_maskz_sub_epi64(__mmask8 __U, __m256i __A, __m256i __B) 2900b57cec5SDimitry Andric { 2910b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 2920b57cec5SDimitry Andric (__v4di)_mm256_sub_epi64(__A, __B), 2930b57cec5SDimitry Andric (__v4di)_mm256_setzero_si256()); 2940b57cec5SDimitry Andric } 2950b57cec5SDimitry Andric 2960b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 2970b57cec5SDimitry Andric _mm_mask_add_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 2980b57cec5SDimitry Andric { 2990b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 3000b57cec5SDimitry Andric (__v4si)_mm_add_epi32(__A, __B), 3010b57cec5SDimitry Andric (__v4si)__W); 3020b57cec5SDimitry Andric } 3030b57cec5SDimitry Andric 3040b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 3050b57cec5SDimitry Andric _mm_maskz_add_epi32(__mmask8 __U, __m128i __A, __m128i __B) 3060b57cec5SDimitry Andric { 3070b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 3080b57cec5SDimitry Andric (__v4si)_mm_add_epi32(__A, __B), 3090b57cec5SDimitry Andric (__v4si)_mm_setzero_si128()); 3100b57cec5SDimitry Andric } 3110b57cec5SDimitry Andric 3120b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 3130b57cec5SDimitry Andric _mm_mask_add_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 3140b57cec5SDimitry Andric { 3150b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 3160b57cec5SDimitry Andric (__v2di)_mm_add_epi64(__A, __B), 3170b57cec5SDimitry Andric (__v2di)__W); 3180b57cec5SDimitry Andric } 3190b57cec5SDimitry Andric 3200b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 3210b57cec5SDimitry Andric _mm_maskz_add_epi64(__mmask8 __U, __m128i __A, __m128i __B) 3220b57cec5SDimitry Andric { 3230b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 3240b57cec5SDimitry Andric (__v2di)_mm_add_epi64(__A, __B), 3250b57cec5SDimitry Andric (__v2di)_mm_setzero_si128()); 3260b57cec5SDimitry Andric } 3270b57cec5SDimitry Andric 3280b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 3290b57cec5SDimitry Andric _mm_mask_sub_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 3300b57cec5SDimitry Andric { 3310b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 3320b57cec5SDimitry Andric (__v4si)_mm_sub_epi32(__A, __B), 3330b57cec5SDimitry Andric (__v4si)__W); 3340b57cec5SDimitry Andric } 3350b57cec5SDimitry Andric 3360b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 3370b57cec5SDimitry Andric _mm_maskz_sub_epi32(__mmask8 __U, __m128i __A, __m128i __B) 3380b57cec5SDimitry Andric { 3390b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 3400b57cec5SDimitry Andric (__v4si)_mm_sub_epi32(__A, __B), 3410b57cec5SDimitry Andric (__v4si)_mm_setzero_si128()); 3420b57cec5SDimitry Andric } 3430b57cec5SDimitry Andric 3440b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 3450b57cec5SDimitry Andric _mm_mask_sub_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 3460b57cec5SDimitry Andric { 3470b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 3480b57cec5SDimitry Andric (__v2di)_mm_sub_epi64(__A, __B), 3490b57cec5SDimitry Andric (__v2di)__W); 3500b57cec5SDimitry Andric } 3510b57cec5SDimitry Andric 3520b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 3530b57cec5SDimitry Andric _mm_maskz_sub_epi64(__mmask8 __U, __m128i __A, __m128i __B) 3540b57cec5SDimitry Andric { 3550b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 3560b57cec5SDimitry Andric (__v2di)_mm_sub_epi64(__A, __B), 3570b57cec5SDimitry Andric (__v2di)_mm_setzero_si128()); 3580b57cec5SDimitry Andric } 3590b57cec5SDimitry Andric 3600b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 3610b57cec5SDimitry Andric _mm256_mask_mul_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) 3620b57cec5SDimitry Andric { 3630b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 3640b57cec5SDimitry Andric (__v4di)_mm256_mul_epi32(__X, __Y), 3650b57cec5SDimitry Andric (__v4di)__W); 3660b57cec5SDimitry Andric } 3670b57cec5SDimitry Andric 3680b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 3690b57cec5SDimitry Andric _mm256_maskz_mul_epi32(__mmask8 __M, __m256i __X, __m256i __Y) 3700b57cec5SDimitry Andric { 3710b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 3720b57cec5SDimitry Andric (__v4di)_mm256_mul_epi32(__X, __Y), 3730b57cec5SDimitry Andric (__v4di)_mm256_setzero_si256()); 3740b57cec5SDimitry Andric } 3750b57cec5SDimitry Andric 3760b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 3770b57cec5SDimitry Andric _mm_mask_mul_epi32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) 3780b57cec5SDimitry Andric { 3790b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 3800b57cec5SDimitry Andric (__v2di)_mm_mul_epi32(__X, __Y), 3810b57cec5SDimitry Andric (__v2di)__W); 3820b57cec5SDimitry Andric } 3830b57cec5SDimitry Andric 3840b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 3850b57cec5SDimitry Andric _mm_maskz_mul_epi32(__mmask8 __M, __m128i __X, __m128i __Y) 3860b57cec5SDimitry Andric { 3870b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 3880b57cec5SDimitry Andric (__v2di)_mm_mul_epi32(__X, __Y), 3890b57cec5SDimitry Andric (__v2di)_mm_setzero_si128()); 3900b57cec5SDimitry Andric } 3910b57cec5SDimitry Andric 3920b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 3930b57cec5SDimitry Andric _mm256_mask_mul_epu32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) 3940b57cec5SDimitry Andric { 3950b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 3960b57cec5SDimitry Andric (__v4di)_mm256_mul_epu32(__X, __Y), 3970b57cec5SDimitry Andric (__v4di)__W); 3980b57cec5SDimitry Andric } 3990b57cec5SDimitry Andric 4000b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 4010b57cec5SDimitry Andric _mm256_maskz_mul_epu32(__mmask8 __M, __m256i __X, __m256i __Y) 4020b57cec5SDimitry Andric { 4030b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 4040b57cec5SDimitry Andric (__v4di)_mm256_mul_epu32(__X, __Y), 4050b57cec5SDimitry Andric (__v4di)_mm256_setzero_si256()); 4060b57cec5SDimitry Andric } 4070b57cec5SDimitry Andric 4080b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 4090b57cec5SDimitry Andric _mm_mask_mul_epu32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) 4100b57cec5SDimitry Andric { 4110b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 4120b57cec5SDimitry Andric (__v2di)_mm_mul_epu32(__X, __Y), 4130b57cec5SDimitry Andric (__v2di)__W); 4140b57cec5SDimitry Andric } 4150b57cec5SDimitry Andric 4160b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 4170b57cec5SDimitry Andric _mm_maskz_mul_epu32(__mmask8 __M, __m128i __X, __m128i __Y) 4180b57cec5SDimitry Andric { 4190b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 4200b57cec5SDimitry Andric (__v2di)_mm_mul_epu32(__X, __Y), 4210b57cec5SDimitry Andric (__v2di)_mm_setzero_si128()); 4220b57cec5SDimitry Andric } 4230b57cec5SDimitry Andric 4240b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 4250b57cec5SDimitry Andric _mm256_maskz_mullo_epi32(__mmask8 __M, __m256i __A, __m256i __B) 4260b57cec5SDimitry Andric { 4270b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 4280b57cec5SDimitry Andric (__v8si)_mm256_mullo_epi32(__A, __B), 4290b57cec5SDimitry Andric (__v8si)_mm256_setzero_si256()); 4300b57cec5SDimitry Andric } 4310b57cec5SDimitry Andric 4320b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 4330b57cec5SDimitry Andric _mm256_mask_mullo_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) 4340b57cec5SDimitry Andric { 4350b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 4360b57cec5SDimitry Andric (__v8si)_mm256_mullo_epi32(__A, __B), 4370b57cec5SDimitry Andric (__v8si)__W); 4380b57cec5SDimitry Andric } 4390b57cec5SDimitry Andric 4400b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 4410b57cec5SDimitry Andric _mm_maskz_mullo_epi32(__mmask8 __M, __m128i __A, __m128i __B) 4420b57cec5SDimitry Andric { 4430b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 4440b57cec5SDimitry Andric (__v4si)_mm_mullo_epi32(__A, __B), 4450b57cec5SDimitry Andric (__v4si)_mm_setzero_si128()); 4460b57cec5SDimitry Andric } 4470b57cec5SDimitry Andric 4480b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 4490b57cec5SDimitry Andric _mm_mask_mullo_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) 4500b57cec5SDimitry Andric { 4510b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 4520b57cec5SDimitry Andric (__v4si)_mm_mullo_epi32(__A, __B), 4530b57cec5SDimitry Andric (__v4si)__W); 4540b57cec5SDimitry Andric } 4550b57cec5SDimitry Andric 4560b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 4570b57cec5SDimitry Andric _mm256_and_epi32(__m256i __a, __m256i __b) 4580b57cec5SDimitry Andric { 4590b57cec5SDimitry Andric return (__m256i)((__v8su)__a & (__v8su)__b); 4600b57cec5SDimitry Andric } 4610b57cec5SDimitry Andric 4620b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 4630b57cec5SDimitry Andric _mm256_mask_and_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 4640b57cec5SDimitry Andric { 4650b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4660b57cec5SDimitry Andric (__v8si)_mm256_and_epi32(__A, __B), 4670b57cec5SDimitry Andric (__v8si)__W); 4680b57cec5SDimitry Andric } 4690b57cec5SDimitry Andric 4700b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 4710b57cec5SDimitry Andric _mm256_maskz_and_epi32(__mmask8 __U, __m256i __A, __m256i __B) 4720b57cec5SDimitry Andric { 4730b57cec5SDimitry Andric return (__m256i)_mm256_mask_and_epi32(_mm256_setzero_si256(), __U, __A, __B); 4740b57cec5SDimitry Andric } 4750b57cec5SDimitry Andric 4760b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 4770b57cec5SDimitry Andric _mm_and_epi32(__m128i __a, __m128i __b) 4780b57cec5SDimitry Andric { 4790b57cec5SDimitry Andric return (__m128i)((__v4su)__a & (__v4su)__b); 4800b57cec5SDimitry Andric } 4810b57cec5SDimitry Andric 4820b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 4830b57cec5SDimitry Andric _mm_mask_and_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 4840b57cec5SDimitry Andric { 4850b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4860b57cec5SDimitry Andric (__v4si)_mm_and_epi32(__A, __B), 4870b57cec5SDimitry Andric (__v4si)__W); 4880b57cec5SDimitry Andric } 4890b57cec5SDimitry Andric 4900b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 4910b57cec5SDimitry Andric _mm_maskz_and_epi32(__mmask8 __U, __m128i __A, __m128i __B) 4920b57cec5SDimitry Andric { 4930b57cec5SDimitry Andric return (__m128i)_mm_mask_and_epi32(_mm_setzero_si128(), __U, __A, __B); 4940b57cec5SDimitry Andric } 4950b57cec5SDimitry Andric 4960b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 4970b57cec5SDimitry Andric _mm256_andnot_epi32(__m256i __A, __m256i __B) 4980b57cec5SDimitry Andric { 4990b57cec5SDimitry Andric return (__m256i)(~(__v8su)__A & (__v8su)__B); 5000b57cec5SDimitry Andric } 5010b57cec5SDimitry Andric 5020b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 5030b57cec5SDimitry Andric _mm256_mask_andnot_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 5040b57cec5SDimitry Andric { 5050b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 5060b57cec5SDimitry Andric (__v8si)_mm256_andnot_epi32(__A, __B), 5070b57cec5SDimitry Andric (__v8si)__W); 5080b57cec5SDimitry Andric } 5090b57cec5SDimitry Andric 5100b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 5110b57cec5SDimitry Andric _mm256_maskz_andnot_epi32(__mmask8 __U, __m256i __A, __m256i __B) 5120b57cec5SDimitry Andric { 5130b57cec5SDimitry Andric return (__m256i)_mm256_mask_andnot_epi32(_mm256_setzero_si256(), 5140b57cec5SDimitry Andric __U, __A, __B); 5150b57cec5SDimitry Andric } 5160b57cec5SDimitry Andric 5170b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 5180b57cec5SDimitry Andric _mm_andnot_epi32(__m128i __A, __m128i __B) 5190b57cec5SDimitry Andric { 5200b57cec5SDimitry Andric return (__m128i)(~(__v4su)__A & (__v4su)__B); 5210b57cec5SDimitry Andric } 5220b57cec5SDimitry Andric 5230b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 5240b57cec5SDimitry Andric _mm_mask_andnot_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 5250b57cec5SDimitry Andric { 5260b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 5270b57cec5SDimitry Andric (__v4si)_mm_andnot_epi32(__A, __B), 5280b57cec5SDimitry Andric (__v4si)__W); 5290b57cec5SDimitry Andric } 5300b57cec5SDimitry Andric 5310b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 5320b57cec5SDimitry Andric _mm_maskz_andnot_epi32(__mmask8 __U, __m128i __A, __m128i __B) 5330b57cec5SDimitry Andric { 5340b57cec5SDimitry Andric return (__m128i)_mm_mask_andnot_epi32(_mm_setzero_si128(), __U, __A, __B); 5350b57cec5SDimitry Andric } 5360b57cec5SDimitry Andric 5370b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 5380b57cec5SDimitry Andric _mm256_or_epi32(__m256i __a, __m256i __b) 5390b57cec5SDimitry Andric { 5400b57cec5SDimitry Andric return (__m256i)((__v8su)__a | (__v8su)__b); 5410b57cec5SDimitry Andric } 5420b57cec5SDimitry Andric 5430b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 5440b57cec5SDimitry Andric _mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 5450b57cec5SDimitry Andric { 5460b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 5470b57cec5SDimitry Andric (__v8si)_mm256_or_epi32(__A, __B), 5480b57cec5SDimitry Andric (__v8si)__W); 5490b57cec5SDimitry Andric } 5500b57cec5SDimitry Andric 5510b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 5520b57cec5SDimitry Andric _mm256_maskz_or_epi32(__mmask8 __U, __m256i __A, __m256i __B) 5530b57cec5SDimitry Andric { 5540b57cec5SDimitry Andric return (__m256i)_mm256_mask_or_epi32(_mm256_setzero_si256(), __U, __A, __B); 5550b57cec5SDimitry Andric } 5560b57cec5SDimitry Andric 5570b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 5580b57cec5SDimitry Andric _mm_or_epi32(__m128i __a, __m128i __b) 5590b57cec5SDimitry Andric { 5600b57cec5SDimitry Andric return (__m128i)((__v4su)__a | (__v4su)__b); 5610b57cec5SDimitry Andric } 5620b57cec5SDimitry Andric 5630b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 5640b57cec5SDimitry Andric _mm_mask_or_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 5650b57cec5SDimitry Andric { 5660b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 5670b57cec5SDimitry Andric (__v4si)_mm_or_epi32(__A, __B), 5680b57cec5SDimitry Andric (__v4si)__W); 5690b57cec5SDimitry Andric } 5700b57cec5SDimitry Andric 5710b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 5720b57cec5SDimitry Andric _mm_maskz_or_epi32(__mmask8 __U, __m128i __A, __m128i __B) 5730b57cec5SDimitry Andric { 5740b57cec5SDimitry Andric return (__m128i)_mm_mask_or_epi32(_mm_setzero_si128(), __U, __A, __B); 5750b57cec5SDimitry Andric } 5760b57cec5SDimitry Andric 5770b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 5780b57cec5SDimitry Andric _mm256_xor_epi32(__m256i __a, __m256i __b) 5790b57cec5SDimitry Andric { 5800b57cec5SDimitry Andric return (__m256i)((__v8su)__a ^ (__v8su)__b); 5810b57cec5SDimitry Andric } 5820b57cec5SDimitry Andric 5830b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 5840b57cec5SDimitry Andric _mm256_mask_xor_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 5850b57cec5SDimitry Andric { 5860b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 5870b57cec5SDimitry Andric (__v8si)_mm256_xor_epi32(__A, __B), 5880b57cec5SDimitry Andric (__v8si)__W); 5890b57cec5SDimitry Andric } 5900b57cec5SDimitry Andric 5910b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 5920b57cec5SDimitry Andric _mm256_maskz_xor_epi32(__mmask8 __U, __m256i __A, __m256i __B) 5930b57cec5SDimitry Andric { 5940b57cec5SDimitry Andric return (__m256i)_mm256_mask_xor_epi32(_mm256_setzero_si256(), __U, __A, __B); 5950b57cec5SDimitry Andric } 5960b57cec5SDimitry Andric 5970b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 5980b57cec5SDimitry Andric _mm_xor_epi32(__m128i __a, __m128i __b) 5990b57cec5SDimitry Andric { 6000b57cec5SDimitry Andric return (__m128i)((__v4su)__a ^ (__v4su)__b); 6010b57cec5SDimitry Andric } 6020b57cec5SDimitry Andric 6030b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 6040b57cec5SDimitry Andric _mm_mask_xor_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 6050b57cec5SDimitry Andric { 6060b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 6070b57cec5SDimitry Andric (__v4si)_mm_xor_epi32(__A, __B), 6080b57cec5SDimitry Andric (__v4si)__W); 6090b57cec5SDimitry Andric } 6100b57cec5SDimitry Andric 6110b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 6120b57cec5SDimitry Andric _mm_maskz_xor_epi32(__mmask8 __U, __m128i __A, __m128i __B) 6130b57cec5SDimitry Andric { 6140b57cec5SDimitry Andric return (__m128i)_mm_mask_xor_epi32(_mm_setzero_si128(), __U, __A, __B); 6150b57cec5SDimitry Andric } 6160b57cec5SDimitry Andric 6170b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 6180b57cec5SDimitry Andric _mm256_and_epi64(__m256i __a, __m256i __b) 6190b57cec5SDimitry Andric { 6200b57cec5SDimitry Andric return (__m256i)((__v4du)__a & (__v4du)__b); 6210b57cec5SDimitry Andric } 6220b57cec5SDimitry Andric 6230b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 6240b57cec5SDimitry Andric _mm256_mask_and_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 6250b57cec5SDimitry Andric { 6260b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 6270b57cec5SDimitry Andric (__v4di)_mm256_and_epi64(__A, __B), 6280b57cec5SDimitry Andric (__v4di)__W); 6290b57cec5SDimitry Andric } 6300b57cec5SDimitry Andric 6310b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 6320b57cec5SDimitry Andric _mm256_maskz_and_epi64(__mmask8 __U, __m256i __A, __m256i __B) 6330b57cec5SDimitry Andric { 6340b57cec5SDimitry Andric return (__m256i)_mm256_mask_and_epi64(_mm256_setzero_si256(), __U, __A, __B); 6350b57cec5SDimitry Andric } 6360b57cec5SDimitry Andric 6370b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 6380b57cec5SDimitry Andric _mm_and_epi64(__m128i __a, __m128i __b) 6390b57cec5SDimitry Andric { 6400b57cec5SDimitry Andric return (__m128i)((__v2du)__a & (__v2du)__b); 6410b57cec5SDimitry Andric } 6420b57cec5SDimitry Andric 6430b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 6440b57cec5SDimitry Andric _mm_mask_and_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 6450b57cec5SDimitry Andric { 6460b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 6470b57cec5SDimitry Andric (__v2di)_mm_and_epi64(__A, __B), 6480b57cec5SDimitry Andric (__v2di)__W); 6490b57cec5SDimitry Andric } 6500b57cec5SDimitry Andric 6510b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 6520b57cec5SDimitry Andric _mm_maskz_and_epi64(__mmask8 __U, __m128i __A, __m128i __B) 6530b57cec5SDimitry Andric { 6540b57cec5SDimitry Andric return (__m128i)_mm_mask_and_epi64(_mm_setzero_si128(), __U, __A, __B); 6550b57cec5SDimitry Andric } 6560b57cec5SDimitry Andric 6570b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 6580b57cec5SDimitry Andric _mm256_andnot_epi64(__m256i __A, __m256i __B) 6590b57cec5SDimitry Andric { 6600b57cec5SDimitry Andric return (__m256i)(~(__v4du)__A & (__v4du)__B); 6610b57cec5SDimitry Andric } 6620b57cec5SDimitry Andric 6630b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 6640b57cec5SDimitry Andric _mm256_mask_andnot_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 6650b57cec5SDimitry Andric { 6660b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 6670b57cec5SDimitry Andric (__v4di)_mm256_andnot_epi64(__A, __B), 6680b57cec5SDimitry Andric (__v4di)__W); 6690b57cec5SDimitry Andric } 6700b57cec5SDimitry Andric 6710b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 6720b57cec5SDimitry Andric _mm256_maskz_andnot_epi64(__mmask8 __U, __m256i __A, __m256i __B) 6730b57cec5SDimitry Andric { 6740b57cec5SDimitry Andric return (__m256i)_mm256_mask_andnot_epi64(_mm256_setzero_si256(), 6750b57cec5SDimitry Andric __U, __A, __B); 6760b57cec5SDimitry Andric } 6770b57cec5SDimitry Andric 6780b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 6790b57cec5SDimitry Andric _mm_andnot_epi64(__m128i __A, __m128i __B) 6800b57cec5SDimitry Andric { 6810b57cec5SDimitry Andric return (__m128i)(~(__v2du)__A & (__v2du)__B); 6820b57cec5SDimitry Andric } 6830b57cec5SDimitry Andric 6840b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 6850b57cec5SDimitry Andric _mm_mask_andnot_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 6860b57cec5SDimitry Andric { 6870b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 6880b57cec5SDimitry Andric (__v2di)_mm_andnot_epi64(__A, __B), 6890b57cec5SDimitry Andric (__v2di)__W); 6900b57cec5SDimitry Andric } 6910b57cec5SDimitry Andric 6920b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 6930b57cec5SDimitry Andric _mm_maskz_andnot_epi64(__mmask8 __U, __m128i __A, __m128i __B) 6940b57cec5SDimitry Andric { 6950b57cec5SDimitry Andric return (__m128i)_mm_mask_andnot_epi64(_mm_setzero_si128(), __U, __A, __B); 6960b57cec5SDimitry Andric } 6970b57cec5SDimitry Andric 6980b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 6990b57cec5SDimitry Andric _mm256_or_epi64(__m256i __a, __m256i __b) 7000b57cec5SDimitry Andric { 7010b57cec5SDimitry Andric return (__m256i)((__v4du)__a | (__v4du)__b); 7020b57cec5SDimitry Andric } 7030b57cec5SDimitry Andric 7040b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 7050b57cec5SDimitry Andric _mm256_mask_or_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 7060b57cec5SDimitry Andric { 7070b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 7080b57cec5SDimitry Andric (__v4di)_mm256_or_epi64(__A, __B), 7090b57cec5SDimitry Andric (__v4di)__W); 7100b57cec5SDimitry Andric } 7110b57cec5SDimitry Andric 7120b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 7130b57cec5SDimitry Andric _mm256_maskz_or_epi64(__mmask8 __U, __m256i __A, __m256i __B) 7140b57cec5SDimitry Andric { 7150b57cec5SDimitry Andric return (__m256i)_mm256_mask_or_epi64(_mm256_setzero_si256(), __U, __A, __B); 7160b57cec5SDimitry Andric } 7170b57cec5SDimitry Andric 7180b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 7190b57cec5SDimitry Andric _mm_or_epi64(__m128i __a, __m128i __b) 7200b57cec5SDimitry Andric { 7210b57cec5SDimitry Andric return (__m128i)((__v2du)__a | (__v2du)__b); 7220b57cec5SDimitry Andric } 7230b57cec5SDimitry Andric 7240b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 7250b57cec5SDimitry Andric _mm_mask_or_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 7260b57cec5SDimitry Andric { 7270b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 7280b57cec5SDimitry Andric (__v2di)_mm_or_epi64(__A, __B), 7290b57cec5SDimitry Andric (__v2di)__W); 7300b57cec5SDimitry Andric } 7310b57cec5SDimitry Andric 7320b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 7330b57cec5SDimitry Andric _mm_maskz_or_epi64(__mmask8 __U, __m128i __A, __m128i __B) 7340b57cec5SDimitry Andric { 7350b57cec5SDimitry Andric return (__m128i)_mm_mask_or_epi64(_mm_setzero_si128(), __U, __A, __B); 7360b57cec5SDimitry Andric } 7370b57cec5SDimitry Andric 7380b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 7390b57cec5SDimitry Andric _mm256_xor_epi64(__m256i __a, __m256i __b) 7400b57cec5SDimitry Andric { 7410b57cec5SDimitry Andric return (__m256i)((__v4du)__a ^ (__v4du)__b); 7420b57cec5SDimitry Andric } 7430b57cec5SDimitry Andric 7440b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 7450b57cec5SDimitry Andric _mm256_mask_xor_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 7460b57cec5SDimitry Andric { 7470b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 7480b57cec5SDimitry Andric (__v4di)_mm256_xor_epi64(__A, __B), 7490b57cec5SDimitry Andric (__v4di)__W); 7500b57cec5SDimitry Andric } 7510b57cec5SDimitry Andric 7520b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 7530b57cec5SDimitry Andric _mm256_maskz_xor_epi64(__mmask8 __U, __m256i __A, __m256i __B) 7540b57cec5SDimitry Andric { 7550b57cec5SDimitry Andric return (__m256i)_mm256_mask_xor_epi64(_mm256_setzero_si256(), __U, __A, __B); 7560b57cec5SDimitry Andric } 7570b57cec5SDimitry Andric 7580b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 7590b57cec5SDimitry Andric _mm_xor_epi64(__m128i __a, __m128i __b) 7600b57cec5SDimitry Andric { 7610b57cec5SDimitry Andric return (__m128i)((__v2du)__a ^ (__v2du)__b); 7620b57cec5SDimitry Andric } 7630b57cec5SDimitry Andric 7640b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 7650b57cec5SDimitry Andric _mm_mask_xor_epi64(__m128i __W, __mmask8 __U, __m128i __A, 7660b57cec5SDimitry Andric __m128i __B) 7670b57cec5SDimitry Andric { 7680b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 7690b57cec5SDimitry Andric (__v2di)_mm_xor_epi64(__A, __B), 7700b57cec5SDimitry Andric (__v2di)__W); 7710b57cec5SDimitry Andric } 7720b57cec5SDimitry Andric 7730b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 7740b57cec5SDimitry Andric _mm_maskz_xor_epi64(__mmask8 __U, __m128i __A, __m128i __B) 7750b57cec5SDimitry Andric { 7760b57cec5SDimitry Andric return (__m128i)_mm_mask_xor_epi64(_mm_setzero_si128(), __U, __A, __B); 7770b57cec5SDimitry Andric } 7780b57cec5SDimitry Andric 7790b57cec5SDimitry Andric #define _mm_cmp_epi32_mask(a, b, p) \ 780349cc55cSDimitry Andric ((__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \ 7810b57cec5SDimitry Andric (__v4si)(__m128i)(b), (int)(p), \ 782349cc55cSDimitry Andric (__mmask8)-1)) 7830b57cec5SDimitry Andric 7840b57cec5SDimitry Andric #define _mm_mask_cmp_epi32_mask(m, a, b, p) \ 785349cc55cSDimitry Andric ((__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \ 7860b57cec5SDimitry Andric (__v4si)(__m128i)(b), (int)(p), \ 787349cc55cSDimitry Andric (__mmask8)(m))) 7880b57cec5SDimitry Andric 7890b57cec5SDimitry Andric #define _mm_cmp_epu32_mask(a, b, p) \ 790349cc55cSDimitry Andric ((__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \ 7910b57cec5SDimitry Andric (__v4si)(__m128i)(b), (int)(p), \ 792349cc55cSDimitry Andric (__mmask8)-1)) 7930b57cec5SDimitry Andric 7940b57cec5SDimitry Andric #define _mm_mask_cmp_epu32_mask(m, a, b, p) \ 795349cc55cSDimitry Andric ((__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \ 7960b57cec5SDimitry Andric (__v4si)(__m128i)(b), (int)(p), \ 797349cc55cSDimitry Andric (__mmask8)(m))) 7980b57cec5SDimitry Andric 7990b57cec5SDimitry Andric #define _mm256_cmp_epi32_mask(a, b, p) \ 800349cc55cSDimitry Andric ((__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \ 8010b57cec5SDimitry Andric (__v8si)(__m256i)(b), (int)(p), \ 802349cc55cSDimitry Andric (__mmask8)-1)) 8030b57cec5SDimitry Andric 8040b57cec5SDimitry Andric #define _mm256_mask_cmp_epi32_mask(m, a, b, p) \ 805349cc55cSDimitry Andric ((__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \ 8060b57cec5SDimitry Andric (__v8si)(__m256i)(b), (int)(p), \ 807349cc55cSDimitry Andric (__mmask8)(m))) 8080b57cec5SDimitry Andric 8090b57cec5SDimitry Andric #define _mm256_cmp_epu32_mask(a, b, p) \ 810349cc55cSDimitry Andric ((__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \ 8110b57cec5SDimitry Andric (__v8si)(__m256i)(b), (int)(p), \ 812349cc55cSDimitry Andric (__mmask8)-1)) 8130b57cec5SDimitry Andric 8140b57cec5SDimitry Andric #define _mm256_mask_cmp_epu32_mask(m, a, b, p) \ 815349cc55cSDimitry Andric ((__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \ 8160b57cec5SDimitry Andric (__v8si)(__m256i)(b), (int)(p), \ 817349cc55cSDimitry Andric (__mmask8)(m))) 8180b57cec5SDimitry Andric 8190b57cec5SDimitry Andric #define _mm_cmp_epi64_mask(a, b, p) \ 820349cc55cSDimitry Andric ((__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \ 8210b57cec5SDimitry Andric (__v2di)(__m128i)(b), (int)(p), \ 822349cc55cSDimitry Andric (__mmask8)-1)) 8230b57cec5SDimitry Andric 8240b57cec5SDimitry Andric #define _mm_mask_cmp_epi64_mask(m, a, b, p) \ 825349cc55cSDimitry Andric ((__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \ 8260b57cec5SDimitry Andric (__v2di)(__m128i)(b), (int)(p), \ 827349cc55cSDimitry Andric (__mmask8)(m))) 8280b57cec5SDimitry Andric 8290b57cec5SDimitry Andric #define _mm_cmp_epu64_mask(a, b, p) \ 830349cc55cSDimitry Andric ((__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \ 8310b57cec5SDimitry Andric (__v2di)(__m128i)(b), (int)(p), \ 832349cc55cSDimitry Andric (__mmask8)-1)) 8330b57cec5SDimitry Andric 8340b57cec5SDimitry Andric #define _mm_mask_cmp_epu64_mask(m, a, b, p) \ 835349cc55cSDimitry Andric ((__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \ 8360b57cec5SDimitry Andric (__v2di)(__m128i)(b), (int)(p), \ 837349cc55cSDimitry Andric (__mmask8)(m))) 8380b57cec5SDimitry Andric 8390b57cec5SDimitry Andric #define _mm256_cmp_epi64_mask(a, b, p) \ 840349cc55cSDimitry Andric ((__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \ 8410b57cec5SDimitry Andric (__v4di)(__m256i)(b), (int)(p), \ 842349cc55cSDimitry Andric (__mmask8)-1)) 8430b57cec5SDimitry Andric 8440b57cec5SDimitry Andric #define _mm256_mask_cmp_epi64_mask(m, a, b, p) \ 845349cc55cSDimitry Andric ((__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \ 8460b57cec5SDimitry Andric (__v4di)(__m256i)(b), (int)(p), \ 847349cc55cSDimitry Andric (__mmask8)(m))) 8480b57cec5SDimitry Andric 8490b57cec5SDimitry Andric #define _mm256_cmp_epu64_mask(a, b, p) \ 850349cc55cSDimitry Andric ((__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \ 8510b57cec5SDimitry Andric (__v4di)(__m256i)(b), (int)(p), \ 852349cc55cSDimitry Andric (__mmask8)-1)) 8530b57cec5SDimitry Andric 8540b57cec5SDimitry Andric #define _mm256_mask_cmp_epu64_mask(m, a, b, p) \ 855349cc55cSDimitry Andric ((__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \ 8560b57cec5SDimitry Andric (__v4di)(__m256i)(b), (int)(p), \ 857349cc55cSDimitry Andric (__mmask8)(m))) 8580b57cec5SDimitry Andric 8590b57cec5SDimitry Andric #define _mm256_cmp_ps_mask(a, b, p) \ 860349cc55cSDimitry Andric ((__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \ 8610b57cec5SDimitry Andric (__v8sf)(__m256)(b), (int)(p), \ 862349cc55cSDimitry Andric (__mmask8)-1)) 8630b57cec5SDimitry Andric 8640b57cec5SDimitry Andric #define _mm256_mask_cmp_ps_mask(m, a, b, p) \ 865349cc55cSDimitry Andric ((__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \ 8660b57cec5SDimitry Andric (__v8sf)(__m256)(b), (int)(p), \ 867349cc55cSDimitry Andric (__mmask8)(m))) 8680b57cec5SDimitry Andric 8690b57cec5SDimitry Andric #define _mm256_cmp_pd_mask(a, b, p) \ 870349cc55cSDimitry Andric ((__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \ 8710b57cec5SDimitry Andric (__v4df)(__m256d)(b), (int)(p), \ 872349cc55cSDimitry Andric (__mmask8)-1)) 8730b57cec5SDimitry Andric 8740b57cec5SDimitry Andric #define _mm256_mask_cmp_pd_mask(m, a, b, p) \ 875349cc55cSDimitry Andric ((__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \ 8760b57cec5SDimitry Andric (__v4df)(__m256d)(b), (int)(p), \ 877349cc55cSDimitry Andric (__mmask8)(m))) 8780b57cec5SDimitry Andric 8790b57cec5SDimitry Andric #define _mm_cmp_ps_mask(a, b, p) \ 880349cc55cSDimitry Andric ((__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \ 8810b57cec5SDimitry Andric (__v4sf)(__m128)(b), (int)(p), \ 882349cc55cSDimitry Andric (__mmask8)-1)) 8830b57cec5SDimitry Andric 8840b57cec5SDimitry Andric #define _mm_mask_cmp_ps_mask(m, a, b, p) \ 885349cc55cSDimitry Andric ((__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \ 8860b57cec5SDimitry Andric (__v4sf)(__m128)(b), (int)(p), \ 887349cc55cSDimitry Andric (__mmask8)(m))) 8880b57cec5SDimitry Andric 8890b57cec5SDimitry Andric #define _mm_cmp_pd_mask(a, b, p) \ 890349cc55cSDimitry Andric ((__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \ 8910b57cec5SDimitry Andric (__v2df)(__m128d)(b), (int)(p), \ 892349cc55cSDimitry Andric (__mmask8)-1)) 8930b57cec5SDimitry Andric 8940b57cec5SDimitry Andric #define _mm_mask_cmp_pd_mask(m, a, b, p) \ 895349cc55cSDimitry Andric ((__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \ 8960b57cec5SDimitry Andric (__v2df)(__m128d)(b), (int)(p), \ 897349cc55cSDimitry Andric (__mmask8)(m))) 8980b57cec5SDimitry Andric 8990b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 9000b57cec5SDimitry Andric _mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) 9010b57cec5SDimitry Andric { 9020b57cec5SDimitry Andric return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 9030b57cec5SDimitry Andric __builtin_ia32_vfmaddpd ((__v2df) __A, 9040b57cec5SDimitry Andric (__v2df) __B, 9050b57cec5SDimitry Andric (__v2df) __C), 9060b57cec5SDimitry Andric (__v2df) __A); 9070b57cec5SDimitry Andric } 9080b57cec5SDimitry Andric 9090b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 9100b57cec5SDimitry Andric _mm_mask3_fmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) 9110b57cec5SDimitry Andric { 9120b57cec5SDimitry Andric return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 9130b57cec5SDimitry Andric __builtin_ia32_vfmaddpd ((__v2df) __A, 9140b57cec5SDimitry Andric (__v2df) __B, 9150b57cec5SDimitry Andric (__v2df) __C), 9160b57cec5SDimitry Andric (__v2df) __C); 9170b57cec5SDimitry Andric } 9180b57cec5SDimitry Andric 9190b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 9200b57cec5SDimitry Andric _mm_maskz_fmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 9210b57cec5SDimitry Andric { 9220b57cec5SDimitry Andric return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 9230b57cec5SDimitry Andric __builtin_ia32_vfmaddpd ((__v2df) __A, 9240b57cec5SDimitry Andric (__v2df) __B, 9250b57cec5SDimitry Andric (__v2df) __C), 9260b57cec5SDimitry Andric (__v2df)_mm_setzero_pd()); 9270b57cec5SDimitry Andric } 9280b57cec5SDimitry Andric 9290b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 9300b57cec5SDimitry Andric _mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) 9310b57cec5SDimitry Andric { 9320b57cec5SDimitry Andric return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 9330b57cec5SDimitry Andric __builtin_ia32_vfmaddpd ((__v2df) __A, 9340b57cec5SDimitry Andric (__v2df) __B, 9350b57cec5SDimitry Andric -(__v2df) __C), 9360b57cec5SDimitry Andric (__v2df) __A); 9370b57cec5SDimitry Andric } 9380b57cec5SDimitry Andric 9390b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 9400b57cec5SDimitry Andric _mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 9410b57cec5SDimitry Andric { 9420b57cec5SDimitry Andric return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 9430b57cec5SDimitry Andric __builtin_ia32_vfmaddpd ((__v2df) __A, 9440b57cec5SDimitry Andric (__v2df) __B, 9450b57cec5SDimitry Andric -(__v2df) __C), 9460b57cec5SDimitry Andric (__v2df)_mm_setzero_pd()); 9470b57cec5SDimitry Andric } 9480b57cec5SDimitry Andric 9490b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 9500b57cec5SDimitry Andric _mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) 9510b57cec5SDimitry Andric { 9520b57cec5SDimitry Andric return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 9530b57cec5SDimitry Andric __builtin_ia32_vfmaddpd (-(__v2df) __A, 9540b57cec5SDimitry Andric (__v2df) __B, 9550b57cec5SDimitry Andric (__v2df) __C), 9560b57cec5SDimitry Andric (__v2df) __C); 9570b57cec5SDimitry Andric } 9580b57cec5SDimitry Andric 9590b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 9600b57cec5SDimitry Andric _mm_maskz_fnmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 9610b57cec5SDimitry Andric { 9620b57cec5SDimitry Andric return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 9630b57cec5SDimitry Andric __builtin_ia32_vfmaddpd (-(__v2df) __A, 9640b57cec5SDimitry Andric (__v2df) __B, 9650b57cec5SDimitry Andric (__v2df) __C), 9660b57cec5SDimitry Andric (__v2df)_mm_setzero_pd()); 9670b57cec5SDimitry Andric } 9680b57cec5SDimitry Andric 9690b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 9700b57cec5SDimitry Andric _mm_maskz_fnmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 9710b57cec5SDimitry Andric { 9720b57cec5SDimitry Andric return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 9730b57cec5SDimitry Andric __builtin_ia32_vfmaddpd (-(__v2df) __A, 9740b57cec5SDimitry Andric (__v2df) __B, 9750b57cec5SDimitry Andric -(__v2df) __C), 9760b57cec5SDimitry Andric (__v2df)_mm_setzero_pd()); 9770b57cec5SDimitry Andric } 9780b57cec5SDimitry Andric 9790b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 9800b57cec5SDimitry Andric _mm256_mask_fmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) 9810b57cec5SDimitry Andric { 9820b57cec5SDimitry Andric return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 9830b57cec5SDimitry Andric __builtin_ia32_vfmaddpd256 ((__v4df) __A, 9840b57cec5SDimitry Andric (__v4df) __B, 9850b57cec5SDimitry Andric (__v4df) __C), 9860b57cec5SDimitry Andric (__v4df) __A); 9870b57cec5SDimitry Andric } 9880b57cec5SDimitry Andric 9890b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 9900b57cec5SDimitry Andric _mm256_mask3_fmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) 9910b57cec5SDimitry Andric { 9920b57cec5SDimitry Andric return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 9930b57cec5SDimitry Andric __builtin_ia32_vfmaddpd256 ((__v4df) __A, 9940b57cec5SDimitry Andric (__v4df) __B, 9950b57cec5SDimitry Andric (__v4df) __C), 9960b57cec5SDimitry Andric (__v4df) __C); 9970b57cec5SDimitry Andric } 9980b57cec5SDimitry Andric 9990b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 10000b57cec5SDimitry Andric _mm256_maskz_fmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) 10010b57cec5SDimitry Andric { 10020b57cec5SDimitry Andric return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 10030b57cec5SDimitry Andric __builtin_ia32_vfmaddpd256 ((__v4df) __A, 10040b57cec5SDimitry Andric (__v4df) __B, 10050b57cec5SDimitry Andric (__v4df) __C), 10060b57cec5SDimitry Andric (__v4df)_mm256_setzero_pd()); 10070b57cec5SDimitry Andric } 10080b57cec5SDimitry Andric 10090b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 10100b57cec5SDimitry Andric _mm256_mask_fmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) 10110b57cec5SDimitry Andric { 10120b57cec5SDimitry Andric return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 10130b57cec5SDimitry Andric __builtin_ia32_vfmaddpd256 ((__v4df) __A, 10140b57cec5SDimitry Andric (__v4df) __B, 10150b57cec5SDimitry Andric -(__v4df) __C), 10160b57cec5SDimitry Andric (__v4df) __A); 10170b57cec5SDimitry Andric } 10180b57cec5SDimitry Andric 10190b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 10200b57cec5SDimitry Andric _mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) 10210b57cec5SDimitry Andric { 10220b57cec5SDimitry Andric return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 10230b57cec5SDimitry Andric __builtin_ia32_vfmaddpd256 ((__v4df) __A, 10240b57cec5SDimitry Andric (__v4df) __B, 10250b57cec5SDimitry Andric -(__v4df) __C), 10260b57cec5SDimitry Andric (__v4df)_mm256_setzero_pd()); 10270b57cec5SDimitry Andric } 10280b57cec5SDimitry Andric 10290b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 10300b57cec5SDimitry Andric _mm256_mask3_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) 10310b57cec5SDimitry Andric { 10320b57cec5SDimitry Andric return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 10330b57cec5SDimitry Andric __builtin_ia32_vfmaddpd256 (-(__v4df) __A, 10340b57cec5SDimitry Andric (__v4df) __B, 10350b57cec5SDimitry Andric (__v4df) __C), 10360b57cec5SDimitry Andric (__v4df) __C); 10370b57cec5SDimitry Andric } 10380b57cec5SDimitry Andric 10390b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 10400b57cec5SDimitry Andric _mm256_maskz_fnmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) 10410b57cec5SDimitry Andric { 10420b57cec5SDimitry Andric return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 10430b57cec5SDimitry Andric __builtin_ia32_vfmaddpd256 (-(__v4df) __A, 10440b57cec5SDimitry Andric (__v4df) __B, 10450b57cec5SDimitry Andric (__v4df) __C), 10460b57cec5SDimitry Andric (__v4df)_mm256_setzero_pd()); 10470b57cec5SDimitry Andric } 10480b57cec5SDimitry Andric 10490b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 10500b57cec5SDimitry Andric _mm256_maskz_fnmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) 10510b57cec5SDimitry Andric { 10520b57cec5SDimitry Andric return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 10530b57cec5SDimitry Andric __builtin_ia32_vfmaddpd256 (-(__v4df) __A, 10540b57cec5SDimitry Andric (__v4df) __B, 10550b57cec5SDimitry Andric -(__v4df) __C), 10560b57cec5SDimitry Andric (__v4df)_mm256_setzero_pd()); 10570b57cec5SDimitry Andric } 10580b57cec5SDimitry Andric 10590b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 10600b57cec5SDimitry Andric _mm_mask_fmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) 10610b57cec5SDimitry Andric { 10620b57cec5SDimitry Andric return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 10630b57cec5SDimitry Andric __builtin_ia32_vfmaddps ((__v4sf) __A, 10640b57cec5SDimitry Andric (__v4sf) __B, 10650b57cec5SDimitry Andric (__v4sf) __C), 10660b57cec5SDimitry Andric (__v4sf) __A); 10670b57cec5SDimitry Andric } 10680b57cec5SDimitry Andric 10690b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 10700b57cec5SDimitry Andric _mm_mask3_fmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) 10710b57cec5SDimitry Andric { 10720b57cec5SDimitry Andric return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 10730b57cec5SDimitry Andric __builtin_ia32_vfmaddps ((__v4sf) __A, 10740b57cec5SDimitry Andric (__v4sf) __B, 10750b57cec5SDimitry Andric (__v4sf) __C), 10760b57cec5SDimitry Andric (__v4sf) __C); 10770b57cec5SDimitry Andric } 10780b57cec5SDimitry Andric 10790b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 10800b57cec5SDimitry Andric _mm_maskz_fmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 10810b57cec5SDimitry Andric { 10820b57cec5SDimitry Andric return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 10830b57cec5SDimitry Andric __builtin_ia32_vfmaddps ((__v4sf) __A, 10840b57cec5SDimitry Andric (__v4sf) __B, 10850b57cec5SDimitry Andric (__v4sf) __C), 10860b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps()); 10870b57cec5SDimitry Andric } 10880b57cec5SDimitry Andric 10890b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 10900b57cec5SDimitry Andric _mm_mask_fmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) 10910b57cec5SDimitry Andric { 10920b57cec5SDimitry Andric return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 10930b57cec5SDimitry Andric __builtin_ia32_vfmaddps ((__v4sf) __A, 10940b57cec5SDimitry Andric (__v4sf) __B, 10950b57cec5SDimitry Andric -(__v4sf) __C), 10960b57cec5SDimitry Andric (__v4sf) __A); 10970b57cec5SDimitry Andric } 10980b57cec5SDimitry Andric 10990b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 11000b57cec5SDimitry Andric _mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 11010b57cec5SDimitry Andric { 11020b57cec5SDimitry Andric return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 11030b57cec5SDimitry Andric __builtin_ia32_vfmaddps ((__v4sf) __A, 11040b57cec5SDimitry Andric (__v4sf) __B, 11050b57cec5SDimitry Andric -(__v4sf) __C), 11060b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps()); 11070b57cec5SDimitry Andric } 11080b57cec5SDimitry Andric 11090b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 11100b57cec5SDimitry Andric _mm_mask3_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) 11110b57cec5SDimitry Andric { 11120b57cec5SDimitry Andric return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 11130b57cec5SDimitry Andric __builtin_ia32_vfmaddps (-(__v4sf) __A, 11140b57cec5SDimitry Andric (__v4sf) __B, 11150b57cec5SDimitry Andric (__v4sf) __C), 11160b57cec5SDimitry Andric (__v4sf) __C); 11170b57cec5SDimitry Andric } 11180b57cec5SDimitry Andric 11190b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 11200b57cec5SDimitry Andric _mm_maskz_fnmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 11210b57cec5SDimitry Andric { 11220b57cec5SDimitry Andric return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 11230b57cec5SDimitry Andric __builtin_ia32_vfmaddps (-(__v4sf) __A, 11240b57cec5SDimitry Andric (__v4sf) __B, 11250b57cec5SDimitry Andric (__v4sf) __C), 11260b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps()); 11270b57cec5SDimitry Andric } 11280b57cec5SDimitry Andric 11290b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 11300b57cec5SDimitry Andric _mm_maskz_fnmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 11310b57cec5SDimitry Andric { 11320b57cec5SDimitry Andric return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 11330b57cec5SDimitry Andric __builtin_ia32_vfmaddps (-(__v4sf) __A, 11340b57cec5SDimitry Andric (__v4sf) __B, 11350b57cec5SDimitry Andric -(__v4sf) __C), 11360b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps()); 11370b57cec5SDimitry Andric } 11380b57cec5SDimitry Andric 11390b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 11400b57cec5SDimitry Andric _mm256_mask_fmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) 11410b57cec5SDimitry Andric { 11420b57cec5SDimitry Andric return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 11430b57cec5SDimitry Andric __builtin_ia32_vfmaddps256 ((__v8sf) __A, 11440b57cec5SDimitry Andric (__v8sf) __B, 11450b57cec5SDimitry Andric (__v8sf) __C), 11460b57cec5SDimitry Andric (__v8sf) __A); 11470b57cec5SDimitry Andric } 11480b57cec5SDimitry Andric 11490b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 11500b57cec5SDimitry Andric _mm256_mask3_fmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) 11510b57cec5SDimitry Andric { 11520b57cec5SDimitry Andric return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 11530b57cec5SDimitry Andric __builtin_ia32_vfmaddps256 ((__v8sf) __A, 11540b57cec5SDimitry Andric (__v8sf) __B, 11550b57cec5SDimitry Andric (__v8sf) __C), 11560b57cec5SDimitry Andric (__v8sf) __C); 11570b57cec5SDimitry Andric } 11580b57cec5SDimitry Andric 11590b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 11600b57cec5SDimitry Andric _mm256_maskz_fmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) 11610b57cec5SDimitry Andric { 11620b57cec5SDimitry Andric return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 11630b57cec5SDimitry Andric __builtin_ia32_vfmaddps256 ((__v8sf) __A, 11640b57cec5SDimitry Andric (__v8sf) __B, 11650b57cec5SDimitry Andric (__v8sf) __C), 11660b57cec5SDimitry Andric (__v8sf)_mm256_setzero_ps()); 11670b57cec5SDimitry Andric } 11680b57cec5SDimitry Andric 11690b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 11700b57cec5SDimitry Andric _mm256_mask_fmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) 11710b57cec5SDimitry Andric { 11720b57cec5SDimitry Andric return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 11730b57cec5SDimitry Andric __builtin_ia32_vfmaddps256 ((__v8sf) __A, 11740b57cec5SDimitry Andric (__v8sf) __B, 11750b57cec5SDimitry Andric -(__v8sf) __C), 11760b57cec5SDimitry Andric (__v8sf) __A); 11770b57cec5SDimitry Andric } 11780b57cec5SDimitry Andric 11790b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 11800b57cec5SDimitry Andric _mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) 11810b57cec5SDimitry Andric { 11820b57cec5SDimitry Andric return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 11830b57cec5SDimitry Andric __builtin_ia32_vfmaddps256 ((__v8sf) __A, 11840b57cec5SDimitry Andric (__v8sf) __B, 11850b57cec5SDimitry Andric -(__v8sf) __C), 11860b57cec5SDimitry Andric (__v8sf)_mm256_setzero_ps()); 11870b57cec5SDimitry Andric } 11880b57cec5SDimitry Andric 11890b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 11900b57cec5SDimitry Andric _mm256_mask3_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) 11910b57cec5SDimitry Andric { 11920b57cec5SDimitry Andric return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 11930b57cec5SDimitry Andric __builtin_ia32_vfmaddps256 (-(__v8sf) __A, 11940b57cec5SDimitry Andric (__v8sf) __B, 11950b57cec5SDimitry Andric (__v8sf) __C), 11960b57cec5SDimitry Andric (__v8sf) __C); 11970b57cec5SDimitry Andric } 11980b57cec5SDimitry Andric 11990b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 12000b57cec5SDimitry Andric _mm256_maskz_fnmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) 12010b57cec5SDimitry Andric { 12020b57cec5SDimitry Andric return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 12030b57cec5SDimitry Andric __builtin_ia32_vfmaddps256 (-(__v8sf) __A, 12040b57cec5SDimitry Andric (__v8sf) __B, 12050b57cec5SDimitry Andric (__v8sf) __C), 12060b57cec5SDimitry Andric (__v8sf)_mm256_setzero_ps()); 12070b57cec5SDimitry Andric } 12080b57cec5SDimitry Andric 12090b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 12100b57cec5SDimitry Andric _mm256_maskz_fnmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) 12110b57cec5SDimitry Andric { 12120b57cec5SDimitry Andric return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 12130b57cec5SDimitry Andric __builtin_ia32_vfmaddps256 (-(__v8sf) __A, 12140b57cec5SDimitry Andric (__v8sf) __B, 12150b57cec5SDimitry Andric -(__v8sf) __C), 12160b57cec5SDimitry Andric (__v8sf)_mm256_setzero_ps()); 12170b57cec5SDimitry Andric } 12180b57cec5SDimitry Andric 12190b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 12200b57cec5SDimitry Andric _mm_mask_fmaddsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) 12210b57cec5SDimitry Andric { 12220b57cec5SDimitry Andric return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 12230b57cec5SDimitry Andric __builtin_ia32_vfmaddsubpd ((__v2df) __A, 12240b57cec5SDimitry Andric (__v2df) __B, 12250b57cec5SDimitry Andric (__v2df) __C), 12260b57cec5SDimitry Andric (__v2df) __A); 12270b57cec5SDimitry Andric } 12280b57cec5SDimitry Andric 12290b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 12300b57cec5SDimitry Andric _mm_mask3_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) 12310b57cec5SDimitry Andric { 12320b57cec5SDimitry Andric return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 12330b57cec5SDimitry Andric __builtin_ia32_vfmaddsubpd ((__v2df) __A, 12340b57cec5SDimitry Andric (__v2df) __B, 12350b57cec5SDimitry Andric (__v2df) __C), 12360b57cec5SDimitry Andric (__v2df) __C); 12370b57cec5SDimitry Andric } 12380b57cec5SDimitry Andric 12390b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 12400b57cec5SDimitry Andric _mm_maskz_fmaddsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 12410b57cec5SDimitry Andric { 12420b57cec5SDimitry Andric return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 12430b57cec5SDimitry Andric __builtin_ia32_vfmaddsubpd ((__v2df) __A, 12440b57cec5SDimitry Andric (__v2df) __B, 12450b57cec5SDimitry Andric (__v2df) __C), 12460b57cec5SDimitry Andric (__v2df)_mm_setzero_pd()); 12470b57cec5SDimitry Andric } 12480b57cec5SDimitry Andric 12490b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 12500b57cec5SDimitry Andric _mm_mask_fmsubadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) 12510b57cec5SDimitry Andric { 12520b57cec5SDimitry Andric return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 12530b57cec5SDimitry Andric __builtin_ia32_vfmaddsubpd ((__v2df) __A, 12540b57cec5SDimitry Andric (__v2df) __B, 12550b57cec5SDimitry Andric -(__v2df) __C), 12560b57cec5SDimitry Andric (__v2df) __A); 12570b57cec5SDimitry Andric } 12580b57cec5SDimitry Andric 12590b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 12600b57cec5SDimitry Andric _mm_maskz_fmsubadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 12610b57cec5SDimitry Andric { 12620b57cec5SDimitry Andric return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 12630b57cec5SDimitry Andric __builtin_ia32_vfmaddsubpd ((__v2df) __A, 12640b57cec5SDimitry Andric (__v2df) __B, 12650b57cec5SDimitry Andric -(__v2df) __C), 12660b57cec5SDimitry Andric (__v2df)_mm_setzero_pd()); 12670b57cec5SDimitry Andric } 12680b57cec5SDimitry Andric 12690b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 12700b57cec5SDimitry Andric _mm256_mask_fmaddsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) 12710b57cec5SDimitry Andric { 12720b57cec5SDimitry Andric return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 12730b57cec5SDimitry Andric __builtin_ia32_vfmaddsubpd256 ((__v4df) __A, 12740b57cec5SDimitry Andric (__v4df) __B, 12750b57cec5SDimitry Andric (__v4df) __C), 12760b57cec5SDimitry Andric (__v4df) __A); 12770b57cec5SDimitry Andric } 12780b57cec5SDimitry Andric 12790b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 12800b57cec5SDimitry Andric _mm256_mask3_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) 12810b57cec5SDimitry Andric { 12820b57cec5SDimitry Andric return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 12830b57cec5SDimitry Andric __builtin_ia32_vfmaddsubpd256 ((__v4df) __A, 12840b57cec5SDimitry Andric (__v4df) __B, 12850b57cec5SDimitry Andric (__v4df) __C), 12860b57cec5SDimitry Andric (__v4df) __C); 12870b57cec5SDimitry Andric } 12880b57cec5SDimitry Andric 12890b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 12900b57cec5SDimitry Andric _mm256_maskz_fmaddsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) 12910b57cec5SDimitry Andric { 12920b57cec5SDimitry Andric return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 12930b57cec5SDimitry Andric __builtin_ia32_vfmaddsubpd256 ((__v4df) __A, 12940b57cec5SDimitry Andric (__v4df) __B, 12950b57cec5SDimitry Andric (__v4df) __C), 12960b57cec5SDimitry Andric (__v4df)_mm256_setzero_pd()); 12970b57cec5SDimitry Andric } 12980b57cec5SDimitry Andric 12990b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 13000b57cec5SDimitry Andric _mm256_mask_fmsubadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) 13010b57cec5SDimitry Andric { 13020b57cec5SDimitry Andric return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 13030b57cec5SDimitry Andric __builtin_ia32_vfmaddsubpd256 ((__v4df) __A, 13040b57cec5SDimitry Andric (__v4df) __B, 13050b57cec5SDimitry Andric -(__v4df) __C), 13060b57cec5SDimitry Andric (__v4df) __A); 13070b57cec5SDimitry Andric } 13080b57cec5SDimitry Andric 13090b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 13100b57cec5SDimitry Andric _mm256_maskz_fmsubadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) 13110b57cec5SDimitry Andric { 13120b57cec5SDimitry Andric return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 13130b57cec5SDimitry Andric __builtin_ia32_vfmaddsubpd256 ((__v4df) __A, 13140b57cec5SDimitry Andric (__v4df) __B, 13150b57cec5SDimitry Andric -(__v4df) __C), 13160b57cec5SDimitry Andric (__v4df)_mm256_setzero_pd()); 13170b57cec5SDimitry Andric } 13180b57cec5SDimitry Andric 13190b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 13200b57cec5SDimitry Andric _mm_mask_fmaddsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) 13210b57cec5SDimitry Andric { 13220b57cec5SDimitry Andric return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 13230b57cec5SDimitry Andric __builtin_ia32_vfmaddsubps ((__v4sf) __A, 13240b57cec5SDimitry Andric (__v4sf) __B, 13250b57cec5SDimitry Andric (__v4sf) __C), 13260b57cec5SDimitry Andric (__v4sf) __A); 13270b57cec5SDimitry Andric } 13280b57cec5SDimitry Andric 13290b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 13300b57cec5SDimitry Andric _mm_mask3_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) 13310b57cec5SDimitry Andric { 13320b57cec5SDimitry Andric return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 13330b57cec5SDimitry Andric __builtin_ia32_vfmaddsubps ((__v4sf) __A, 13340b57cec5SDimitry Andric (__v4sf) __B, 13350b57cec5SDimitry Andric (__v4sf) __C), 13360b57cec5SDimitry Andric (__v4sf) __C); 13370b57cec5SDimitry Andric } 13380b57cec5SDimitry Andric 13390b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 13400b57cec5SDimitry Andric _mm_maskz_fmaddsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 13410b57cec5SDimitry Andric { 13420b57cec5SDimitry Andric return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 13430b57cec5SDimitry Andric __builtin_ia32_vfmaddsubps ((__v4sf) __A, 13440b57cec5SDimitry Andric (__v4sf) __B, 13450b57cec5SDimitry Andric (__v4sf) __C), 13460b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps()); 13470b57cec5SDimitry Andric } 13480b57cec5SDimitry Andric 13490b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 13500b57cec5SDimitry Andric _mm_mask_fmsubadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) 13510b57cec5SDimitry Andric { 13520b57cec5SDimitry Andric return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 13530b57cec5SDimitry Andric __builtin_ia32_vfmaddsubps ((__v4sf) __A, 13540b57cec5SDimitry Andric (__v4sf) __B, 13550b57cec5SDimitry Andric -(__v4sf) __C), 13560b57cec5SDimitry Andric (__v4sf) __A); 13570b57cec5SDimitry Andric } 13580b57cec5SDimitry Andric 13590b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 13600b57cec5SDimitry Andric _mm_maskz_fmsubadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 13610b57cec5SDimitry Andric { 13620b57cec5SDimitry Andric return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 13630b57cec5SDimitry Andric __builtin_ia32_vfmaddsubps ((__v4sf) __A, 13640b57cec5SDimitry Andric (__v4sf) __B, 13650b57cec5SDimitry Andric -(__v4sf) __C), 13660b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps()); 13670b57cec5SDimitry Andric } 13680b57cec5SDimitry Andric 13690b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 13700b57cec5SDimitry Andric _mm256_mask_fmaddsub_ps(__m256 __A, __mmask8 __U, __m256 __B, 13710b57cec5SDimitry Andric __m256 __C) 13720b57cec5SDimitry Andric { 13730b57cec5SDimitry Andric return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 13740b57cec5SDimitry Andric __builtin_ia32_vfmaddsubps256 ((__v8sf) __A, 13750b57cec5SDimitry Andric (__v8sf) __B, 13760b57cec5SDimitry Andric (__v8sf) __C), 13770b57cec5SDimitry Andric (__v8sf) __A); 13780b57cec5SDimitry Andric } 13790b57cec5SDimitry Andric 13800b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 13810b57cec5SDimitry Andric _mm256_mask3_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) 13820b57cec5SDimitry Andric { 13830b57cec5SDimitry Andric return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 13840b57cec5SDimitry Andric __builtin_ia32_vfmaddsubps256 ((__v8sf) __A, 13850b57cec5SDimitry Andric (__v8sf) __B, 13860b57cec5SDimitry Andric (__v8sf) __C), 13870b57cec5SDimitry Andric (__v8sf) __C); 13880b57cec5SDimitry Andric } 13890b57cec5SDimitry Andric 13900b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 13910b57cec5SDimitry Andric _mm256_maskz_fmaddsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) 13920b57cec5SDimitry Andric { 13930b57cec5SDimitry Andric return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 13940b57cec5SDimitry Andric __builtin_ia32_vfmaddsubps256 ((__v8sf) __A, 13950b57cec5SDimitry Andric (__v8sf) __B, 13960b57cec5SDimitry Andric (__v8sf) __C), 13970b57cec5SDimitry Andric (__v8sf)_mm256_setzero_ps()); 13980b57cec5SDimitry Andric } 13990b57cec5SDimitry Andric 14000b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 14010b57cec5SDimitry Andric _mm256_mask_fmsubadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) 14020b57cec5SDimitry Andric { 14030b57cec5SDimitry Andric return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 14040b57cec5SDimitry Andric __builtin_ia32_vfmaddsubps256 ((__v8sf) __A, 14050b57cec5SDimitry Andric (__v8sf) __B, 14060b57cec5SDimitry Andric -(__v8sf) __C), 14070b57cec5SDimitry Andric (__v8sf) __A); 14080b57cec5SDimitry Andric } 14090b57cec5SDimitry Andric 14100b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 14110b57cec5SDimitry Andric _mm256_maskz_fmsubadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) 14120b57cec5SDimitry Andric { 14130b57cec5SDimitry Andric return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 14140b57cec5SDimitry Andric __builtin_ia32_vfmaddsubps256 ((__v8sf) __A, 14150b57cec5SDimitry Andric (__v8sf) __B, 14160b57cec5SDimitry Andric -(__v8sf) __C), 14170b57cec5SDimitry Andric (__v8sf)_mm256_setzero_ps()); 14180b57cec5SDimitry Andric } 14190b57cec5SDimitry Andric 14200b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 14210b57cec5SDimitry Andric _mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) 14220b57cec5SDimitry Andric { 14230b57cec5SDimitry Andric return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 14240b57cec5SDimitry Andric __builtin_ia32_vfmaddpd ((__v2df) __A, 14250b57cec5SDimitry Andric (__v2df) __B, 14260b57cec5SDimitry Andric -(__v2df) __C), 14270b57cec5SDimitry Andric (__v2df) __C); 14280b57cec5SDimitry Andric } 14290b57cec5SDimitry Andric 14300b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 14310b57cec5SDimitry Andric _mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) 14320b57cec5SDimitry Andric { 14330b57cec5SDimitry Andric return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 14340b57cec5SDimitry Andric __builtin_ia32_vfmaddpd256 ((__v4df) __A, 14350b57cec5SDimitry Andric (__v4df) __B, 14360b57cec5SDimitry Andric -(__v4df) __C), 14370b57cec5SDimitry Andric (__v4df) __C); 14380b57cec5SDimitry Andric } 14390b57cec5SDimitry Andric 14400b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 14410b57cec5SDimitry Andric _mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) 14420b57cec5SDimitry Andric { 14430b57cec5SDimitry Andric return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 14440b57cec5SDimitry Andric __builtin_ia32_vfmaddps ((__v4sf) __A, 14450b57cec5SDimitry Andric (__v4sf) __B, 14460b57cec5SDimitry Andric -(__v4sf) __C), 14470b57cec5SDimitry Andric (__v4sf) __C); 14480b57cec5SDimitry Andric } 14490b57cec5SDimitry Andric 14500b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 14510b57cec5SDimitry Andric _mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) 14520b57cec5SDimitry Andric { 14530b57cec5SDimitry Andric return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 14540b57cec5SDimitry Andric __builtin_ia32_vfmaddps256 ((__v8sf) __A, 14550b57cec5SDimitry Andric (__v8sf) __B, 14560b57cec5SDimitry Andric -(__v8sf) __C), 14570b57cec5SDimitry Andric (__v8sf) __C); 14580b57cec5SDimitry Andric } 14590b57cec5SDimitry Andric 14600b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 14610b57cec5SDimitry Andric _mm_mask3_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) 14620b57cec5SDimitry Andric { 14630b57cec5SDimitry Andric return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 14640b57cec5SDimitry Andric __builtin_ia32_vfmaddsubpd ((__v2df) __A, 14650b57cec5SDimitry Andric (__v2df) __B, 14660b57cec5SDimitry Andric -(__v2df) __C), 14670b57cec5SDimitry Andric (__v2df) __C); 14680b57cec5SDimitry Andric } 14690b57cec5SDimitry Andric 14700b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 14710b57cec5SDimitry Andric _mm256_mask3_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) 14720b57cec5SDimitry Andric { 14730b57cec5SDimitry Andric return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 14740b57cec5SDimitry Andric __builtin_ia32_vfmaddsubpd256 ((__v4df) __A, 14750b57cec5SDimitry Andric (__v4df) __B, 14760b57cec5SDimitry Andric -(__v4df) __C), 14770b57cec5SDimitry Andric (__v4df) __C); 14780b57cec5SDimitry Andric } 14790b57cec5SDimitry Andric 14800b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 14810b57cec5SDimitry Andric _mm_mask3_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) 14820b57cec5SDimitry Andric { 14830b57cec5SDimitry Andric return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 14840b57cec5SDimitry Andric __builtin_ia32_vfmaddsubps ((__v4sf) __A, 14850b57cec5SDimitry Andric (__v4sf) __B, 14860b57cec5SDimitry Andric -(__v4sf) __C), 14870b57cec5SDimitry Andric (__v4sf) __C); 14880b57cec5SDimitry Andric } 14890b57cec5SDimitry Andric 14900b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 14910b57cec5SDimitry Andric _mm256_mask3_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) 14920b57cec5SDimitry Andric { 14930b57cec5SDimitry Andric return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 14940b57cec5SDimitry Andric __builtin_ia32_vfmaddsubps256 ((__v8sf) __A, 14950b57cec5SDimitry Andric (__v8sf) __B, 14960b57cec5SDimitry Andric -(__v8sf) __C), 14970b57cec5SDimitry Andric (__v8sf) __C); 14980b57cec5SDimitry Andric } 14990b57cec5SDimitry Andric 15000b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 15010b57cec5SDimitry Andric _mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) 15020b57cec5SDimitry Andric { 15030b57cec5SDimitry Andric return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 15040b57cec5SDimitry Andric __builtin_ia32_vfmaddpd ((__v2df) __A, 15050b57cec5SDimitry Andric -(__v2df) __B, 15060b57cec5SDimitry Andric (__v2df) __C), 15070b57cec5SDimitry Andric (__v2df) __A); 15080b57cec5SDimitry Andric } 15090b57cec5SDimitry Andric 15100b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 15110b57cec5SDimitry Andric _mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) 15120b57cec5SDimitry Andric { 15130b57cec5SDimitry Andric return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 15140b57cec5SDimitry Andric __builtin_ia32_vfmaddpd256 ((__v4df) __A, 15150b57cec5SDimitry Andric -(__v4df) __B, 15160b57cec5SDimitry Andric (__v4df) __C), 15170b57cec5SDimitry Andric (__v4df) __A); 15180b57cec5SDimitry Andric } 15190b57cec5SDimitry Andric 15200b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 15210b57cec5SDimitry Andric _mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) 15220b57cec5SDimitry Andric { 15230b57cec5SDimitry Andric return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 15240b57cec5SDimitry Andric __builtin_ia32_vfmaddps ((__v4sf) __A, 15250b57cec5SDimitry Andric -(__v4sf) __B, 15260b57cec5SDimitry Andric (__v4sf) __C), 15270b57cec5SDimitry Andric (__v4sf) __A); 15280b57cec5SDimitry Andric } 15290b57cec5SDimitry Andric 15300b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 15310b57cec5SDimitry Andric _mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) 15320b57cec5SDimitry Andric { 15330b57cec5SDimitry Andric return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 15340b57cec5SDimitry Andric __builtin_ia32_vfmaddps256 ((__v8sf) __A, 15350b57cec5SDimitry Andric -(__v8sf) __B, 15360b57cec5SDimitry Andric (__v8sf) __C), 15370b57cec5SDimitry Andric (__v8sf) __A); 15380b57cec5SDimitry Andric } 15390b57cec5SDimitry Andric 15400b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 15410b57cec5SDimitry Andric _mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) 15420b57cec5SDimitry Andric { 15430b57cec5SDimitry Andric return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 15440b57cec5SDimitry Andric __builtin_ia32_vfmaddpd ((__v2df) __A, 15450b57cec5SDimitry Andric -(__v2df) __B, 15460b57cec5SDimitry Andric -(__v2df) __C), 15470b57cec5SDimitry Andric (__v2df) __A); 15480b57cec5SDimitry Andric } 15490b57cec5SDimitry Andric 15500b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 15510b57cec5SDimitry Andric _mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) 15520b57cec5SDimitry Andric { 15530b57cec5SDimitry Andric return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 15540b57cec5SDimitry Andric __builtin_ia32_vfmaddpd ((__v2df) __A, 15550b57cec5SDimitry Andric -(__v2df) __B, 15560b57cec5SDimitry Andric -(__v2df) __C), 15570b57cec5SDimitry Andric (__v2df) __C); 15580b57cec5SDimitry Andric } 15590b57cec5SDimitry Andric 15600b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 15610b57cec5SDimitry Andric _mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) 15620b57cec5SDimitry Andric { 15630b57cec5SDimitry Andric return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 15640b57cec5SDimitry Andric __builtin_ia32_vfmaddpd256 ((__v4df) __A, 15650b57cec5SDimitry Andric -(__v4df) __B, 15660b57cec5SDimitry Andric -(__v4df) __C), 15670b57cec5SDimitry Andric (__v4df) __A); 15680b57cec5SDimitry Andric } 15690b57cec5SDimitry Andric 15700b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 15710b57cec5SDimitry Andric _mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) 15720b57cec5SDimitry Andric { 15730b57cec5SDimitry Andric return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 15740b57cec5SDimitry Andric __builtin_ia32_vfmaddpd256 ((__v4df) __A, 15750b57cec5SDimitry Andric -(__v4df) __B, 15760b57cec5SDimitry Andric -(__v4df) __C), 15770b57cec5SDimitry Andric (__v4df) __C); 15780b57cec5SDimitry Andric } 15790b57cec5SDimitry Andric 15800b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 15810b57cec5SDimitry Andric _mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) 15820b57cec5SDimitry Andric { 15830b57cec5SDimitry Andric return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 15840b57cec5SDimitry Andric __builtin_ia32_vfmaddps ((__v4sf) __A, 15850b57cec5SDimitry Andric -(__v4sf) __B, 15860b57cec5SDimitry Andric -(__v4sf) __C), 15870b57cec5SDimitry Andric (__v4sf) __A); 15880b57cec5SDimitry Andric } 15890b57cec5SDimitry Andric 15900b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 15910b57cec5SDimitry Andric _mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) 15920b57cec5SDimitry Andric { 15930b57cec5SDimitry Andric return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 15940b57cec5SDimitry Andric __builtin_ia32_vfmaddps ((__v4sf) __A, 15950b57cec5SDimitry Andric -(__v4sf) __B, 15960b57cec5SDimitry Andric -(__v4sf) __C), 15970b57cec5SDimitry Andric (__v4sf) __C); 15980b57cec5SDimitry Andric } 15990b57cec5SDimitry Andric 16000b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 16010b57cec5SDimitry Andric _mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) 16020b57cec5SDimitry Andric { 16030b57cec5SDimitry Andric return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 16040b57cec5SDimitry Andric __builtin_ia32_vfmaddps256 ((__v8sf) __A, 16050b57cec5SDimitry Andric -(__v8sf) __B, 16060b57cec5SDimitry Andric -(__v8sf) __C), 16070b57cec5SDimitry Andric (__v8sf) __A); 16080b57cec5SDimitry Andric } 16090b57cec5SDimitry Andric 16100b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 16110b57cec5SDimitry Andric _mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) 16120b57cec5SDimitry Andric { 16130b57cec5SDimitry Andric return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 16140b57cec5SDimitry Andric __builtin_ia32_vfmaddps256 ((__v8sf) __A, 16150b57cec5SDimitry Andric -(__v8sf) __B, 16160b57cec5SDimitry Andric -(__v8sf) __C), 16170b57cec5SDimitry Andric (__v8sf) __C); 16180b57cec5SDimitry Andric } 16190b57cec5SDimitry Andric 16200b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 16210b57cec5SDimitry Andric _mm_mask_add_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 16220b57cec5SDimitry Andric return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 16230b57cec5SDimitry Andric (__v2df)_mm_add_pd(__A, __B), 16240b57cec5SDimitry Andric (__v2df)__W); 16250b57cec5SDimitry Andric } 16260b57cec5SDimitry Andric 16270b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 16280b57cec5SDimitry Andric _mm_maskz_add_pd(__mmask8 __U, __m128d __A, __m128d __B) { 16290b57cec5SDimitry Andric return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 16300b57cec5SDimitry Andric (__v2df)_mm_add_pd(__A, __B), 16310b57cec5SDimitry Andric (__v2df)_mm_setzero_pd()); 16320b57cec5SDimitry Andric } 16330b57cec5SDimitry Andric 16340b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 16350b57cec5SDimitry Andric _mm256_mask_add_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 16360b57cec5SDimitry Andric return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 16370b57cec5SDimitry Andric (__v4df)_mm256_add_pd(__A, __B), 16380b57cec5SDimitry Andric (__v4df)__W); 16390b57cec5SDimitry Andric } 16400b57cec5SDimitry Andric 16410b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 16420b57cec5SDimitry Andric _mm256_maskz_add_pd(__mmask8 __U, __m256d __A, __m256d __B) { 16430b57cec5SDimitry Andric return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 16440b57cec5SDimitry Andric (__v4df)_mm256_add_pd(__A, __B), 16450b57cec5SDimitry Andric (__v4df)_mm256_setzero_pd()); 16460b57cec5SDimitry Andric } 16470b57cec5SDimitry Andric 16480b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 16490b57cec5SDimitry Andric _mm_mask_add_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 16500b57cec5SDimitry Andric return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 16510b57cec5SDimitry Andric (__v4sf)_mm_add_ps(__A, __B), 16520b57cec5SDimitry Andric (__v4sf)__W); 16530b57cec5SDimitry Andric } 16540b57cec5SDimitry Andric 16550b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 16560b57cec5SDimitry Andric _mm_maskz_add_ps(__mmask8 __U, __m128 __A, __m128 __B) { 16570b57cec5SDimitry Andric return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 16580b57cec5SDimitry Andric (__v4sf)_mm_add_ps(__A, __B), 16590b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps()); 16600b57cec5SDimitry Andric } 16610b57cec5SDimitry Andric 16620b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 16630b57cec5SDimitry Andric _mm256_mask_add_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 16640b57cec5SDimitry Andric return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 16650b57cec5SDimitry Andric (__v8sf)_mm256_add_ps(__A, __B), 16660b57cec5SDimitry Andric (__v8sf)__W); 16670b57cec5SDimitry Andric } 16680b57cec5SDimitry Andric 16690b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 16700b57cec5SDimitry Andric _mm256_maskz_add_ps(__mmask8 __U, __m256 __A, __m256 __B) { 16710b57cec5SDimitry Andric return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 16720b57cec5SDimitry Andric (__v8sf)_mm256_add_ps(__A, __B), 16730b57cec5SDimitry Andric (__v8sf)_mm256_setzero_ps()); 16740b57cec5SDimitry Andric } 16750b57cec5SDimitry Andric 16760b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 16770b57cec5SDimitry Andric _mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W) { 16780b57cec5SDimitry Andric return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U, 16790b57cec5SDimitry Andric (__v4si) __W, 16800b57cec5SDimitry Andric (__v4si) __A); 16810b57cec5SDimitry Andric } 16820b57cec5SDimitry Andric 16830b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 16840b57cec5SDimitry Andric _mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W) { 16850b57cec5SDimitry Andric return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U, 16860b57cec5SDimitry Andric (__v8si) __W, 16870b57cec5SDimitry Andric (__v8si) __A); 16880b57cec5SDimitry Andric } 16890b57cec5SDimitry Andric 16900b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 16910b57cec5SDimitry Andric _mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W) { 16920b57cec5SDimitry Andric return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U, 16930b57cec5SDimitry Andric (__v2df) __W, 16940b57cec5SDimitry Andric (__v2df) __A); 16950b57cec5SDimitry Andric } 16960b57cec5SDimitry Andric 16970b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 16980b57cec5SDimitry Andric _mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W) { 16990b57cec5SDimitry Andric return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U, 17000b57cec5SDimitry Andric (__v4df) __W, 17010b57cec5SDimitry Andric (__v4df) __A); 17020b57cec5SDimitry Andric } 17030b57cec5SDimitry Andric 17040b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 17050b57cec5SDimitry Andric _mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W) { 17060b57cec5SDimitry Andric return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U, 17070b57cec5SDimitry Andric (__v4sf) __W, 17080b57cec5SDimitry Andric (__v4sf) __A); 17090b57cec5SDimitry Andric } 17100b57cec5SDimitry Andric 17110b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 17120b57cec5SDimitry Andric _mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W) { 17130b57cec5SDimitry Andric return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U, 17140b57cec5SDimitry Andric (__v8sf) __W, 17150b57cec5SDimitry Andric (__v8sf) __A); 17160b57cec5SDimitry Andric } 17170b57cec5SDimitry Andric 17180b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 17190b57cec5SDimitry Andric _mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W) { 17200b57cec5SDimitry Andric return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U, 17210b57cec5SDimitry Andric (__v2di) __W, 17220b57cec5SDimitry Andric (__v2di) __A); 17230b57cec5SDimitry Andric } 17240b57cec5SDimitry Andric 17250b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 17260b57cec5SDimitry Andric _mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W) { 17270b57cec5SDimitry Andric return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U, 17280b57cec5SDimitry Andric (__v4di) __W, 17290b57cec5SDimitry Andric (__v4di) __A); 17300b57cec5SDimitry Andric } 17310b57cec5SDimitry Andric 17320b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 17330b57cec5SDimitry Andric _mm_mask_compress_pd (__m128d __W, __mmask8 __U, __m128d __A) { 17340b57cec5SDimitry Andric return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A, 17350b57cec5SDimitry Andric (__v2df) __W, 17360b57cec5SDimitry Andric (__mmask8) __U); 17370b57cec5SDimitry Andric } 17380b57cec5SDimitry Andric 17390b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 17400b57cec5SDimitry Andric _mm_maskz_compress_pd (__mmask8 __U, __m128d __A) { 17410b57cec5SDimitry Andric return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A, 17420b57cec5SDimitry Andric (__v2df) 17430b57cec5SDimitry Andric _mm_setzero_pd (), 17440b57cec5SDimitry Andric (__mmask8) __U); 17450b57cec5SDimitry Andric } 17460b57cec5SDimitry Andric 17470b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 17480b57cec5SDimitry Andric _mm256_mask_compress_pd (__m256d __W, __mmask8 __U, __m256d __A) { 17490b57cec5SDimitry Andric return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A, 17500b57cec5SDimitry Andric (__v4df) __W, 17510b57cec5SDimitry Andric (__mmask8) __U); 17520b57cec5SDimitry Andric } 17530b57cec5SDimitry Andric 17540b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 17550b57cec5SDimitry Andric _mm256_maskz_compress_pd (__mmask8 __U, __m256d __A) { 17560b57cec5SDimitry Andric return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A, 17570b57cec5SDimitry Andric (__v4df) 17580b57cec5SDimitry Andric _mm256_setzero_pd (), 17590b57cec5SDimitry Andric (__mmask8) __U); 17600b57cec5SDimitry Andric } 17610b57cec5SDimitry Andric 17620b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 17630b57cec5SDimitry Andric _mm_mask_compress_epi64 (__m128i __W, __mmask8 __U, __m128i __A) { 17640b57cec5SDimitry Andric return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A, 17650b57cec5SDimitry Andric (__v2di) __W, 17660b57cec5SDimitry Andric (__mmask8) __U); 17670b57cec5SDimitry Andric } 17680b57cec5SDimitry Andric 17690b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 17700b57cec5SDimitry Andric _mm_maskz_compress_epi64 (__mmask8 __U, __m128i __A) { 17710b57cec5SDimitry Andric return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A, 17720b57cec5SDimitry Andric (__v2di) 17730b57cec5SDimitry Andric _mm_setzero_si128 (), 17740b57cec5SDimitry Andric (__mmask8) __U); 17750b57cec5SDimitry Andric } 17760b57cec5SDimitry Andric 17770b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 17780b57cec5SDimitry Andric _mm256_mask_compress_epi64 (__m256i __W, __mmask8 __U, __m256i __A) { 17790b57cec5SDimitry Andric return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A, 17800b57cec5SDimitry Andric (__v4di) __W, 17810b57cec5SDimitry Andric (__mmask8) __U); 17820b57cec5SDimitry Andric } 17830b57cec5SDimitry Andric 17840b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 17850b57cec5SDimitry Andric _mm256_maskz_compress_epi64 (__mmask8 __U, __m256i __A) { 17860b57cec5SDimitry Andric return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A, 17870b57cec5SDimitry Andric (__v4di) 17880b57cec5SDimitry Andric _mm256_setzero_si256 (), 17890b57cec5SDimitry Andric (__mmask8) __U); 17900b57cec5SDimitry Andric } 17910b57cec5SDimitry Andric 17920b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 17930b57cec5SDimitry Andric _mm_mask_compress_ps (__m128 __W, __mmask8 __U, __m128 __A) { 17940b57cec5SDimitry Andric return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A, 17950b57cec5SDimitry Andric (__v4sf) __W, 17960b57cec5SDimitry Andric (__mmask8) __U); 17970b57cec5SDimitry Andric } 17980b57cec5SDimitry Andric 17990b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 18000b57cec5SDimitry Andric _mm_maskz_compress_ps (__mmask8 __U, __m128 __A) { 18010b57cec5SDimitry Andric return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A, 18020b57cec5SDimitry Andric (__v4sf) 18030b57cec5SDimitry Andric _mm_setzero_ps (), 18040b57cec5SDimitry Andric (__mmask8) __U); 18050b57cec5SDimitry Andric } 18060b57cec5SDimitry Andric 18070b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 18080b57cec5SDimitry Andric _mm256_mask_compress_ps (__m256 __W, __mmask8 __U, __m256 __A) { 18090b57cec5SDimitry Andric return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A, 18100b57cec5SDimitry Andric (__v8sf) __W, 18110b57cec5SDimitry Andric (__mmask8) __U); 18120b57cec5SDimitry Andric } 18130b57cec5SDimitry Andric 18140b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 18150b57cec5SDimitry Andric _mm256_maskz_compress_ps (__mmask8 __U, __m256 __A) { 18160b57cec5SDimitry Andric return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A, 18170b57cec5SDimitry Andric (__v8sf) 18180b57cec5SDimitry Andric _mm256_setzero_ps (), 18190b57cec5SDimitry Andric (__mmask8) __U); 18200b57cec5SDimitry Andric } 18210b57cec5SDimitry Andric 18220b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 18230b57cec5SDimitry Andric _mm_mask_compress_epi32 (__m128i __W, __mmask8 __U, __m128i __A) { 18240b57cec5SDimitry Andric return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A, 18250b57cec5SDimitry Andric (__v4si) __W, 18260b57cec5SDimitry Andric (__mmask8) __U); 18270b57cec5SDimitry Andric } 18280b57cec5SDimitry Andric 18290b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 18300b57cec5SDimitry Andric _mm_maskz_compress_epi32 (__mmask8 __U, __m128i __A) { 18310b57cec5SDimitry Andric return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A, 18320b57cec5SDimitry Andric (__v4si) 18330b57cec5SDimitry Andric _mm_setzero_si128 (), 18340b57cec5SDimitry Andric (__mmask8) __U); 18350b57cec5SDimitry Andric } 18360b57cec5SDimitry Andric 18370b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 18380b57cec5SDimitry Andric _mm256_mask_compress_epi32 (__m256i __W, __mmask8 __U, __m256i __A) { 18390b57cec5SDimitry Andric return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A, 18400b57cec5SDimitry Andric (__v8si) __W, 18410b57cec5SDimitry Andric (__mmask8) __U); 18420b57cec5SDimitry Andric } 18430b57cec5SDimitry Andric 18440b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 18450b57cec5SDimitry Andric _mm256_maskz_compress_epi32 (__mmask8 __U, __m256i __A) { 18460b57cec5SDimitry Andric return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A, 18470b57cec5SDimitry Andric (__v8si) 18480b57cec5SDimitry Andric _mm256_setzero_si256 (), 18490b57cec5SDimitry Andric (__mmask8) __U); 18500b57cec5SDimitry Andric } 18510b57cec5SDimitry Andric 18520b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS128 18530b57cec5SDimitry Andric _mm_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m128d __A) { 18540b57cec5SDimitry Andric __builtin_ia32_compressstoredf128_mask ((__v2df *) __P, 18550b57cec5SDimitry Andric (__v2df) __A, 18560b57cec5SDimitry Andric (__mmask8) __U); 18570b57cec5SDimitry Andric } 18580b57cec5SDimitry Andric 18590b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS256 18600b57cec5SDimitry Andric _mm256_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m256d __A) { 18610b57cec5SDimitry Andric __builtin_ia32_compressstoredf256_mask ((__v4df *) __P, 18620b57cec5SDimitry Andric (__v4df) __A, 18630b57cec5SDimitry Andric (__mmask8) __U); 18640b57cec5SDimitry Andric } 18650b57cec5SDimitry Andric 18660b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS128 18670b57cec5SDimitry Andric _mm_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m128i __A) { 18680b57cec5SDimitry Andric __builtin_ia32_compressstoredi128_mask ((__v2di *) __P, 18690b57cec5SDimitry Andric (__v2di) __A, 18700b57cec5SDimitry Andric (__mmask8) __U); 18710b57cec5SDimitry Andric } 18720b57cec5SDimitry Andric 18730b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS256 18740b57cec5SDimitry Andric _mm256_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m256i __A) { 18750b57cec5SDimitry Andric __builtin_ia32_compressstoredi256_mask ((__v4di *) __P, 18760b57cec5SDimitry Andric (__v4di) __A, 18770b57cec5SDimitry Andric (__mmask8) __U); 18780b57cec5SDimitry Andric } 18790b57cec5SDimitry Andric 18800b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS128 18810b57cec5SDimitry Andric _mm_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m128 __A) { 18820b57cec5SDimitry Andric __builtin_ia32_compressstoresf128_mask ((__v4sf *) __P, 18830b57cec5SDimitry Andric (__v4sf) __A, 18840b57cec5SDimitry Andric (__mmask8) __U); 18850b57cec5SDimitry Andric } 18860b57cec5SDimitry Andric 18870b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS256 18880b57cec5SDimitry Andric _mm256_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m256 __A) { 18890b57cec5SDimitry Andric __builtin_ia32_compressstoresf256_mask ((__v8sf *) __P, 18900b57cec5SDimitry Andric (__v8sf) __A, 18910b57cec5SDimitry Andric (__mmask8) __U); 18920b57cec5SDimitry Andric } 18930b57cec5SDimitry Andric 18940b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS128 18950b57cec5SDimitry Andric _mm_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m128i __A) { 18960b57cec5SDimitry Andric __builtin_ia32_compressstoresi128_mask ((__v4si *) __P, 18970b57cec5SDimitry Andric (__v4si) __A, 18980b57cec5SDimitry Andric (__mmask8) __U); 18990b57cec5SDimitry Andric } 19000b57cec5SDimitry Andric 19010b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS256 19020b57cec5SDimitry Andric _mm256_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m256i __A) { 19030b57cec5SDimitry Andric __builtin_ia32_compressstoresi256_mask ((__v8si *) __P, 19040b57cec5SDimitry Andric (__v8si) __A, 19050b57cec5SDimitry Andric (__mmask8) __U); 19060b57cec5SDimitry Andric } 19070b57cec5SDimitry Andric 19080b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 19090b57cec5SDimitry Andric _mm_mask_cvtepi32_pd (__m128d __W, __mmask8 __U, __m128i __A) { 19100b57cec5SDimitry Andric return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U, 19110b57cec5SDimitry Andric (__v2df)_mm_cvtepi32_pd(__A), 19120b57cec5SDimitry Andric (__v2df)__W); 19130b57cec5SDimitry Andric } 19140b57cec5SDimitry Andric 19150b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 19160b57cec5SDimitry Andric _mm_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) { 19170b57cec5SDimitry Andric return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U, 19180b57cec5SDimitry Andric (__v2df)_mm_cvtepi32_pd(__A), 19190b57cec5SDimitry Andric (__v2df)_mm_setzero_pd()); 19200b57cec5SDimitry Andric } 19210b57cec5SDimitry Andric 19220b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 19230b57cec5SDimitry Andric _mm256_mask_cvtepi32_pd (__m256d __W, __mmask8 __U, __m128i __A) { 19240b57cec5SDimitry Andric return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U, 19250b57cec5SDimitry Andric (__v4df)_mm256_cvtepi32_pd(__A), 19260b57cec5SDimitry Andric (__v4df)__W); 19270b57cec5SDimitry Andric } 19280b57cec5SDimitry Andric 19290b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 19300b57cec5SDimitry Andric _mm256_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) { 19310b57cec5SDimitry Andric return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U, 19320b57cec5SDimitry Andric (__v4df)_mm256_cvtepi32_pd(__A), 19330b57cec5SDimitry Andric (__v4df)_mm256_setzero_pd()); 19340b57cec5SDimitry Andric } 19350b57cec5SDimitry Andric 19360b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 19370b57cec5SDimitry Andric _mm_mask_cvtepi32_ps (__m128 __W, __mmask8 __U, __m128i __A) { 19380b57cec5SDimitry Andric return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 19390b57cec5SDimitry Andric (__v4sf)_mm_cvtepi32_ps(__A), 19400b57cec5SDimitry Andric (__v4sf)__W); 19410b57cec5SDimitry Andric } 19420b57cec5SDimitry Andric 19430b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 19440b57cec5SDimitry Andric _mm_maskz_cvtepi32_ps (__mmask8 __U, __m128i __A) { 19450b57cec5SDimitry Andric return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 19460b57cec5SDimitry Andric (__v4sf)_mm_cvtepi32_ps(__A), 19470b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps()); 19480b57cec5SDimitry Andric } 19490b57cec5SDimitry Andric 19500b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 19510b57cec5SDimitry Andric _mm256_mask_cvtepi32_ps (__m256 __W, __mmask8 __U, __m256i __A) { 19520b57cec5SDimitry Andric return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 19530b57cec5SDimitry Andric (__v8sf)_mm256_cvtepi32_ps(__A), 19540b57cec5SDimitry Andric (__v8sf)__W); 19550b57cec5SDimitry Andric } 19560b57cec5SDimitry Andric 19570b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 19580b57cec5SDimitry Andric _mm256_maskz_cvtepi32_ps (__mmask8 __U, __m256i __A) { 19590b57cec5SDimitry Andric return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 19600b57cec5SDimitry Andric (__v8sf)_mm256_cvtepi32_ps(__A), 19610b57cec5SDimitry Andric (__v8sf)_mm256_setzero_ps()); 19620b57cec5SDimitry Andric } 19630b57cec5SDimitry Andric 19640b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 19650b57cec5SDimitry Andric _mm_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) { 19660b57cec5SDimitry Andric return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A, 19670b57cec5SDimitry Andric (__v4si) __W, 19680b57cec5SDimitry Andric (__mmask8) __U); 19690b57cec5SDimitry Andric } 19700b57cec5SDimitry Andric 19710b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 19720b57cec5SDimitry Andric _mm_maskz_cvtpd_epi32 (__mmask8 __U, __m128d __A) { 19730b57cec5SDimitry Andric return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A, 19740b57cec5SDimitry Andric (__v4si) 19750b57cec5SDimitry Andric _mm_setzero_si128 (), 19760b57cec5SDimitry Andric (__mmask8) __U); 19770b57cec5SDimitry Andric } 19780b57cec5SDimitry Andric 19790b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS256 19800b57cec5SDimitry Andric _mm256_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) { 19810b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 19820b57cec5SDimitry Andric (__v4si)_mm256_cvtpd_epi32(__A), 19830b57cec5SDimitry Andric (__v4si)__W); 19840b57cec5SDimitry Andric } 19850b57cec5SDimitry Andric 19860b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS256 19870b57cec5SDimitry Andric _mm256_maskz_cvtpd_epi32 (__mmask8 __U, __m256d __A) { 19880b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 19890b57cec5SDimitry Andric (__v4si)_mm256_cvtpd_epi32(__A), 19900b57cec5SDimitry Andric (__v4si)_mm_setzero_si128()); 19910b57cec5SDimitry Andric } 19920b57cec5SDimitry Andric 19930b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 19940b57cec5SDimitry Andric _mm_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m128d __A) { 19950b57cec5SDimitry Andric return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A, 19960b57cec5SDimitry Andric (__v4sf) __W, 19970b57cec5SDimitry Andric (__mmask8) __U); 19980b57cec5SDimitry Andric } 19990b57cec5SDimitry Andric 20000b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 20010b57cec5SDimitry Andric _mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A) { 20020b57cec5SDimitry Andric return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A, 20030b57cec5SDimitry Andric (__v4sf) 20040b57cec5SDimitry Andric _mm_setzero_ps (), 20050b57cec5SDimitry Andric (__mmask8) __U); 20060b57cec5SDimitry Andric } 20070b57cec5SDimitry Andric 20080b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS256 20090b57cec5SDimitry Andric _mm256_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m256d __A) { 20100b57cec5SDimitry Andric return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 20110b57cec5SDimitry Andric (__v4sf)_mm256_cvtpd_ps(__A), 20120b57cec5SDimitry Andric (__v4sf)__W); 20130b57cec5SDimitry Andric } 20140b57cec5SDimitry Andric 20150b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS256 20160b57cec5SDimitry Andric _mm256_maskz_cvtpd_ps (__mmask8 __U, __m256d __A) { 20170b57cec5SDimitry Andric return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 20180b57cec5SDimitry Andric (__v4sf)_mm256_cvtpd_ps(__A), 20190b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps()); 20200b57cec5SDimitry Andric } 20210b57cec5SDimitry Andric 20220b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 20230b57cec5SDimitry Andric _mm_cvtpd_epu32 (__m128d __A) { 20240b57cec5SDimitry Andric return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A, 20250b57cec5SDimitry Andric (__v4si) 20260b57cec5SDimitry Andric _mm_setzero_si128 (), 20270b57cec5SDimitry Andric (__mmask8) -1); 20280b57cec5SDimitry Andric } 20290b57cec5SDimitry Andric 20300b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 20310b57cec5SDimitry Andric _mm_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) { 20320b57cec5SDimitry Andric return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A, 20330b57cec5SDimitry Andric (__v4si) __W, 20340b57cec5SDimitry Andric (__mmask8) __U); 20350b57cec5SDimitry Andric } 20360b57cec5SDimitry Andric 20370b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 20380b57cec5SDimitry Andric _mm_maskz_cvtpd_epu32 (__mmask8 __U, __m128d __A) { 20390b57cec5SDimitry Andric return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A, 20400b57cec5SDimitry Andric (__v4si) 20410b57cec5SDimitry Andric _mm_setzero_si128 (), 20420b57cec5SDimitry Andric (__mmask8) __U); 20430b57cec5SDimitry Andric } 20440b57cec5SDimitry Andric 20450b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS256 20460b57cec5SDimitry Andric _mm256_cvtpd_epu32 (__m256d __A) { 20470b57cec5SDimitry Andric return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A, 20480b57cec5SDimitry Andric (__v4si) 20490b57cec5SDimitry Andric _mm_setzero_si128 (), 20500b57cec5SDimitry Andric (__mmask8) -1); 20510b57cec5SDimitry Andric } 20520b57cec5SDimitry Andric 20530b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS256 20540b57cec5SDimitry Andric _mm256_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) { 20550b57cec5SDimitry Andric return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A, 20560b57cec5SDimitry Andric (__v4si) __W, 20570b57cec5SDimitry Andric (__mmask8) __U); 20580b57cec5SDimitry Andric } 20590b57cec5SDimitry Andric 20600b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS256 20610b57cec5SDimitry Andric _mm256_maskz_cvtpd_epu32 (__mmask8 __U, __m256d __A) { 20620b57cec5SDimitry Andric return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A, 20630b57cec5SDimitry Andric (__v4si) 20640b57cec5SDimitry Andric _mm_setzero_si128 (), 20650b57cec5SDimitry Andric (__mmask8) __U); 20660b57cec5SDimitry Andric } 20670b57cec5SDimitry Andric 20680b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 20690b57cec5SDimitry Andric _mm_mask_cvtps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) { 20700b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 20710b57cec5SDimitry Andric (__v4si)_mm_cvtps_epi32(__A), 20720b57cec5SDimitry Andric (__v4si)__W); 20730b57cec5SDimitry Andric } 20740b57cec5SDimitry Andric 20750b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 20760b57cec5SDimitry Andric _mm_maskz_cvtps_epi32 (__mmask8 __U, __m128 __A) { 20770b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 20780b57cec5SDimitry Andric (__v4si)_mm_cvtps_epi32(__A), 20790b57cec5SDimitry Andric (__v4si)_mm_setzero_si128()); 20800b57cec5SDimitry Andric } 20810b57cec5SDimitry Andric 20820b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 20830b57cec5SDimitry Andric _mm256_mask_cvtps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) { 20840b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 20850b57cec5SDimitry Andric (__v8si)_mm256_cvtps_epi32(__A), 20860b57cec5SDimitry Andric (__v8si)__W); 20870b57cec5SDimitry Andric } 20880b57cec5SDimitry Andric 20890b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 20900b57cec5SDimitry Andric _mm256_maskz_cvtps_epi32 (__mmask8 __U, __m256 __A) { 20910b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 20920b57cec5SDimitry Andric (__v8si)_mm256_cvtps_epi32(__A), 20930b57cec5SDimitry Andric (__v8si)_mm256_setzero_si256()); 20940b57cec5SDimitry Andric } 20950b57cec5SDimitry Andric 20960b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 20970b57cec5SDimitry Andric _mm_mask_cvtps_pd (__m128d __W, __mmask8 __U, __m128 __A) { 20980b57cec5SDimitry Andric return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 20990b57cec5SDimitry Andric (__v2df)_mm_cvtps_pd(__A), 21000b57cec5SDimitry Andric (__v2df)__W); 21010b57cec5SDimitry Andric } 21020b57cec5SDimitry Andric 21030b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 21040b57cec5SDimitry Andric _mm_maskz_cvtps_pd (__mmask8 __U, __m128 __A) { 21050b57cec5SDimitry Andric return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 21060b57cec5SDimitry Andric (__v2df)_mm_cvtps_pd(__A), 21070b57cec5SDimitry Andric (__v2df)_mm_setzero_pd()); 21080b57cec5SDimitry Andric } 21090b57cec5SDimitry Andric 21100b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 21110b57cec5SDimitry Andric _mm256_mask_cvtps_pd (__m256d __W, __mmask8 __U, __m128 __A) { 21120b57cec5SDimitry Andric return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 21130b57cec5SDimitry Andric (__v4df)_mm256_cvtps_pd(__A), 21140b57cec5SDimitry Andric (__v4df)__W); 21150b57cec5SDimitry Andric } 21160b57cec5SDimitry Andric 21170b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 21180b57cec5SDimitry Andric _mm256_maskz_cvtps_pd (__mmask8 __U, __m128 __A) { 21190b57cec5SDimitry Andric return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 21200b57cec5SDimitry Andric (__v4df)_mm256_cvtps_pd(__A), 21210b57cec5SDimitry Andric (__v4df)_mm256_setzero_pd()); 21220b57cec5SDimitry Andric } 21230b57cec5SDimitry Andric 21240b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 21250b57cec5SDimitry Andric _mm_cvtps_epu32 (__m128 __A) { 21260b57cec5SDimitry Andric return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A, 21270b57cec5SDimitry Andric (__v4si) 21280b57cec5SDimitry Andric _mm_setzero_si128 (), 21290b57cec5SDimitry Andric (__mmask8) -1); 21300b57cec5SDimitry Andric } 21310b57cec5SDimitry Andric 21320b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 21330b57cec5SDimitry Andric _mm_mask_cvtps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) { 21340b57cec5SDimitry Andric return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A, 21350b57cec5SDimitry Andric (__v4si) __W, 21360b57cec5SDimitry Andric (__mmask8) __U); 21370b57cec5SDimitry Andric } 21380b57cec5SDimitry Andric 21390b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 21400b57cec5SDimitry Andric _mm_maskz_cvtps_epu32 (__mmask8 __U, __m128 __A) { 21410b57cec5SDimitry Andric return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A, 21420b57cec5SDimitry Andric (__v4si) 21430b57cec5SDimitry Andric _mm_setzero_si128 (), 21440b57cec5SDimitry Andric (__mmask8) __U); 21450b57cec5SDimitry Andric } 21460b57cec5SDimitry Andric 21470b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 21480b57cec5SDimitry Andric _mm256_cvtps_epu32 (__m256 __A) { 21490b57cec5SDimitry Andric return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A, 21500b57cec5SDimitry Andric (__v8si) 21510b57cec5SDimitry Andric _mm256_setzero_si256 (), 21520b57cec5SDimitry Andric (__mmask8) -1); 21530b57cec5SDimitry Andric } 21540b57cec5SDimitry Andric 21550b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 21560b57cec5SDimitry Andric _mm256_mask_cvtps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) { 21570b57cec5SDimitry Andric return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A, 21580b57cec5SDimitry Andric (__v8si) __W, 21590b57cec5SDimitry Andric (__mmask8) __U); 21600b57cec5SDimitry Andric } 21610b57cec5SDimitry Andric 21620b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 21630b57cec5SDimitry Andric _mm256_maskz_cvtps_epu32 (__mmask8 __U, __m256 __A) { 21640b57cec5SDimitry Andric return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A, 21650b57cec5SDimitry Andric (__v8si) 21660b57cec5SDimitry Andric _mm256_setzero_si256 (), 21670b57cec5SDimitry Andric (__mmask8) __U); 21680b57cec5SDimitry Andric } 21690b57cec5SDimitry Andric 21700b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 21710b57cec5SDimitry Andric _mm_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) { 21720b57cec5SDimitry Andric return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A, 21730b57cec5SDimitry Andric (__v4si) __W, 21740b57cec5SDimitry Andric (__mmask8) __U); 21750b57cec5SDimitry Andric } 21760b57cec5SDimitry Andric 21770b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 21780b57cec5SDimitry Andric _mm_maskz_cvttpd_epi32 (__mmask8 __U, __m128d __A) { 21790b57cec5SDimitry Andric return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A, 21800b57cec5SDimitry Andric (__v4si) 21810b57cec5SDimitry Andric _mm_setzero_si128 (), 21820b57cec5SDimitry Andric (__mmask8) __U); 21830b57cec5SDimitry Andric } 21840b57cec5SDimitry Andric 21850b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS256 21860b57cec5SDimitry Andric _mm256_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) { 21870b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 21880b57cec5SDimitry Andric (__v4si)_mm256_cvttpd_epi32(__A), 21890b57cec5SDimitry Andric (__v4si)__W); 21900b57cec5SDimitry Andric } 21910b57cec5SDimitry Andric 21920b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS256 21930b57cec5SDimitry Andric _mm256_maskz_cvttpd_epi32 (__mmask8 __U, __m256d __A) { 21940b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 21950b57cec5SDimitry Andric (__v4si)_mm256_cvttpd_epi32(__A), 21960b57cec5SDimitry Andric (__v4si)_mm_setzero_si128()); 21970b57cec5SDimitry Andric } 21980b57cec5SDimitry Andric 21990b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 22000b57cec5SDimitry Andric _mm_cvttpd_epu32 (__m128d __A) { 22010b57cec5SDimitry Andric return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A, 22020b57cec5SDimitry Andric (__v4si) 22030b57cec5SDimitry Andric _mm_setzero_si128 (), 22040b57cec5SDimitry Andric (__mmask8) -1); 22050b57cec5SDimitry Andric } 22060b57cec5SDimitry Andric 22070b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 22080b57cec5SDimitry Andric _mm_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) { 22090b57cec5SDimitry Andric return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A, 22100b57cec5SDimitry Andric (__v4si) __W, 22110b57cec5SDimitry Andric (__mmask8) __U); 22120b57cec5SDimitry Andric } 22130b57cec5SDimitry Andric 22140b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 22150b57cec5SDimitry Andric _mm_maskz_cvttpd_epu32 (__mmask8 __U, __m128d __A) { 22160b57cec5SDimitry Andric return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A, 22170b57cec5SDimitry Andric (__v4si) 22180b57cec5SDimitry Andric _mm_setzero_si128 (), 22190b57cec5SDimitry Andric (__mmask8) __U); 22200b57cec5SDimitry Andric } 22210b57cec5SDimitry Andric 22220b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS256 22230b57cec5SDimitry Andric _mm256_cvttpd_epu32 (__m256d __A) { 22240b57cec5SDimitry Andric return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A, 22250b57cec5SDimitry Andric (__v4si) 22260b57cec5SDimitry Andric _mm_setzero_si128 (), 22270b57cec5SDimitry Andric (__mmask8) -1); 22280b57cec5SDimitry Andric } 22290b57cec5SDimitry Andric 22300b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS256 22310b57cec5SDimitry Andric _mm256_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) { 22320b57cec5SDimitry Andric return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A, 22330b57cec5SDimitry Andric (__v4si) __W, 22340b57cec5SDimitry Andric (__mmask8) __U); 22350b57cec5SDimitry Andric } 22360b57cec5SDimitry Andric 22370b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS256 22380b57cec5SDimitry Andric _mm256_maskz_cvttpd_epu32 (__mmask8 __U, __m256d __A) { 22390b57cec5SDimitry Andric return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A, 22400b57cec5SDimitry Andric (__v4si) 22410b57cec5SDimitry Andric _mm_setzero_si128 (), 22420b57cec5SDimitry Andric (__mmask8) __U); 22430b57cec5SDimitry Andric } 22440b57cec5SDimitry Andric 22450b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 22460b57cec5SDimitry Andric _mm_mask_cvttps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) { 22470b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 22480b57cec5SDimitry Andric (__v4si)_mm_cvttps_epi32(__A), 22490b57cec5SDimitry Andric (__v4si)__W); 22500b57cec5SDimitry Andric } 22510b57cec5SDimitry Andric 22520b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 22530b57cec5SDimitry Andric _mm_maskz_cvttps_epi32 (__mmask8 __U, __m128 __A) { 22540b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 22550b57cec5SDimitry Andric (__v4si)_mm_cvttps_epi32(__A), 22560b57cec5SDimitry Andric (__v4si)_mm_setzero_si128()); 22570b57cec5SDimitry Andric } 22580b57cec5SDimitry Andric 22590b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 22600b57cec5SDimitry Andric _mm256_mask_cvttps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) { 22610b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 22620b57cec5SDimitry Andric (__v8si)_mm256_cvttps_epi32(__A), 22630b57cec5SDimitry Andric (__v8si)__W); 22640b57cec5SDimitry Andric } 22650b57cec5SDimitry Andric 22660b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 22670b57cec5SDimitry Andric _mm256_maskz_cvttps_epi32 (__mmask8 __U, __m256 __A) { 22680b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 22690b57cec5SDimitry Andric (__v8si)_mm256_cvttps_epi32(__A), 22700b57cec5SDimitry Andric (__v8si)_mm256_setzero_si256()); 22710b57cec5SDimitry Andric } 22720b57cec5SDimitry Andric 22730b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 22740b57cec5SDimitry Andric _mm_cvttps_epu32 (__m128 __A) { 22750b57cec5SDimitry Andric return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A, 22760b57cec5SDimitry Andric (__v4si) 22770b57cec5SDimitry Andric _mm_setzero_si128 (), 22780b57cec5SDimitry Andric (__mmask8) -1); 22790b57cec5SDimitry Andric } 22800b57cec5SDimitry Andric 22810b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 22820b57cec5SDimitry Andric _mm_mask_cvttps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) { 22830b57cec5SDimitry Andric return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A, 22840b57cec5SDimitry Andric (__v4si) __W, 22850b57cec5SDimitry Andric (__mmask8) __U); 22860b57cec5SDimitry Andric } 22870b57cec5SDimitry Andric 22880b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 22890b57cec5SDimitry Andric _mm_maskz_cvttps_epu32 (__mmask8 __U, __m128 __A) { 22900b57cec5SDimitry Andric return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A, 22910b57cec5SDimitry Andric (__v4si) 22920b57cec5SDimitry Andric _mm_setzero_si128 (), 22930b57cec5SDimitry Andric (__mmask8) __U); 22940b57cec5SDimitry Andric } 22950b57cec5SDimitry Andric 22960b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 22970b57cec5SDimitry Andric _mm256_cvttps_epu32 (__m256 __A) { 22980b57cec5SDimitry Andric return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A, 22990b57cec5SDimitry Andric (__v8si) 23000b57cec5SDimitry Andric _mm256_setzero_si256 (), 23010b57cec5SDimitry Andric (__mmask8) -1); 23020b57cec5SDimitry Andric } 23030b57cec5SDimitry Andric 23040b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 23050b57cec5SDimitry Andric _mm256_mask_cvttps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) { 23060b57cec5SDimitry Andric return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A, 23070b57cec5SDimitry Andric (__v8si) __W, 23080b57cec5SDimitry Andric (__mmask8) __U); 23090b57cec5SDimitry Andric } 23100b57cec5SDimitry Andric 23110b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 23120b57cec5SDimitry Andric _mm256_maskz_cvttps_epu32 (__mmask8 __U, __m256 __A) { 23130b57cec5SDimitry Andric return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A, 23140b57cec5SDimitry Andric (__v8si) 23150b57cec5SDimitry Andric _mm256_setzero_si256 (), 23160b57cec5SDimitry Andric (__mmask8) __U); 23170b57cec5SDimitry Andric } 23180b57cec5SDimitry Andric 23190b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 23200b57cec5SDimitry Andric _mm_cvtepu32_pd (__m128i __A) { 23210b57cec5SDimitry Andric return (__m128d) __builtin_convertvector( 23220b57cec5SDimitry Andric __builtin_shufflevector((__v4su)__A, (__v4su)__A, 0, 1), __v2df); 23230b57cec5SDimitry Andric } 23240b57cec5SDimitry Andric 23250b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 23260b57cec5SDimitry Andric _mm_mask_cvtepu32_pd (__m128d __W, __mmask8 __U, __m128i __A) { 23270b57cec5SDimitry Andric return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U, 23280b57cec5SDimitry Andric (__v2df)_mm_cvtepu32_pd(__A), 23290b57cec5SDimitry Andric (__v2df)__W); 23300b57cec5SDimitry Andric } 23310b57cec5SDimitry Andric 23320b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 23330b57cec5SDimitry Andric _mm_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) { 23340b57cec5SDimitry Andric return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U, 23350b57cec5SDimitry Andric (__v2df)_mm_cvtepu32_pd(__A), 23360b57cec5SDimitry Andric (__v2df)_mm_setzero_pd()); 23370b57cec5SDimitry Andric } 23380b57cec5SDimitry Andric 23390b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 23400b57cec5SDimitry Andric _mm256_cvtepu32_pd (__m128i __A) { 23410b57cec5SDimitry Andric return (__m256d)__builtin_convertvector((__v4su)__A, __v4df); 23420b57cec5SDimitry Andric } 23430b57cec5SDimitry Andric 23440b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 23450b57cec5SDimitry Andric _mm256_mask_cvtepu32_pd (__m256d __W, __mmask8 __U, __m128i __A) { 23460b57cec5SDimitry Andric return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U, 23470b57cec5SDimitry Andric (__v4df)_mm256_cvtepu32_pd(__A), 23480b57cec5SDimitry Andric (__v4df)__W); 23490b57cec5SDimitry Andric } 23500b57cec5SDimitry Andric 23510b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 23520b57cec5SDimitry Andric _mm256_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) { 23530b57cec5SDimitry Andric return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U, 23540b57cec5SDimitry Andric (__v4df)_mm256_cvtepu32_pd(__A), 23550b57cec5SDimitry Andric (__v4df)_mm256_setzero_pd()); 23560b57cec5SDimitry Andric } 23570b57cec5SDimitry Andric 23580b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 23590b57cec5SDimitry Andric _mm_cvtepu32_ps (__m128i __A) { 23600b57cec5SDimitry Andric return (__m128)__builtin_convertvector((__v4su)__A, __v4sf); 23610b57cec5SDimitry Andric } 23620b57cec5SDimitry Andric 23630b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 23640b57cec5SDimitry Andric _mm_mask_cvtepu32_ps (__m128 __W, __mmask8 __U, __m128i __A) { 23650b57cec5SDimitry Andric return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 23660b57cec5SDimitry Andric (__v4sf)_mm_cvtepu32_ps(__A), 23670b57cec5SDimitry Andric (__v4sf)__W); 23680b57cec5SDimitry Andric } 23690b57cec5SDimitry Andric 23700b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 23710b57cec5SDimitry Andric _mm_maskz_cvtepu32_ps (__mmask8 __U, __m128i __A) { 23720b57cec5SDimitry Andric return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 23730b57cec5SDimitry Andric (__v4sf)_mm_cvtepu32_ps(__A), 23740b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps()); 23750b57cec5SDimitry Andric } 23760b57cec5SDimitry Andric 23770b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 23780b57cec5SDimitry Andric _mm256_cvtepu32_ps (__m256i __A) { 23790b57cec5SDimitry Andric return (__m256)__builtin_convertvector((__v8su)__A, __v8sf); 23800b57cec5SDimitry Andric } 23810b57cec5SDimitry Andric 23820b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 23830b57cec5SDimitry Andric _mm256_mask_cvtepu32_ps (__m256 __W, __mmask8 __U, __m256i __A) { 23840b57cec5SDimitry Andric return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 23850b57cec5SDimitry Andric (__v8sf)_mm256_cvtepu32_ps(__A), 23860b57cec5SDimitry Andric (__v8sf)__W); 23870b57cec5SDimitry Andric } 23880b57cec5SDimitry Andric 23890b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 23900b57cec5SDimitry Andric _mm256_maskz_cvtepu32_ps (__mmask8 __U, __m256i __A) { 23910b57cec5SDimitry Andric return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 23920b57cec5SDimitry Andric (__v8sf)_mm256_cvtepu32_ps(__A), 23930b57cec5SDimitry Andric (__v8sf)_mm256_setzero_ps()); 23940b57cec5SDimitry Andric } 23950b57cec5SDimitry Andric 23960b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 23970b57cec5SDimitry Andric _mm_mask_div_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 23980b57cec5SDimitry Andric return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 23990b57cec5SDimitry Andric (__v2df)_mm_div_pd(__A, __B), 24000b57cec5SDimitry Andric (__v2df)__W); 24010b57cec5SDimitry Andric } 24020b57cec5SDimitry Andric 24030b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 24040b57cec5SDimitry Andric _mm_maskz_div_pd(__mmask8 __U, __m128d __A, __m128d __B) { 24050b57cec5SDimitry Andric return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 24060b57cec5SDimitry Andric (__v2df)_mm_div_pd(__A, __B), 24070b57cec5SDimitry Andric (__v2df)_mm_setzero_pd()); 24080b57cec5SDimitry Andric } 24090b57cec5SDimitry Andric 24100b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 24110b57cec5SDimitry Andric _mm256_mask_div_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 24120b57cec5SDimitry Andric return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 24130b57cec5SDimitry Andric (__v4df)_mm256_div_pd(__A, __B), 24140b57cec5SDimitry Andric (__v4df)__W); 24150b57cec5SDimitry Andric } 24160b57cec5SDimitry Andric 24170b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 24180b57cec5SDimitry Andric _mm256_maskz_div_pd(__mmask8 __U, __m256d __A, __m256d __B) { 24190b57cec5SDimitry Andric return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 24200b57cec5SDimitry Andric (__v4df)_mm256_div_pd(__A, __B), 24210b57cec5SDimitry Andric (__v4df)_mm256_setzero_pd()); 24220b57cec5SDimitry Andric } 24230b57cec5SDimitry Andric 24240b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 24250b57cec5SDimitry Andric _mm_mask_div_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 24260b57cec5SDimitry Andric return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 24270b57cec5SDimitry Andric (__v4sf)_mm_div_ps(__A, __B), 24280b57cec5SDimitry Andric (__v4sf)__W); 24290b57cec5SDimitry Andric } 24300b57cec5SDimitry Andric 24310b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 24320b57cec5SDimitry Andric _mm_maskz_div_ps(__mmask8 __U, __m128 __A, __m128 __B) { 24330b57cec5SDimitry Andric return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 24340b57cec5SDimitry Andric (__v4sf)_mm_div_ps(__A, __B), 24350b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps()); 24360b57cec5SDimitry Andric } 24370b57cec5SDimitry Andric 24380b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 24390b57cec5SDimitry Andric _mm256_mask_div_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 24400b57cec5SDimitry Andric return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 24410b57cec5SDimitry Andric (__v8sf)_mm256_div_ps(__A, __B), 24420b57cec5SDimitry Andric (__v8sf)__W); 24430b57cec5SDimitry Andric } 24440b57cec5SDimitry Andric 24450b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 24460b57cec5SDimitry Andric _mm256_maskz_div_ps(__mmask8 __U, __m256 __A, __m256 __B) { 24470b57cec5SDimitry Andric return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 24480b57cec5SDimitry Andric (__v8sf)_mm256_div_ps(__A, __B), 24490b57cec5SDimitry Andric (__v8sf)_mm256_setzero_ps()); 24500b57cec5SDimitry Andric } 24510b57cec5SDimitry Andric 24520b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 24530b57cec5SDimitry Andric _mm_mask_expand_pd (__m128d __W, __mmask8 __U, __m128d __A) { 24540b57cec5SDimitry Andric return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A, 24550b57cec5SDimitry Andric (__v2df) __W, 24560b57cec5SDimitry Andric (__mmask8) __U); 24570b57cec5SDimitry Andric } 24580b57cec5SDimitry Andric 24590b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 24600b57cec5SDimitry Andric _mm_maskz_expand_pd (__mmask8 __U, __m128d __A) { 24610b57cec5SDimitry Andric return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A, 24620b57cec5SDimitry Andric (__v2df) 24630b57cec5SDimitry Andric _mm_setzero_pd (), 24640b57cec5SDimitry Andric (__mmask8) __U); 24650b57cec5SDimitry Andric } 24660b57cec5SDimitry Andric 24670b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 24680b57cec5SDimitry Andric _mm256_mask_expand_pd (__m256d __W, __mmask8 __U, __m256d __A) { 24690b57cec5SDimitry Andric return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A, 24700b57cec5SDimitry Andric (__v4df) __W, 24710b57cec5SDimitry Andric (__mmask8) __U); 24720b57cec5SDimitry Andric } 24730b57cec5SDimitry Andric 24740b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 24750b57cec5SDimitry Andric _mm256_maskz_expand_pd (__mmask8 __U, __m256d __A) { 24760b57cec5SDimitry Andric return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A, 24770b57cec5SDimitry Andric (__v4df) 24780b57cec5SDimitry Andric _mm256_setzero_pd (), 24790b57cec5SDimitry Andric (__mmask8) __U); 24800b57cec5SDimitry Andric } 24810b57cec5SDimitry Andric 24820b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 24830b57cec5SDimitry Andric _mm_mask_expand_epi64 (__m128i __W, __mmask8 __U, __m128i __A) { 24840b57cec5SDimitry Andric return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A, 24850b57cec5SDimitry Andric (__v2di) __W, 24860b57cec5SDimitry Andric (__mmask8) __U); 24870b57cec5SDimitry Andric } 24880b57cec5SDimitry Andric 24890b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 24900b57cec5SDimitry Andric _mm_maskz_expand_epi64 (__mmask8 __U, __m128i __A) { 24910b57cec5SDimitry Andric return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A, 24920b57cec5SDimitry Andric (__v2di) 24930b57cec5SDimitry Andric _mm_setzero_si128 (), 24940b57cec5SDimitry Andric (__mmask8) __U); 24950b57cec5SDimitry Andric } 24960b57cec5SDimitry Andric 24970b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 24980b57cec5SDimitry Andric _mm256_mask_expand_epi64 (__m256i __W, __mmask8 __U, __m256i __A) { 24990b57cec5SDimitry Andric return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A, 25000b57cec5SDimitry Andric (__v4di) __W, 25010b57cec5SDimitry Andric (__mmask8) __U); 25020b57cec5SDimitry Andric } 25030b57cec5SDimitry Andric 25040b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 25050b57cec5SDimitry Andric _mm256_maskz_expand_epi64 (__mmask8 __U, __m256i __A) { 25060b57cec5SDimitry Andric return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A, 25070b57cec5SDimitry Andric (__v4di) 25080b57cec5SDimitry Andric _mm256_setzero_si256 (), 25090b57cec5SDimitry Andric (__mmask8) __U); 25100b57cec5SDimitry Andric } 25110b57cec5SDimitry Andric 25120b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 25130b57cec5SDimitry Andric _mm_mask_expandloadu_pd (__m128d __W, __mmask8 __U, void const *__P) { 2514480093f4SDimitry Andric return (__m128d) __builtin_ia32_expandloaddf128_mask ((const __v2df *) __P, 25150b57cec5SDimitry Andric (__v2df) __W, 25160b57cec5SDimitry Andric (__mmask8) 25170b57cec5SDimitry Andric __U); 25180b57cec5SDimitry Andric } 25190b57cec5SDimitry Andric 25200b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 25210b57cec5SDimitry Andric _mm_maskz_expandloadu_pd (__mmask8 __U, void const *__P) { 2522480093f4SDimitry Andric return (__m128d) __builtin_ia32_expandloaddf128_mask ((const __v2df *) __P, 25230b57cec5SDimitry Andric (__v2df) 25240b57cec5SDimitry Andric _mm_setzero_pd (), 25250b57cec5SDimitry Andric (__mmask8) 25260b57cec5SDimitry Andric __U); 25270b57cec5SDimitry Andric } 25280b57cec5SDimitry Andric 25290b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 25300b57cec5SDimitry Andric _mm256_mask_expandloadu_pd (__m256d __W, __mmask8 __U, void const *__P) { 2531480093f4SDimitry Andric return (__m256d) __builtin_ia32_expandloaddf256_mask ((const __v4df *) __P, 25320b57cec5SDimitry Andric (__v4df) __W, 25330b57cec5SDimitry Andric (__mmask8) 25340b57cec5SDimitry Andric __U); 25350b57cec5SDimitry Andric } 25360b57cec5SDimitry Andric 25370b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 25380b57cec5SDimitry Andric _mm256_maskz_expandloadu_pd (__mmask8 __U, void const *__P) { 2539480093f4SDimitry Andric return (__m256d) __builtin_ia32_expandloaddf256_mask ((const __v4df *) __P, 25400b57cec5SDimitry Andric (__v4df) 25410b57cec5SDimitry Andric _mm256_setzero_pd (), 25420b57cec5SDimitry Andric (__mmask8) 25430b57cec5SDimitry Andric __U); 25440b57cec5SDimitry Andric } 25450b57cec5SDimitry Andric 25460b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 25470b57cec5SDimitry Andric _mm_mask_expandloadu_epi64 (__m128i __W, __mmask8 __U, void const *__P) { 2548480093f4SDimitry Andric return (__m128i) __builtin_ia32_expandloaddi128_mask ((const __v2di *) __P, 25490b57cec5SDimitry Andric (__v2di) __W, 25500b57cec5SDimitry Andric (__mmask8) 25510b57cec5SDimitry Andric __U); 25520b57cec5SDimitry Andric } 25530b57cec5SDimitry Andric 25540b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 25550b57cec5SDimitry Andric _mm_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) { 2556480093f4SDimitry Andric return (__m128i) __builtin_ia32_expandloaddi128_mask ((const __v2di *) __P, 25570b57cec5SDimitry Andric (__v2di) 25580b57cec5SDimitry Andric _mm_setzero_si128 (), 25590b57cec5SDimitry Andric (__mmask8) 25600b57cec5SDimitry Andric __U); 25610b57cec5SDimitry Andric } 25620b57cec5SDimitry Andric 25630b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 25640b57cec5SDimitry Andric _mm256_mask_expandloadu_epi64 (__m256i __W, __mmask8 __U, 25650b57cec5SDimitry Andric void const *__P) { 2566480093f4SDimitry Andric return (__m256i) __builtin_ia32_expandloaddi256_mask ((const __v4di *) __P, 25670b57cec5SDimitry Andric (__v4di) __W, 25680b57cec5SDimitry Andric (__mmask8) 25690b57cec5SDimitry Andric __U); 25700b57cec5SDimitry Andric } 25710b57cec5SDimitry Andric 25720b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 25730b57cec5SDimitry Andric _mm256_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) { 2574480093f4SDimitry Andric return (__m256i) __builtin_ia32_expandloaddi256_mask ((const __v4di *) __P, 25750b57cec5SDimitry Andric (__v4di) 25760b57cec5SDimitry Andric _mm256_setzero_si256 (), 25770b57cec5SDimitry Andric (__mmask8) 25780b57cec5SDimitry Andric __U); 25790b57cec5SDimitry Andric } 25800b57cec5SDimitry Andric 25810b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 25820b57cec5SDimitry Andric _mm_mask_expandloadu_ps (__m128 __W, __mmask8 __U, void const *__P) { 2583480093f4SDimitry Andric return (__m128) __builtin_ia32_expandloadsf128_mask ((const __v4sf *) __P, 25840b57cec5SDimitry Andric (__v4sf) __W, 25850b57cec5SDimitry Andric (__mmask8) __U); 25860b57cec5SDimitry Andric } 25870b57cec5SDimitry Andric 25880b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 25890b57cec5SDimitry Andric _mm_maskz_expandloadu_ps (__mmask8 __U, void const *__P) { 2590480093f4SDimitry Andric return (__m128) __builtin_ia32_expandloadsf128_mask ((const __v4sf *) __P, 25910b57cec5SDimitry Andric (__v4sf) 25920b57cec5SDimitry Andric _mm_setzero_ps (), 25930b57cec5SDimitry Andric (__mmask8) 25940b57cec5SDimitry Andric __U); 25950b57cec5SDimitry Andric } 25960b57cec5SDimitry Andric 25970b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 25980b57cec5SDimitry Andric _mm256_mask_expandloadu_ps (__m256 __W, __mmask8 __U, void const *__P) { 2599480093f4SDimitry Andric return (__m256) __builtin_ia32_expandloadsf256_mask ((const __v8sf *) __P, 26000b57cec5SDimitry Andric (__v8sf) __W, 26010b57cec5SDimitry Andric (__mmask8) __U); 26020b57cec5SDimitry Andric } 26030b57cec5SDimitry Andric 26040b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 26050b57cec5SDimitry Andric _mm256_maskz_expandloadu_ps (__mmask8 __U, void const *__P) { 2606480093f4SDimitry Andric return (__m256) __builtin_ia32_expandloadsf256_mask ((const __v8sf *) __P, 26070b57cec5SDimitry Andric (__v8sf) 26080b57cec5SDimitry Andric _mm256_setzero_ps (), 26090b57cec5SDimitry Andric (__mmask8) 26100b57cec5SDimitry Andric __U); 26110b57cec5SDimitry Andric } 26120b57cec5SDimitry Andric 26130b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 26140b57cec5SDimitry Andric _mm_mask_expandloadu_epi32 (__m128i __W, __mmask8 __U, void const *__P) { 2615480093f4SDimitry Andric return (__m128i) __builtin_ia32_expandloadsi128_mask ((const __v4si *) __P, 26160b57cec5SDimitry Andric (__v4si) __W, 26170b57cec5SDimitry Andric (__mmask8) 26180b57cec5SDimitry Andric __U); 26190b57cec5SDimitry Andric } 26200b57cec5SDimitry Andric 26210b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 26220b57cec5SDimitry Andric _mm_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) { 2623480093f4SDimitry Andric return (__m128i) __builtin_ia32_expandloadsi128_mask ((const __v4si *) __P, 26240b57cec5SDimitry Andric (__v4si) 26250b57cec5SDimitry Andric _mm_setzero_si128 (), 26260b57cec5SDimitry Andric (__mmask8) __U); 26270b57cec5SDimitry Andric } 26280b57cec5SDimitry Andric 26290b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 26300b57cec5SDimitry Andric _mm256_mask_expandloadu_epi32 (__m256i __W, __mmask8 __U, 26310b57cec5SDimitry Andric void const *__P) { 2632480093f4SDimitry Andric return (__m256i) __builtin_ia32_expandloadsi256_mask ((const __v8si *) __P, 26330b57cec5SDimitry Andric (__v8si) __W, 26340b57cec5SDimitry Andric (__mmask8) 26350b57cec5SDimitry Andric __U); 26360b57cec5SDimitry Andric } 26370b57cec5SDimitry Andric 26380b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 26390b57cec5SDimitry Andric _mm256_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) { 2640480093f4SDimitry Andric return (__m256i) __builtin_ia32_expandloadsi256_mask ((const __v8si *) __P, 26410b57cec5SDimitry Andric (__v8si) 26420b57cec5SDimitry Andric _mm256_setzero_si256 (), 26430b57cec5SDimitry Andric (__mmask8) 26440b57cec5SDimitry Andric __U); 26450b57cec5SDimitry Andric } 26460b57cec5SDimitry Andric 26470b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 26480b57cec5SDimitry Andric _mm_mask_expand_ps (__m128 __W, __mmask8 __U, __m128 __A) { 26490b57cec5SDimitry Andric return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A, 26500b57cec5SDimitry Andric (__v4sf) __W, 26510b57cec5SDimitry Andric (__mmask8) __U); 26520b57cec5SDimitry Andric } 26530b57cec5SDimitry Andric 26540b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 26550b57cec5SDimitry Andric _mm_maskz_expand_ps (__mmask8 __U, __m128 __A) { 26560b57cec5SDimitry Andric return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A, 26570b57cec5SDimitry Andric (__v4sf) 26580b57cec5SDimitry Andric _mm_setzero_ps (), 26590b57cec5SDimitry Andric (__mmask8) __U); 26600b57cec5SDimitry Andric } 26610b57cec5SDimitry Andric 26620b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 26630b57cec5SDimitry Andric _mm256_mask_expand_ps (__m256 __W, __mmask8 __U, __m256 __A) { 26640b57cec5SDimitry Andric return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A, 26650b57cec5SDimitry Andric (__v8sf) __W, 26660b57cec5SDimitry Andric (__mmask8) __U); 26670b57cec5SDimitry Andric } 26680b57cec5SDimitry Andric 26690b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 26700b57cec5SDimitry Andric _mm256_maskz_expand_ps (__mmask8 __U, __m256 __A) { 26710b57cec5SDimitry Andric return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A, 26720b57cec5SDimitry Andric (__v8sf) 26730b57cec5SDimitry Andric _mm256_setzero_ps (), 26740b57cec5SDimitry Andric (__mmask8) __U); 26750b57cec5SDimitry Andric } 26760b57cec5SDimitry Andric 26770b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 26780b57cec5SDimitry Andric _mm_mask_expand_epi32 (__m128i __W, __mmask8 __U, __m128i __A) { 26790b57cec5SDimitry Andric return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A, 26800b57cec5SDimitry Andric (__v4si) __W, 26810b57cec5SDimitry Andric (__mmask8) __U); 26820b57cec5SDimitry Andric } 26830b57cec5SDimitry Andric 26840b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 26850b57cec5SDimitry Andric _mm_maskz_expand_epi32 (__mmask8 __U, __m128i __A) { 26860b57cec5SDimitry Andric return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A, 26870b57cec5SDimitry Andric (__v4si) 26880b57cec5SDimitry Andric _mm_setzero_si128 (), 26890b57cec5SDimitry Andric (__mmask8) __U); 26900b57cec5SDimitry Andric } 26910b57cec5SDimitry Andric 26920b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 26930b57cec5SDimitry Andric _mm256_mask_expand_epi32 (__m256i __W, __mmask8 __U, __m256i __A) { 26940b57cec5SDimitry Andric return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A, 26950b57cec5SDimitry Andric (__v8si) __W, 26960b57cec5SDimitry Andric (__mmask8) __U); 26970b57cec5SDimitry Andric } 26980b57cec5SDimitry Andric 26990b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 27000b57cec5SDimitry Andric _mm256_maskz_expand_epi32 (__mmask8 __U, __m256i __A) { 27010b57cec5SDimitry Andric return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A, 27020b57cec5SDimitry Andric (__v8si) 27030b57cec5SDimitry Andric _mm256_setzero_si256 (), 27040b57cec5SDimitry Andric (__mmask8) __U); 27050b57cec5SDimitry Andric } 27060b57cec5SDimitry Andric 27070b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 27080b57cec5SDimitry Andric _mm_getexp_pd (__m128d __A) { 27090b57cec5SDimitry Andric return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A, 27100b57cec5SDimitry Andric (__v2df) 27110b57cec5SDimitry Andric _mm_setzero_pd (), 27120b57cec5SDimitry Andric (__mmask8) -1); 27130b57cec5SDimitry Andric } 27140b57cec5SDimitry Andric 27150b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 27160b57cec5SDimitry Andric _mm_mask_getexp_pd (__m128d __W, __mmask8 __U, __m128d __A) { 27170b57cec5SDimitry Andric return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A, 27180b57cec5SDimitry Andric (__v2df) __W, 27190b57cec5SDimitry Andric (__mmask8) __U); 27200b57cec5SDimitry Andric } 27210b57cec5SDimitry Andric 27220b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 27230b57cec5SDimitry Andric _mm_maskz_getexp_pd (__mmask8 __U, __m128d __A) { 27240b57cec5SDimitry Andric return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A, 27250b57cec5SDimitry Andric (__v2df) 27260b57cec5SDimitry Andric _mm_setzero_pd (), 27270b57cec5SDimitry Andric (__mmask8) __U); 27280b57cec5SDimitry Andric } 27290b57cec5SDimitry Andric 27300b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 27310b57cec5SDimitry Andric _mm256_getexp_pd (__m256d __A) { 27320b57cec5SDimitry Andric return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A, 27330b57cec5SDimitry Andric (__v4df) 27340b57cec5SDimitry Andric _mm256_setzero_pd (), 27350b57cec5SDimitry Andric (__mmask8) -1); 27360b57cec5SDimitry Andric } 27370b57cec5SDimitry Andric 27380b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 27390b57cec5SDimitry Andric _mm256_mask_getexp_pd (__m256d __W, __mmask8 __U, __m256d __A) { 27400b57cec5SDimitry Andric return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A, 27410b57cec5SDimitry Andric (__v4df) __W, 27420b57cec5SDimitry Andric (__mmask8) __U); 27430b57cec5SDimitry Andric } 27440b57cec5SDimitry Andric 27450b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 27460b57cec5SDimitry Andric _mm256_maskz_getexp_pd (__mmask8 __U, __m256d __A) { 27470b57cec5SDimitry Andric return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A, 27480b57cec5SDimitry Andric (__v4df) 27490b57cec5SDimitry Andric _mm256_setzero_pd (), 27500b57cec5SDimitry Andric (__mmask8) __U); 27510b57cec5SDimitry Andric } 27520b57cec5SDimitry Andric 27530b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 27540b57cec5SDimitry Andric _mm_getexp_ps (__m128 __A) { 27550b57cec5SDimitry Andric return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A, 27560b57cec5SDimitry Andric (__v4sf) 27570b57cec5SDimitry Andric _mm_setzero_ps (), 27580b57cec5SDimitry Andric (__mmask8) -1); 27590b57cec5SDimitry Andric } 27600b57cec5SDimitry Andric 27610b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 27620b57cec5SDimitry Andric _mm_mask_getexp_ps (__m128 __W, __mmask8 __U, __m128 __A) { 27630b57cec5SDimitry Andric return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A, 27640b57cec5SDimitry Andric (__v4sf) __W, 27650b57cec5SDimitry Andric (__mmask8) __U); 27660b57cec5SDimitry Andric } 27670b57cec5SDimitry Andric 27680b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 27690b57cec5SDimitry Andric _mm_maskz_getexp_ps (__mmask8 __U, __m128 __A) { 27700b57cec5SDimitry Andric return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A, 27710b57cec5SDimitry Andric (__v4sf) 27720b57cec5SDimitry Andric _mm_setzero_ps (), 27730b57cec5SDimitry Andric (__mmask8) __U); 27740b57cec5SDimitry Andric } 27750b57cec5SDimitry Andric 27760b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 27770b57cec5SDimitry Andric _mm256_getexp_ps (__m256 __A) { 27780b57cec5SDimitry Andric return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A, 27790b57cec5SDimitry Andric (__v8sf) 27800b57cec5SDimitry Andric _mm256_setzero_ps (), 27810b57cec5SDimitry Andric (__mmask8) -1); 27820b57cec5SDimitry Andric } 27830b57cec5SDimitry Andric 27840b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 27850b57cec5SDimitry Andric _mm256_mask_getexp_ps (__m256 __W, __mmask8 __U, __m256 __A) { 27860b57cec5SDimitry Andric return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A, 27870b57cec5SDimitry Andric (__v8sf) __W, 27880b57cec5SDimitry Andric (__mmask8) __U); 27890b57cec5SDimitry Andric } 27900b57cec5SDimitry Andric 27910b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 27920b57cec5SDimitry Andric _mm256_maskz_getexp_ps (__mmask8 __U, __m256 __A) { 27930b57cec5SDimitry Andric return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A, 27940b57cec5SDimitry Andric (__v8sf) 27950b57cec5SDimitry Andric _mm256_setzero_ps (), 27960b57cec5SDimitry Andric (__mmask8) __U); 27970b57cec5SDimitry Andric } 27980b57cec5SDimitry Andric 27990b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 28000b57cec5SDimitry Andric _mm_mask_max_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 28010b57cec5SDimitry Andric return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 28020b57cec5SDimitry Andric (__v2df)_mm_max_pd(__A, __B), 28030b57cec5SDimitry Andric (__v2df)__W); 28040b57cec5SDimitry Andric } 28050b57cec5SDimitry Andric 28060b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 28070b57cec5SDimitry Andric _mm_maskz_max_pd(__mmask8 __U, __m128d __A, __m128d __B) { 28080b57cec5SDimitry Andric return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 28090b57cec5SDimitry Andric (__v2df)_mm_max_pd(__A, __B), 28100b57cec5SDimitry Andric (__v2df)_mm_setzero_pd()); 28110b57cec5SDimitry Andric } 28120b57cec5SDimitry Andric 28130b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 28140b57cec5SDimitry Andric _mm256_mask_max_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 28150b57cec5SDimitry Andric return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 28160b57cec5SDimitry Andric (__v4df)_mm256_max_pd(__A, __B), 28170b57cec5SDimitry Andric (__v4df)__W); 28180b57cec5SDimitry Andric } 28190b57cec5SDimitry Andric 28200b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 28210b57cec5SDimitry Andric _mm256_maskz_max_pd(__mmask8 __U, __m256d __A, __m256d __B) { 28220b57cec5SDimitry Andric return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 28230b57cec5SDimitry Andric (__v4df)_mm256_max_pd(__A, __B), 28240b57cec5SDimitry Andric (__v4df)_mm256_setzero_pd()); 28250b57cec5SDimitry Andric } 28260b57cec5SDimitry Andric 28270b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 28280b57cec5SDimitry Andric _mm_mask_max_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 28290b57cec5SDimitry Andric return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 28300b57cec5SDimitry Andric (__v4sf)_mm_max_ps(__A, __B), 28310b57cec5SDimitry Andric (__v4sf)__W); 28320b57cec5SDimitry Andric } 28330b57cec5SDimitry Andric 28340b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 28350b57cec5SDimitry Andric _mm_maskz_max_ps(__mmask8 __U, __m128 __A, __m128 __B) { 28360b57cec5SDimitry Andric return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 28370b57cec5SDimitry Andric (__v4sf)_mm_max_ps(__A, __B), 28380b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps()); 28390b57cec5SDimitry Andric } 28400b57cec5SDimitry Andric 28410b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 28420b57cec5SDimitry Andric _mm256_mask_max_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 28430b57cec5SDimitry Andric return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 28440b57cec5SDimitry Andric (__v8sf)_mm256_max_ps(__A, __B), 28450b57cec5SDimitry Andric (__v8sf)__W); 28460b57cec5SDimitry Andric } 28470b57cec5SDimitry Andric 28480b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 28490b57cec5SDimitry Andric _mm256_maskz_max_ps(__mmask8 __U, __m256 __A, __m256 __B) { 28500b57cec5SDimitry Andric return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 28510b57cec5SDimitry Andric (__v8sf)_mm256_max_ps(__A, __B), 28520b57cec5SDimitry Andric (__v8sf)_mm256_setzero_ps()); 28530b57cec5SDimitry Andric } 28540b57cec5SDimitry Andric 28550b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 28560b57cec5SDimitry Andric _mm_mask_min_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 28570b57cec5SDimitry Andric return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 28580b57cec5SDimitry Andric (__v2df)_mm_min_pd(__A, __B), 28590b57cec5SDimitry Andric (__v2df)__W); 28600b57cec5SDimitry Andric } 28610b57cec5SDimitry Andric 28620b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 28630b57cec5SDimitry Andric _mm_maskz_min_pd(__mmask8 __U, __m128d __A, __m128d __B) { 28640b57cec5SDimitry Andric return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 28650b57cec5SDimitry Andric (__v2df)_mm_min_pd(__A, __B), 28660b57cec5SDimitry Andric (__v2df)_mm_setzero_pd()); 28670b57cec5SDimitry Andric } 28680b57cec5SDimitry Andric 28690b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 28700b57cec5SDimitry Andric _mm256_mask_min_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 28710b57cec5SDimitry Andric return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 28720b57cec5SDimitry Andric (__v4df)_mm256_min_pd(__A, __B), 28730b57cec5SDimitry Andric (__v4df)__W); 28740b57cec5SDimitry Andric } 28750b57cec5SDimitry Andric 28760b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 28770b57cec5SDimitry Andric _mm256_maskz_min_pd(__mmask8 __U, __m256d __A, __m256d __B) { 28780b57cec5SDimitry Andric return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 28790b57cec5SDimitry Andric (__v4df)_mm256_min_pd(__A, __B), 28800b57cec5SDimitry Andric (__v4df)_mm256_setzero_pd()); 28810b57cec5SDimitry Andric } 28820b57cec5SDimitry Andric 28830b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 28840b57cec5SDimitry Andric _mm_mask_min_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 28850b57cec5SDimitry Andric return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 28860b57cec5SDimitry Andric (__v4sf)_mm_min_ps(__A, __B), 28870b57cec5SDimitry Andric (__v4sf)__W); 28880b57cec5SDimitry Andric } 28890b57cec5SDimitry Andric 28900b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 28910b57cec5SDimitry Andric _mm_maskz_min_ps(__mmask8 __U, __m128 __A, __m128 __B) { 28920b57cec5SDimitry Andric return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 28930b57cec5SDimitry Andric (__v4sf)_mm_min_ps(__A, __B), 28940b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps()); 28950b57cec5SDimitry Andric } 28960b57cec5SDimitry Andric 28970b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 28980b57cec5SDimitry Andric _mm256_mask_min_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 28990b57cec5SDimitry Andric return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 29000b57cec5SDimitry Andric (__v8sf)_mm256_min_ps(__A, __B), 29010b57cec5SDimitry Andric (__v8sf)__W); 29020b57cec5SDimitry Andric } 29030b57cec5SDimitry Andric 29040b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 29050b57cec5SDimitry Andric _mm256_maskz_min_ps(__mmask8 __U, __m256 __A, __m256 __B) { 29060b57cec5SDimitry Andric return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 29070b57cec5SDimitry Andric (__v8sf)_mm256_min_ps(__A, __B), 29080b57cec5SDimitry Andric (__v8sf)_mm256_setzero_ps()); 29090b57cec5SDimitry Andric } 29100b57cec5SDimitry Andric 29110b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 29120b57cec5SDimitry Andric _mm_mask_mul_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 29130b57cec5SDimitry Andric return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 29140b57cec5SDimitry Andric (__v2df)_mm_mul_pd(__A, __B), 29150b57cec5SDimitry Andric (__v2df)__W); 29160b57cec5SDimitry Andric } 29170b57cec5SDimitry Andric 29180b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 29190b57cec5SDimitry Andric _mm_maskz_mul_pd(__mmask8 __U, __m128d __A, __m128d __B) { 29200b57cec5SDimitry Andric return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 29210b57cec5SDimitry Andric (__v2df)_mm_mul_pd(__A, __B), 29220b57cec5SDimitry Andric (__v2df)_mm_setzero_pd()); 29230b57cec5SDimitry Andric } 29240b57cec5SDimitry Andric 29250b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 29260b57cec5SDimitry Andric _mm256_mask_mul_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 29270b57cec5SDimitry Andric return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 29280b57cec5SDimitry Andric (__v4df)_mm256_mul_pd(__A, __B), 29290b57cec5SDimitry Andric (__v4df)__W); 29300b57cec5SDimitry Andric } 29310b57cec5SDimitry Andric 29320b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 29330b57cec5SDimitry Andric _mm256_maskz_mul_pd(__mmask8 __U, __m256d __A, __m256d __B) { 29340b57cec5SDimitry Andric return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 29350b57cec5SDimitry Andric (__v4df)_mm256_mul_pd(__A, __B), 29360b57cec5SDimitry Andric (__v4df)_mm256_setzero_pd()); 29370b57cec5SDimitry Andric } 29380b57cec5SDimitry Andric 29390b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 29400b57cec5SDimitry Andric _mm_mask_mul_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 29410b57cec5SDimitry Andric return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 29420b57cec5SDimitry Andric (__v4sf)_mm_mul_ps(__A, __B), 29430b57cec5SDimitry Andric (__v4sf)__W); 29440b57cec5SDimitry Andric } 29450b57cec5SDimitry Andric 29460b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 29470b57cec5SDimitry Andric _mm_maskz_mul_ps(__mmask8 __U, __m128 __A, __m128 __B) { 29480b57cec5SDimitry Andric return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 29490b57cec5SDimitry Andric (__v4sf)_mm_mul_ps(__A, __B), 29500b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps()); 29510b57cec5SDimitry Andric } 29520b57cec5SDimitry Andric 29530b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 29540b57cec5SDimitry Andric _mm256_mask_mul_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 29550b57cec5SDimitry Andric return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 29560b57cec5SDimitry Andric (__v8sf)_mm256_mul_ps(__A, __B), 29570b57cec5SDimitry Andric (__v8sf)__W); 29580b57cec5SDimitry Andric } 29590b57cec5SDimitry Andric 29600b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 29610b57cec5SDimitry Andric _mm256_maskz_mul_ps(__mmask8 __U, __m256 __A, __m256 __B) { 29620b57cec5SDimitry Andric return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 29630b57cec5SDimitry Andric (__v8sf)_mm256_mul_ps(__A, __B), 29640b57cec5SDimitry Andric (__v8sf)_mm256_setzero_ps()); 29650b57cec5SDimitry Andric } 29660b57cec5SDimitry Andric 29670b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 29680b57cec5SDimitry Andric _mm_mask_abs_epi32(__m128i __W, __mmask8 __U, __m128i __A) { 29690b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 29700b57cec5SDimitry Andric (__v4si)_mm_abs_epi32(__A), 29710b57cec5SDimitry Andric (__v4si)__W); 29720b57cec5SDimitry Andric } 29730b57cec5SDimitry Andric 29740b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 29750b57cec5SDimitry Andric _mm_maskz_abs_epi32(__mmask8 __U, __m128i __A) { 29760b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 29770b57cec5SDimitry Andric (__v4si)_mm_abs_epi32(__A), 29780b57cec5SDimitry Andric (__v4si)_mm_setzero_si128()); 29790b57cec5SDimitry Andric } 29800b57cec5SDimitry Andric 29810b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 29820b57cec5SDimitry Andric _mm256_mask_abs_epi32(__m256i __W, __mmask8 __U, __m256i __A) { 29830b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 29840b57cec5SDimitry Andric (__v8si)_mm256_abs_epi32(__A), 29850b57cec5SDimitry Andric (__v8si)__W); 29860b57cec5SDimitry Andric } 29870b57cec5SDimitry Andric 29880b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 29890b57cec5SDimitry Andric _mm256_maskz_abs_epi32(__mmask8 __U, __m256i __A) { 29900b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 29910b57cec5SDimitry Andric (__v8si)_mm256_abs_epi32(__A), 29920b57cec5SDimitry Andric (__v8si)_mm256_setzero_si256()); 29930b57cec5SDimitry Andric } 29940b57cec5SDimitry Andric 29950b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 29960b57cec5SDimitry Andric _mm_abs_epi64 (__m128i __A) { 299704eeddc0SDimitry Andric return (__m128i)__builtin_elementwise_abs((__v2di)__A); 29980b57cec5SDimitry Andric } 29990b57cec5SDimitry Andric 30000b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 30010b57cec5SDimitry Andric _mm_mask_abs_epi64 (__m128i __W, __mmask8 __U, __m128i __A) { 30020b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 30030b57cec5SDimitry Andric (__v2di)_mm_abs_epi64(__A), 30040b57cec5SDimitry Andric (__v2di)__W); 30050b57cec5SDimitry Andric } 30060b57cec5SDimitry Andric 30070b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 30080b57cec5SDimitry Andric _mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A) { 30090b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 30100b57cec5SDimitry Andric (__v2di)_mm_abs_epi64(__A), 30110b57cec5SDimitry Andric (__v2di)_mm_setzero_si128()); 30120b57cec5SDimitry Andric } 30130b57cec5SDimitry Andric 30140b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 30150b57cec5SDimitry Andric _mm256_abs_epi64 (__m256i __A) { 301604eeddc0SDimitry Andric return (__m256i)__builtin_elementwise_abs((__v4di)__A); 30170b57cec5SDimitry Andric } 30180b57cec5SDimitry Andric 30190b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 30200b57cec5SDimitry Andric _mm256_mask_abs_epi64 (__m256i __W, __mmask8 __U, __m256i __A) { 30210b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 30220b57cec5SDimitry Andric (__v4di)_mm256_abs_epi64(__A), 30230b57cec5SDimitry Andric (__v4di)__W); 30240b57cec5SDimitry Andric } 30250b57cec5SDimitry Andric 30260b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 30270b57cec5SDimitry Andric _mm256_maskz_abs_epi64 (__mmask8 __U, __m256i __A) { 30280b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 30290b57cec5SDimitry Andric (__v4di)_mm256_abs_epi64(__A), 30300b57cec5SDimitry Andric (__v4di)_mm256_setzero_si256()); 30310b57cec5SDimitry Andric } 30320b57cec5SDimitry Andric 30330b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 30340b57cec5SDimitry Andric _mm_maskz_max_epi32(__mmask8 __M, __m128i __A, __m128i __B) { 30350b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 30360b57cec5SDimitry Andric (__v4si)_mm_max_epi32(__A, __B), 30370b57cec5SDimitry Andric (__v4si)_mm_setzero_si128()); 30380b57cec5SDimitry Andric } 30390b57cec5SDimitry Andric 30400b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 30410b57cec5SDimitry Andric _mm_mask_max_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { 30420b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 30430b57cec5SDimitry Andric (__v4si)_mm_max_epi32(__A, __B), 30440b57cec5SDimitry Andric (__v4si)__W); 30450b57cec5SDimitry Andric } 30460b57cec5SDimitry Andric 30470b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 30480b57cec5SDimitry Andric _mm256_maskz_max_epi32(__mmask8 __M, __m256i __A, __m256i __B) { 30490b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 30500b57cec5SDimitry Andric (__v8si)_mm256_max_epi32(__A, __B), 30510b57cec5SDimitry Andric (__v8si)_mm256_setzero_si256()); 30520b57cec5SDimitry Andric } 30530b57cec5SDimitry Andric 30540b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 30550b57cec5SDimitry Andric _mm256_mask_max_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { 30560b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 30570b57cec5SDimitry Andric (__v8si)_mm256_max_epi32(__A, __B), 30580b57cec5SDimitry Andric (__v8si)__W); 30590b57cec5SDimitry Andric } 30600b57cec5SDimitry Andric 30610b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 30620b57cec5SDimitry Andric _mm_max_epi64 (__m128i __A, __m128i __B) { 306304eeddc0SDimitry Andric return (__m128i)__builtin_elementwise_max((__v2di)__A, (__v2di)__B); 30640b57cec5SDimitry Andric } 30650b57cec5SDimitry Andric 30660b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 30670b57cec5SDimitry Andric _mm_maskz_max_epi64 (__mmask8 __M, __m128i __A, __m128i __B) { 30680b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 30690b57cec5SDimitry Andric (__v2di)_mm_max_epi64(__A, __B), 30700b57cec5SDimitry Andric (__v2di)_mm_setzero_si128()); 30710b57cec5SDimitry Andric } 30720b57cec5SDimitry Andric 30730b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 30740b57cec5SDimitry Andric _mm_mask_max_epi64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { 30750b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 30760b57cec5SDimitry Andric (__v2di)_mm_max_epi64(__A, __B), 30770b57cec5SDimitry Andric (__v2di)__W); 30780b57cec5SDimitry Andric } 30790b57cec5SDimitry Andric 30800b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 30810b57cec5SDimitry Andric _mm256_max_epi64 (__m256i __A, __m256i __B) { 308204eeddc0SDimitry Andric return (__m256i)__builtin_elementwise_max((__v4di)__A, (__v4di)__B); 30830b57cec5SDimitry Andric } 30840b57cec5SDimitry Andric 30850b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 30860b57cec5SDimitry Andric _mm256_maskz_max_epi64 (__mmask8 __M, __m256i __A, __m256i __B) { 30870b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 30880b57cec5SDimitry Andric (__v4di)_mm256_max_epi64(__A, __B), 30890b57cec5SDimitry Andric (__v4di)_mm256_setzero_si256()); 30900b57cec5SDimitry Andric } 30910b57cec5SDimitry Andric 30920b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 30930b57cec5SDimitry Andric _mm256_mask_max_epi64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { 30940b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 30950b57cec5SDimitry Andric (__v4di)_mm256_max_epi64(__A, __B), 30960b57cec5SDimitry Andric (__v4di)__W); 30970b57cec5SDimitry Andric } 30980b57cec5SDimitry Andric 30990b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 31000b57cec5SDimitry Andric _mm_maskz_max_epu32(__mmask8 __M, __m128i __A, __m128i __B) { 31010b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 31020b57cec5SDimitry Andric (__v4si)_mm_max_epu32(__A, __B), 31030b57cec5SDimitry Andric (__v4si)_mm_setzero_si128()); 31040b57cec5SDimitry Andric } 31050b57cec5SDimitry Andric 31060b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 31070b57cec5SDimitry Andric _mm_mask_max_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { 31080b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 31090b57cec5SDimitry Andric (__v4si)_mm_max_epu32(__A, __B), 31100b57cec5SDimitry Andric (__v4si)__W); 31110b57cec5SDimitry Andric } 31120b57cec5SDimitry Andric 31130b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 31140b57cec5SDimitry Andric _mm256_maskz_max_epu32(__mmask8 __M, __m256i __A, __m256i __B) { 31150b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 31160b57cec5SDimitry Andric (__v8si)_mm256_max_epu32(__A, __B), 31170b57cec5SDimitry Andric (__v8si)_mm256_setzero_si256()); 31180b57cec5SDimitry Andric } 31190b57cec5SDimitry Andric 31200b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 31210b57cec5SDimitry Andric _mm256_mask_max_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { 31220b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 31230b57cec5SDimitry Andric (__v8si)_mm256_max_epu32(__A, __B), 31240b57cec5SDimitry Andric (__v8si)__W); 31250b57cec5SDimitry Andric } 31260b57cec5SDimitry Andric 31270b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 31280b57cec5SDimitry Andric _mm_max_epu64 (__m128i __A, __m128i __B) { 312904eeddc0SDimitry Andric return (__m128i)__builtin_elementwise_max((__v2du)__A, (__v2du)__B); 31300b57cec5SDimitry Andric } 31310b57cec5SDimitry Andric 31320b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 31330b57cec5SDimitry Andric _mm_maskz_max_epu64 (__mmask8 __M, __m128i __A, __m128i __B) { 31340b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 31350b57cec5SDimitry Andric (__v2di)_mm_max_epu64(__A, __B), 31360b57cec5SDimitry Andric (__v2di)_mm_setzero_si128()); 31370b57cec5SDimitry Andric } 31380b57cec5SDimitry Andric 31390b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 31400b57cec5SDimitry Andric _mm_mask_max_epu64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { 31410b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 31420b57cec5SDimitry Andric (__v2di)_mm_max_epu64(__A, __B), 31430b57cec5SDimitry Andric (__v2di)__W); 31440b57cec5SDimitry Andric } 31450b57cec5SDimitry Andric 31460b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 31470b57cec5SDimitry Andric _mm256_max_epu64 (__m256i __A, __m256i __B) { 314804eeddc0SDimitry Andric return (__m256i)__builtin_elementwise_max((__v4du)__A, (__v4du)__B); 31490b57cec5SDimitry Andric } 31500b57cec5SDimitry Andric 31510b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 31520b57cec5SDimitry Andric _mm256_maskz_max_epu64 (__mmask8 __M, __m256i __A, __m256i __B) { 31530b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 31540b57cec5SDimitry Andric (__v4di)_mm256_max_epu64(__A, __B), 31550b57cec5SDimitry Andric (__v4di)_mm256_setzero_si256()); 31560b57cec5SDimitry Andric } 31570b57cec5SDimitry Andric 31580b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 31590b57cec5SDimitry Andric _mm256_mask_max_epu64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { 31600b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 31610b57cec5SDimitry Andric (__v4di)_mm256_max_epu64(__A, __B), 31620b57cec5SDimitry Andric (__v4di)__W); 31630b57cec5SDimitry Andric } 31640b57cec5SDimitry Andric 31650b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 31660b57cec5SDimitry Andric _mm_maskz_min_epi32(__mmask8 __M, __m128i __A, __m128i __B) { 31670b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 31680b57cec5SDimitry Andric (__v4si)_mm_min_epi32(__A, __B), 31690b57cec5SDimitry Andric (__v4si)_mm_setzero_si128()); 31700b57cec5SDimitry Andric } 31710b57cec5SDimitry Andric 31720b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 31730b57cec5SDimitry Andric _mm_mask_min_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { 31740b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 31750b57cec5SDimitry Andric (__v4si)_mm_min_epi32(__A, __B), 31760b57cec5SDimitry Andric (__v4si)__W); 31770b57cec5SDimitry Andric } 31780b57cec5SDimitry Andric 31790b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 31800b57cec5SDimitry Andric _mm256_maskz_min_epi32(__mmask8 __M, __m256i __A, __m256i __B) { 31810b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 31820b57cec5SDimitry Andric (__v8si)_mm256_min_epi32(__A, __B), 31830b57cec5SDimitry Andric (__v8si)_mm256_setzero_si256()); 31840b57cec5SDimitry Andric } 31850b57cec5SDimitry Andric 31860b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 31870b57cec5SDimitry Andric _mm256_mask_min_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { 31880b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 31890b57cec5SDimitry Andric (__v8si)_mm256_min_epi32(__A, __B), 31900b57cec5SDimitry Andric (__v8si)__W); 31910b57cec5SDimitry Andric } 31920b57cec5SDimitry Andric 31930b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 31940b57cec5SDimitry Andric _mm_min_epi64 (__m128i __A, __m128i __B) { 319504eeddc0SDimitry Andric return (__m128i)__builtin_elementwise_min((__v2di)__A, (__v2di)__B); 31960b57cec5SDimitry Andric } 31970b57cec5SDimitry Andric 31980b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 31990b57cec5SDimitry Andric _mm_mask_min_epi64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { 32000b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 32010b57cec5SDimitry Andric (__v2di)_mm_min_epi64(__A, __B), 32020b57cec5SDimitry Andric (__v2di)__W); 32030b57cec5SDimitry Andric } 32040b57cec5SDimitry Andric 32050b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 32060b57cec5SDimitry Andric _mm_maskz_min_epi64 (__mmask8 __M, __m128i __A, __m128i __B) { 32070b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 32080b57cec5SDimitry Andric (__v2di)_mm_min_epi64(__A, __B), 32090b57cec5SDimitry Andric (__v2di)_mm_setzero_si128()); 32100b57cec5SDimitry Andric } 32110b57cec5SDimitry Andric 32120b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 32130b57cec5SDimitry Andric _mm256_min_epi64 (__m256i __A, __m256i __B) { 321404eeddc0SDimitry Andric return (__m256i)__builtin_elementwise_min((__v4di)__A, (__v4di)__B); 32150b57cec5SDimitry Andric } 32160b57cec5SDimitry Andric 32170b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 32180b57cec5SDimitry Andric _mm256_mask_min_epi64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { 32190b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 32200b57cec5SDimitry Andric (__v4di)_mm256_min_epi64(__A, __B), 32210b57cec5SDimitry Andric (__v4di)__W); 32220b57cec5SDimitry Andric } 32230b57cec5SDimitry Andric 32240b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 32250b57cec5SDimitry Andric _mm256_maskz_min_epi64 (__mmask8 __M, __m256i __A, __m256i __B) { 32260b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 32270b57cec5SDimitry Andric (__v4di)_mm256_min_epi64(__A, __B), 32280b57cec5SDimitry Andric (__v4di)_mm256_setzero_si256()); 32290b57cec5SDimitry Andric } 32300b57cec5SDimitry Andric 32310b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 32320b57cec5SDimitry Andric _mm_maskz_min_epu32(__mmask8 __M, __m128i __A, __m128i __B) { 32330b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 32340b57cec5SDimitry Andric (__v4si)_mm_min_epu32(__A, __B), 32350b57cec5SDimitry Andric (__v4si)_mm_setzero_si128()); 32360b57cec5SDimitry Andric } 32370b57cec5SDimitry Andric 32380b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 32390b57cec5SDimitry Andric _mm_mask_min_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { 32400b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 32410b57cec5SDimitry Andric (__v4si)_mm_min_epu32(__A, __B), 32420b57cec5SDimitry Andric (__v4si)__W); 32430b57cec5SDimitry Andric } 32440b57cec5SDimitry Andric 32450b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 32460b57cec5SDimitry Andric _mm256_maskz_min_epu32(__mmask8 __M, __m256i __A, __m256i __B) { 32470b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 32480b57cec5SDimitry Andric (__v8si)_mm256_min_epu32(__A, __B), 32490b57cec5SDimitry Andric (__v8si)_mm256_setzero_si256()); 32500b57cec5SDimitry Andric } 32510b57cec5SDimitry Andric 32520b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 32530b57cec5SDimitry Andric _mm256_mask_min_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { 32540b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 32550b57cec5SDimitry Andric (__v8si)_mm256_min_epu32(__A, __B), 32560b57cec5SDimitry Andric (__v8si)__W); 32570b57cec5SDimitry Andric } 32580b57cec5SDimitry Andric 32590b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 32600b57cec5SDimitry Andric _mm_min_epu64 (__m128i __A, __m128i __B) { 326104eeddc0SDimitry Andric return (__m128i)__builtin_elementwise_min((__v2du)__A, (__v2du)__B); 32620b57cec5SDimitry Andric } 32630b57cec5SDimitry Andric 32640b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 32650b57cec5SDimitry Andric _mm_mask_min_epu64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { 32660b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 32670b57cec5SDimitry Andric (__v2di)_mm_min_epu64(__A, __B), 32680b57cec5SDimitry Andric (__v2di)__W); 32690b57cec5SDimitry Andric } 32700b57cec5SDimitry Andric 32710b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 32720b57cec5SDimitry Andric _mm_maskz_min_epu64 (__mmask8 __M, __m128i __A, __m128i __B) { 32730b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 32740b57cec5SDimitry Andric (__v2di)_mm_min_epu64(__A, __B), 32750b57cec5SDimitry Andric (__v2di)_mm_setzero_si128()); 32760b57cec5SDimitry Andric } 32770b57cec5SDimitry Andric 32780b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 32790b57cec5SDimitry Andric _mm256_min_epu64 (__m256i __A, __m256i __B) { 328004eeddc0SDimitry Andric return (__m256i)__builtin_elementwise_min((__v4du)__A, (__v4du)__B); 32810b57cec5SDimitry Andric } 32820b57cec5SDimitry Andric 32830b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 32840b57cec5SDimitry Andric _mm256_mask_min_epu64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { 32850b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 32860b57cec5SDimitry Andric (__v4di)_mm256_min_epu64(__A, __B), 32870b57cec5SDimitry Andric (__v4di)__W); 32880b57cec5SDimitry Andric } 32890b57cec5SDimitry Andric 32900b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 32910b57cec5SDimitry Andric _mm256_maskz_min_epu64 (__mmask8 __M, __m256i __A, __m256i __B) { 32920b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 32930b57cec5SDimitry Andric (__v4di)_mm256_min_epu64(__A, __B), 32940b57cec5SDimitry Andric (__v4di)_mm256_setzero_si256()); 32950b57cec5SDimitry Andric } 32960b57cec5SDimitry Andric 32970b57cec5SDimitry Andric #define _mm_roundscale_pd(A, imm) \ 3298349cc55cSDimitry Andric ((__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \ 32990b57cec5SDimitry Andric (int)(imm), \ 33000b57cec5SDimitry Andric (__v2df)_mm_setzero_pd(), \ 3301349cc55cSDimitry Andric (__mmask8)-1)) 33020b57cec5SDimitry Andric 33030b57cec5SDimitry Andric 33040b57cec5SDimitry Andric #define _mm_mask_roundscale_pd(W, U, A, imm) \ 3305349cc55cSDimitry Andric ((__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \ 33060b57cec5SDimitry Andric (int)(imm), \ 33070b57cec5SDimitry Andric (__v2df)(__m128d)(W), \ 3308349cc55cSDimitry Andric (__mmask8)(U))) 33090b57cec5SDimitry Andric 33100b57cec5SDimitry Andric 33110b57cec5SDimitry Andric #define _mm_maskz_roundscale_pd(U, A, imm) \ 3312349cc55cSDimitry Andric ((__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \ 33130b57cec5SDimitry Andric (int)(imm), \ 33140b57cec5SDimitry Andric (__v2df)_mm_setzero_pd(), \ 3315349cc55cSDimitry Andric (__mmask8)(U))) 33160b57cec5SDimitry Andric 33170b57cec5SDimitry Andric 33180b57cec5SDimitry Andric #define _mm256_roundscale_pd(A, imm) \ 3319349cc55cSDimitry Andric ((__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \ 33200b57cec5SDimitry Andric (int)(imm), \ 33210b57cec5SDimitry Andric (__v4df)_mm256_setzero_pd(), \ 3322349cc55cSDimitry Andric (__mmask8)-1)) 33230b57cec5SDimitry Andric 33240b57cec5SDimitry Andric 33250b57cec5SDimitry Andric #define _mm256_mask_roundscale_pd(W, U, A, imm) \ 3326349cc55cSDimitry Andric ((__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \ 33270b57cec5SDimitry Andric (int)(imm), \ 33280b57cec5SDimitry Andric (__v4df)(__m256d)(W), \ 3329349cc55cSDimitry Andric (__mmask8)(U))) 33300b57cec5SDimitry Andric 33310b57cec5SDimitry Andric 33320b57cec5SDimitry Andric #define _mm256_maskz_roundscale_pd(U, A, imm) \ 3333349cc55cSDimitry Andric ((__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \ 33340b57cec5SDimitry Andric (int)(imm), \ 33350b57cec5SDimitry Andric (__v4df)_mm256_setzero_pd(), \ 3336349cc55cSDimitry Andric (__mmask8)(U))) 33370b57cec5SDimitry Andric 33380b57cec5SDimitry Andric #define _mm_roundscale_ps(A, imm) \ 3339349cc55cSDimitry Andric ((__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \ 33400b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps(), \ 3341349cc55cSDimitry Andric (__mmask8)-1)) 33420b57cec5SDimitry Andric 33430b57cec5SDimitry Andric 33440b57cec5SDimitry Andric #define _mm_mask_roundscale_ps(W, U, A, imm) \ 3345349cc55cSDimitry Andric ((__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \ 33460b57cec5SDimitry Andric (__v4sf)(__m128)(W), \ 3347349cc55cSDimitry Andric (__mmask8)(U))) 33480b57cec5SDimitry Andric 33490b57cec5SDimitry Andric 33500b57cec5SDimitry Andric #define _mm_maskz_roundscale_ps(U, A, imm) \ 3351349cc55cSDimitry Andric ((__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \ 33520b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps(), \ 3353349cc55cSDimitry Andric (__mmask8)(U))) 33540b57cec5SDimitry Andric 33550b57cec5SDimitry Andric #define _mm256_roundscale_ps(A, imm) \ 3356349cc55cSDimitry Andric ((__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \ 33570b57cec5SDimitry Andric (__v8sf)_mm256_setzero_ps(), \ 3358349cc55cSDimitry Andric (__mmask8)-1)) 33590b57cec5SDimitry Andric 33600b57cec5SDimitry Andric #define _mm256_mask_roundscale_ps(W, U, A, imm) \ 3361349cc55cSDimitry Andric ((__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \ 33620b57cec5SDimitry Andric (__v8sf)(__m256)(W), \ 3363349cc55cSDimitry Andric (__mmask8)(U))) 33640b57cec5SDimitry Andric 33650b57cec5SDimitry Andric 33660b57cec5SDimitry Andric #define _mm256_maskz_roundscale_ps(U, A, imm) \ 3367349cc55cSDimitry Andric ((__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \ 33680b57cec5SDimitry Andric (__v8sf)_mm256_setzero_ps(), \ 3369349cc55cSDimitry Andric (__mmask8)(U))) 33700b57cec5SDimitry Andric 33710b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 33720b57cec5SDimitry Andric _mm_scalef_pd (__m128d __A, __m128d __B) { 33730b57cec5SDimitry Andric return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A, 33740b57cec5SDimitry Andric (__v2df) __B, 33750b57cec5SDimitry Andric (__v2df) 33760b57cec5SDimitry Andric _mm_setzero_pd (), 33770b57cec5SDimitry Andric (__mmask8) -1); 33780b57cec5SDimitry Andric } 33790b57cec5SDimitry Andric 33800b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 33810b57cec5SDimitry Andric _mm_mask_scalef_pd (__m128d __W, __mmask8 __U, __m128d __A, 33820b57cec5SDimitry Andric __m128d __B) { 33830b57cec5SDimitry Andric return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A, 33840b57cec5SDimitry Andric (__v2df) __B, 33850b57cec5SDimitry Andric (__v2df) __W, 33860b57cec5SDimitry Andric (__mmask8) __U); 33870b57cec5SDimitry Andric } 33880b57cec5SDimitry Andric 33890b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 33900b57cec5SDimitry Andric _mm_maskz_scalef_pd (__mmask8 __U, __m128d __A, __m128d __B) { 33910b57cec5SDimitry Andric return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A, 33920b57cec5SDimitry Andric (__v2df) __B, 33930b57cec5SDimitry Andric (__v2df) 33940b57cec5SDimitry Andric _mm_setzero_pd (), 33950b57cec5SDimitry Andric (__mmask8) __U); 33960b57cec5SDimitry Andric } 33970b57cec5SDimitry Andric 33980b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 33990b57cec5SDimitry Andric _mm256_scalef_pd (__m256d __A, __m256d __B) { 34000b57cec5SDimitry Andric return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A, 34010b57cec5SDimitry Andric (__v4df) __B, 34020b57cec5SDimitry Andric (__v4df) 34030b57cec5SDimitry Andric _mm256_setzero_pd (), 34040b57cec5SDimitry Andric (__mmask8) -1); 34050b57cec5SDimitry Andric } 34060b57cec5SDimitry Andric 34070b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 34080b57cec5SDimitry Andric _mm256_mask_scalef_pd (__m256d __W, __mmask8 __U, __m256d __A, 34090b57cec5SDimitry Andric __m256d __B) { 34100b57cec5SDimitry Andric return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A, 34110b57cec5SDimitry Andric (__v4df) __B, 34120b57cec5SDimitry Andric (__v4df) __W, 34130b57cec5SDimitry Andric (__mmask8) __U); 34140b57cec5SDimitry Andric } 34150b57cec5SDimitry Andric 34160b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 34170b57cec5SDimitry Andric _mm256_maskz_scalef_pd (__mmask8 __U, __m256d __A, __m256d __B) { 34180b57cec5SDimitry Andric return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A, 34190b57cec5SDimitry Andric (__v4df) __B, 34200b57cec5SDimitry Andric (__v4df) 34210b57cec5SDimitry Andric _mm256_setzero_pd (), 34220b57cec5SDimitry Andric (__mmask8) __U); 34230b57cec5SDimitry Andric } 34240b57cec5SDimitry Andric 34250b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 34260b57cec5SDimitry Andric _mm_scalef_ps (__m128 __A, __m128 __B) { 34270b57cec5SDimitry Andric return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A, 34280b57cec5SDimitry Andric (__v4sf) __B, 34290b57cec5SDimitry Andric (__v4sf) 34300b57cec5SDimitry Andric _mm_setzero_ps (), 34310b57cec5SDimitry Andric (__mmask8) -1); 34320b57cec5SDimitry Andric } 34330b57cec5SDimitry Andric 34340b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 34350b57cec5SDimitry Andric _mm_mask_scalef_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 34360b57cec5SDimitry Andric return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A, 34370b57cec5SDimitry Andric (__v4sf) __B, 34380b57cec5SDimitry Andric (__v4sf) __W, 34390b57cec5SDimitry Andric (__mmask8) __U); 34400b57cec5SDimitry Andric } 34410b57cec5SDimitry Andric 34420b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 34430b57cec5SDimitry Andric _mm_maskz_scalef_ps (__mmask8 __U, __m128 __A, __m128 __B) { 34440b57cec5SDimitry Andric return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A, 34450b57cec5SDimitry Andric (__v4sf) __B, 34460b57cec5SDimitry Andric (__v4sf) 34470b57cec5SDimitry Andric _mm_setzero_ps (), 34480b57cec5SDimitry Andric (__mmask8) __U); 34490b57cec5SDimitry Andric } 34500b57cec5SDimitry Andric 34510b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 34520b57cec5SDimitry Andric _mm256_scalef_ps (__m256 __A, __m256 __B) { 34530b57cec5SDimitry Andric return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A, 34540b57cec5SDimitry Andric (__v8sf) __B, 34550b57cec5SDimitry Andric (__v8sf) 34560b57cec5SDimitry Andric _mm256_setzero_ps (), 34570b57cec5SDimitry Andric (__mmask8) -1); 34580b57cec5SDimitry Andric } 34590b57cec5SDimitry Andric 34600b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 34610b57cec5SDimitry Andric _mm256_mask_scalef_ps (__m256 __W, __mmask8 __U, __m256 __A, 34620b57cec5SDimitry Andric __m256 __B) { 34630b57cec5SDimitry Andric return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A, 34640b57cec5SDimitry Andric (__v8sf) __B, 34650b57cec5SDimitry Andric (__v8sf) __W, 34660b57cec5SDimitry Andric (__mmask8) __U); 34670b57cec5SDimitry Andric } 34680b57cec5SDimitry Andric 34690b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 34700b57cec5SDimitry Andric _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) { 34710b57cec5SDimitry Andric return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A, 34720b57cec5SDimitry Andric (__v8sf) __B, 34730b57cec5SDimitry Andric (__v8sf) 34740b57cec5SDimitry Andric _mm256_setzero_ps (), 34750b57cec5SDimitry Andric (__mmask8) __U); 34760b57cec5SDimitry Andric } 34770b57cec5SDimitry Andric 34780b57cec5SDimitry Andric #define _mm_i64scatter_pd(addr, index, v1, scale) \ 34790b57cec5SDimitry Andric __builtin_ia32_scatterdiv2df((void *)(addr), (__mmask8)-1, \ 34800b57cec5SDimitry Andric (__v2di)(__m128i)(index), \ 34810b57cec5SDimitry Andric (__v2df)(__m128d)(v1), (int)(scale)) 34820b57cec5SDimitry Andric 34830b57cec5SDimitry Andric #define _mm_mask_i64scatter_pd(addr, mask, index, v1, scale) \ 34840b57cec5SDimitry Andric __builtin_ia32_scatterdiv2df((void *)(addr), (__mmask8)(mask), \ 34850b57cec5SDimitry Andric (__v2di)(__m128i)(index), \ 34860b57cec5SDimitry Andric (__v2df)(__m128d)(v1), (int)(scale)) 34870b57cec5SDimitry Andric 34880b57cec5SDimitry Andric #define _mm_i64scatter_epi64(addr, index, v1, scale) \ 34890b57cec5SDimitry Andric __builtin_ia32_scatterdiv2di((void *)(addr), (__mmask8)-1, \ 34900b57cec5SDimitry Andric (__v2di)(__m128i)(index), \ 34910b57cec5SDimitry Andric (__v2di)(__m128i)(v1), (int)(scale)) 34920b57cec5SDimitry Andric 34930b57cec5SDimitry Andric #define _mm_mask_i64scatter_epi64(addr, mask, index, v1, scale) \ 34940b57cec5SDimitry Andric __builtin_ia32_scatterdiv2di((void *)(addr), (__mmask8)(mask), \ 34950b57cec5SDimitry Andric (__v2di)(__m128i)(index), \ 34960b57cec5SDimitry Andric (__v2di)(__m128i)(v1), (int)(scale)) 34970b57cec5SDimitry Andric 34980b57cec5SDimitry Andric #define _mm256_i64scatter_pd(addr, index, v1, scale) \ 34990b57cec5SDimitry Andric __builtin_ia32_scatterdiv4df((void *)(addr), (__mmask8)-1, \ 35000b57cec5SDimitry Andric (__v4di)(__m256i)(index), \ 35010b57cec5SDimitry Andric (__v4df)(__m256d)(v1), (int)(scale)) 35020b57cec5SDimitry Andric 35030b57cec5SDimitry Andric #define _mm256_mask_i64scatter_pd(addr, mask, index, v1, scale) \ 35040b57cec5SDimitry Andric __builtin_ia32_scatterdiv4df((void *)(addr), (__mmask8)(mask), \ 35050b57cec5SDimitry Andric (__v4di)(__m256i)(index), \ 35060b57cec5SDimitry Andric (__v4df)(__m256d)(v1), (int)(scale)) 35070b57cec5SDimitry Andric 35080b57cec5SDimitry Andric #define _mm256_i64scatter_epi64(addr, index, v1, scale) \ 35090b57cec5SDimitry Andric __builtin_ia32_scatterdiv4di((void *)(addr), (__mmask8)-1, \ 35100b57cec5SDimitry Andric (__v4di)(__m256i)(index), \ 35110b57cec5SDimitry Andric (__v4di)(__m256i)(v1), (int)(scale)) 35120b57cec5SDimitry Andric 35130b57cec5SDimitry Andric #define _mm256_mask_i64scatter_epi64(addr, mask, index, v1, scale) \ 35140b57cec5SDimitry Andric __builtin_ia32_scatterdiv4di((void *)(addr), (__mmask8)(mask), \ 35150b57cec5SDimitry Andric (__v4di)(__m256i)(index), \ 35160b57cec5SDimitry Andric (__v4di)(__m256i)(v1), (int)(scale)) 35170b57cec5SDimitry Andric 35180b57cec5SDimitry Andric #define _mm_i64scatter_ps(addr, index, v1, scale) \ 35190b57cec5SDimitry Andric __builtin_ia32_scatterdiv4sf((void *)(addr), (__mmask8)-1, \ 35200b57cec5SDimitry Andric (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \ 35210b57cec5SDimitry Andric (int)(scale)) 35220b57cec5SDimitry Andric 35230b57cec5SDimitry Andric #define _mm_mask_i64scatter_ps(addr, mask, index, v1, scale) \ 35240b57cec5SDimitry Andric __builtin_ia32_scatterdiv4sf((void *)(addr), (__mmask8)(mask), \ 35250b57cec5SDimitry Andric (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \ 35260b57cec5SDimitry Andric (int)(scale)) 35270b57cec5SDimitry Andric 35280b57cec5SDimitry Andric #define _mm_i64scatter_epi32(addr, index, v1, scale) \ 35290b57cec5SDimitry Andric __builtin_ia32_scatterdiv4si((void *)(addr), (__mmask8)-1, \ 35300b57cec5SDimitry Andric (__v2di)(__m128i)(index), \ 35310b57cec5SDimitry Andric (__v4si)(__m128i)(v1), (int)(scale)) 35320b57cec5SDimitry Andric 35330b57cec5SDimitry Andric #define _mm_mask_i64scatter_epi32(addr, mask, index, v1, scale) \ 35340b57cec5SDimitry Andric __builtin_ia32_scatterdiv4si((void *)(addr), (__mmask8)(mask), \ 35350b57cec5SDimitry Andric (__v2di)(__m128i)(index), \ 35360b57cec5SDimitry Andric (__v4si)(__m128i)(v1), (int)(scale)) 35370b57cec5SDimitry Andric 35380b57cec5SDimitry Andric #define _mm256_i64scatter_ps(addr, index, v1, scale) \ 35390b57cec5SDimitry Andric __builtin_ia32_scatterdiv8sf((void *)(addr), (__mmask8)-1, \ 35400b57cec5SDimitry Andric (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \ 35410b57cec5SDimitry Andric (int)(scale)) 35420b57cec5SDimitry Andric 35430b57cec5SDimitry Andric #define _mm256_mask_i64scatter_ps(addr, mask, index, v1, scale) \ 35440b57cec5SDimitry Andric __builtin_ia32_scatterdiv8sf((void *)(addr), (__mmask8)(mask), \ 35450b57cec5SDimitry Andric (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \ 35460b57cec5SDimitry Andric (int)(scale)) 35470b57cec5SDimitry Andric 35480b57cec5SDimitry Andric #define _mm256_i64scatter_epi32(addr, index, v1, scale) \ 35490b57cec5SDimitry Andric __builtin_ia32_scatterdiv8si((void *)(addr), (__mmask8)-1, \ 35500b57cec5SDimitry Andric (__v4di)(__m256i)(index), \ 35510b57cec5SDimitry Andric (__v4si)(__m128i)(v1), (int)(scale)) 35520b57cec5SDimitry Andric 35530b57cec5SDimitry Andric #define _mm256_mask_i64scatter_epi32(addr, mask, index, v1, scale) \ 35540b57cec5SDimitry Andric __builtin_ia32_scatterdiv8si((void *)(addr), (__mmask8)(mask), \ 35550b57cec5SDimitry Andric (__v4di)(__m256i)(index), \ 35560b57cec5SDimitry Andric (__v4si)(__m128i)(v1), (int)(scale)) 35570b57cec5SDimitry Andric 35580b57cec5SDimitry Andric #define _mm_i32scatter_pd(addr, index, v1, scale) \ 35590b57cec5SDimitry Andric __builtin_ia32_scattersiv2df((void *)(addr), (__mmask8)-1, \ 35600b57cec5SDimitry Andric (__v4si)(__m128i)(index), \ 35610b57cec5SDimitry Andric (__v2df)(__m128d)(v1), (int)(scale)) 35620b57cec5SDimitry Andric 35630b57cec5SDimitry Andric #define _mm_mask_i32scatter_pd(addr, mask, index, v1, scale) \ 35640b57cec5SDimitry Andric __builtin_ia32_scattersiv2df((void *)(addr), (__mmask8)(mask), \ 35650b57cec5SDimitry Andric (__v4si)(__m128i)(index), \ 35660b57cec5SDimitry Andric (__v2df)(__m128d)(v1), (int)(scale)) 35670b57cec5SDimitry Andric 35680b57cec5SDimitry Andric #define _mm_i32scatter_epi64(addr, index, v1, scale) \ 35690b57cec5SDimitry Andric __builtin_ia32_scattersiv2di((void *)(addr), (__mmask8)-1, \ 35700b57cec5SDimitry Andric (__v4si)(__m128i)(index), \ 35710b57cec5SDimitry Andric (__v2di)(__m128i)(v1), (int)(scale)) 35720b57cec5SDimitry Andric 35730b57cec5SDimitry Andric #define _mm_mask_i32scatter_epi64(addr, mask, index, v1, scale) \ 35740b57cec5SDimitry Andric __builtin_ia32_scattersiv2di((void *)(addr), (__mmask8)(mask), \ 35750b57cec5SDimitry Andric (__v4si)(__m128i)(index), \ 35760b57cec5SDimitry Andric (__v2di)(__m128i)(v1), (int)(scale)) 35770b57cec5SDimitry Andric 35780b57cec5SDimitry Andric #define _mm256_i32scatter_pd(addr, index, v1, scale) \ 35790b57cec5SDimitry Andric __builtin_ia32_scattersiv4df((void *)(addr), (__mmask8)-1, \ 35800b57cec5SDimitry Andric (__v4si)(__m128i)(index), \ 35810b57cec5SDimitry Andric (__v4df)(__m256d)(v1), (int)(scale)) 35820b57cec5SDimitry Andric 35830b57cec5SDimitry Andric #define _mm256_mask_i32scatter_pd(addr, mask, index, v1, scale) \ 35840b57cec5SDimitry Andric __builtin_ia32_scattersiv4df((void *)(addr), (__mmask8)(mask), \ 35850b57cec5SDimitry Andric (__v4si)(__m128i)(index), \ 35860b57cec5SDimitry Andric (__v4df)(__m256d)(v1), (int)(scale)) 35870b57cec5SDimitry Andric 35880b57cec5SDimitry Andric #define _mm256_i32scatter_epi64(addr, index, v1, scale) \ 35890b57cec5SDimitry Andric __builtin_ia32_scattersiv4di((void *)(addr), (__mmask8)-1, \ 35900b57cec5SDimitry Andric (__v4si)(__m128i)(index), \ 35910b57cec5SDimitry Andric (__v4di)(__m256i)(v1), (int)(scale)) 35920b57cec5SDimitry Andric 35930b57cec5SDimitry Andric #define _mm256_mask_i32scatter_epi64(addr, mask, index, v1, scale) \ 35940b57cec5SDimitry Andric __builtin_ia32_scattersiv4di((void *)(addr), (__mmask8)(mask), \ 35950b57cec5SDimitry Andric (__v4si)(__m128i)(index), \ 35960b57cec5SDimitry Andric (__v4di)(__m256i)(v1), (int)(scale)) 35970b57cec5SDimitry Andric 35980b57cec5SDimitry Andric #define _mm_i32scatter_ps(addr, index, v1, scale) \ 35990b57cec5SDimitry Andric __builtin_ia32_scattersiv4sf((void *)(addr), (__mmask8)-1, \ 36000b57cec5SDimitry Andric (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \ 36010b57cec5SDimitry Andric (int)(scale)) 36020b57cec5SDimitry Andric 36030b57cec5SDimitry Andric #define _mm_mask_i32scatter_ps(addr, mask, index, v1, scale) \ 36040b57cec5SDimitry Andric __builtin_ia32_scattersiv4sf((void *)(addr), (__mmask8)(mask), \ 36050b57cec5SDimitry Andric (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \ 36060b57cec5SDimitry Andric (int)(scale)) 36070b57cec5SDimitry Andric 36080b57cec5SDimitry Andric #define _mm_i32scatter_epi32(addr, index, v1, scale) \ 36090b57cec5SDimitry Andric __builtin_ia32_scattersiv4si((void *)(addr), (__mmask8)-1, \ 36100b57cec5SDimitry Andric (__v4si)(__m128i)(index), \ 36110b57cec5SDimitry Andric (__v4si)(__m128i)(v1), (int)(scale)) 36120b57cec5SDimitry Andric 36130b57cec5SDimitry Andric #define _mm_mask_i32scatter_epi32(addr, mask, index, v1, scale) \ 36140b57cec5SDimitry Andric __builtin_ia32_scattersiv4si((void *)(addr), (__mmask8)(mask), \ 36150b57cec5SDimitry Andric (__v4si)(__m128i)(index), \ 36160b57cec5SDimitry Andric (__v4si)(__m128i)(v1), (int)(scale)) 36170b57cec5SDimitry Andric 36180b57cec5SDimitry Andric #define _mm256_i32scatter_ps(addr, index, v1, scale) \ 36190b57cec5SDimitry Andric __builtin_ia32_scattersiv8sf((void *)(addr), (__mmask8)-1, \ 36200b57cec5SDimitry Andric (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \ 36210b57cec5SDimitry Andric (int)(scale)) 36220b57cec5SDimitry Andric 36230b57cec5SDimitry Andric #define _mm256_mask_i32scatter_ps(addr, mask, index, v1, scale) \ 36240b57cec5SDimitry Andric __builtin_ia32_scattersiv8sf((void *)(addr), (__mmask8)(mask), \ 36250b57cec5SDimitry Andric (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \ 36260b57cec5SDimitry Andric (int)(scale)) 36270b57cec5SDimitry Andric 36280b57cec5SDimitry Andric #define _mm256_i32scatter_epi32(addr, index, v1, scale) \ 36290b57cec5SDimitry Andric __builtin_ia32_scattersiv8si((void *)(addr), (__mmask8)-1, \ 36300b57cec5SDimitry Andric (__v8si)(__m256i)(index), \ 36310b57cec5SDimitry Andric (__v8si)(__m256i)(v1), (int)(scale)) 36320b57cec5SDimitry Andric 36330b57cec5SDimitry Andric #define _mm256_mask_i32scatter_epi32(addr, mask, index, v1, scale) \ 36340b57cec5SDimitry Andric __builtin_ia32_scattersiv8si((void *)(addr), (__mmask8)(mask), \ 36350b57cec5SDimitry Andric (__v8si)(__m256i)(index), \ 36360b57cec5SDimitry Andric (__v8si)(__m256i)(v1), (int)(scale)) 36370b57cec5SDimitry Andric 36380b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 36390b57cec5SDimitry Andric _mm_mask_sqrt_pd(__m128d __W, __mmask8 __U, __m128d __A) { 36400b57cec5SDimitry Andric return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 36410b57cec5SDimitry Andric (__v2df)_mm_sqrt_pd(__A), 36420b57cec5SDimitry Andric (__v2df)__W); 36430b57cec5SDimitry Andric } 36440b57cec5SDimitry Andric 36450b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 36460b57cec5SDimitry Andric _mm_maskz_sqrt_pd(__mmask8 __U, __m128d __A) { 36470b57cec5SDimitry Andric return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 36480b57cec5SDimitry Andric (__v2df)_mm_sqrt_pd(__A), 36490b57cec5SDimitry Andric (__v2df)_mm_setzero_pd()); 36500b57cec5SDimitry Andric } 36510b57cec5SDimitry Andric 36520b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 36530b57cec5SDimitry Andric _mm256_mask_sqrt_pd(__m256d __W, __mmask8 __U, __m256d __A) { 36540b57cec5SDimitry Andric return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 36550b57cec5SDimitry Andric (__v4df)_mm256_sqrt_pd(__A), 36560b57cec5SDimitry Andric (__v4df)__W); 36570b57cec5SDimitry Andric } 36580b57cec5SDimitry Andric 36590b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 36600b57cec5SDimitry Andric _mm256_maskz_sqrt_pd(__mmask8 __U, __m256d __A) { 36610b57cec5SDimitry Andric return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 36620b57cec5SDimitry Andric (__v4df)_mm256_sqrt_pd(__A), 36630b57cec5SDimitry Andric (__v4df)_mm256_setzero_pd()); 36640b57cec5SDimitry Andric } 36650b57cec5SDimitry Andric 36660b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 36670b57cec5SDimitry Andric _mm_mask_sqrt_ps(__m128 __W, __mmask8 __U, __m128 __A) { 36680b57cec5SDimitry Andric return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 36690b57cec5SDimitry Andric (__v4sf)_mm_sqrt_ps(__A), 36700b57cec5SDimitry Andric (__v4sf)__W); 36710b57cec5SDimitry Andric } 36720b57cec5SDimitry Andric 36730b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 36740b57cec5SDimitry Andric _mm_maskz_sqrt_ps(__mmask8 __U, __m128 __A) { 36750b57cec5SDimitry Andric return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 36760b57cec5SDimitry Andric (__v4sf)_mm_sqrt_ps(__A), 36770b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps()); 36780b57cec5SDimitry Andric } 36790b57cec5SDimitry Andric 36800b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 36810b57cec5SDimitry Andric _mm256_mask_sqrt_ps(__m256 __W, __mmask8 __U, __m256 __A) { 36820b57cec5SDimitry Andric return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 36830b57cec5SDimitry Andric (__v8sf)_mm256_sqrt_ps(__A), 36840b57cec5SDimitry Andric (__v8sf)__W); 36850b57cec5SDimitry Andric } 36860b57cec5SDimitry Andric 36870b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 36880b57cec5SDimitry Andric _mm256_maskz_sqrt_ps(__mmask8 __U, __m256 __A) { 36890b57cec5SDimitry Andric return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 36900b57cec5SDimitry Andric (__v8sf)_mm256_sqrt_ps(__A), 36910b57cec5SDimitry Andric (__v8sf)_mm256_setzero_ps()); 36920b57cec5SDimitry Andric } 36930b57cec5SDimitry Andric 36940b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 36950b57cec5SDimitry Andric _mm_mask_sub_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 36960b57cec5SDimitry Andric return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 36970b57cec5SDimitry Andric (__v2df)_mm_sub_pd(__A, __B), 36980b57cec5SDimitry Andric (__v2df)__W); 36990b57cec5SDimitry Andric } 37000b57cec5SDimitry Andric 37010b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 37020b57cec5SDimitry Andric _mm_maskz_sub_pd(__mmask8 __U, __m128d __A, __m128d __B) { 37030b57cec5SDimitry Andric return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 37040b57cec5SDimitry Andric (__v2df)_mm_sub_pd(__A, __B), 37050b57cec5SDimitry Andric (__v2df)_mm_setzero_pd()); 37060b57cec5SDimitry Andric } 37070b57cec5SDimitry Andric 37080b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 37090b57cec5SDimitry Andric _mm256_mask_sub_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 37100b57cec5SDimitry Andric return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 37110b57cec5SDimitry Andric (__v4df)_mm256_sub_pd(__A, __B), 37120b57cec5SDimitry Andric (__v4df)__W); 37130b57cec5SDimitry Andric } 37140b57cec5SDimitry Andric 37150b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 37160b57cec5SDimitry Andric _mm256_maskz_sub_pd(__mmask8 __U, __m256d __A, __m256d __B) { 37170b57cec5SDimitry Andric return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 37180b57cec5SDimitry Andric (__v4df)_mm256_sub_pd(__A, __B), 37190b57cec5SDimitry Andric (__v4df)_mm256_setzero_pd()); 37200b57cec5SDimitry Andric } 37210b57cec5SDimitry Andric 37220b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 37230b57cec5SDimitry Andric _mm_mask_sub_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 37240b57cec5SDimitry Andric return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 37250b57cec5SDimitry Andric (__v4sf)_mm_sub_ps(__A, __B), 37260b57cec5SDimitry Andric (__v4sf)__W); 37270b57cec5SDimitry Andric } 37280b57cec5SDimitry Andric 37290b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 37300b57cec5SDimitry Andric _mm_maskz_sub_ps(__mmask8 __U, __m128 __A, __m128 __B) { 37310b57cec5SDimitry Andric return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 37320b57cec5SDimitry Andric (__v4sf)_mm_sub_ps(__A, __B), 37330b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps()); 37340b57cec5SDimitry Andric } 37350b57cec5SDimitry Andric 37360b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 37370b57cec5SDimitry Andric _mm256_mask_sub_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 37380b57cec5SDimitry Andric return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 37390b57cec5SDimitry Andric (__v8sf)_mm256_sub_ps(__A, __B), 37400b57cec5SDimitry Andric (__v8sf)__W); 37410b57cec5SDimitry Andric } 37420b57cec5SDimitry Andric 37430b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 37440b57cec5SDimitry Andric _mm256_maskz_sub_ps(__mmask8 __U, __m256 __A, __m256 __B) { 37450b57cec5SDimitry Andric return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 37460b57cec5SDimitry Andric (__v8sf)_mm256_sub_ps(__A, __B), 37470b57cec5SDimitry Andric (__v8sf)_mm256_setzero_ps()); 37480b57cec5SDimitry Andric } 37490b57cec5SDimitry Andric 37500b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 37510b57cec5SDimitry Andric _mm_permutex2var_epi32(__m128i __A, __m128i __I, __m128i __B) { 37520b57cec5SDimitry Andric return (__m128i)__builtin_ia32_vpermi2vard128((__v4si) __A, (__v4si)__I, 37530b57cec5SDimitry Andric (__v4si)__B); 37540b57cec5SDimitry Andric } 37550b57cec5SDimitry Andric 37560b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 37570b57cec5SDimitry Andric _mm_mask_permutex2var_epi32(__m128i __A, __mmask8 __U, __m128i __I, 37580b57cec5SDimitry Andric __m128i __B) { 37590b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128(__U, 37600b57cec5SDimitry Andric (__v4si)_mm_permutex2var_epi32(__A, __I, __B), 37610b57cec5SDimitry Andric (__v4si)__A); 37620b57cec5SDimitry Andric } 37630b57cec5SDimitry Andric 37640b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 37650b57cec5SDimitry Andric _mm_mask2_permutex2var_epi32(__m128i __A, __m128i __I, __mmask8 __U, 37660b57cec5SDimitry Andric __m128i __B) { 37670b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128(__U, 37680b57cec5SDimitry Andric (__v4si)_mm_permutex2var_epi32(__A, __I, __B), 37690b57cec5SDimitry Andric (__v4si)__I); 37700b57cec5SDimitry Andric } 37710b57cec5SDimitry Andric 37720b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 37730b57cec5SDimitry Andric _mm_maskz_permutex2var_epi32(__mmask8 __U, __m128i __A, __m128i __I, 37740b57cec5SDimitry Andric __m128i __B) { 37750b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128(__U, 37760b57cec5SDimitry Andric (__v4si)_mm_permutex2var_epi32(__A, __I, __B), 37770b57cec5SDimitry Andric (__v4si)_mm_setzero_si128()); 37780b57cec5SDimitry Andric } 37790b57cec5SDimitry Andric 37800b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 37810b57cec5SDimitry Andric _mm256_permutex2var_epi32(__m256i __A, __m256i __I, __m256i __B) { 37820b57cec5SDimitry Andric return (__m256i)__builtin_ia32_vpermi2vard256((__v8si)__A, (__v8si) __I, 37830b57cec5SDimitry Andric (__v8si) __B); 37840b57cec5SDimitry Andric } 37850b57cec5SDimitry Andric 37860b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 37870b57cec5SDimitry Andric _mm256_mask_permutex2var_epi32(__m256i __A, __mmask8 __U, __m256i __I, 37880b57cec5SDimitry Andric __m256i __B) { 37890b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256(__U, 37900b57cec5SDimitry Andric (__v8si)_mm256_permutex2var_epi32(__A, __I, __B), 37910b57cec5SDimitry Andric (__v8si)__A); 37920b57cec5SDimitry Andric } 37930b57cec5SDimitry Andric 37940b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 37950b57cec5SDimitry Andric _mm256_mask2_permutex2var_epi32(__m256i __A, __m256i __I, __mmask8 __U, 37960b57cec5SDimitry Andric __m256i __B) { 37970b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256(__U, 37980b57cec5SDimitry Andric (__v8si)_mm256_permutex2var_epi32(__A, __I, __B), 37990b57cec5SDimitry Andric (__v8si)__I); 38000b57cec5SDimitry Andric } 38010b57cec5SDimitry Andric 38020b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 38030b57cec5SDimitry Andric _mm256_maskz_permutex2var_epi32(__mmask8 __U, __m256i __A, __m256i __I, 38040b57cec5SDimitry Andric __m256i __B) { 38050b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256(__U, 38060b57cec5SDimitry Andric (__v8si)_mm256_permutex2var_epi32(__A, __I, __B), 38070b57cec5SDimitry Andric (__v8si)_mm256_setzero_si256()); 38080b57cec5SDimitry Andric } 38090b57cec5SDimitry Andric 38100b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 38110b57cec5SDimitry Andric _mm_permutex2var_pd(__m128d __A, __m128i __I, __m128d __B) { 38120b57cec5SDimitry Andric return (__m128d)__builtin_ia32_vpermi2varpd128((__v2df)__A, (__v2di)__I, 38130b57cec5SDimitry Andric (__v2df)__B); 38140b57cec5SDimitry Andric } 38150b57cec5SDimitry Andric 38160b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 38170b57cec5SDimitry Andric _mm_mask_permutex2var_pd(__m128d __A, __mmask8 __U, __m128i __I, __m128d __B) { 38180b57cec5SDimitry Andric return (__m128d)__builtin_ia32_selectpd_128(__U, 38190b57cec5SDimitry Andric (__v2df)_mm_permutex2var_pd(__A, __I, __B), 38200b57cec5SDimitry Andric (__v2df)__A); 38210b57cec5SDimitry Andric } 38220b57cec5SDimitry Andric 38230b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 38240b57cec5SDimitry Andric _mm_mask2_permutex2var_pd(__m128d __A, __m128i __I, __mmask8 __U, __m128d __B) { 38250b57cec5SDimitry Andric return (__m128d)__builtin_ia32_selectpd_128(__U, 38260b57cec5SDimitry Andric (__v2df)_mm_permutex2var_pd(__A, __I, __B), 38270b57cec5SDimitry Andric (__v2df)(__m128d)__I); 38280b57cec5SDimitry Andric } 38290b57cec5SDimitry Andric 38300b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 38310b57cec5SDimitry Andric _mm_maskz_permutex2var_pd(__mmask8 __U, __m128d __A, __m128i __I, __m128d __B) { 38320b57cec5SDimitry Andric return (__m128d)__builtin_ia32_selectpd_128(__U, 38330b57cec5SDimitry Andric (__v2df)_mm_permutex2var_pd(__A, __I, __B), 38340b57cec5SDimitry Andric (__v2df)_mm_setzero_pd()); 38350b57cec5SDimitry Andric } 38360b57cec5SDimitry Andric 38370b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 38380b57cec5SDimitry Andric _mm256_permutex2var_pd(__m256d __A, __m256i __I, __m256d __B) { 38390b57cec5SDimitry Andric return (__m256d)__builtin_ia32_vpermi2varpd256((__v4df)__A, (__v4di)__I, 38400b57cec5SDimitry Andric (__v4df)__B); 38410b57cec5SDimitry Andric } 38420b57cec5SDimitry Andric 38430b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 38440b57cec5SDimitry Andric _mm256_mask_permutex2var_pd(__m256d __A, __mmask8 __U, __m256i __I, 38450b57cec5SDimitry Andric __m256d __B) { 38460b57cec5SDimitry Andric return (__m256d)__builtin_ia32_selectpd_256(__U, 38470b57cec5SDimitry Andric (__v4df)_mm256_permutex2var_pd(__A, __I, __B), 38480b57cec5SDimitry Andric (__v4df)__A); 38490b57cec5SDimitry Andric } 38500b57cec5SDimitry Andric 38510b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 38520b57cec5SDimitry Andric _mm256_mask2_permutex2var_pd(__m256d __A, __m256i __I, __mmask8 __U, 38530b57cec5SDimitry Andric __m256d __B) { 38540b57cec5SDimitry Andric return (__m256d)__builtin_ia32_selectpd_256(__U, 38550b57cec5SDimitry Andric (__v4df)_mm256_permutex2var_pd(__A, __I, __B), 38560b57cec5SDimitry Andric (__v4df)(__m256d)__I); 38570b57cec5SDimitry Andric } 38580b57cec5SDimitry Andric 38590b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 38600b57cec5SDimitry Andric _mm256_maskz_permutex2var_pd(__mmask8 __U, __m256d __A, __m256i __I, 38610b57cec5SDimitry Andric __m256d __B) { 38620b57cec5SDimitry Andric return (__m256d)__builtin_ia32_selectpd_256(__U, 38630b57cec5SDimitry Andric (__v4df)_mm256_permutex2var_pd(__A, __I, __B), 38640b57cec5SDimitry Andric (__v4df)_mm256_setzero_pd()); 38650b57cec5SDimitry Andric } 38660b57cec5SDimitry Andric 38670b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 38680b57cec5SDimitry Andric _mm_permutex2var_ps(__m128 __A, __m128i __I, __m128 __B) { 38690b57cec5SDimitry Andric return (__m128)__builtin_ia32_vpermi2varps128((__v4sf)__A, (__v4si)__I, 38700b57cec5SDimitry Andric (__v4sf)__B); 38710b57cec5SDimitry Andric } 38720b57cec5SDimitry Andric 38730b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 38740b57cec5SDimitry Andric _mm_mask_permutex2var_ps(__m128 __A, __mmask8 __U, __m128i __I, __m128 __B) { 38750b57cec5SDimitry Andric return (__m128)__builtin_ia32_selectps_128(__U, 38760b57cec5SDimitry Andric (__v4sf)_mm_permutex2var_ps(__A, __I, __B), 38770b57cec5SDimitry Andric (__v4sf)__A); 38780b57cec5SDimitry Andric } 38790b57cec5SDimitry Andric 38800b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 38810b57cec5SDimitry Andric _mm_mask2_permutex2var_ps(__m128 __A, __m128i __I, __mmask8 __U, __m128 __B) { 38820b57cec5SDimitry Andric return (__m128)__builtin_ia32_selectps_128(__U, 38830b57cec5SDimitry Andric (__v4sf)_mm_permutex2var_ps(__A, __I, __B), 38840b57cec5SDimitry Andric (__v4sf)(__m128)__I); 38850b57cec5SDimitry Andric } 38860b57cec5SDimitry Andric 38870b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 38880b57cec5SDimitry Andric _mm_maskz_permutex2var_ps(__mmask8 __U, __m128 __A, __m128i __I, __m128 __B) { 38890b57cec5SDimitry Andric return (__m128)__builtin_ia32_selectps_128(__U, 38900b57cec5SDimitry Andric (__v4sf)_mm_permutex2var_ps(__A, __I, __B), 38910b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps()); 38920b57cec5SDimitry Andric } 38930b57cec5SDimitry Andric 38940b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 38950b57cec5SDimitry Andric _mm256_permutex2var_ps(__m256 __A, __m256i __I, __m256 __B) { 38960b57cec5SDimitry Andric return (__m256)__builtin_ia32_vpermi2varps256((__v8sf)__A, (__v8si)__I, 38970b57cec5SDimitry Andric (__v8sf) __B); 38980b57cec5SDimitry Andric } 38990b57cec5SDimitry Andric 39000b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 39010b57cec5SDimitry Andric _mm256_mask_permutex2var_ps(__m256 __A, __mmask8 __U, __m256i __I, __m256 __B) { 39020b57cec5SDimitry Andric return (__m256)__builtin_ia32_selectps_256(__U, 39030b57cec5SDimitry Andric (__v8sf)_mm256_permutex2var_ps(__A, __I, __B), 39040b57cec5SDimitry Andric (__v8sf)__A); 39050b57cec5SDimitry Andric } 39060b57cec5SDimitry Andric 39070b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 39080b57cec5SDimitry Andric _mm256_mask2_permutex2var_ps(__m256 __A, __m256i __I, __mmask8 __U, 39090b57cec5SDimitry Andric __m256 __B) { 39100b57cec5SDimitry Andric return (__m256)__builtin_ia32_selectps_256(__U, 39110b57cec5SDimitry Andric (__v8sf)_mm256_permutex2var_ps(__A, __I, __B), 39120b57cec5SDimitry Andric (__v8sf)(__m256)__I); 39130b57cec5SDimitry Andric } 39140b57cec5SDimitry Andric 39150b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 39160b57cec5SDimitry Andric _mm256_maskz_permutex2var_ps(__mmask8 __U, __m256 __A, __m256i __I, 39170b57cec5SDimitry Andric __m256 __B) { 39180b57cec5SDimitry Andric return (__m256)__builtin_ia32_selectps_256(__U, 39190b57cec5SDimitry Andric (__v8sf)_mm256_permutex2var_ps(__A, __I, __B), 39200b57cec5SDimitry Andric (__v8sf)_mm256_setzero_ps()); 39210b57cec5SDimitry Andric } 39220b57cec5SDimitry Andric 39230b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 39240b57cec5SDimitry Andric _mm_permutex2var_epi64(__m128i __A, __m128i __I, __m128i __B) { 39250b57cec5SDimitry Andric return (__m128i)__builtin_ia32_vpermi2varq128((__v2di)__A, (__v2di)__I, 39260b57cec5SDimitry Andric (__v2di)__B); 39270b57cec5SDimitry Andric } 39280b57cec5SDimitry Andric 39290b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 39300b57cec5SDimitry Andric _mm_mask_permutex2var_epi64(__m128i __A, __mmask8 __U, __m128i __I, 39310b57cec5SDimitry Andric __m128i __B) { 39320b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128(__U, 39330b57cec5SDimitry Andric (__v2di)_mm_permutex2var_epi64(__A, __I, __B), 39340b57cec5SDimitry Andric (__v2di)__A); 39350b57cec5SDimitry Andric } 39360b57cec5SDimitry Andric 39370b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 39380b57cec5SDimitry Andric _mm_mask2_permutex2var_epi64(__m128i __A, __m128i __I, __mmask8 __U, 39390b57cec5SDimitry Andric __m128i __B) { 39400b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128(__U, 39410b57cec5SDimitry Andric (__v2di)_mm_permutex2var_epi64(__A, __I, __B), 39420b57cec5SDimitry Andric (__v2di)__I); 39430b57cec5SDimitry Andric } 39440b57cec5SDimitry Andric 39450b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 39460b57cec5SDimitry Andric _mm_maskz_permutex2var_epi64(__mmask8 __U, __m128i __A, __m128i __I, 39470b57cec5SDimitry Andric __m128i __B) { 39480b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128(__U, 39490b57cec5SDimitry Andric (__v2di)_mm_permutex2var_epi64(__A, __I, __B), 39500b57cec5SDimitry Andric (__v2di)_mm_setzero_si128()); 39510b57cec5SDimitry Andric } 39520b57cec5SDimitry Andric 39530b57cec5SDimitry Andric 39540b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 39550b57cec5SDimitry Andric _mm256_permutex2var_epi64(__m256i __A, __m256i __I, __m256i __B) { 39560b57cec5SDimitry Andric return (__m256i)__builtin_ia32_vpermi2varq256((__v4di)__A, (__v4di) __I, 39570b57cec5SDimitry Andric (__v4di) __B); 39580b57cec5SDimitry Andric } 39590b57cec5SDimitry Andric 39600b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 39610b57cec5SDimitry Andric _mm256_mask_permutex2var_epi64(__m256i __A, __mmask8 __U, __m256i __I, 39620b57cec5SDimitry Andric __m256i __B) { 39630b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256(__U, 39640b57cec5SDimitry Andric (__v4di)_mm256_permutex2var_epi64(__A, __I, __B), 39650b57cec5SDimitry Andric (__v4di)__A); 39660b57cec5SDimitry Andric } 39670b57cec5SDimitry Andric 39680b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 39690b57cec5SDimitry Andric _mm256_mask2_permutex2var_epi64(__m256i __A, __m256i __I, __mmask8 __U, 39700b57cec5SDimitry Andric __m256i __B) { 39710b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256(__U, 39720b57cec5SDimitry Andric (__v4di)_mm256_permutex2var_epi64(__A, __I, __B), 39730b57cec5SDimitry Andric (__v4di)__I); 39740b57cec5SDimitry Andric } 39750b57cec5SDimitry Andric 39760b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 39770b57cec5SDimitry Andric _mm256_maskz_permutex2var_epi64(__mmask8 __U, __m256i __A, __m256i __I, 39780b57cec5SDimitry Andric __m256i __B) { 39790b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256(__U, 39800b57cec5SDimitry Andric (__v4di)_mm256_permutex2var_epi64(__A, __I, __B), 39810b57cec5SDimitry Andric (__v4di)_mm256_setzero_si256()); 39820b57cec5SDimitry Andric } 39830b57cec5SDimitry Andric 39840b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 39850b57cec5SDimitry Andric _mm_mask_cvtepi8_epi32(__m128i __W, __mmask8 __U, __m128i __A) 39860b57cec5SDimitry Andric { 39870b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 39880b57cec5SDimitry Andric (__v4si)_mm_cvtepi8_epi32(__A), 39890b57cec5SDimitry Andric (__v4si)__W); 39900b57cec5SDimitry Andric } 39910b57cec5SDimitry Andric 39920b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 39930b57cec5SDimitry Andric _mm_maskz_cvtepi8_epi32(__mmask8 __U, __m128i __A) 39940b57cec5SDimitry Andric { 39950b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 39960b57cec5SDimitry Andric (__v4si)_mm_cvtepi8_epi32(__A), 39970b57cec5SDimitry Andric (__v4si)_mm_setzero_si128()); 39980b57cec5SDimitry Andric } 39990b57cec5SDimitry Andric 40000b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 40010b57cec5SDimitry Andric _mm256_mask_cvtepi8_epi32 (__m256i __W, __mmask8 __U, __m128i __A) 40020b57cec5SDimitry Andric { 40030b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 40040b57cec5SDimitry Andric (__v8si)_mm256_cvtepi8_epi32(__A), 40050b57cec5SDimitry Andric (__v8si)__W); 40060b57cec5SDimitry Andric } 40070b57cec5SDimitry Andric 40080b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 40090b57cec5SDimitry Andric _mm256_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A) 40100b57cec5SDimitry Andric { 40110b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 40120b57cec5SDimitry Andric (__v8si)_mm256_cvtepi8_epi32(__A), 40130b57cec5SDimitry Andric (__v8si)_mm256_setzero_si256()); 40140b57cec5SDimitry Andric } 40150b57cec5SDimitry Andric 40160b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 40170b57cec5SDimitry Andric _mm_mask_cvtepi8_epi64(__m128i __W, __mmask8 __U, __m128i __A) 40180b57cec5SDimitry Andric { 40190b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 40200b57cec5SDimitry Andric (__v2di)_mm_cvtepi8_epi64(__A), 40210b57cec5SDimitry Andric (__v2di)__W); 40220b57cec5SDimitry Andric } 40230b57cec5SDimitry Andric 40240b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 40250b57cec5SDimitry Andric _mm_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A) 40260b57cec5SDimitry Andric { 40270b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 40280b57cec5SDimitry Andric (__v2di)_mm_cvtepi8_epi64(__A), 40290b57cec5SDimitry Andric (__v2di)_mm_setzero_si128()); 40300b57cec5SDimitry Andric } 40310b57cec5SDimitry Andric 40320b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 40330b57cec5SDimitry Andric _mm256_mask_cvtepi8_epi64(__m256i __W, __mmask8 __U, __m128i __A) 40340b57cec5SDimitry Andric { 40350b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 40360b57cec5SDimitry Andric (__v4di)_mm256_cvtepi8_epi64(__A), 40370b57cec5SDimitry Andric (__v4di)__W); 40380b57cec5SDimitry Andric } 40390b57cec5SDimitry Andric 40400b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 40410b57cec5SDimitry Andric _mm256_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A) 40420b57cec5SDimitry Andric { 40430b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 40440b57cec5SDimitry Andric (__v4di)_mm256_cvtepi8_epi64(__A), 40450b57cec5SDimitry Andric (__v4di)_mm256_setzero_si256()); 40460b57cec5SDimitry Andric } 40470b57cec5SDimitry Andric 40480b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 40490b57cec5SDimitry Andric _mm_mask_cvtepi32_epi64(__m128i __W, __mmask8 __U, __m128i __X) 40500b57cec5SDimitry Andric { 40510b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 40520b57cec5SDimitry Andric (__v2di)_mm_cvtepi32_epi64(__X), 40530b57cec5SDimitry Andric (__v2di)__W); 40540b57cec5SDimitry Andric } 40550b57cec5SDimitry Andric 40560b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 40570b57cec5SDimitry Andric _mm_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X) 40580b57cec5SDimitry Andric { 40590b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 40600b57cec5SDimitry Andric (__v2di)_mm_cvtepi32_epi64(__X), 40610b57cec5SDimitry Andric (__v2di)_mm_setzero_si128()); 40620b57cec5SDimitry Andric } 40630b57cec5SDimitry Andric 40640b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 40650b57cec5SDimitry Andric _mm256_mask_cvtepi32_epi64(__m256i __W, __mmask8 __U, __m128i __X) 40660b57cec5SDimitry Andric { 40670b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 40680b57cec5SDimitry Andric (__v4di)_mm256_cvtepi32_epi64(__X), 40690b57cec5SDimitry Andric (__v4di)__W); 40700b57cec5SDimitry Andric } 40710b57cec5SDimitry Andric 40720b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 40730b57cec5SDimitry Andric _mm256_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X) 40740b57cec5SDimitry Andric { 40750b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 40760b57cec5SDimitry Andric (__v4di)_mm256_cvtepi32_epi64(__X), 40770b57cec5SDimitry Andric (__v4di)_mm256_setzero_si256()); 40780b57cec5SDimitry Andric } 40790b57cec5SDimitry Andric 40800b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 40810b57cec5SDimitry Andric _mm_mask_cvtepi16_epi32(__m128i __W, __mmask8 __U, __m128i __A) 40820b57cec5SDimitry Andric { 40830b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 40840b57cec5SDimitry Andric (__v4si)_mm_cvtepi16_epi32(__A), 40850b57cec5SDimitry Andric (__v4si)__W); 40860b57cec5SDimitry Andric } 40870b57cec5SDimitry Andric 40880b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 40890b57cec5SDimitry Andric _mm_maskz_cvtepi16_epi32(__mmask8 __U, __m128i __A) 40900b57cec5SDimitry Andric { 40910b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 40920b57cec5SDimitry Andric (__v4si)_mm_cvtepi16_epi32(__A), 40930b57cec5SDimitry Andric (__v4si)_mm_setzero_si128()); 40940b57cec5SDimitry Andric } 40950b57cec5SDimitry Andric 40960b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 40970b57cec5SDimitry Andric _mm256_mask_cvtepi16_epi32(__m256i __W, __mmask8 __U, __m128i __A) 40980b57cec5SDimitry Andric { 40990b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 41000b57cec5SDimitry Andric (__v8si)_mm256_cvtepi16_epi32(__A), 41010b57cec5SDimitry Andric (__v8si)__W); 41020b57cec5SDimitry Andric } 41030b57cec5SDimitry Andric 41040b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 41050b57cec5SDimitry Andric _mm256_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A) 41060b57cec5SDimitry Andric { 41070b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 41080b57cec5SDimitry Andric (__v8si)_mm256_cvtepi16_epi32(__A), 41090b57cec5SDimitry Andric (__v8si)_mm256_setzero_si256()); 41100b57cec5SDimitry Andric } 41110b57cec5SDimitry Andric 41120b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 41130b57cec5SDimitry Andric _mm_mask_cvtepi16_epi64(__m128i __W, __mmask8 __U, __m128i __A) 41140b57cec5SDimitry Andric { 41150b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 41160b57cec5SDimitry Andric (__v2di)_mm_cvtepi16_epi64(__A), 41170b57cec5SDimitry Andric (__v2di)__W); 41180b57cec5SDimitry Andric } 41190b57cec5SDimitry Andric 41200b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 41210b57cec5SDimitry Andric _mm_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A) 41220b57cec5SDimitry Andric { 41230b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 41240b57cec5SDimitry Andric (__v2di)_mm_cvtepi16_epi64(__A), 41250b57cec5SDimitry Andric (__v2di)_mm_setzero_si128()); 41260b57cec5SDimitry Andric } 41270b57cec5SDimitry Andric 41280b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 41290b57cec5SDimitry Andric _mm256_mask_cvtepi16_epi64(__m256i __W, __mmask8 __U, __m128i __A) 41300b57cec5SDimitry Andric { 41310b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 41320b57cec5SDimitry Andric (__v4di)_mm256_cvtepi16_epi64(__A), 41330b57cec5SDimitry Andric (__v4di)__W); 41340b57cec5SDimitry Andric } 41350b57cec5SDimitry Andric 41360b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 41370b57cec5SDimitry Andric _mm256_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A) 41380b57cec5SDimitry Andric { 41390b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 41400b57cec5SDimitry Andric (__v4di)_mm256_cvtepi16_epi64(__A), 41410b57cec5SDimitry Andric (__v4di)_mm256_setzero_si256()); 41420b57cec5SDimitry Andric } 41430b57cec5SDimitry Andric 41440b57cec5SDimitry Andric 41450b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 41460b57cec5SDimitry Andric _mm_mask_cvtepu8_epi32(__m128i __W, __mmask8 __U, __m128i __A) 41470b57cec5SDimitry Andric { 41480b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 41490b57cec5SDimitry Andric (__v4si)_mm_cvtepu8_epi32(__A), 41500b57cec5SDimitry Andric (__v4si)__W); 41510b57cec5SDimitry Andric } 41520b57cec5SDimitry Andric 41530b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 41540b57cec5SDimitry Andric _mm_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A) 41550b57cec5SDimitry Andric { 41560b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 41570b57cec5SDimitry Andric (__v4si)_mm_cvtepu8_epi32(__A), 41580b57cec5SDimitry Andric (__v4si)_mm_setzero_si128()); 41590b57cec5SDimitry Andric } 41600b57cec5SDimitry Andric 41610b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 41620b57cec5SDimitry Andric _mm256_mask_cvtepu8_epi32(__m256i __W, __mmask8 __U, __m128i __A) 41630b57cec5SDimitry Andric { 41640b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 41650b57cec5SDimitry Andric (__v8si)_mm256_cvtepu8_epi32(__A), 41660b57cec5SDimitry Andric (__v8si)__W); 41670b57cec5SDimitry Andric } 41680b57cec5SDimitry Andric 41690b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 41700b57cec5SDimitry Andric _mm256_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A) 41710b57cec5SDimitry Andric { 41720b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 41730b57cec5SDimitry Andric (__v8si)_mm256_cvtepu8_epi32(__A), 41740b57cec5SDimitry Andric (__v8si)_mm256_setzero_si256()); 41750b57cec5SDimitry Andric } 41760b57cec5SDimitry Andric 41770b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 41780b57cec5SDimitry Andric _mm_mask_cvtepu8_epi64(__m128i __W, __mmask8 __U, __m128i __A) 41790b57cec5SDimitry Andric { 41800b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 41810b57cec5SDimitry Andric (__v2di)_mm_cvtepu8_epi64(__A), 41820b57cec5SDimitry Andric (__v2di)__W); 41830b57cec5SDimitry Andric } 41840b57cec5SDimitry Andric 41850b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 41860b57cec5SDimitry Andric _mm_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A) 41870b57cec5SDimitry Andric { 41880b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 41890b57cec5SDimitry Andric (__v2di)_mm_cvtepu8_epi64(__A), 41900b57cec5SDimitry Andric (__v2di)_mm_setzero_si128()); 41910b57cec5SDimitry Andric } 41920b57cec5SDimitry Andric 41930b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 41940b57cec5SDimitry Andric _mm256_mask_cvtepu8_epi64(__m256i __W, __mmask8 __U, __m128i __A) 41950b57cec5SDimitry Andric { 41960b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 41970b57cec5SDimitry Andric (__v4di)_mm256_cvtepu8_epi64(__A), 41980b57cec5SDimitry Andric (__v4di)__W); 41990b57cec5SDimitry Andric } 42000b57cec5SDimitry Andric 42010b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 42020b57cec5SDimitry Andric _mm256_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A) 42030b57cec5SDimitry Andric { 42040b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 42050b57cec5SDimitry Andric (__v4di)_mm256_cvtepu8_epi64(__A), 42060b57cec5SDimitry Andric (__v4di)_mm256_setzero_si256()); 42070b57cec5SDimitry Andric } 42080b57cec5SDimitry Andric 42090b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 42100b57cec5SDimitry Andric _mm_mask_cvtepu32_epi64(__m128i __W, __mmask8 __U, __m128i __X) 42110b57cec5SDimitry Andric { 42120b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 42130b57cec5SDimitry Andric (__v2di)_mm_cvtepu32_epi64(__X), 42140b57cec5SDimitry Andric (__v2di)__W); 42150b57cec5SDimitry Andric } 42160b57cec5SDimitry Andric 42170b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 42180b57cec5SDimitry Andric _mm_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X) 42190b57cec5SDimitry Andric { 42200b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 42210b57cec5SDimitry Andric (__v2di)_mm_cvtepu32_epi64(__X), 42220b57cec5SDimitry Andric (__v2di)_mm_setzero_si128()); 42230b57cec5SDimitry Andric } 42240b57cec5SDimitry Andric 42250b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 42260b57cec5SDimitry Andric _mm256_mask_cvtepu32_epi64(__m256i __W, __mmask8 __U, __m128i __X) 42270b57cec5SDimitry Andric { 42280b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 42290b57cec5SDimitry Andric (__v4di)_mm256_cvtepu32_epi64(__X), 42300b57cec5SDimitry Andric (__v4di)__W); 42310b57cec5SDimitry Andric } 42320b57cec5SDimitry Andric 42330b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 42340b57cec5SDimitry Andric _mm256_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X) 42350b57cec5SDimitry Andric { 42360b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 42370b57cec5SDimitry Andric (__v4di)_mm256_cvtepu32_epi64(__X), 42380b57cec5SDimitry Andric (__v4di)_mm256_setzero_si256()); 42390b57cec5SDimitry Andric } 42400b57cec5SDimitry Andric 42410b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 42420b57cec5SDimitry Andric _mm_mask_cvtepu16_epi32(__m128i __W, __mmask8 __U, __m128i __A) 42430b57cec5SDimitry Andric { 42440b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 42450b57cec5SDimitry Andric (__v4si)_mm_cvtepu16_epi32(__A), 42460b57cec5SDimitry Andric (__v4si)__W); 42470b57cec5SDimitry Andric } 42480b57cec5SDimitry Andric 42490b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 42500b57cec5SDimitry Andric _mm_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A) 42510b57cec5SDimitry Andric { 42520b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 42530b57cec5SDimitry Andric (__v4si)_mm_cvtepu16_epi32(__A), 42540b57cec5SDimitry Andric (__v4si)_mm_setzero_si128()); 42550b57cec5SDimitry Andric } 42560b57cec5SDimitry Andric 42570b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 42580b57cec5SDimitry Andric _mm256_mask_cvtepu16_epi32(__m256i __W, __mmask8 __U, __m128i __A) 42590b57cec5SDimitry Andric { 42600b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 42610b57cec5SDimitry Andric (__v8si)_mm256_cvtepu16_epi32(__A), 42620b57cec5SDimitry Andric (__v8si)__W); 42630b57cec5SDimitry Andric } 42640b57cec5SDimitry Andric 42650b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 42660b57cec5SDimitry Andric _mm256_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A) 42670b57cec5SDimitry Andric { 42680b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 42690b57cec5SDimitry Andric (__v8si)_mm256_cvtepu16_epi32(__A), 42700b57cec5SDimitry Andric (__v8si)_mm256_setzero_si256()); 42710b57cec5SDimitry Andric } 42720b57cec5SDimitry Andric 42730b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 42740b57cec5SDimitry Andric _mm_mask_cvtepu16_epi64(__m128i __W, __mmask8 __U, __m128i __A) 42750b57cec5SDimitry Andric { 42760b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 42770b57cec5SDimitry Andric (__v2di)_mm_cvtepu16_epi64(__A), 42780b57cec5SDimitry Andric (__v2di)__W); 42790b57cec5SDimitry Andric } 42800b57cec5SDimitry Andric 42810b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 42820b57cec5SDimitry Andric _mm_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A) 42830b57cec5SDimitry Andric { 42840b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 42850b57cec5SDimitry Andric (__v2di)_mm_cvtepu16_epi64(__A), 42860b57cec5SDimitry Andric (__v2di)_mm_setzero_si128()); 42870b57cec5SDimitry Andric } 42880b57cec5SDimitry Andric 42890b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 42900b57cec5SDimitry Andric _mm256_mask_cvtepu16_epi64(__m256i __W, __mmask8 __U, __m128i __A) 42910b57cec5SDimitry Andric { 42920b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 42930b57cec5SDimitry Andric (__v4di)_mm256_cvtepu16_epi64(__A), 42940b57cec5SDimitry Andric (__v4di)__W); 42950b57cec5SDimitry Andric } 42960b57cec5SDimitry Andric 42970b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 42980b57cec5SDimitry Andric _mm256_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A) 42990b57cec5SDimitry Andric { 43000b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 43010b57cec5SDimitry Andric (__v4di)_mm256_cvtepu16_epi64(__A), 43020b57cec5SDimitry Andric (__v4di)_mm256_setzero_si256()); 43030b57cec5SDimitry Andric } 43040b57cec5SDimitry Andric 43050b57cec5SDimitry Andric 43060b57cec5SDimitry Andric #define _mm_rol_epi32(a, b) \ 4307349cc55cSDimitry Andric ((__m128i)__builtin_ia32_prold128((__v4si)(__m128i)(a), (int)(b))) 43080b57cec5SDimitry Andric 43090b57cec5SDimitry Andric #define _mm_mask_rol_epi32(w, u, a, b) \ 4310349cc55cSDimitry Andric ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \ 43110b57cec5SDimitry Andric (__v4si)_mm_rol_epi32((a), (b)), \ 4312349cc55cSDimitry Andric (__v4si)(__m128i)(w))) 43130b57cec5SDimitry Andric 43140b57cec5SDimitry Andric #define _mm_maskz_rol_epi32(u, a, b) \ 4315349cc55cSDimitry Andric ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \ 43160b57cec5SDimitry Andric (__v4si)_mm_rol_epi32((a), (b)), \ 4317349cc55cSDimitry Andric (__v4si)_mm_setzero_si128())) 43180b57cec5SDimitry Andric 43190b57cec5SDimitry Andric #define _mm256_rol_epi32(a, b) \ 4320349cc55cSDimitry Andric ((__m256i)__builtin_ia32_prold256((__v8si)(__m256i)(a), (int)(b))) 43210b57cec5SDimitry Andric 43220b57cec5SDimitry Andric #define _mm256_mask_rol_epi32(w, u, a, b) \ 4323349cc55cSDimitry Andric ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \ 43240b57cec5SDimitry Andric (__v8si)_mm256_rol_epi32((a), (b)), \ 4325349cc55cSDimitry Andric (__v8si)(__m256i)(w))) 43260b57cec5SDimitry Andric 43270b57cec5SDimitry Andric #define _mm256_maskz_rol_epi32(u, a, b) \ 4328349cc55cSDimitry Andric ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \ 43290b57cec5SDimitry Andric (__v8si)_mm256_rol_epi32((a), (b)), \ 4330349cc55cSDimitry Andric (__v8si)_mm256_setzero_si256())) 43310b57cec5SDimitry Andric 43320b57cec5SDimitry Andric #define _mm_rol_epi64(a, b) \ 4333349cc55cSDimitry Andric ((__m128i)__builtin_ia32_prolq128((__v2di)(__m128i)(a), (int)(b))) 43340b57cec5SDimitry Andric 43350b57cec5SDimitry Andric #define _mm_mask_rol_epi64(w, u, a, b) \ 4336349cc55cSDimitry Andric ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \ 43370b57cec5SDimitry Andric (__v2di)_mm_rol_epi64((a), (b)), \ 4338349cc55cSDimitry Andric (__v2di)(__m128i)(w))) 43390b57cec5SDimitry Andric 43400b57cec5SDimitry Andric #define _mm_maskz_rol_epi64(u, a, b) \ 4341349cc55cSDimitry Andric ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \ 43420b57cec5SDimitry Andric (__v2di)_mm_rol_epi64((a), (b)), \ 4343349cc55cSDimitry Andric (__v2di)_mm_setzero_si128())) 43440b57cec5SDimitry Andric 43450b57cec5SDimitry Andric #define _mm256_rol_epi64(a, b) \ 4346349cc55cSDimitry Andric ((__m256i)__builtin_ia32_prolq256((__v4di)(__m256i)(a), (int)(b))) 43470b57cec5SDimitry Andric 43480b57cec5SDimitry Andric #define _mm256_mask_rol_epi64(w, u, a, b) \ 4349349cc55cSDimitry Andric ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \ 43500b57cec5SDimitry Andric (__v4di)_mm256_rol_epi64((a), (b)), \ 4351349cc55cSDimitry Andric (__v4di)(__m256i)(w))) 43520b57cec5SDimitry Andric 43530b57cec5SDimitry Andric #define _mm256_maskz_rol_epi64(u, a, b) \ 4354349cc55cSDimitry Andric ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \ 43550b57cec5SDimitry Andric (__v4di)_mm256_rol_epi64((a), (b)), \ 4356349cc55cSDimitry Andric (__v4di)_mm256_setzero_si256())) 43570b57cec5SDimitry Andric 43580b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 43590b57cec5SDimitry Andric _mm_rolv_epi32 (__m128i __A, __m128i __B) 43600b57cec5SDimitry Andric { 43610b57cec5SDimitry Andric return (__m128i)__builtin_ia32_prolvd128((__v4si)__A, (__v4si)__B); 43620b57cec5SDimitry Andric } 43630b57cec5SDimitry Andric 43640b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 43650b57cec5SDimitry Andric _mm_mask_rolv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 43660b57cec5SDimitry Andric { 43670b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128(__U, 43680b57cec5SDimitry Andric (__v4si)_mm_rolv_epi32(__A, __B), 43690b57cec5SDimitry Andric (__v4si)__W); 43700b57cec5SDimitry Andric } 43710b57cec5SDimitry Andric 43720b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 43730b57cec5SDimitry Andric _mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B) 43740b57cec5SDimitry Andric { 43750b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128(__U, 43760b57cec5SDimitry Andric (__v4si)_mm_rolv_epi32(__A, __B), 43770b57cec5SDimitry Andric (__v4si)_mm_setzero_si128()); 43780b57cec5SDimitry Andric } 43790b57cec5SDimitry Andric 43800b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 43810b57cec5SDimitry Andric _mm256_rolv_epi32 (__m256i __A, __m256i __B) 43820b57cec5SDimitry Andric { 43830b57cec5SDimitry Andric return (__m256i)__builtin_ia32_prolvd256((__v8si)__A, (__v8si)__B); 43840b57cec5SDimitry Andric } 43850b57cec5SDimitry Andric 43860b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 43870b57cec5SDimitry Andric _mm256_mask_rolv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 43880b57cec5SDimitry Andric { 43890b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256(__U, 43900b57cec5SDimitry Andric (__v8si)_mm256_rolv_epi32(__A, __B), 43910b57cec5SDimitry Andric (__v8si)__W); 43920b57cec5SDimitry Andric } 43930b57cec5SDimitry Andric 43940b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 43950b57cec5SDimitry Andric _mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B) 43960b57cec5SDimitry Andric { 43970b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256(__U, 43980b57cec5SDimitry Andric (__v8si)_mm256_rolv_epi32(__A, __B), 43990b57cec5SDimitry Andric (__v8si)_mm256_setzero_si256()); 44000b57cec5SDimitry Andric } 44010b57cec5SDimitry Andric 44020b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 44030b57cec5SDimitry Andric _mm_rolv_epi64 (__m128i __A, __m128i __B) 44040b57cec5SDimitry Andric { 44050b57cec5SDimitry Andric return (__m128i)__builtin_ia32_prolvq128((__v2di)__A, (__v2di)__B); 44060b57cec5SDimitry Andric } 44070b57cec5SDimitry Andric 44080b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 44090b57cec5SDimitry Andric _mm_mask_rolv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 44100b57cec5SDimitry Andric { 44110b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128(__U, 44120b57cec5SDimitry Andric (__v2di)_mm_rolv_epi64(__A, __B), 44130b57cec5SDimitry Andric (__v2di)__W); 44140b57cec5SDimitry Andric } 44150b57cec5SDimitry Andric 44160b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 44170b57cec5SDimitry Andric _mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B) 44180b57cec5SDimitry Andric { 44190b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128(__U, 44200b57cec5SDimitry Andric (__v2di)_mm_rolv_epi64(__A, __B), 44210b57cec5SDimitry Andric (__v2di)_mm_setzero_si128()); 44220b57cec5SDimitry Andric } 44230b57cec5SDimitry Andric 44240b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 44250b57cec5SDimitry Andric _mm256_rolv_epi64 (__m256i __A, __m256i __B) 44260b57cec5SDimitry Andric { 44270b57cec5SDimitry Andric return (__m256i)__builtin_ia32_prolvq256((__v4di)__A, (__v4di)__B); 44280b57cec5SDimitry Andric } 44290b57cec5SDimitry Andric 44300b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 44310b57cec5SDimitry Andric _mm256_mask_rolv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 44320b57cec5SDimitry Andric { 44330b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256(__U, 44340b57cec5SDimitry Andric (__v4di)_mm256_rolv_epi64(__A, __B), 44350b57cec5SDimitry Andric (__v4di)__W); 44360b57cec5SDimitry Andric } 44370b57cec5SDimitry Andric 44380b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 44390b57cec5SDimitry Andric _mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B) 44400b57cec5SDimitry Andric { 44410b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256(__U, 44420b57cec5SDimitry Andric (__v4di)_mm256_rolv_epi64(__A, __B), 44430b57cec5SDimitry Andric (__v4di)_mm256_setzero_si256()); 44440b57cec5SDimitry Andric } 44450b57cec5SDimitry Andric 44460b57cec5SDimitry Andric #define _mm_ror_epi32(a, b) \ 4447349cc55cSDimitry Andric ((__m128i)__builtin_ia32_prord128((__v4si)(__m128i)(a), (int)(b))) 44480b57cec5SDimitry Andric 44490b57cec5SDimitry Andric #define _mm_mask_ror_epi32(w, u, a, b) \ 4450349cc55cSDimitry Andric ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \ 44510b57cec5SDimitry Andric (__v4si)_mm_ror_epi32((a), (b)), \ 4452349cc55cSDimitry Andric (__v4si)(__m128i)(w))) 44530b57cec5SDimitry Andric 44540b57cec5SDimitry Andric #define _mm_maskz_ror_epi32(u, a, b) \ 4455349cc55cSDimitry Andric ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \ 44560b57cec5SDimitry Andric (__v4si)_mm_ror_epi32((a), (b)), \ 4457349cc55cSDimitry Andric (__v4si)_mm_setzero_si128())) 44580b57cec5SDimitry Andric 44590b57cec5SDimitry Andric #define _mm256_ror_epi32(a, b) \ 4460349cc55cSDimitry Andric ((__m256i)__builtin_ia32_prord256((__v8si)(__m256i)(a), (int)(b))) 44610b57cec5SDimitry Andric 44620b57cec5SDimitry Andric #define _mm256_mask_ror_epi32(w, u, a, b) \ 4463349cc55cSDimitry Andric ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \ 44640b57cec5SDimitry Andric (__v8si)_mm256_ror_epi32((a), (b)), \ 4465349cc55cSDimitry Andric (__v8si)(__m256i)(w))) 44660b57cec5SDimitry Andric 44670b57cec5SDimitry Andric #define _mm256_maskz_ror_epi32(u, a, b) \ 4468349cc55cSDimitry Andric ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \ 44690b57cec5SDimitry Andric (__v8si)_mm256_ror_epi32((a), (b)), \ 4470349cc55cSDimitry Andric (__v8si)_mm256_setzero_si256())) 44710b57cec5SDimitry Andric 44720b57cec5SDimitry Andric #define _mm_ror_epi64(a, b) \ 4473349cc55cSDimitry Andric ((__m128i)__builtin_ia32_prorq128((__v2di)(__m128i)(a), (int)(b))) 44740b57cec5SDimitry Andric 44750b57cec5SDimitry Andric #define _mm_mask_ror_epi64(w, u, a, b) \ 4476349cc55cSDimitry Andric ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \ 44770b57cec5SDimitry Andric (__v2di)_mm_ror_epi64((a), (b)), \ 4478349cc55cSDimitry Andric (__v2di)(__m128i)(w))) 44790b57cec5SDimitry Andric 44800b57cec5SDimitry Andric #define _mm_maskz_ror_epi64(u, a, b) \ 4481349cc55cSDimitry Andric ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \ 44820b57cec5SDimitry Andric (__v2di)_mm_ror_epi64((a), (b)), \ 4483349cc55cSDimitry Andric (__v2di)_mm_setzero_si128())) 44840b57cec5SDimitry Andric 44850b57cec5SDimitry Andric #define _mm256_ror_epi64(a, b) \ 4486349cc55cSDimitry Andric ((__m256i)__builtin_ia32_prorq256((__v4di)(__m256i)(a), (int)(b))) 44870b57cec5SDimitry Andric 44880b57cec5SDimitry Andric #define _mm256_mask_ror_epi64(w, u, a, b) \ 4489349cc55cSDimitry Andric ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \ 44900b57cec5SDimitry Andric (__v4di)_mm256_ror_epi64((a), (b)), \ 4491349cc55cSDimitry Andric (__v4di)(__m256i)(w))) 44920b57cec5SDimitry Andric 44930b57cec5SDimitry Andric #define _mm256_maskz_ror_epi64(u, a, b) \ 4494349cc55cSDimitry Andric ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \ 44950b57cec5SDimitry Andric (__v4di)_mm256_ror_epi64((a), (b)), \ 4496349cc55cSDimitry Andric (__v4di)_mm256_setzero_si256())) 44970b57cec5SDimitry Andric 44980b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 44990b57cec5SDimitry Andric _mm_mask_sll_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 45000b57cec5SDimitry Andric { 45010b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 45020b57cec5SDimitry Andric (__v4si)_mm_sll_epi32(__A, __B), 45030b57cec5SDimitry Andric (__v4si)__W); 45040b57cec5SDimitry Andric } 45050b57cec5SDimitry Andric 45060b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 45070b57cec5SDimitry Andric _mm_maskz_sll_epi32(__mmask8 __U, __m128i __A, __m128i __B) 45080b57cec5SDimitry Andric { 45090b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 45100b57cec5SDimitry Andric (__v4si)_mm_sll_epi32(__A, __B), 45110b57cec5SDimitry Andric (__v4si)_mm_setzero_si128()); 45120b57cec5SDimitry Andric } 45130b57cec5SDimitry Andric 45140b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 45150b57cec5SDimitry Andric _mm256_mask_sll_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) 45160b57cec5SDimitry Andric { 45170b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 45180b57cec5SDimitry Andric (__v8si)_mm256_sll_epi32(__A, __B), 45190b57cec5SDimitry Andric (__v8si)__W); 45200b57cec5SDimitry Andric } 45210b57cec5SDimitry Andric 45220b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 45230b57cec5SDimitry Andric _mm256_maskz_sll_epi32(__mmask8 __U, __m256i __A, __m128i __B) 45240b57cec5SDimitry Andric { 45250b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 45260b57cec5SDimitry Andric (__v8si)_mm256_sll_epi32(__A, __B), 45270b57cec5SDimitry Andric (__v8si)_mm256_setzero_si256()); 45280b57cec5SDimitry Andric } 45290b57cec5SDimitry Andric 45300b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 45315ffd83dbSDimitry Andric _mm_mask_slli_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B) 45320b57cec5SDimitry Andric { 45330b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 453481ad6265SDimitry Andric (__v4si)_mm_slli_epi32(__A, (int)__B), 45350b57cec5SDimitry Andric (__v4si)__W); 45360b57cec5SDimitry Andric } 45370b57cec5SDimitry Andric 45380b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 45395ffd83dbSDimitry Andric _mm_maskz_slli_epi32(__mmask8 __U, __m128i __A, unsigned int __B) 45400b57cec5SDimitry Andric { 45410b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 454281ad6265SDimitry Andric (__v4si)_mm_slli_epi32(__A, (int)__B), 45430b57cec5SDimitry Andric (__v4si)_mm_setzero_si128()); 45440b57cec5SDimitry Andric } 45450b57cec5SDimitry Andric 45460b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 45475ffd83dbSDimitry Andric _mm256_mask_slli_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B) 45480b57cec5SDimitry Andric { 45490b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 455081ad6265SDimitry Andric (__v8si)_mm256_slli_epi32(__A, (int)__B), 45510b57cec5SDimitry Andric (__v8si)__W); 45520b57cec5SDimitry Andric } 45530b57cec5SDimitry Andric 45540b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 45555ffd83dbSDimitry Andric _mm256_maskz_slli_epi32(__mmask8 __U, __m256i __A, unsigned int __B) 45560b57cec5SDimitry Andric { 45570b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 455881ad6265SDimitry Andric (__v8si)_mm256_slli_epi32(__A, (int)__B), 45590b57cec5SDimitry Andric (__v8si)_mm256_setzero_si256()); 45600b57cec5SDimitry Andric } 45610b57cec5SDimitry Andric 45620b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 45630b57cec5SDimitry Andric _mm_mask_sll_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 45640b57cec5SDimitry Andric { 45650b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 45660b57cec5SDimitry Andric (__v2di)_mm_sll_epi64(__A, __B), 45670b57cec5SDimitry Andric (__v2di)__W); 45680b57cec5SDimitry Andric } 45690b57cec5SDimitry Andric 45700b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 45710b57cec5SDimitry Andric _mm_maskz_sll_epi64(__mmask8 __U, __m128i __A, __m128i __B) 45720b57cec5SDimitry Andric { 45730b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 45740b57cec5SDimitry Andric (__v2di)_mm_sll_epi64(__A, __B), 45750b57cec5SDimitry Andric (__v2di)_mm_setzero_si128()); 45760b57cec5SDimitry Andric } 45770b57cec5SDimitry Andric 45780b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 45790b57cec5SDimitry Andric _mm256_mask_sll_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) 45800b57cec5SDimitry Andric { 45810b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 45820b57cec5SDimitry Andric (__v4di)_mm256_sll_epi64(__A, __B), 45830b57cec5SDimitry Andric (__v4di)__W); 45840b57cec5SDimitry Andric } 45850b57cec5SDimitry Andric 45860b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 45870b57cec5SDimitry Andric _mm256_maskz_sll_epi64(__mmask8 __U, __m256i __A, __m128i __B) 45880b57cec5SDimitry Andric { 45890b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 45900b57cec5SDimitry Andric (__v4di)_mm256_sll_epi64(__A, __B), 45910b57cec5SDimitry Andric (__v4di)_mm256_setzero_si256()); 45920b57cec5SDimitry Andric } 45930b57cec5SDimitry Andric 45940b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 45955ffd83dbSDimitry Andric _mm_mask_slli_epi64(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B) 45960b57cec5SDimitry Andric { 45970b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 459881ad6265SDimitry Andric (__v2di)_mm_slli_epi64(__A, (int)__B), 45990b57cec5SDimitry Andric (__v2di)__W); 46000b57cec5SDimitry Andric } 46010b57cec5SDimitry Andric 46020b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 46035ffd83dbSDimitry Andric _mm_maskz_slli_epi64(__mmask8 __U, __m128i __A, unsigned int __B) 46040b57cec5SDimitry Andric { 46050b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 460681ad6265SDimitry Andric (__v2di)_mm_slli_epi64(__A, (int)__B), 46070b57cec5SDimitry Andric (__v2di)_mm_setzero_si128()); 46080b57cec5SDimitry Andric } 46090b57cec5SDimitry Andric 46100b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 46115ffd83dbSDimitry Andric _mm256_mask_slli_epi64(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B) 46120b57cec5SDimitry Andric { 46130b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 461481ad6265SDimitry Andric (__v4di)_mm256_slli_epi64(__A, (int)__B), 46150b57cec5SDimitry Andric (__v4di)__W); 46160b57cec5SDimitry Andric } 46170b57cec5SDimitry Andric 46180b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 46195ffd83dbSDimitry Andric _mm256_maskz_slli_epi64(__mmask8 __U, __m256i __A, unsigned int __B) 46200b57cec5SDimitry Andric { 46210b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 462281ad6265SDimitry Andric (__v4di)_mm256_slli_epi64(__A, (int)__B), 46230b57cec5SDimitry Andric (__v4di)_mm256_setzero_si256()); 46240b57cec5SDimitry Andric } 46250b57cec5SDimitry Andric 46260b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 46270b57cec5SDimitry Andric _mm_rorv_epi32 (__m128i __A, __m128i __B) 46280b57cec5SDimitry Andric { 46290b57cec5SDimitry Andric return (__m128i)__builtin_ia32_prorvd128((__v4si)__A, (__v4si)__B); 46300b57cec5SDimitry Andric } 46310b57cec5SDimitry Andric 46320b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 46330b57cec5SDimitry Andric _mm_mask_rorv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 46340b57cec5SDimitry Andric { 46350b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128(__U, 46360b57cec5SDimitry Andric (__v4si)_mm_rorv_epi32(__A, __B), 46370b57cec5SDimitry Andric (__v4si)__W); 46380b57cec5SDimitry Andric } 46390b57cec5SDimitry Andric 46400b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 46410b57cec5SDimitry Andric _mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B) 46420b57cec5SDimitry Andric { 46430b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128(__U, 46440b57cec5SDimitry Andric (__v4si)_mm_rorv_epi32(__A, __B), 46450b57cec5SDimitry Andric (__v4si)_mm_setzero_si128()); 46460b57cec5SDimitry Andric } 46470b57cec5SDimitry Andric 46480b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 46490b57cec5SDimitry Andric _mm256_rorv_epi32 (__m256i __A, __m256i __B) 46500b57cec5SDimitry Andric { 46510b57cec5SDimitry Andric return (__m256i)__builtin_ia32_prorvd256((__v8si)__A, (__v8si)__B); 46520b57cec5SDimitry Andric } 46530b57cec5SDimitry Andric 46540b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 46550b57cec5SDimitry Andric _mm256_mask_rorv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 46560b57cec5SDimitry Andric { 46570b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256(__U, 46580b57cec5SDimitry Andric (__v8si)_mm256_rorv_epi32(__A, __B), 46590b57cec5SDimitry Andric (__v8si)__W); 46600b57cec5SDimitry Andric } 46610b57cec5SDimitry Andric 46620b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 46630b57cec5SDimitry Andric _mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B) 46640b57cec5SDimitry Andric { 46650b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256(__U, 46660b57cec5SDimitry Andric (__v8si)_mm256_rorv_epi32(__A, __B), 46670b57cec5SDimitry Andric (__v8si)_mm256_setzero_si256()); 46680b57cec5SDimitry Andric } 46690b57cec5SDimitry Andric 46700b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 46710b57cec5SDimitry Andric _mm_rorv_epi64 (__m128i __A, __m128i __B) 46720b57cec5SDimitry Andric { 46730b57cec5SDimitry Andric return (__m128i)__builtin_ia32_prorvq128((__v2di)__A, (__v2di)__B); 46740b57cec5SDimitry Andric } 46750b57cec5SDimitry Andric 46760b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 46770b57cec5SDimitry Andric _mm_mask_rorv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 46780b57cec5SDimitry Andric { 46790b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128(__U, 46800b57cec5SDimitry Andric (__v2di)_mm_rorv_epi64(__A, __B), 46810b57cec5SDimitry Andric (__v2di)__W); 46820b57cec5SDimitry Andric } 46830b57cec5SDimitry Andric 46840b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 46850b57cec5SDimitry Andric _mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B) 46860b57cec5SDimitry Andric { 46870b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128(__U, 46880b57cec5SDimitry Andric (__v2di)_mm_rorv_epi64(__A, __B), 46890b57cec5SDimitry Andric (__v2di)_mm_setzero_si128()); 46900b57cec5SDimitry Andric } 46910b57cec5SDimitry Andric 46920b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 46930b57cec5SDimitry Andric _mm256_rorv_epi64 (__m256i __A, __m256i __B) 46940b57cec5SDimitry Andric { 46950b57cec5SDimitry Andric return (__m256i)__builtin_ia32_prorvq256((__v4di)__A, (__v4di)__B); 46960b57cec5SDimitry Andric } 46970b57cec5SDimitry Andric 46980b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 46990b57cec5SDimitry Andric _mm256_mask_rorv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 47000b57cec5SDimitry Andric { 47010b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256(__U, 47020b57cec5SDimitry Andric (__v4di)_mm256_rorv_epi64(__A, __B), 47030b57cec5SDimitry Andric (__v4di)__W); 47040b57cec5SDimitry Andric } 47050b57cec5SDimitry Andric 47060b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 47070b57cec5SDimitry Andric _mm256_maskz_rorv_epi64 (__mmask8 __U, __m256i __A, __m256i __B) 47080b57cec5SDimitry Andric { 47090b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256(__U, 47100b57cec5SDimitry Andric (__v4di)_mm256_rorv_epi64(__A, __B), 47110b57cec5SDimitry Andric (__v4di)_mm256_setzero_si256()); 47120b57cec5SDimitry Andric } 47130b57cec5SDimitry Andric 47140b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 47150b57cec5SDimitry Andric _mm_mask_sllv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) 47160b57cec5SDimitry Andric { 47170b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 47180b57cec5SDimitry Andric (__v2di)_mm_sllv_epi64(__X, __Y), 47190b57cec5SDimitry Andric (__v2di)__W); 47200b57cec5SDimitry Andric } 47210b57cec5SDimitry Andric 47220b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 47230b57cec5SDimitry Andric _mm_maskz_sllv_epi64(__mmask8 __U, __m128i __X, __m128i __Y) 47240b57cec5SDimitry Andric { 47250b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 47260b57cec5SDimitry Andric (__v2di)_mm_sllv_epi64(__X, __Y), 47270b57cec5SDimitry Andric (__v2di)_mm_setzero_si128()); 47280b57cec5SDimitry Andric } 47290b57cec5SDimitry Andric 47300b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 47310b57cec5SDimitry Andric _mm256_mask_sllv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) 47320b57cec5SDimitry Andric { 47330b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 47340b57cec5SDimitry Andric (__v4di)_mm256_sllv_epi64(__X, __Y), 47350b57cec5SDimitry Andric (__v4di)__W); 47360b57cec5SDimitry Andric } 47370b57cec5SDimitry Andric 47380b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 47390b57cec5SDimitry Andric _mm256_maskz_sllv_epi64(__mmask8 __U, __m256i __X, __m256i __Y) 47400b57cec5SDimitry Andric { 47410b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 47420b57cec5SDimitry Andric (__v4di)_mm256_sllv_epi64(__X, __Y), 47430b57cec5SDimitry Andric (__v4di)_mm256_setzero_si256()); 47440b57cec5SDimitry Andric } 47450b57cec5SDimitry Andric 47460b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 47470b57cec5SDimitry Andric _mm_mask_sllv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) 47480b57cec5SDimitry Andric { 47490b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 47500b57cec5SDimitry Andric (__v4si)_mm_sllv_epi32(__X, __Y), 47510b57cec5SDimitry Andric (__v4si)__W); 47520b57cec5SDimitry Andric } 47530b57cec5SDimitry Andric 47540b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 47550b57cec5SDimitry Andric _mm_maskz_sllv_epi32(__mmask8 __U, __m128i __X, __m128i __Y) 47560b57cec5SDimitry Andric { 47570b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 47580b57cec5SDimitry Andric (__v4si)_mm_sllv_epi32(__X, __Y), 47590b57cec5SDimitry Andric (__v4si)_mm_setzero_si128()); 47600b57cec5SDimitry Andric } 47610b57cec5SDimitry Andric 47620b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 47630b57cec5SDimitry Andric _mm256_mask_sllv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) 47640b57cec5SDimitry Andric { 47650b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 47660b57cec5SDimitry Andric (__v8si)_mm256_sllv_epi32(__X, __Y), 47670b57cec5SDimitry Andric (__v8si)__W); 47680b57cec5SDimitry Andric } 47690b57cec5SDimitry Andric 47700b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 47710b57cec5SDimitry Andric _mm256_maskz_sllv_epi32(__mmask8 __U, __m256i __X, __m256i __Y) 47720b57cec5SDimitry Andric { 47730b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 47740b57cec5SDimitry Andric (__v8si)_mm256_sllv_epi32(__X, __Y), 47750b57cec5SDimitry Andric (__v8si)_mm256_setzero_si256()); 47760b57cec5SDimitry Andric } 47770b57cec5SDimitry Andric 47780b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 47790b57cec5SDimitry Andric _mm_mask_srlv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) 47800b57cec5SDimitry Andric { 47810b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 47820b57cec5SDimitry Andric (__v2di)_mm_srlv_epi64(__X, __Y), 47830b57cec5SDimitry Andric (__v2di)__W); 47840b57cec5SDimitry Andric } 47850b57cec5SDimitry Andric 47860b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 47870b57cec5SDimitry Andric _mm_maskz_srlv_epi64(__mmask8 __U, __m128i __X, __m128i __Y) 47880b57cec5SDimitry Andric { 47890b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 47900b57cec5SDimitry Andric (__v2di)_mm_srlv_epi64(__X, __Y), 47910b57cec5SDimitry Andric (__v2di)_mm_setzero_si128()); 47920b57cec5SDimitry Andric } 47930b57cec5SDimitry Andric 47940b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 47950b57cec5SDimitry Andric _mm256_mask_srlv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) 47960b57cec5SDimitry Andric { 47970b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 47980b57cec5SDimitry Andric (__v4di)_mm256_srlv_epi64(__X, __Y), 47990b57cec5SDimitry Andric (__v4di)__W); 48000b57cec5SDimitry Andric } 48010b57cec5SDimitry Andric 48020b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 48030b57cec5SDimitry Andric _mm256_maskz_srlv_epi64(__mmask8 __U, __m256i __X, __m256i __Y) 48040b57cec5SDimitry Andric { 48050b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 48060b57cec5SDimitry Andric (__v4di)_mm256_srlv_epi64(__X, __Y), 48070b57cec5SDimitry Andric (__v4di)_mm256_setzero_si256()); 48080b57cec5SDimitry Andric } 48090b57cec5SDimitry Andric 48100b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 48110b57cec5SDimitry Andric _mm_mask_srlv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) 48120b57cec5SDimitry Andric { 48130b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 48140b57cec5SDimitry Andric (__v4si)_mm_srlv_epi32(__X, __Y), 48150b57cec5SDimitry Andric (__v4si)__W); 48160b57cec5SDimitry Andric } 48170b57cec5SDimitry Andric 48180b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 48190b57cec5SDimitry Andric _mm_maskz_srlv_epi32(__mmask8 __U, __m128i __X, __m128i __Y) 48200b57cec5SDimitry Andric { 48210b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 48220b57cec5SDimitry Andric (__v4si)_mm_srlv_epi32(__X, __Y), 48230b57cec5SDimitry Andric (__v4si)_mm_setzero_si128()); 48240b57cec5SDimitry Andric } 48250b57cec5SDimitry Andric 48260b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 48270b57cec5SDimitry Andric _mm256_mask_srlv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) 48280b57cec5SDimitry Andric { 48290b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 48300b57cec5SDimitry Andric (__v8si)_mm256_srlv_epi32(__X, __Y), 48310b57cec5SDimitry Andric (__v8si)__W); 48320b57cec5SDimitry Andric } 48330b57cec5SDimitry Andric 48340b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 48350b57cec5SDimitry Andric _mm256_maskz_srlv_epi32(__mmask8 __U, __m256i __X, __m256i __Y) 48360b57cec5SDimitry Andric { 48370b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 48380b57cec5SDimitry Andric (__v8si)_mm256_srlv_epi32(__X, __Y), 48390b57cec5SDimitry Andric (__v8si)_mm256_setzero_si256()); 48400b57cec5SDimitry Andric } 48410b57cec5SDimitry Andric 48420b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 48430b57cec5SDimitry Andric _mm_mask_srl_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 48440b57cec5SDimitry Andric { 48450b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 48460b57cec5SDimitry Andric (__v4si)_mm_srl_epi32(__A, __B), 48470b57cec5SDimitry Andric (__v4si)__W); 48480b57cec5SDimitry Andric } 48490b57cec5SDimitry Andric 48500b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 48510b57cec5SDimitry Andric _mm_maskz_srl_epi32(__mmask8 __U, __m128i __A, __m128i __B) 48520b57cec5SDimitry Andric { 48530b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 48540b57cec5SDimitry Andric (__v4si)_mm_srl_epi32(__A, __B), 48550b57cec5SDimitry Andric (__v4si)_mm_setzero_si128()); 48560b57cec5SDimitry Andric } 48570b57cec5SDimitry Andric 48580b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 48590b57cec5SDimitry Andric _mm256_mask_srl_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) 48600b57cec5SDimitry Andric { 48610b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 48620b57cec5SDimitry Andric (__v8si)_mm256_srl_epi32(__A, __B), 48630b57cec5SDimitry Andric (__v8si)__W); 48640b57cec5SDimitry Andric } 48650b57cec5SDimitry Andric 48660b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 48670b57cec5SDimitry Andric _mm256_maskz_srl_epi32(__mmask8 __U, __m256i __A, __m128i __B) 48680b57cec5SDimitry Andric { 48690b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 48700b57cec5SDimitry Andric (__v8si)_mm256_srl_epi32(__A, __B), 48710b57cec5SDimitry Andric (__v8si)_mm256_setzero_si256()); 48720b57cec5SDimitry Andric } 48730b57cec5SDimitry Andric 48740b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 48755ffd83dbSDimitry Andric _mm_mask_srli_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B) 48760b57cec5SDimitry Andric { 48770b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 487881ad6265SDimitry Andric (__v4si)_mm_srli_epi32(__A, (int)__B), 48790b57cec5SDimitry Andric (__v4si)__W); 48800b57cec5SDimitry Andric } 48810b57cec5SDimitry Andric 48820b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 48835ffd83dbSDimitry Andric _mm_maskz_srli_epi32(__mmask8 __U, __m128i __A, unsigned int __B) 48840b57cec5SDimitry Andric { 48850b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 488681ad6265SDimitry Andric (__v4si)_mm_srli_epi32(__A, (int)__B), 48870b57cec5SDimitry Andric (__v4si)_mm_setzero_si128()); 48880b57cec5SDimitry Andric } 48890b57cec5SDimitry Andric 48900b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 48915ffd83dbSDimitry Andric _mm256_mask_srli_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B) 48920b57cec5SDimitry Andric { 48930b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 489481ad6265SDimitry Andric (__v8si)_mm256_srli_epi32(__A, (int)__B), 48950b57cec5SDimitry Andric (__v8si)__W); 48960b57cec5SDimitry Andric } 48970b57cec5SDimitry Andric 48980b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 48995ffd83dbSDimitry Andric _mm256_maskz_srli_epi32(__mmask8 __U, __m256i __A, unsigned int __B) 49000b57cec5SDimitry Andric { 49010b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 490281ad6265SDimitry Andric (__v8si)_mm256_srli_epi32(__A, (int)__B), 49030b57cec5SDimitry Andric (__v8si)_mm256_setzero_si256()); 49040b57cec5SDimitry Andric } 49050b57cec5SDimitry Andric 49060b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 49070b57cec5SDimitry Andric _mm_mask_srl_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 49080b57cec5SDimitry Andric { 49090b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 49100b57cec5SDimitry Andric (__v2di)_mm_srl_epi64(__A, __B), 49110b57cec5SDimitry Andric (__v2di)__W); 49120b57cec5SDimitry Andric } 49130b57cec5SDimitry Andric 49140b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 49150b57cec5SDimitry Andric _mm_maskz_srl_epi64(__mmask8 __U, __m128i __A, __m128i __B) 49160b57cec5SDimitry Andric { 49170b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 49180b57cec5SDimitry Andric (__v2di)_mm_srl_epi64(__A, __B), 49190b57cec5SDimitry Andric (__v2di)_mm_setzero_si128()); 49200b57cec5SDimitry Andric } 49210b57cec5SDimitry Andric 49220b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 49230b57cec5SDimitry Andric _mm256_mask_srl_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) 49240b57cec5SDimitry Andric { 49250b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 49260b57cec5SDimitry Andric (__v4di)_mm256_srl_epi64(__A, __B), 49270b57cec5SDimitry Andric (__v4di)__W); 49280b57cec5SDimitry Andric } 49290b57cec5SDimitry Andric 49300b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 49310b57cec5SDimitry Andric _mm256_maskz_srl_epi64(__mmask8 __U, __m256i __A, __m128i __B) 49320b57cec5SDimitry Andric { 49330b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 49340b57cec5SDimitry Andric (__v4di)_mm256_srl_epi64(__A, __B), 49350b57cec5SDimitry Andric (__v4di)_mm256_setzero_si256()); 49360b57cec5SDimitry Andric } 49370b57cec5SDimitry Andric 49380b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 49395ffd83dbSDimitry Andric _mm_mask_srli_epi64(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B) 49400b57cec5SDimitry Andric { 49410b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 494281ad6265SDimitry Andric (__v2di)_mm_srli_epi64(__A, (int)__B), 49430b57cec5SDimitry Andric (__v2di)__W); 49440b57cec5SDimitry Andric } 49450b57cec5SDimitry Andric 49460b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 49475ffd83dbSDimitry Andric _mm_maskz_srli_epi64(__mmask8 __U, __m128i __A, unsigned int __B) 49480b57cec5SDimitry Andric { 49490b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 495081ad6265SDimitry Andric (__v2di)_mm_srli_epi64(__A, (int)__B), 49510b57cec5SDimitry Andric (__v2di)_mm_setzero_si128()); 49520b57cec5SDimitry Andric } 49530b57cec5SDimitry Andric 49540b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 49555ffd83dbSDimitry Andric _mm256_mask_srli_epi64(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B) 49560b57cec5SDimitry Andric { 49570b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 495881ad6265SDimitry Andric (__v4di)_mm256_srli_epi64(__A, (int)__B), 49590b57cec5SDimitry Andric (__v4di)__W); 49600b57cec5SDimitry Andric } 49610b57cec5SDimitry Andric 49620b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 49635ffd83dbSDimitry Andric _mm256_maskz_srli_epi64(__mmask8 __U, __m256i __A, unsigned int __B) 49640b57cec5SDimitry Andric { 49650b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 496681ad6265SDimitry Andric (__v4di)_mm256_srli_epi64(__A, (int)__B), 49670b57cec5SDimitry Andric (__v4di)_mm256_setzero_si256()); 49680b57cec5SDimitry Andric } 49690b57cec5SDimitry Andric 49700b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 49710b57cec5SDimitry Andric _mm_mask_srav_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) 49720b57cec5SDimitry Andric { 49730b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 49740b57cec5SDimitry Andric (__v4si)_mm_srav_epi32(__X, __Y), 49750b57cec5SDimitry Andric (__v4si)__W); 49760b57cec5SDimitry Andric } 49770b57cec5SDimitry Andric 49780b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 49790b57cec5SDimitry Andric _mm_maskz_srav_epi32(__mmask8 __U, __m128i __X, __m128i __Y) 49800b57cec5SDimitry Andric { 49810b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 49820b57cec5SDimitry Andric (__v4si)_mm_srav_epi32(__X, __Y), 49830b57cec5SDimitry Andric (__v4si)_mm_setzero_si128()); 49840b57cec5SDimitry Andric } 49850b57cec5SDimitry Andric 49860b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 49870b57cec5SDimitry Andric _mm256_mask_srav_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) 49880b57cec5SDimitry Andric { 49890b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 49900b57cec5SDimitry Andric (__v8si)_mm256_srav_epi32(__X, __Y), 49910b57cec5SDimitry Andric (__v8si)__W); 49920b57cec5SDimitry Andric } 49930b57cec5SDimitry Andric 49940b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 49950b57cec5SDimitry Andric _mm256_maskz_srav_epi32(__mmask8 __U, __m256i __X, __m256i __Y) 49960b57cec5SDimitry Andric { 49970b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 49980b57cec5SDimitry Andric (__v8si)_mm256_srav_epi32(__X, __Y), 49990b57cec5SDimitry Andric (__v8si)_mm256_setzero_si256()); 50000b57cec5SDimitry Andric } 50010b57cec5SDimitry Andric 50020b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 50030b57cec5SDimitry Andric _mm_srav_epi64(__m128i __X, __m128i __Y) 50040b57cec5SDimitry Andric { 50050b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psravq128((__v2di)__X, (__v2di)__Y); 50060b57cec5SDimitry Andric } 50070b57cec5SDimitry Andric 50080b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 50090b57cec5SDimitry Andric _mm_mask_srav_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) 50100b57cec5SDimitry Andric { 50110b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 50120b57cec5SDimitry Andric (__v2di)_mm_srav_epi64(__X, __Y), 50130b57cec5SDimitry Andric (__v2di)__W); 50140b57cec5SDimitry Andric } 50150b57cec5SDimitry Andric 50160b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 50170b57cec5SDimitry Andric _mm_maskz_srav_epi64(__mmask8 __U, __m128i __X, __m128i __Y) 50180b57cec5SDimitry Andric { 50190b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 50200b57cec5SDimitry Andric (__v2di)_mm_srav_epi64(__X, __Y), 50210b57cec5SDimitry Andric (__v2di)_mm_setzero_si128()); 50220b57cec5SDimitry Andric } 50230b57cec5SDimitry Andric 50240b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 50250b57cec5SDimitry Andric _mm256_srav_epi64(__m256i __X, __m256i __Y) 50260b57cec5SDimitry Andric { 50270b57cec5SDimitry Andric return (__m256i)__builtin_ia32_psravq256((__v4di)__X, (__v4di) __Y); 50280b57cec5SDimitry Andric } 50290b57cec5SDimitry Andric 50300b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 50310b57cec5SDimitry Andric _mm256_mask_srav_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) 50320b57cec5SDimitry Andric { 50330b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 50340b57cec5SDimitry Andric (__v4di)_mm256_srav_epi64(__X, __Y), 50350b57cec5SDimitry Andric (__v4di)__W); 50360b57cec5SDimitry Andric } 50370b57cec5SDimitry Andric 50380b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 50390b57cec5SDimitry Andric _mm256_maskz_srav_epi64 (__mmask8 __U, __m256i __X, __m256i __Y) 50400b57cec5SDimitry Andric { 50410b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 50420b57cec5SDimitry Andric (__v4di)_mm256_srav_epi64(__X, __Y), 50430b57cec5SDimitry Andric (__v4di)_mm256_setzero_si256()); 50440b57cec5SDimitry Andric } 50450b57cec5SDimitry Andric 50460b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 50470b57cec5SDimitry Andric _mm_mask_mov_epi32 (__m128i __W, __mmask8 __U, __m128i __A) 50480b57cec5SDimitry Andric { 50490b57cec5SDimitry Andric return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U, 50500b57cec5SDimitry Andric (__v4si) __A, 50510b57cec5SDimitry Andric (__v4si) __W); 50520b57cec5SDimitry Andric } 50530b57cec5SDimitry Andric 50540b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 50550b57cec5SDimitry Andric _mm_maskz_mov_epi32 (__mmask8 __U, __m128i __A) 50560b57cec5SDimitry Andric { 50570b57cec5SDimitry Andric return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U, 50580b57cec5SDimitry Andric (__v4si) __A, 50590b57cec5SDimitry Andric (__v4si) _mm_setzero_si128 ()); 50600b57cec5SDimitry Andric } 50610b57cec5SDimitry Andric 50620b57cec5SDimitry Andric 50630b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 50640b57cec5SDimitry Andric _mm256_mask_mov_epi32 (__m256i __W, __mmask8 __U, __m256i __A) 50650b57cec5SDimitry Andric { 50660b57cec5SDimitry Andric return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U, 50670b57cec5SDimitry Andric (__v8si) __A, 50680b57cec5SDimitry Andric (__v8si) __W); 50690b57cec5SDimitry Andric } 50700b57cec5SDimitry Andric 50710b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 50720b57cec5SDimitry Andric _mm256_maskz_mov_epi32 (__mmask8 __U, __m256i __A) 50730b57cec5SDimitry Andric { 50740b57cec5SDimitry Andric return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U, 50750b57cec5SDimitry Andric (__v8si) __A, 50760b57cec5SDimitry Andric (__v8si) _mm256_setzero_si256 ()); 50770b57cec5SDimitry Andric } 50780b57cec5SDimitry Andric 50790b57cec5SDimitry Andric static __inline __m128i __DEFAULT_FN_ATTRS128 50800b57cec5SDimitry Andric _mm_load_epi32 (void const *__P) 50810b57cec5SDimitry Andric { 5082480093f4SDimitry Andric return *(const __m128i *) __P; 50830b57cec5SDimitry Andric } 50840b57cec5SDimitry Andric 50850b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 50860b57cec5SDimitry Andric _mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P) 50870b57cec5SDimitry Andric { 5088480093f4SDimitry Andric return (__m128i) __builtin_ia32_movdqa32load128_mask ((const __v4si *) __P, 50890b57cec5SDimitry Andric (__v4si) __W, 50900b57cec5SDimitry Andric (__mmask8) 50910b57cec5SDimitry Andric __U); 50920b57cec5SDimitry Andric } 50930b57cec5SDimitry Andric 50940b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 50950b57cec5SDimitry Andric _mm_maskz_load_epi32 (__mmask8 __U, void const *__P) 50960b57cec5SDimitry Andric { 5097480093f4SDimitry Andric return (__m128i) __builtin_ia32_movdqa32load128_mask ((const __v4si *) __P, 50980b57cec5SDimitry Andric (__v4si) 50990b57cec5SDimitry Andric _mm_setzero_si128 (), 51000b57cec5SDimitry Andric (__mmask8) 51010b57cec5SDimitry Andric __U); 51020b57cec5SDimitry Andric } 51030b57cec5SDimitry Andric 51040b57cec5SDimitry Andric static __inline __m256i __DEFAULT_FN_ATTRS256 51050b57cec5SDimitry Andric _mm256_load_epi32 (void const *__P) 51060b57cec5SDimitry Andric { 5107480093f4SDimitry Andric return *(const __m256i *) __P; 51080b57cec5SDimitry Andric } 51090b57cec5SDimitry Andric 51100b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 51110b57cec5SDimitry Andric _mm256_mask_load_epi32 (__m256i __W, __mmask8 __U, void const *__P) 51120b57cec5SDimitry Andric { 5113480093f4SDimitry Andric return (__m256i) __builtin_ia32_movdqa32load256_mask ((const __v8si *) __P, 51140b57cec5SDimitry Andric (__v8si) __W, 51150b57cec5SDimitry Andric (__mmask8) 51160b57cec5SDimitry Andric __U); 51170b57cec5SDimitry Andric } 51180b57cec5SDimitry Andric 51190b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 51200b57cec5SDimitry Andric _mm256_maskz_load_epi32 (__mmask8 __U, void const *__P) 51210b57cec5SDimitry Andric { 5122480093f4SDimitry Andric return (__m256i) __builtin_ia32_movdqa32load256_mask ((const __v8si *) __P, 51230b57cec5SDimitry Andric (__v8si) 51240b57cec5SDimitry Andric _mm256_setzero_si256 (), 51250b57cec5SDimitry Andric (__mmask8) 51260b57cec5SDimitry Andric __U); 51270b57cec5SDimitry Andric } 51280b57cec5SDimitry Andric 51290b57cec5SDimitry Andric static __inline void __DEFAULT_FN_ATTRS128 51300b57cec5SDimitry Andric _mm_store_epi32 (void *__P, __m128i __A) 51310b57cec5SDimitry Andric { 51320b57cec5SDimitry Andric *(__m128i *) __P = __A; 51330b57cec5SDimitry Andric } 51340b57cec5SDimitry Andric 51350b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS128 51360b57cec5SDimitry Andric _mm_mask_store_epi32 (void *__P, __mmask8 __U, __m128i __A) 51370b57cec5SDimitry Andric { 51380b57cec5SDimitry Andric __builtin_ia32_movdqa32store128_mask ((__v4si *) __P, 51390b57cec5SDimitry Andric (__v4si) __A, 51400b57cec5SDimitry Andric (__mmask8) __U); 51410b57cec5SDimitry Andric } 51420b57cec5SDimitry Andric 51430b57cec5SDimitry Andric static __inline void __DEFAULT_FN_ATTRS256 51440b57cec5SDimitry Andric _mm256_store_epi32 (void *__P, __m256i __A) 51450b57cec5SDimitry Andric { 51460b57cec5SDimitry Andric *(__m256i *) __P = __A; 51470b57cec5SDimitry Andric } 51480b57cec5SDimitry Andric 51490b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS256 51500b57cec5SDimitry Andric _mm256_mask_store_epi32 (void *__P, __mmask8 __U, __m256i __A) 51510b57cec5SDimitry Andric { 51520b57cec5SDimitry Andric __builtin_ia32_movdqa32store256_mask ((__v8si *) __P, 51530b57cec5SDimitry Andric (__v8si) __A, 51540b57cec5SDimitry Andric (__mmask8) __U); 51550b57cec5SDimitry Andric } 51560b57cec5SDimitry Andric 51570b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 51580b57cec5SDimitry Andric _mm_mask_mov_epi64 (__m128i __W, __mmask8 __U, __m128i __A) 51590b57cec5SDimitry Andric { 51600b57cec5SDimitry Andric return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U, 51610b57cec5SDimitry Andric (__v2di) __A, 51620b57cec5SDimitry Andric (__v2di) __W); 51630b57cec5SDimitry Andric } 51640b57cec5SDimitry Andric 51650b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 51660b57cec5SDimitry Andric _mm_maskz_mov_epi64 (__mmask8 __U, __m128i __A) 51670b57cec5SDimitry Andric { 51680b57cec5SDimitry Andric return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U, 51690b57cec5SDimitry Andric (__v2di) __A, 51700b57cec5SDimitry Andric (__v2di) _mm_setzero_si128 ()); 51710b57cec5SDimitry Andric } 51720b57cec5SDimitry Andric 51730b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 51740b57cec5SDimitry Andric _mm256_mask_mov_epi64 (__m256i __W, __mmask8 __U, __m256i __A) 51750b57cec5SDimitry Andric { 51760b57cec5SDimitry Andric return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U, 51770b57cec5SDimitry Andric (__v4di) __A, 51780b57cec5SDimitry Andric (__v4di) __W); 51790b57cec5SDimitry Andric } 51800b57cec5SDimitry Andric 51810b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 51820b57cec5SDimitry Andric _mm256_maskz_mov_epi64 (__mmask8 __U, __m256i __A) 51830b57cec5SDimitry Andric { 51840b57cec5SDimitry Andric return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U, 51850b57cec5SDimitry Andric (__v4di) __A, 51860b57cec5SDimitry Andric (__v4di) _mm256_setzero_si256 ()); 51870b57cec5SDimitry Andric } 51880b57cec5SDimitry Andric 51890b57cec5SDimitry Andric static __inline __m128i __DEFAULT_FN_ATTRS128 51900b57cec5SDimitry Andric _mm_load_epi64 (void const *__P) 51910b57cec5SDimitry Andric { 5192480093f4SDimitry Andric return *(const __m128i *) __P; 51930b57cec5SDimitry Andric } 51940b57cec5SDimitry Andric 51950b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 51960b57cec5SDimitry Andric _mm_mask_load_epi64 (__m128i __W, __mmask8 __U, void const *__P) 51970b57cec5SDimitry Andric { 5198480093f4SDimitry Andric return (__m128i) __builtin_ia32_movdqa64load128_mask ((const __v2di *) __P, 51990b57cec5SDimitry Andric (__v2di) __W, 52000b57cec5SDimitry Andric (__mmask8) 52010b57cec5SDimitry Andric __U); 52020b57cec5SDimitry Andric } 52030b57cec5SDimitry Andric 52040b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 52050b57cec5SDimitry Andric _mm_maskz_load_epi64 (__mmask8 __U, void const *__P) 52060b57cec5SDimitry Andric { 5207480093f4SDimitry Andric return (__m128i) __builtin_ia32_movdqa64load128_mask ((const __v2di *) __P, 52080b57cec5SDimitry Andric (__v2di) 52090b57cec5SDimitry Andric _mm_setzero_si128 (), 52100b57cec5SDimitry Andric (__mmask8) 52110b57cec5SDimitry Andric __U); 52120b57cec5SDimitry Andric } 52130b57cec5SDimitry Andric 52140b57cec5SDimitry Andric static __inline __m256i __DEFAULT_FN_ATTRS256 52150b57cec5SDimitry Andric _mm256_load_epi64 (void const *__P) 52160b57cec5SDimitry Andric { 5217480093f4SDimitry Andric return *(const __m256i *) __P; 52180b57cec5SDimitry Andric } 52190b57cec5SDimitry Andric 52200b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 52210b57cec5SDimitry Andric _mm256_mask_load_epi64 (__m256i __W, __mmask8 __U, void const *__P) 52220b57cec5SDimitry Andric { 5223480093f4SDimitry Andric return (__m256i) __builtin_ia32_movdqa64load256_mask ((const __v4di *) __P, 52240b57cec5SDimitry Andric (__v4di) __W, 52250b57cec5SDimitry Andric (__mmask8) 52260b57cec5SDimitry Andric __U); 52270b57cec5SDimitry Andric } 52280b57cec5SDimitry Andric 52290b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 52300b57cec5SDimitry Andric _mm256_maskz_load_epi64 (__mmask8 __U, void const *__P) 52310b57cec5SDimitry Andric { 5232480093f4SDimitry Andric return (__m256i) __builtin_ia32_movdqa64load256_mask ((const __v4di *) __P, 52330b57cec5SDimitry Andric (__v4di) 52340b57cec5SDimitry Andric _mm256_setzero_si256 (), 52350b57cec5SDimitry Andric (__mmask8) 52360b57cec5SDimitry Andric __U); 52370b57cec5SDimitry Andric } 52380b57cec5SDimitry Andric 52390b57cec5SDimitry Andric static __inline void __DEFAULT_FN_ATTRS128 52400b57cec5SDimitry Andric _mm_store_epi64 (void *__P, __m128i __A) 52410b57cec5SDimitry Andric { 52420b57cec5SDimitry Andric *(__m128i *) __P = __A; 52430b57cec5SDimitry Andric } 52440b57cec5SDimitry Andric 52450b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS128 52460b57cec5SDimitry Andric _mm_mask_store_epi64 (void *__P, __mmask8 __U, __m128i __A) 52470b57cec5SDimitry Andric { 52480b57cec5SDimitry Andric __builtin_ia32_movdqa64store128_mask ((__v2di *) __P, 52490b57cec5SDimitry Andric (__v2di) __A, 52500b57cec5SDimitry Andric (__mmask8) __U); 52510b57cec5SDimitry Andric } 52520b57cec5SDimitry Andric 52530b57cec5SDimitry Andric static __inline void __DEFAULT_FN_ATTRS256 52540b57cec5SDimitry Andric _mm256_store_epi64 (void *__P, __m256i __A) 52550b57cec5SDimitry Andric { 52560b57cec5SDimitry Andric *(__m256i *) __P = __A; 52570b57cec5SDimitry Andric } 52580b57cec5SDimitry Andric 52590b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS256 52600b57cec5SDimitry Andric _mm256_mask_store_epi64 (void *__P, __mmask8 __U, __m256i __A) 52610b57cec5SDimitry Andric { 52620b57cec5SDimitry Andric __builtin_ia32_movdqa64store256_mask ((__v4di *) __P, 52630b57cec5SDimitry Andric (__v4di) __A, 52640b57cec5SDimitry Andric (__mmask8) __U); 52650b57cec5SDimitry Andric } 52660b57cec5SDimitry Andric 52670b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 52680b57cec5SDimitry Andric _mm_mask_movedup_pd (__m128d __W, __mmask8 __U, __m128d __A) 52690b57cec5SDimitry Andric { 52700b57cec5SDimitry Andric return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 52710b57cec5SDimitry Andric (__v2df)_mm_movedup_pd(__A), 52720b57cec5SDimitry Andric (__v2df)__W); 52730b57cec5SDimitry Andric } 52740b57cec5SDimitry Andric 52750b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 52760b57cec5SDimitry Andric _mm_maskz_movedup_pd (__mmask8 __U, __m128d __A) 52770b57cec5SDimitry Andric { 52780b57cec5SDimitry Andric return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 52790b57cec5SDimitry Andric (__v2df)_mm_movedup_pd(__A), 52800b57cec5SDimitry Andric (__v2df)_mm_setzero_pd()); 52810b57cec5SDimitry Andric } 52820b57cec5SDimitry Andric 52830b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 52840b57cec5SDimitry Andric _mm256_mask_movedup_pd (__m256d __W, __mmask8 __U, __m256d __A) 52850b57cec5SDimitry Andric { 52860b57cec5SDimitry Andric return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 52870b57cec5SDimitry Andric (__v4df)_mm256_movedup_pd(__A), 52880b57cec5SDimitry Andric (__v4df)__W); 52890b57cec5SDimitry Andric } 52900b57cec5SDimitry Andric 52910b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 52920b57cec5SDimitry Andric _mm256_maskz_movedup_pd (__mmask8 __U, __m256d __A) 52930b57cec5SDimitry Andric { 52940b57cec5SDimitry Andric return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 52950b57cec5SDimitry Andric (__v4df)_mm256_movedup_pd(__A), 52960b57cec5SDimitry Andric (__v4df)_mm256_setzero_pd()); 52970b57cec5SDimitry Andric } 52980b57cec5SDimitry Andric 52990b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 53000b57cec5SDimitry Andric _mm_mask_set1_epi32(__m128i __O, __mmask8 __M, int __A) 53010b57cec5SDimitry Andric { 53020b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128(__M, 53030b57cec5SDimitry Andric (__v4si) _mm_set1_epi32(__A), 53040b57cec5SDimitry Andric (__v4si)__O); 53050b57cec5SDimitry Andric } 53060b57cec5SDimitry Andric 53070b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 53080b57cec5SDimitry Andric _mm_maskz_set1_epi32( __mmask8 __M, int __A) 53090b57cec5SDimitry Andric { 53100b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128(__M, 53110b57cec5SDimitry Andric (__v4si) _mm_set1_epi32(__A), 53120b57cec5SDimitry Andric (__v4si)_mm_setzero_si128()); 53130b57cec5SDimitry Andric } 53140b57cec5SDimitry Andric 53150b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 53160b57cec5SDimitry Andric _mm256_mask_set1_epi32(__m256i __O, __mmask8 __M, int __A) 53170b57cec5SDimitry Andric { 53180b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256(__M, 53190b57cec5SDimitry Andric (__v8si) _mm256_set1_epi32(__A), 53200b57cec5SDimitry Andric (__v8si)__O); 53210b57cec5SDimitry Andric } 53220b57cec5SDimitry Andric 53230b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 53240b57cec5SDimitry Andric _mm256_maskz_set1_epi32( __mmask8 __M, int __A) 53250b57cec5SDimitry Andric { 53260b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256(__M, 53270b57cec5SDimitry Andric (__v8si) _mm256_set1_epi32(__A), 53280b57cec5SDimitry Andric (__v8si)_mm256_setzero_si256()); 53290b57cec5SDimitry Andric } 53300b57cec5SDimitry Andric 53310b57cec5SDimitry Andric 53320b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 53330b57cec5SDimitry Andric _mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A) 53340b57cec5SDimitry Andric { 53350b57cec5SDimitry Andric return (__m128i) __builtin_ia32_selectq_128(__M, 53360b57cec5SDimitry Andric (__v2di) _mm_set1_epi64x(__A), 53370b57cec5SDimitry Andric (__v2di) __O); 53380b57cec5SDimitry Andric } 53390b57cec5SDimitry Andric 53400b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 53410b57cec5SDimitry Andric _mm_maskz_set1_epi64 (__mmask8 __M, long long __A) 53420b57cec5SDimitry Andric { 53430b57cec5SDimitry Andric return (__m128i) __builtin_ia32_selectq_128(__M, 53440b57cec5SDimitry Andric (__v2di) _mm_set1_epi64x(__A), 53450b57cec5SDimitry Andric (__v2di) _mm_setzero_si128()); 53460b57cec5SDimitry Andric } 53470b57cec5SDimitry Andric 53480b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 53490b57cec5SDimitry Andric _mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A) 53500b57cec5SDimitry Andric { 53510b57cec5SDimitry Andric return (__m256i) __builtin_ia32_selectq_256(__M, 53520b57cec5SDimitry Andric (__v4di) _mm256_set1_epi64x(__A), 53530b57cec5SDimitry Andric (__v4di) __O) ; 53540b57cec5SDimitry Andric } 53550b57cec5SDimitry Andric 53560b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 53570b57cec5SDimitry Andric _mm256_maskz_set1_epi64 (__mmask8 __M, long long __A) 53580b57cec5SDimitry Andric { 53590b57cec5SDimitry Andric return (__m256i) __builtin_ia32_selectq_256(__M, 53600b57cec5SDimitry Andric (__v4di) _mm256_set1_epi64x(__A), 53610b57cec5SDimitry Andric (__v4di) _mm256_setzero_si256()); 53620b57cec5SDimitry Andric } 53630b57cec5SDimitry Andric 53640b57cec5SDimitry Andric #define _mm_fixupimm_pd(A, B, C, imm) \ 5365349cc55cSDimitry Andric ((__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \ 53660b57cec5SDimitry Andric (__v2df)(__m128d)(B), \ 53670b57cec5SDimitry Andric (__v2di)(__m128i)(C), (int)(imm), \ 5368349cc55cSDimitry Andric (__mmask8)-1)) 53690b57cec5SDimitry Andric 53700b57cec5SDimitry Andric #define _mm_mask_fixupimm_pd(A, U, B, C, imm) \ 5371349cc55cSDimitry Andric ((__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \ 53720b57cec5SDimitry Andric (__v2df)(__m128d)(B), \ 53730b57cec5SDimitry Andric (__v2di)(__m128i)(C), (int)(imm), \ 5374349cc55cSDimitry Andric (__mmask8)(U))) 53750b57cec5SDimitry Andric 53760b57cec5SDimitry Andric #define _mm_maskz_fixupimm_pd(U, A, B, C, imm) \ 5377349cc55cSDimitry Andric ((__m128d)__builtin_ia32_fixupimmpd128_maskz((__v2df)(__m128d)(A), \ 53780b57cec5SDimitry Andric (__v2df)(__m128d)(B), \ 53790b57cec5SDimitry Andric (__v2di)(__m128i)(C), \ 5380349cc55cSDimitry Andric (int)(imm), (__mmask8)(U))) 53810b57cec5SDimitry Andric 53820b57cec5SDimitry Andric #define _mm256_fixupimm_pd(A, B, C, imm) \ 5383349cc55cSDimitry Andric ((__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \ 53840b57cec5SDimitry Andric (__v4df)(__m256d)(B), \ 53850b57cec5SDimitry Andric (__v4di)(__m256i)(C), (int)(imm), \ 5386349cc55cSDimitry Andric (__mmask8)-1)) 53870b57cec5SDimitry Andric 53880b57cec5SDimitry Andric #define _mm256_mask_fixupimm_pd(A, U, B, C, imm) \ 5389349cc55cSDimitry Andric ((__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \ 53900b57cec5SDimitry Andric (__v4df)(__m256d)(B), \ 53910b57cec5SDimitry Andric (__v4di)(__m256i)(C), (int)(imm), \ 5392349cc55cSDimitry Andric (__mmask8)(U))) 53930b57cec5SDimitry Andric 53940b57cec5SDimitry Andric #define _mm256_maskz_fixupimm_pd(U, A, B, C, imm) \ 5395349cc55cSDimitry Andric ((__m256d)__builtin_ia32_fixupimmpd256_maskz((__v4df)(__m256d)(A), \ 53960b57cec5SDimitry Andric (__v4df)(__m256d)(B), \ 53970b57cec5SDimitry Andric (__v4di)(__m256i)(C), \ 5398349cc55cSDimitry Andric (int)(imm), (__mmask8)(U))) 53990b57cec5SDimitry Andric 54000b57cec5SDimitry Andric #define _mm_fixupimm_ps(A, B, C, imm) \ 5401349cc55cSDimitry Andric ((__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \ 54020b57cec5SDimitry Andric (__v4sf)(__m128)(B), \ 54030b57cec5SDimitry Andric (__v4si)(__m128i)(C), (int)(imm), \ 5404349cc55cSDimitry Andric (__mmask8)-1)) 54050b57cec5SDimitry Andric 54060b57cec5SDimitry Andric #define _mm_mask_fixupimm_ps(A, U, B, C, imm) \ 5407349cc55cSDimitry Andric ((__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \ 54080b57cec5SDimitry Andric (__v4sf)(__m128)(B), \ 54090b57cec5SDimitry Andric (__v4si)(__m128i)(C), (int)(imm), \ 5410349cc55cSDimitry Andric (__mmask8)(U))) 54110b57cec5SDimitry Andric 54120b57cec5SDimitry Andric #define _mm_maskz_fixupimm_ps(U, A, B, C, imm) \ 5413349cc55cSDimitry Andric ((__m128)__builtin_ia32_fixupimmps128_maskz((__v4sf)(__m128)(A), \ 54140b57cec5SDimitry Andric (__v4sf)(__m128)(B), \ 54150b57cec5SDimitry Andric (__v4si)(__m128i)(C), (int)(imm), \ 5416349cc55cSDimitry Andric (__mmask8)(U))) 54170b57cec5SDimitry Andric 54180b57cec5SDimitry Andric #define _mm256_fixupimm_ps(A, B, C, imm) \ 5419349cc55cSDimitry Andric ((__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \ 54200b57cec5SDimitry Andric (__v8sf)(__m256)(B), \ 54210b57cec5SDimitry Andric (__v8si)(__m256i)(C), (int)(imm), \ 5422349cc55cSDimitry Andric (__mmask8)-1)) 54230b57cec5SDimitry Andric 54240b57cec5SDimitry Andric #define _mm256_mask_fixupimm_ps(A, U, B, C, imm) \ 5425349cc55cSDimitry Andric ((__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \ 54260b57cec5SDimitry Andric (__v8sf)(__m256)(B), \ 54270b57cec5SDimitry Andric (__v8si)(__m256i)(C), (int)(imm), \ 5428349cc55cSDimitry Andric (__mmask8)(U))) 54290b57cec5SDimitry Andric 54300b57cec5SDimitry Andric #define _mm256_maskz_fixupimm_ps(U, A, B, C, imm) \ 5431349cc55cSDimitry Andric ((__m256)__builtin_ia32_fixupimmps256_maskz((__v8sf)(__m256)(A), \ 54320b57cec5SDimitry Andric (__v8sf)(__m256)(B), \ 54330b57cec5SDimitry Andric (__v8si)(__m256i)(C), (int)(imm), \ 5434349cc55cSDimitry Andric (__mmask8)(U))) 54350b57cec5SDimitry Andric 54360b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 54370b57cec5SDimitry Andric _mm_mask_load_pd (__m128d __W, __mmask8 __U, void const *__P) 54380b57cec5SDimitry Andric { 5439480093f4SDimitry Andric return (__m128d) __builtin_ia32_loadapd128_mask ((const __v2df *) __P, 54400b57cec5SDimitry Andric (__v2df) __W, 54410b57cec5SDimitry Andric (__mmask8) __U); 54420b57cec5SDimitry Andric } 54430b57cec5SDimitry Andric 54440b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 54450b57cec5SDimitry Andric _mm_maskz_load_pd (__mmask8 __U, void const *__P) 54460b57cec5SDimitry Andric { 5447480093f4SDimitry Andric return (__m128d) __builtin_ia32_loadapd128_mask ((const __v2df *) __P, 54480b57cec5SDimitry Andric (__v2df) 54490b57cec5SDimitry Andric _mm_setzero_pd (), 54500b57cec5SDimitry Andric (__mmask8) __U); 54510b57cec5SDimitry Andric } 54520b57cec5SDimitry Andric 54530b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 54540b57cec5SDimitry Andric _mm256_mask_load_pd (__m256d __W, __mmask8 __U, void const *__P) 54550b57cec5SDimitry Andric { 5456480093f4SDimitry Andric return (__m256d) __builtin_ia32_loadapd256_mask ((const __v4df *) __P, 54570b57cec5SDimitry Andric (__v4df) __W, 54580b57cec5SDimitry Andric (__mmask8) __U); 54590b57cec5SDimitry Andric } 54600b57cec5SDimitry Andric 54610b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 54620b57cec5SDimitry Andric _mm256_maskz_load_pd (__mmask8 __U, void const *__P) 54630b57cec5SDimitry Andric { 5464480093f4SDimitry Andric return (__m256d) __builtin_ia32_loadapd256_mask ((const __v4df *) __P, 54650b57cec5SDimitry Andric (__v4df) 54660b57cec5SDimitry Andric _mm256_setzero_pd (), 54670b57cec5SDimitry Andric (__mmask8) __U); 54680b57cec5SDimitry Andric } 54690b57cec5SDimitry Andric 54700b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 54710b57cec5SDimitry Andric _mm_mask_load_ps (__m128 __W, __mmask8 __U, void const *__P) 54720b57cec5SDimitry Andric { 5473480093f4SDimitry Andric return (__m128) __builtin_ia32_loadaps128_mask ((const __v4sf *) __P, 54740b57cec5SDimitry Andric (__v4sf) __W, 54750b57cec5SDimitry Andric (__mmask8) __U); 54760b57cec5SDimitry Andric } 54770b57cec5SDimitry Andric 54780b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 54790b57cec5SDimitry Andric _mm_maskz_load_ps (__mmask8 __U, void const *__P) 54800b57cec5SDimitry Andric { 5481480093f4SDimitry Andric return (__m128) __builtin_ia32_loadaps128_mask ((const __v4sf *) __P, 54820b57cec5SDimitry Andric (__v4sf) 54830b57cec5SDimitry Andric _mm_setzero_ps (), 54840b57cec5SDimitry Andric (__mmask8) __U); 54850b57cec5SDimitry Andric } 54860b57cec5SDimitry Andric 54870b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 54880b57cec5SDimitry Andric _mm256_mask_load_ps (__m256 __W, __mmask8 __U, void const *__P) 54890b57cec5SDimitry Andric { 5490480093f4SDimitry Andric return (__m256) __builtin_ia32_loadaps256_mask ((const __v8sf *) __P, 54910b57cec5SDimitry Andric (__v8sf) __W, 54920b57cec5SDimitry Andric (__mmask8) __U); 54930b57cec5SDimitry Andric } 54940b57cec5SDimitry Andric 54950b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 54960b57cec5SDimitry Andric _mm256_maskz_load_ps (__mmask8 __U, void const *__P) 54970b57cec5SDimitry Andric { 5498480093f4SDimitry Andric return (__m256) __builtin_ia32_loadaps256_mask ((const __v8sf *) __P, 54990b57cec5SDimitry Andric (__v8sf) 55000b57cec5SDimitry Andric _mm256_setzero_ps (), 55010b57cec5SDimitry Andric (__mmask8) __U); 55020b57cec5SDimitry Andric } 55030b57cec5SDimitry Andric 55040b57cec5SDimitry Andric static __inline __m128i __DEFAULT_FN_ATTRS128 55050b57cec5SDimitry Andric _mm_loadu_epi64 (void const *__P) 55060b57cec5SDimitry Andric { 55070b57cec5SDimitry Andric struct __loadu_epi64 { 55080b57cec5SDimitry Andric __m128i_u __v; 55090b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 5510480093f4SDimitry Andric return ((const struct __loadu_epi64*)__P)->__v; 55110b57cec5SDimitry Andric } 55120b57cec5SDimitry Andric 55130b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 55140b57cec5SDimitry Andric _mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P) 55150b57cec5SDimitry Andric { 5516480093f4SDimitry Andric return (__m128i) __builtin_ia32_loaddqudi128_mask ((const __v2di *) __P, 55170b57cec5SDimitry Andric (__v2di) __W, 55180b57cec5SDimitry Andric (__mmask8) __U); 55190b57cec5SDimitry Andric } 55200b57cec5SDimitry Andric 55210b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 55220b57cec5SDimitry Andric _mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P) 55230b57cec5SDimitry Andric { 5524480093f4SDimitry Andric return (__m128i) __builtin_ia32_loaddqudi128_mask ((const __v2di *) __P, 55250b57cec5SDimitry Andric (__v2di) 55260b57cec5SDimitry Andric _mm_setzero_si128 (), 55270b57cec5SDimitry Andric (__mmask8) __U); 55280b57cec5SDimitry Andric } 55290b57cec5SDimitry Andric 55300b57cec5SDimitry Andric static __inline __m256i __DEFAULT_FN_ATTRS256 55310b57cec5SDimitry Andric _mm256_loadu_epi64 (void const *__P) 55320b57cec5SDimitry Andric { 55330b57cec5SDimitry Andric struct __loadu_epi64 { 55340b57cec5SDimitry Andric __m256i_u __v; 55350b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 5536480093f4SDimitry Andric return ((const struct __loadu_epi64*)__P)->__v; 55370b57cec5SDimitry Andric } 55380b57cec5SDimitry Andric 55390b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 55400b57cec5SDimitry Andric _mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P) 55410b57cec5SDimitry Andric { 5542480093f4SDimitry Andric return (__m256i) __builtin_ia32_loaddqudi256_mask ((const __v4di *) __P, 55430b57cec5SDimitry Andric (__v4di) __W, 55440b57cec5SDimitry Andric (__mmask8) __U); 55450b57cec5SDimitry Andric } 55460b57cec5SDimitry Andric 55470b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 55480b57cec5SDimitry Andric _mm256_maskz_loadu_epi64 (__mmask8 __U, void const *__P) 55490b57cec5SDimitry Andric { 5550480093f4SDimitry Andric return (__m256i) __builtin_ia32_loaddqudi256_mask ((const __v4di *) __P, 55510b57cec5SDimitry Andric (__v4di) 55520b57cec5SDimitry Andric _mm256_setzero_si256 (), 55530b57cec5SDimitry Andric (__mmask8) __U); 55540b57cec5SDimitry Andric } 55550b57cec5SDimitry Andric 55560b57cec5SDimitry Andric static __inline __m128i __DEFAULT_FN_ATTRS128 55570b57cec5SDimitry Andric _mm_loadu_epi32 (void const *__P) 55580b57cec5SDimitry Andric { 55590b57cec5SDimitry Andric struct __loadu_epi32 { 55600b57cec5SDimitry Andric __m128i_u __v; 55610b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 5562480093f4SDimitry Andric return ((const struct __loadu_epi32*)__P)->__v; 55630b57cec5SDimitry Andric } 55640b57cec5SDimitry Andric 55650b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 55660b57cec5SDimitry Andric _mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P) 55670b57cec5SDimitry Andric { 5568480093f4SDimitry Andric return (__m128i) __builtin_ia32_loaddqusi128_mask ((const __v4si *) __P, 55690b57cec5SDimitry Andric (__v4si) __W, 55700b57cec5SDimitry Andric (__mmask8) __U); 55710b57cec5SDimitry Andric } 55720b57cec5SDimitry Andric 55730b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 55740b57cec5SDimitry Andric _mm_maskz_loadu_epi32 (__mmask8 __U, void const *__P) 55750b57cec5SDimitry Andric { 5576480093f4SDimitry Andric return (__m128i) __builtin_ia32_loaddqusi128_mask ((const __v4si *) __P, 55770b57cec5SDimitry Andric (__v4si) 55780b57cec5SDimitry Andric _mm_setzero_si128 (), 55790b57cec5SDimitry Andric (__mmask8) __U); 55800b57cec5SDimitry Andric } 55810b57cec5SDimitry Andric 55820b57cec5SDimitry Andric static __inline __m256i __DEFAULT_FN_ATTRS256 55830b57cec5SDimitry Andric _mm256_loadu_epi32 (void const *__P) 55840b57cec5SDimitry Andric { 55850b57cec5SDimitry Andric struct __loadu_epi32 { 55860b57cec5SDimitry Andric __m256i_u __v; 55870b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 5588480093f4SDimitry Andric return ((const struct __loadu_epi32*)__P)->__v; 55890b57cec5SDimitry Andric } 55900b57cec5SDimitry Andric 55910b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 55920b57cec5SDimitry Andric _mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P) 55930b57cec5SDimitry Andric { 5594480093f4SDimitry Andric return (__m256i) __builtin_ia32_loaddqusi256_mask ((const __v8si *) __P, 55950b57cec5SDimitry Andric (__v8si) __W, 55960b57cec5SDimitry Andric (__mmask8) __U); 55970b57cec5SDimitry Andric } 55980b57cec5SDimitry Andric 55990b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 56000b57cec5SDimitry Andric _mm256_maskz_loadu_epi32 (__mmask8 __U, void const *__P) 56010b57cec5SDimitry Andric { 5602480093f4SDimitry Andric return (__m256i) __builtin_ia32_loaddqusi256_mask ((const __v8si *) __P, 56030b57cec5SDimitry Andric (__v8si) 56040b57cec5SDimitry Andric _mm256_setzero_si256 (), 56050b57cec5SDimitry Andric (__mmask8) __U); 56060b57cec5SDimitry Andric } 56070b57cec5SDimitry Andric 56080b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 56090b57cec5SDimitry Andric _mm_mask_loadu_pd (__m128d __W, __mmask8 __U, void const *__P) 56100b57cec5SDimitry Andric { 5611480093f4SDimitry Andric return (__m128d) __builtin_ia32_loadupd128_mask ((const __v2df *) __P, 56120b57cec5SDimitry Andric (__v2df) __W, 56130b57cec5SDimitry Andric (__mmask8) __U); 56140b57cec5SDimitry Andric } 56150b57cec5SDimitry Andric 56160b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 56170b57cec5SDimitry Andric _mm_maskz_loadu_pd (__mmask8 __U, void const *__P) 56180b57cec5SDimitry Andric { 5619480093f4SDimitry Andric return (__m128d) __builtin_ia32_loadupd128_mask ((const __v2df *) __P, 56200b57cec5SDimitry Andric (__v2df) 56210b57cec5SDimitry Andric _mm_setzero_pd (), 56220b57cec5SDimitry Andric (__mmask8) __U); 56230b57cec5SDimitry Andric } 56240b57cec5SDimitry Andric 56250b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 56260b57cec5SDimitry Andric _mm256_mask_loadu_pd (__m256d __W, __mmask8 __U, void const *__P) 56270b57cec5SDimitry Andric { 5628480093f4SDimitry Andric return (__m256d) __builtin_ia32_loadupd256_mask ((const __v4df *) __P, 56290b57cec5SDimitry Andric (__v4df) __W, 56300b57cec5SDimitry Andric (__mmask8) __U); 56310b57cec5SDimitry Andric } 56320b57cec5SDimitry Andric 56330b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 56340b57cec5SDimitry Andric _mm256_maskz_loadu_pd (__mmask8 __U, void const *__P) 56350b57cec5SDimitry Andric { 5636480093f4SDimitry Andric return (__m256d) __builtin_ia32_loadupd256_mask ((const __v4df *) __P, 56370b57cec5SDimitry Andric (__v4df) 56380b57cec5SDimitry Andric _mm256_setzero_pd (), 56390b57cec5SDimitry Andric (__mmask8) __U); 56400b57cec5SDimitry Andric } 56410b57cec5SDimitry Andric 56420b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 56430b57cec5SDimitry Andric _mm_mask_loadu_ps (__m128 __W, __mmask8 __U, void const *__P) 56440b57cec5SDimitry Andric { 5645480093f4SDimitry Andric return (__m128) __builtin_ia32_loadups128_mask ((const __v4sf *) __P, 56460b57cec5SDimitry Andric (__v4sf) __W, 56470b57cec5SDimitry Andric (__mmask8) __U); 56480b57cec5SDimitry Andric } 56490b57cec5SDimitry Andric 56500b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 56510b57cec5SDimitry Andric _mm_maskz_loadu_ps (__mmask8 __U, void const *__P) 56520b57cec5SDimitry Andric { 5653480093f4SDimitry Andric return (__m128) __builtin_ia32_loadups128_mask ((const __v4sf *) __P, 56540b57cec5SDimitry Andric (__v4sf) 56550b57cec5SDimitry Andric _mm_setzero_ps (), 56560b57cec5SDimitry Andric (__mmask8) __U); 56570b57cec5SDimitry Andric } 56580b57cec5SDimitry Andric 56590b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 56600b57cec5SDimitry Andric _mm256_mask_loadu_ps (__m256 __W, __mmask8 __U, void const *__P) 56610b57cec5SDimitry Andric { 5662480093f4SDimitry Andric return (__m256) __builtin_ia32_loadups256_mask ((const __v8sf *) __P, 56630b57cec5SDimitry Andric (__v8sf) __W, 56640b57cec5SDimitry Andric (__mmask8) __U); 56650b57cec5SDimitry Andric } 56660b57cec5SDimitry Andric 56670b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 56680b57cec5SDimitry Andric _mm256_maskz_loadu_ps (__mmask8 __U, void const *__P) 56690b57cec5SDimitry Andric { 5670480093f4SDimitry Andric return (__m256) __builtin_ia32_loadups256_mask ((const __v8sf *) __P, 56710b57cec5SDimitry Andric (__v8sf) 56720b57cec5SDimitry Andric _mm256_setzero_ps (), 56730b57cec5SDimitry Andric (__mmask8) __U); 56740b57cec5SDimitry Andric } 56750b57cec5SDimitry Andric 56760b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS128 56770b57cec5SDimitry Andric _mm_mask_store_pd (void *__P, __mmask8 __U, __m128d __A) 56780b57cec5SDimitry Andric { 56790b57cec5SDimitry Andric __builtin_ia32_storeapd128_mask ((__v2df *) __P, 56800b57cec5SDimitry Andric (__v2df) __A, 56810b57cec5SDimitry Andric (__mmask8) __U); 56820b57cec5SDimitry Andric } 56830b57cec5SDimitry Andric 56840b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS256 56850b57cec5SDimitry Andric _mm256_mask_store_pd (void *__P, __mmask8 __U, __m256d __A) 56860b57cec5SDimitry Andric { 56870b57cec5SDimitry Andric __builtin_ia32_storeapd256_mask ((__v4df *) __P, 56880b57cec5SDimitry Andric (__v4df) __A, 56890b57cec5SDimitry Andric (__mmask8) __U); 56900b57cec5SDimitry Andric } 56910b57cec5SDimitry Andric 56920b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS128 56930b57cec5SDimitry Andric _mm_mask_store_ps (void *__P, __mmask8 __U, __m128 __A) 56940b57cec5SDimitry Andric { 56950b57cec5SDimitry Andric __builtin_ia32_storeaps128_mask ((__v4sf *) __P, 56960b57cec5SDimitry Andric (__v4sf) __A, 56970b57cec5SDimitry Andric (__mmask8) __U); 56980b57cec5SDimitry Andric } 56990b57cec5SDimitry Andric 57000b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS256 57010b57cec5SDimitry Andric _mm256_mask_store_ps (void *__P, __mmask8 __U, __m256 __A) 57020b57cec5SDimitry Andric { 57030b57cec5SDimitry Andric __builtin_ia32_storeaps256_mask ((__v8sf *) __P, 57040b57cec5SDimitry Andric (__v8sf) __A, 57050b57cec5SDimitry Andric (__mmask8) __U); 57060b57cec5SDimitry Andric } 57070b57cec5SDimitry Andric 57080b57cec5SDimitry Andric static __inline void __DEFAULT_FN_ATTRS128 57090b57cec5SDimitry Andric _mm_storeu_epi64 (void *__P, __m128i __A) 57100b57cec5SDimitry Andric { 57110b57cec5SDimitry Andric struct __storeu_epi64 { 57120b57cec5SDimitry Andric __m128i_u __v; 57130b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 57140b57cec5SDimitry Andric ((struct __storeu_epi64*)__P)->__v = __A; 57150b57cec5SDimitry Andric } 57160b57cec5SDimitry Andric 57170b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS128 57180b57cec5SDimitry Andric _mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A) 57190b57cec5SDimitry Andric { 57200b57cec5SDimitry Andric __builtin_ia32_storedqudi128_mask ((__v2di *) __P, 57210b57cec5SDimitry Andric (__v2di) __A, 57220b57cec5SDimitry Andric (__mmask8) __U); 57230b57cec5SDimitry Andric } 57240b57cec5SDimitry Andric 57250b57cec5SDimitry Andric static __inline void __DEFAULT_FN_ATTRS256 57260b57cec5SDimitry Andric _mm256_storeu_epi64 (void *__P, __m256i __A) 57270b57cec5SDimitry Andric { 57280b57cec5SDimitry Andric struct __storeu_epi64 { 57290b57cec5SDimitry Andric __m256i_u __v; 57300b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 57310b57cec5SDimitry Andric ((struct __storeu_epi64*)__P)->__v = __A; 57320b57cec5SDimitry Andric } 57330b57cec5SDimitry Andric 57340b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS256 57350b57cec5SDimitry Andric _mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A) 57360b57cec5SDimitry Andric { 57370b57cec5SDimitry Andric __builtin_ia32_storedqudi256_mask ((__v4di *) __P, 57380b57cec5SDimitry Andric (__v4di) __A, 57390b57cec5SDimitry Andric (__mmask8) __U); 57400b57cec5SDimitry Andric } 57410b57cec5SDimitry Andric 57420b57cec5SDimitry Andric static __inline void __DEFAULT_FN_ATTRS128 57430b57cec5SDimitry Andric _mm_storeu_epi32 (void *__P, __m128i __A) 57440b57cec5SDimitry Andric { 57450b57cec5SDimitry Andric struct __storeu_epi32 { 57460b57cec5SDimitry Andric __m128i_u __v; 57470b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 57480b57cec5SDimitry Andric ((struct __storeu_epi32*)__P)->__v = __A; 57490b57cec5SDimitry Andric } 57500b57cec5SDimitry Andric 57510b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS128 57520b57cec5SDimitry Andric _mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A) 57530b57cec5SDimitry Andric { 57540b57cec5SDimitry Andric __builtin_ia32_storedqusi128_mask ((__v4si *) __P, 57550b57cec5SDimitry Andric (__v4si) __A, 57560b57cec5SDimitry Andric (__mmask8) __U); 57570b57cec5SDimitry Andric } 57580b57cec5SDimitry Andric 57590b57cec5SDimitry Andric static __inline void __DEFAULT_FN_ATTRS256 57600b57cec5SDimitry Andric _mm256_storeu_epi32 (void *__P, __m256i __A) 57610b57cec5SDimitry Andric { 57620b57cec5SDimitry Andric struct __storeu_epi32 { 57630b57cec5SDimitry Andric __m256i_u __v; 57640b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 57650b57cec5SDimitry Andric ((struct __storeu_epi32*)__P)->__v = __A; 57660b57cec5SDimitry Andric } 57670b57cec5SDimitry Andric 57680b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS256 57690b57cec5SDimitry Andric _mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A) 57700b57cec5SDimitry Andric { 57710b57cec5SDimitry Andric __builtin_ia32_storedqusi256_mask ((__v8si *) __P, 57720b57cec5SDimitry Andric (__v8si) __A, 57730b57cec5SDimitry Andric (__mmask8) __U); 57740b57cec5SDimitry Andric } 57750b57cec5SDimitry Andric 57760b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS128 57770b57cec5SDimitry Andric _mm_mask_storeu_pd (void *__P, __mmask8 __U, __m128d __A) 57780b57cec5SDimitry Andric { 57790b57cec5SDimitry Andric __builtin_ia32_storeupd128_mask ((__v2df *) __P, 57800b57cec5SDimitry Andric (__v2df) __A, 57810b57cec5SDimitry Andric (__mmask8) __U); 57820b57cec5SDimitry Andric } 57830b57cec5SDimitry Andric 57840b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS256 57850b57cec5SDimitry Andric _mm256_mask_storeu_pd (void *__P, __mmask8 __U, __m256d __A) 57860b57cec5SDimitry Andric { 57870b57cec5SDimitry Andric __builtin_ia32_storeupd256_mask ((__v4df *) __P, 57880b57cec5SDimitry Andric (__v4df) __A, 57890b57cec5SDimitry Andric (__mmask8) __U); 57900b57cec5SDimitry Andric } 57910b57cec5SDimitry Andric 57920b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS128 57930b57cec5SDimitry Andric _mm_mask_storeu_ps (void *__P, __mmask8 __U, __m128 __A) 57940b57cec5SDimitry Andric { 57950b57cec5SDimitry Andric __builtin_ia32_storeups128_mask ((__v4sf *) __P, 57960b57cec5SDimitry Andric (__v4sf) __A, 57970b57cec5SDimitry Andric (__mmask8) __U); 57980b57cec5SDimitry Andric } 57990b57cec5SDimitry Andric 58000b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS256 58010b57cec5SDimitry Andric _mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A) 58020b57cec5SDimitry Andric { 58030b57cec5SDimitry Andric __builtin_ia32_storeups256_mask ((__v8sf *) __P, 58040b57cec5SDimitry Andric (__v8sf) __A, 58050b57cec5SDimitry Andric (__mmask8) __U); 58060b57cec5SDimitry Andric } 58070b57cec5SDimitry Andric 58080b57cec5SDimitry Andric 58090b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 58100b57cec5SDimitry Andric _mm_mask_unpackhi_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 58110b57cec5SDimitry Andric { 58120b57cec5SDimitry Andric return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 58130b57cec5SDimitry Andric (__v2df)_mm_unpackhi_pd(__A, __B), 58140b57cec5SDimitry Andric (__v2df)__W); 58150b57cec5SDimitry Andric } 58160b57cec5SDimitry Andric 58170b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 58180b57cec5SDimitry Andric _mm_maskz_unpackhi_pd(__mmask8 __U, __m128d __A, __m128d __B) 58190b57cec5SDimitry Andric { 58200b57cec5SDimitry Andric return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 58210b57cec5SDimitry Andric (__v2df)_mm_unpackhi_pd(__A, __B), 58220b57cec5SDimitry Andric (__v2df)_mm_setzero_pd()); 58230b57cec5SDimitry Andric } 58240b57cec5SDimitry Andric 58250b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 58260b57cec5SDimitry Andric _mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) 58270b57cec5SDimitry Andric { 58280b57cec5SDimitry Andric return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 58290b57cec5SDimitry Andric (__v4df)_mm256_unpackhi_pd(__A, __B), 58300b57cec5SDimitry Andric (__v4df)__W); 58310b57cec5SDimitry Andric } 58320b57cec5SDimitry Andric 58330b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 58340b57cec5SDimitry Andric _mm256_maskz_unpackhi_pd(__mmask8 __U, __m256d __A, __m256d __B) 58350b57cec5SDimitry Andric { 58360b57cec5SDimitry Andric return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 58370b57cec5SDimitry Andric (__v4df)_mm256_unpackhi_pd(__A, __B), 58380b57cec5SDimitry Andric (__v4df)_mm256_setzero_pd()); 58390b57cec5SDimitry Andric } 58400b57cec5SDimitry Andric 58410b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 58420b57cec5SDimitry Andric _mm_mask_unpackhi_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 58430b57cec5SDimitry Andric { 58440b57cec5SDimitry Andric return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 58450b57cec5SDimitry Andric (__v4sf)_mm_unpackhi_ps(__A, __B), 58460b57cec5SDimitry Andric (__v4sf)__W); 58470b57cec5SDimitry Andric } 58480b57cec5SDimitry Andric 58490b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 58500b57cec5SDimitry Andric _mm_maskz_unpackhi_ps(__mmask8 __U, __m128 __A, __m128 __B) 58510b57cec5SDimitry Andric { 58520b57cec5SDimitry Andric return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 58530b57cec5SDimitry Andric (__v4sf)_mm_unpackhi_ps(__A, __B), 58540b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps()); 58550b57cec5SDimitry Andric } 58560b57cec5SDimitry Andric 58570b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 58580b57cec5SDimitry Andric _mm256_mask_unpackhi_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) 58590b57cec5SDimitry Andric { 58600b57cec5SDimitry Andric return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 58610b57cec5SDimitry Andric (__v8sf)_mm256_unpackhi_ps(__A, __B), 58620b57cec5SDimitry Andric (__v8sf)__W); 58630b57cec5SDimitry Andric } 58640b57cec5SDimitry Andric 58650b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 58660b57cec5SDimitry Andric _mm256_maskz_unpackhi_ps(__mmask8 __U, __m256 __A, __m256 __B) 58670b57cec5SDimitry Andric { 58680b57cec5SDimitry Andric return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 58690b57cec5SDimitry Andric (__v8sf)_mm256_unpackhi_ps(__A, __B), 58700b57cec5SDimitry Andric (__v8sf)_mm256_setzero_ps()); 58710b57cec5SDimitry Andric } 58720b57cec5SDimitry Andric 58730b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 58740b57cec5SDimitry Andric _mm_mask_unpacklo_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 58750b57cec5SDimitry Andric { 58760b57cec5SDimitry Andric return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 58770b57cec5SDimitry Andric (__v2df)_mm_unpacklo_pd(__A, __B), 58780b57cec5SDimitry Andric (__v2df)__W); 58790b57cec5SDimitry Andric } 58800b57cec5SDimitry Andric 58810b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 58820b57cec5SDimitry Andric _mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B) 58830b57cec5SDimitry Andric { 58840b57cec5SDimitry Andric return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 58850b57cec5SDimitry Andric (__v2df)_mm_unpacklo_pd(__A, __B), 58860b57cec5SDimitry Andric (__v2df)_mm_setzero_pd()); 58870b57cec5SDimitry Andric } 58880b57cec5SDimitry Andric 58890b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 58900b57cec5SDimitry Andric _mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) 58910b57cec5SDimitry Andric { 58920b57cec5SDimitry Andric return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 58930b57cec5SDimitry Andric (__v4df)_mm256_unpacklo_pd(__A, __B), 58940b57cec5SDimitry Andric (__v4df)__W); 58950b57cec5SDimitry Andric } 58960b57cec5SDimitry Andric 58970b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 58980b57cec5SDimitry Andric _mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B) 58990b57cec5SDimitry Andric { 59000b57cec5SDimitry Andric return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 59010b57cec5SDimitry Andric (__v4df)_mm256_unpacklo_pd(__A, __B), 59020b57cec5SDimitry Andric (__v4df)_mm256_setzero_pd()); 59030b57cec5SDimitry Andric } 59040b57cec5SDimitry Andric 59050b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 59060b57cec5SDimitry Andric _mm_mask_unpacklo_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 59070b57cec5SDimitry Andric { 59080b57cec5SDimitry Andric return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 59090b57cec5SDimitry Andric (__v4sf)_mm_unpacklo_ps(__A, __B), 59100b57cec5SDimitry Andric (__v4sf)__W); 59110b57cec5SDimitry Andric } 59120b57cec5SDimitry Andric 59130b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 59140b57cec5SDimitry Andric _mm_maskz_unpacklo_ps(__mmask8 __U, __m128 __A, __m128 __B) 59150b57cec5SDimitry Andric { 59160b57cec5SDimitry Andric return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 59170b57cec5SDimitry Andric (__v4sf)_mm_unpacklo_ps(__A, __B), 59180b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps()); 59190b57cec5SDimitry Andric } 59200b57cec5SDimitry Andric 59210b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 59220b57cec5SDimitry Andric _mm256_mask_unpacklo_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) 59230b57cec5SDimitry Andric { 59240b57cec5SDimitry Andric return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 59250b57cec5SDimitry Andric (__v8sf)_mm256_unpacklo_ps(__A, __B), 59260b57cec5SDimitry Andric (__v8sf)__W); 59270b57cec5SDimitry Andric } 59280b57cec5SDimitry Andric 59290b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 59300b57cec5SDimitry Andric _mm256_maskz_unpacklo_ps(__mmask8 __U, __m256 __A, __m256 __B) 59310b57cec5SDimitry Andric { 59320b57cec5SDimitry Andric return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 59330b57cec5SDimitry Andric (__v8sf)_mm256_unpacklo_ps(__A, __B), 59340b57cec5SDimitry Andric (__v8sf)_mm256_setzero_ps()); 59350b57cec5SDimitry Andric } 59360b57cec5SDimitry Andric 59370b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 59380b57cec5SDimitry Andric _mm_rcp14_pd (__m128d __A) 59390b57cec5SDimitry Andric { 59400b57cec5SDimitry Andric return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A, 59410b57cec5SDimitry Andric (__v2df) 59420b57cec5SDimitry Andric _mm_setzero_pd (), 59430b57cec5SDimitry Andric (__mmask8) -1); 59440b57cec5SDimitry Andric } 59450b57cec5SDimitry Andric 59460b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 59470b57cec5SDimitry Andric _mm_mask_rcp14_pd (__m128d __W, __mmask8 __U, __m128d __A) 59480b57cec5SDimitry Andric { 59490b57cec5SDimitry Andric return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A, 59500b57cec5SDimitry Andric (__v2df) __W, 59510b57cec5SDimitry Andric (__mmask8) __U); 59520b57cec5SDimitry Andric } 59530b57cec5SDimitry Andric 59540b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 59550b57cec5SDimitry Andric _mm_maskz_rcp14_pd (__mmask8 __U, __m128d __A) 59560b57cec5SDimitry Andric { 59570b57cec5SDimitry Andric return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A, 59580b57cec5SDimitry Andric (__v2df) 59590b57cec5SDimitry Andric _mm_setzero_pd (), 59600b57cec5SDimitry Andric (__mmask8) __U); 59610b57cec5SDimitry Andric } 59620b57cec5SDimitry Andric 59630b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 59640b57cec5SDimitry Andric _mm256_rcp14_pd (__m256d __A) 59650b57cec5SDimitry Andric { 59660b57cec5SDimitry Andric return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A, 59670b57cec5SDimitry Andric (__v4df) 59680b57cec5SDimitry Andric _mm256_setzero_pd (), 59690b57cec5SDimitry Andric (__mmask8) -1); 59700b57cec5SDimitry Andric } 59710b57cec5SDimitry Andric 59720b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 59730b57cec5SDimitry Andric _mm256_mask_rcp14_pd (__m256d __W, __mmask8 __U, __m256d __A) 59740b57cec5SDimitry Andric { 59750b57cec5SDimitry Andric return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A, 59760b57cec5SDimitry Andric (__v4df) __W, 59770b57cec5SDimitry Andric (__mmask8) __U); 59780b57cec5SDimitry Andric } 59790b57cec5SDimitry Andric 59800b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 59810b57cec5SDimitry Andric _mm256_maskz_rcp14_pd (__mmask8 __U, __m256d __A) 59820b57cec5SDimitry Andric { 59830b57cec5SDimitry Andric return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A, 59840b57cec5SDimitry Andric (__v4df) 59850b57cec5SDimitry Andric _mm256_setzero_pd (), 59860b57cec5SDimitry Andric (__mmask8) __U); 59870b57cec5SDimitry Andric } 59880b57cec5SDimitry Andric 59890b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 59900b57cec5SDimitry Andric _mm_rcp14_ps (__m128 __A) 59910b57cec5SDimitry Andric { 59920b57cec5SDimitry Andric return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A, 59930b57cec5SDimitry Andric (__v4sf) 59940b57cec5SDimitry Andric _mm_setzero_ps (), 59950b57cec5SDimitry Andric (__mmask8) -1); 59960b57cec5SDimitry Andric } 59970b57cec5SDimitry Andric 59980b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 59990b57cec5SDimitry Andric _mm_mask_rcp14_ps (__m128 __W, __mmask8 __U, __m128 __A) 60000b57cec5SDimitry Andric { 60010b57cec5SDimitry Andric return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A, 60020b57cec5SDimitry Andric (__v4sf) __W, 60030b57cec5SDimitry Andric (__mmask8) __U); 60040b57cec5SDimitry Andric } 60050b57cec5SDimitry Andric 60060b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 60070b57cec5SDimitry Andric _mm_maskz_rcp14_ps (__mmask8 __U, __m128 __A) 60080b57cec5SDimitry Andric { 60090b57cec5SDimitry Andric return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A, 60100b57cec5SDimitry Andric (__v4sf) 60110b57cec5SDimitry Andric _mm_setzero_ps (), 60120b57cec5SDimitry Andric (__mmask8) __U); 60130b57cec5SDimitry Andric } 60140b57cec5SDimitry Andric 60150b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 60160b57cec5SDimitry Andric _mm256_rcp14_ps (__m256 __A) 60170b57cec5SDimitry Andric { 60180b57cec5SDimitry Andric return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A, 60190b57cec5SDimitry Andric (__v8sf) 60200b57cec5SDimitry Andric _mm256_setzero_ps (), 60210b57cec5SDimitry Andric (__mmask8) -1); 60220b57cec5SDimitry Andric } 60230b57cec5SDimitry Andric 60240b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 60250b57cec5SDimitry Andric _mm256_mask_rcp14_ps (__m256 __W, __mmask8 __U, __m256 __A) 60260b57cec5SDimitry Andric { 60270b57cec5SDimitry Andric return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A, 60280b57cec5SDimitry Andric (__v8sf) __W, 60290b57cec5SDimitry Andric (__mmask8) __U); 60300b57cec5SDimitry Andric } 60310b57cec5SDimitry Andric 60320b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 60330b57cec5SDimitry Andric _mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A) 60340b57cec5SDimitry Andric { 60350b57cec5SDimitry Andric return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A, 60360b57cec5SDimitry Andric (__v8sf) 60370b57cec5SDimitry Andric _mm256_setzero_ps (), 60380b57cec5SDimitry Andric (__mmask8) __U); 60390b57cec5SDimitry Andric } 60400b57cec5SDimitry Andric 60410b57cec5SDimitry Andric #define _mm_mask_permute_pd(W, U, X, C) \ 6042349cc55cSDimitry Andric ((__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ 60430b57cec5SDimitry Andric (__v2df)_mm_permute_pd((X), (C)), \ 6044349cc55cSDimitry Andric (__v2df)(__m128d)(W))) 60450b57cec5SDimitry Andric 60460b57cec5SDimitry Andric #define _mm_maskz_permute_pd(U, X, C) \ 6047349cc55cSDimitry Andric ((__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ 60480b57cec5SDimitry Andric (__v2df)_mm_permute_pd((X), (C)), \ 6049349cc55cSDimitry Andric (__v2df)_mm_setzero_pd())) 60500b57cec5SDimitry Andric 60510b57cec5SDimitry Andric #define _mm256_mask_permute_pd(W, U, X, C) \ 6052349cc55cSDimitry Andric ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 60530b57cec5SDimitry Andric (__v4df)_mm256_permute_pd((X), (C)), \ 6054349cc55cSDimitry Andric (__v4df)(__m256d)(W))) 60550b57cec5SDimitry Andric 60560b57cec5SDimitry Andric #define _mm256_maskz_permute_pd(U, X, C) \ 6057349cc55cSDimitry Andric ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 60580b57cec5SDimitry Andric (__v4df)_mm256_permute_pd((X), (C)), \ 6059349cc55cSDimitry Andric (__v4df)_mm256_setzero_pd())) 60600b57cec5SDimitry Andric 60610b57cec5SDimitry Andric #define _mm_mask_permute_ps(W, U, X, C) \ 6062349cc55cSDimitry Andric ((__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ 60630b57cec5SDimitry Andric (__v4sf)_mm_permute_ps((X), (C)), \ 6064349cc55cSDimitry Andric (__v4sf)(__m128)(W))) 60650b57cec5SDimitry Andric 60660b57cec5SDimitry Andric #define _mm_maskz_permute_ps(U, X, C) \ 6067349cc55cSDimitry Andric ((__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ 60680b57cec5SDimitry Andric (__v4sf)_mm_permute_ps((X), (C)), \ 6069349cc55cSDimitry Andric (__v4sf)_mm_setzero_ps())) 60700b57cec5SDimitry Andric 60710b57cec5SDimitry Andric #define _mm256_mask_permute_ps(W, U, X, C) \ 6072349cc55cSDimitry Andric ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 60730b57cec5SDimitry Andric (__v8sf)_mm256_permute_ps((X), (C)), \ 6074349cc55cSDimitry Andric (__v8sf)(__m256)(W))) 60750b57cec5SDimitry Andric 60760b57cec5SDimitry Andric #define _mm256_maskz_permute_ps(U, X, C) \ 6077349cc55cSDimitry Andric ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 60780b57cec5SDimitry Andric (__v8sf)_mm256_permute_ps((X), (C)), \ 6079349cc55cSDimitry Andric (__v8sf)_mm256_setzero_ps())) 60800b57cec5SDimitry Andric 60810b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 60820b57cec5SDimitry Andric _mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C) 60830b57cec5SDimitry Andric { 60840b57cec5SDimitry Andric return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 60850b57cec5SDimitry Andric (__v2df)_mm_permutevar_pd(__A, __C), 60860b57cec5SDimitry Andric (__v2df)__W); 60870b57cec5SDimitry Andric } 60880b57cec5SDimitry Andric 60890b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 60900b57cec5SDimitry Andric _mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C) 60910b57cec5SDimitry Andric { 60920b57cec5SDimitry Andric return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 60930b57cec5SDimitry Andric (__v2df)_mm_permutevar_pd(__A, __C), 60940b57cec5SDimitry Andric (__v2df)_mm_setzero_pd()); 60950b57cec5SDimitry Andric } 60960b57cec5SDimitry Andric 60970b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 60980b57cec5SDimitry Andric _mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C) 60990b57cec5SDimitry Andric { 61000b57cec5SDimitry Andric return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 61010b57cec5SDimitry Andric (__v4df)_mm256_permutevar_pd(__A, __C), 61020b57cec5SDimitry Andric (__v4df)__W); 61030b57cec5SDimitry Andric } 61040b57cec5SDimitry Andric 61050b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 61060b57cec5SDimitry Andric _mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C) 61070b57cec5SDimitry Andric { 61080b57cec5SDimitry Andric return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 61090b57cec5SDimitry Andric (__v4df)_mm256_permutevar_pd(__A, __C), 61100b57cec5SDimitry Andric (__v4df)_mm256_setzero_pd()); 61110b57cec5SDimitry Andric } 61120b57cec5SDimitry Andric 61130b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 61140b57cec5SDimitry Andric _mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C) 61150b57cec5SDimitry Andric { 61160b57cec5SDimitry Andric return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 61170b57cec5SDimitry Andric (__v4sf)_mm_permutevar_ps(__A, __C), 61180b57cec5SDimitry Andric (__v4sf)__W); 61190b57cec5SDimitry Andric } 61200b57cec5SDimitry Andric 61210b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 61220b57cec5SDimitry Andric _mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C) 61230b57cec5SDimitry Andric { 61240b57cec5SDimitry Andric return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 61250b57cec5SDimitry Andric (__v4sf)_mm_permutevar_ps(__A, __C), 61260b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps()); 61270b57cec5SDimitry Andric } 61280b57cec5SDimitry Andric 61290b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 61300b57cec5SDimitry Andric _mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C) 61310b57cec5SDimitry Andric { 61320b57cec5SDimitry Andric return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 61330b57cec5SDimitry Andric (__v8sf)_mm256_permutevar_ps(__A, __C), 61340b57cec5SDimitry Andric (__v8sf)__W); 61350b57cec5SDimitry Andric } 61360b57cec5SDimitry Andric 61370b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 61380b57cec5SDimitry Andric _mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C) 61390b57cec5SDimitry Andric { 61400b57cec5SDimitry Andric return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 61410b57cec5SDimitry Andric (__v8sf)_mm256_permutevar_ps(__A, __C), 61420b57cec5SDimitry Andric (__v8sf)_mm256_setzero_ps()); 61430b57cec5SDimitry Andric } 61440b57cec5SDimitry Andric 61450b57cec5SDimitry Andric static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 61460b57cec5SDimitry Andric _mm_test_epi32_mask (__m128i __A, __m128i __B) 61470b57cec5SDimitry Andric { 61480b57cec5SDimitry Andric return _mm_cmpneq_epi32_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128()); 61490b57cec5SDimitry Andric } 61500b57cec5SDimitry Andric 61510b57cec5SDimitry Andric static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 61520b57cec5SDimitry Andric _mm_mask_test_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B) 61530b57cec5SDimitry Andric { 61540b57cec5SDimitry Andric return _mm_mask_cmpneq_epi32_mask (__U, _mm_and_si128 (__A, __B), 61550b57cec5SDimitry Andric _mm_setzero_si128()); 61560b57cec5SDimitry Andric } 61570b57cec5SDimitry Andric 61580b57cec5SDimitry Andric static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 61590b57cec5SDimitry Andric _mm256_test_epi32_mask (__m256i __A, __m256i __B) 61600b57cec5SDimitry Andric { 61610b57cec5SDimitry Andric return _mm256_cmpneq_epi32_mask (_mm256_and_si256 (__A, __B), 61620b57cec5SDimitry Andric _mm256_setzero_si256()); 61630b57cec5SDimitry Andric } 61640b57cec5SDimitry Andric 61650b57cec5SDimitry Andric static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 61660b57cec5SDimitry Andric _mm256_mask_test_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B) 61670b57cec5SDimitry Andric { 61680b57cec5SDimitry Andric return _mm256_mask_cmpneq_epi32_mask (__U, _mm256_and_si256 (__A, __B), 61690b57cec5SDimitry Andric _mm256_setzero_si256()); 61700b57cec5SDimitry Andric } 61710b57cec5SDimitry Andric 61720b57cec5SDimitry Andric static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 61730b57cec5SDimitry Andric _mm_test_epi64_mask (__m128i __A, __m128i __B) 61740b57cec5SDimitry Andric { 61750b57cec5SDimitry Andric return _mm_cmpneq_epi64_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128()); 61760b57cec5SDimitry Andric } 61770b57cec5SDimitry Andric 61780b57cec5SDimitry Andric static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 61790b57cec5SDimitry Andric _mm_mask_test_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B) 61800b57cec5SDimitry Andric { 61810b57cec5SDimitry Andric return _mm_mask_cmpneq_epi64_mask (__U, _mm_and_si128 (__A, __B), 61820b57cec5SDimitry Andric _mm_setzero_si128()); 61830b57cec5SDimitry Andric } 61840b57cec5SDimitry Andric 61850b57cec5SDimitry Andric static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 61860b57cec5SDimitry Andric _mm256_test_epi64_mask (__m256i __A, __m256i __B) 61870b57cec5SDimitry Andric { 61880b57cec5SDimitry Andric return _mm256_cmpneq_epi64_mask (_mm256_and_si256 (__A, __B), 61890b57cec5SDimitry Andric _mm256_setzero_si256()); 61900b57cec5SDimitry Andric } 61910b57cec5SDimitry Andric 61920b57cec5SDimitry Andric static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 61930b57cec5SDimitry Andric _mm256_mask_test_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B) 61940b57cec5SDimitry Andric { 61950b57cec5SDimitry Andric return _mm256_mask_cmpneq_epi64_mask (__U, _mm256_and_si256 (__A, __B), 61960b57cec5SDimitry Andric _mm256_setzero_si256()); 61970b57cec5SDimitry Andric } 61980b57cec5SDimitry Andric 61990b57cec5SDimitry Andric static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 62000b57cec5SDimitry Andric _mm_testn_epi32_mask (__m128i __A, __m128i __B) 62010b57cec5SDimitry Andric { 62020b57cec5SDimitry Andric return _mm_cmpeq_epi32_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128()); 62030b57cec5SDimitry Andric } 62040b57cec5SDimitry Andric 62050b57cec5SDimitry Andric static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 62060b57cec5SDimitry Andric _mm_mask_testn_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B) 62070b57cec5SDimitry Andric { 62080b57cec5SDimitry Andric return _mm_mask_cmpeq_epi32_mask (__U, _mm_and_si128 (__A, __B), 62090b57cec5SDimitry Andric _mm_setzero_si128()); 62100b57cec5SDimitry Andric } 62110b57cec5SDimitry Andric 62120b57cec5SDimitry Andric static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 62130b57cec5SDimitry Andric _mm256_testn_epi32_mask (__m256i __A, __m256i __B) 62140b57cec5SDimitry Andric { 62150b57cec5SDimitry Andric return _mm256_cmpeq_epi32_mask (_mm256_and_si256 (__A, __B), 62160b57cec5SDimitry Andric _mm256_setzero_si256()); 62170b57cec5SDimitry Andric } 62180b57cec5SDimitry Andric 62190b57cec5SDimitry Andric static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 62200b57cec5SDimitry Andric _mm256_mask_testn_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B) 62210b57cec5SDimitry Andric { 62220b57cec5SDimitry Andric return _mm256_mask_cmpeq_epi32_mask (__U, _mm256_and_si256 (__A, __B), 62230b57cec5SDimitry Andric _mm256_setzero_si256()); 62240b57cec5SDimitry Andric } 62250b57cec5SDimitry Andric 62260b57cec5SDimitry Andric static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 62270b57cec5SDimitry Andric _mm_testn_epi64_mask (__m128i __A, __m128i __B) 62280b57cec5SDimitry Andric { 62290b57cec5SDimitry Andric return _mm_cmpeq_epi64_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128()); 62300b57cec5SDimitry Andric } 62310b57cec5SDimitry Andric 62320b57cec5SDimitry Andric static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 62330b57cec5SDimitry Andric _mm_mask_testn_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B) 62340b57cec5SDimitry Andric { 62350b57cec5SDimitry Andric return _mm_mask_cmpeq_epi64_mask (__U, _mm_and_si128 (__A, __B), 62360b57cec5SDimitry Andric _mm_setzero_si128()); 62370b57cec5SDimitry Andric } 62380b57cec5SDimitry Andric 62390b57cec5SDimitry Andric static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 62400b57cec5SDimitry Andric _mm256_testn_epi64_mask (__m256i __A, __m256i __B) 62410b57cec5SDimitry Andric { 62420b57cec5SDimitry Andric return _mm256_cmpeq_epi64_mask (_mm256_and_si256 (__A, __B), 62430b57cec5SDimitry Andric _mm256_setzero_si256()); 62440b57cec5SDimitry Andric } 62450b57cec5SDimitry Andric 62460b57cec5SDimitry Andric static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 62470b57cec5SDimitry Andric _mm256_mask_testn_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B) 62480b57cec5SDimitry Andric { 62490b57cec5SDimitry Andric return _mm256_mask_cmpeq_epi64_mask (__U, _mm256_and_si256 (__A, __B), 62500b57cec5SDimitry Andric _mm256_setzero_si256()); 62510b57cec5SDimitry Andric } 62520b57cec5SDimitry Andric 62530b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 62540b57cec5SDimitry Andric _mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 62550b57cec5SDimitry Andric { 62560b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 62570b57cec5SDimitry Andric (__v4si)_mm_unpackhi_epi32(__A, __B), 62580b57cec5SDimitry Andric (__v4si)__W); 62590b57cec5SDimitry Andric } 62600b57cec5SDimitry Andric 62610b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 62620b57cec5SDimitry Andric _mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B) 62630b57cec5SDimitry Andric { 62640b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 62650b57cec5SDimitry Andric (__v4si)_mm_unpackhi_epi32(__A, __B), 62660b57cec5SDimitry Andric (__v4si)_mm_setzero_si128()); 62670b57cec5SDimitry Andric } 62680b57cec5SDimitry Andric 62690b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 62700b57cec5SDimitry Andric _mm256_mask_unpackhi_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 62710b57cec5SDimitry Andric { 62720b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 62730b57cec5SDimitry Andric (__v8si)_mm256_unpackhi_epi32(__A, __B), 62740b57cec5SDimitry Andric (__v8si)__W); 62750b57cec5SDimitry Andric } 62760b57cec5SDimitry Andric 62770b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 62780b57cec5SDimitry Andric _mm256_maskz_unpackhi_epi32(__mmask8 __U, __m256i __A, __m256i __B) 62790b57cec5SDimitry Andric { 62800b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 62810b57cec5SDimitry Andric (__v8si)_mm256_unpackhi_epi32(__A, __B), 62820b57cec5SDimitry Andric (__v8si)_mm256_setzero_si256()); 62830b57cec5SDimitry Andric } 62840b57cec5SDimitry Andric 62850b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 62860b57cec5SDimitry Andric _mm_mask_unpackhi_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 62870b57cec5SDimitry Andric { 62880b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 62890b57cec5SDimitry Andric (__v2di)_mm_unpackhi_epi64(__A, __B), 62900b57cec5SDimitry Andric (__v2di)__W); 62910b57cec5SDimitry Andric } 62920b57cec5SDimitry Andric 62930b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 62940b57cec5SDimitry Andric _mm_maskz_unpackhi_epi64(__mmask8 __U, __m128i __A, __m128i __B) 62950b57cec5SDimitry Andric { 62960b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 62970b57cec5SDimitry Andric (__v2di)_mm_unpackhi_epi64(__A, __B), 62980b57cec5SDimitry Andric (__v2di)_mm_setzero_si128()); 62990b57cec5SDimitry Andric } 63000b57cec5SDimitry Andric 63010b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 63020b57cec5SDimitry Andric _mm256_mask_unpackhi_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 63030b57cec5SDimitry Andric { 63040b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 63050b57cec5SDimitry Andric (__v4di)_mm256_unpackhi_epi64(__A, __B), 63060b57cec5SDimitry Andric (__v4di)__W); 63070b57cec5SDimitry Andric } 63080b57cec5SDimitry Andric 63090b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 63100b57cec5SDimitry Andric _mm256_maskz_unpackhi_epi64(__mmask8 __U, __m256i __A, __m256i __B) 63110b57cec5SDimitry Andric { 63120b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 63130b57cec5SDimitry Andric (__v4di)_mm256_unpackhi_epi64(__A, __B), 63140b57cec5SDimitry Andric (__v4di)_mm256_setzero_si256()); 63150b57cec5SDimitry Andric } 63160b57cec5SDimitry Andric 63170b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 63180b57cec5SDimitry Andric _mm_mask_unpacklo_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 63190b57cec5SDimitry Andric { 63200b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 63210b57cec5SDimitry Andric (__v4si)_mm_unpacklo_epi32(__A, __B), 63220b57cec5SDimitry Andric (__v4si)__W); 63230b57cec5SDimitry Andric } 63240b57cec5SDimitry Andric 63250b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 63260b57cec5SDimitry Andric _mm_maskz_unpacklo_epi32(__mmask8 __U, __m128i __A, __m128i __B) 63270b57cec5SDimitry Andric { 63280b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 63290b57cec5SDimitry Andric (__v4si)_mm_unpacklo_epi32(__A, __B), 63300b57cec5SDimitry Andric (__v4si)_mm_setzero_si128()); 63310b57cec5SDimitry Andric } 63320b57cec5SDimitry Andric 63330b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 63340b57cec5SDimitry Andric _mm256_mask_unpacklo_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 63350b57cec5SDimitry Andric { 63360b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 63370b57cec5SDimitry Andric (__v8si)_mm256_unpacklo_epi32(__A, __B), 63380b57cec5SDimitry Andric (__v8si)__W); 63390b57cec5SDimitry Andric } 63400b57cec5SDimitry Andric 63410b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 63420b57cec5SDimitry Andric _mm256_maskz_unpacklo_epi32(__mmask8 __U, __m256i __A, __m256i __B) 63430b57cec5SDimitry Andric { 63440b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 63450b57cec5SDimitry Andric (__v8si)_mm256_unpacklo_epi32(__A, __B), 63460b57cec5SDimitry Andric (__v8si)_mm256_setzero_si256()); 63470b57cec5SDimitry Andric } 63480b57cec5SDimitry Andric 63490b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 63500b57cec5SDimitry Andric _mm_mask_unpacklo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 63510b57cec5SDimitry Andric { 63520b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 63530b57cec5SDimitry Andric (__v2di)_mm_unpacklo_epi64(__A, __B), 63540b57cec5SDimitry Andric (__v2di)__W); 63550b57cec5SDimitry Andric } 63560b57cec5SDimitry Andric 63570b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 63580b57cec5SDimitry Andric _mm_maskz_unpacklo_epi64(__mmask8 __U, __m128i __A, __m128i __B) 63590b57cec5SDimitry Andric { 63600b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 63610b57cec5SDimitry Andric (__v2di)_mm_unpacklo_epi64(__A, __B), 63620b57cec5SDimitry Andric (__v2di)_mm_setzero_si128()); 63630b57cec5SDimitry Andric } 63640b57cec5SDimitry Andric 63650b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 63660b57cec5SDimitry Andric _mm256_mask_unpacklo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 63670b57cec5SDimitry Andric { 63680b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 63690b57cec5SDimitry Andric (__v4di)_mm256_unpacklo_epi64(__A, __B), 63700b57cec5SDimitry Andric (__v4di)__W); 63710b57cec5SDimitry Andric } 63720b57cec5SDimitry Andric 63730b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 63740b57cec5SDimitry Andric _mm256_maskz_unpacklo_epi64(__mmask8 __U, __m256i __A, __m256i __B) 63750b57cec5SDimitry Andric { 63760b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 63770b57cec5SDimitry Andric (__v4di)_mm256_unpacklo_epi64(__A, __B), 63780b57cec5SDimitry Andric (__v4di)_mm256_setzero_si256()); 63790b57cec5SDimitry Andric } 63800b57cec5SDimitry Andric 63810b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 63820b57cec5SDimitry Andric _mm_mask_sra_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 63830b57cec5SDimitry Andric { 63840b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 63850b57cec5SDimitry Andric (__v4si)_mm_sra_epi32(__A, __B), 63860b57cec5SDimitry Andric (__v4si)__W); 63870b57cec5SDimitry Andric } 63880b57cec5SDimitry Andric 63890b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 63900b57cec5SDimitry Andric _mm_maskz_sra_epi32(__mmask8 __U, __m128i __A, __m128i __B) 63910b57cec5SDimitry Andric { 63920b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 63930b57cec5SDimitry Andric (__v4si)_mm_sra_epi32(__A, __B), 63940b57cec5SDimitry Andric (__v4si)_mm_setzero_si128()); 63950b57cec5SDimitry Andric } 63960b57cec5SDimitry Andric 63970b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 63980b57cec5SDimitry Andric _mm256_mask_sra_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) 63990b57cec5SDimitry Andric { 64000b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 64010b57cec5SDimitry Andric (__v8si)_mm256_sra_epi32(__A, __B), 64020b57cec5SDimitry Andric (__v8si)__W); 64030b57cec5SDimitry Andric } 64040b57cec5SDimitry Andric 64050b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 64060b57cec5SDimitry Andric _mm256_maskz_sra_epi32(__mmask8 __U, __m256i __A, __m128i __B) 64070b57cec5SDimitry Andric { 64080b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 64090b57cec5SDimitry Andric (__v8si)_mm256_sra_epi32(__A, __B), 64100b57cec5SDimitry Andric (__v8si)_mm256_setzero_si256()); 64110b57cec5SDimitry Andric } 64120b57cec5SDimitry Andric 64130b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 64145ffd83dbSDimitry Andric _mm_mask_srai_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B) 64150b57cec5SDimitry Andric { 64160b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 641781ad6265SDimitry Andric (__v4si)_mm_srai_epi32(__A, (int)__B), 64180b57cec5SDimitry Andric (__v4si)__W); 64190b57cec5SDimitry Andric } 64200b57cec5SDimitry Andric 64210b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 64225ffd83dbSDimitry Andric _mm_maskz_srai_epi32(__mmask8 __U, __m128i __A, unsigned int __B) 64230b57cec5SDimitry Andric { 64240b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 642581ad6265SDimitry Andric (__v4si)_mm_srai_epi32(__A, (int)__B), 64260b57cec5SDimitry Andric (__v4si)_mm_setzero_si128()); 64270b57cec5SDimitry Andric } 64280b57cec5SDimitry Andric 64290b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 64305ffd83dbSDimitry Andric _mm256_mask_srai_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B) 64310b57cec5SDimitry Andric { 64320b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 643381ad6265SDimitry Andric (__v8si)_mm256_srai_epi32(__A, (int)__B), 64340b57cec5SDimitry Andric (__v8si)__W); 64350b57cec5SDimitry Andric } 64360b57cec5SDimitry Andric 64370b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 64385ffd83dbSDimitry Andric _mm256_maskz_srai_epi32(__mmask8 __U, __m256i __A, unsigned int __B) 64390b57cec5SDimitry Andric { 64400b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 644181ad6265SDimitry Andric (__v8si)_mm256_srai_epi32(__A, (int)__B), 64420b57cec5SDimitry Andric (__v8si)_mm256_setzero_si256()); 64430b57cec5SDimitry Andric } 64440b57cec5SDimitry Andric 64450b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 64460b57cec5SDimitry Andric _mm_sra_epi64(__m128i __A, __m128i __B) 64470b57cec5SDimitry Andric { 64480b57cec5SDimitry Andric return (__m128i)__builtin_ia32_psraq128((__v2di)__A, (__v2di)__B); 64490b57cec5SDimitry Andric } 64500b57cec5SDimitry Andric 64510b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 64520b57cec5SDimitry Andric _mm_mask_sra_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 64530b57cec5SDimitry Andric { 64540b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \ 64550b57cec5SDimitry Andric (__v2di)_mm_sra_epi64(__A, __B), \ 64560b57cec5SDimitry Andric (__v2di)__W); 64570b57cec5SDimitry Andric } 64580b57cec5SDimitry Andric 64590b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 64600b57cec5SDimitry Andric _mm_maskz_sra_epi64(__mmask8 __U, __m128i __A, __m128i __B) 64610b57cec5SDimitry Andric { 64620b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \ 64630b57cec5SDimitry Andric (__v2di)_mm_sra_epi64(__A, __B), \ 64640b57cec5SDimitry Andric (__v2di)_mm_setzero_si128()); 64650b57cec5SDimitry Andric } 64660b57cec5SDimitry Andric 64670b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 64680b57cec5SDimitry Andric _mm256_sra_epi64(__m256i __A, __m128i __B) 64690b57cec5SDimitry Andric { 64700b57cec5SDimitry Andric return (__m256i)__builtin_ia32_psraq256((__v4di) __A, (__v2di) __B); 64710b57cec5SDimitry Andric } 64720b57cec5SDimitry Andric 64730b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 64740b57cec5SDimitry Andric _mm256_mask_sra_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) 64750b57cec5SDimitry Andric { 64760b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \ 64770b57cec5SDimitry Andric (__v4di)_mm256_sra_epi64(__A, __B), \ 64780b57cec5SDimitry Andric (__v4di)__W); 64790b57cec5SDimitry Andric } 64800b57cec5SDimitry Andric 64810b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 64820b57cec5SDimitry Andric _mm256_maskz_sra_epi64(__mmask8 __U, __m256i __A, __m128i __B) 64830b57cec5SDimitry Andric { 64840b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \ 64850b57cec5SDimitry Andric (__v4di)_mm256_sra_epi64(__A, __B), \ 64860b57cec5SDimitry Andric (__v4di)_mm256_setzero_si256()); 64870b57cec5SDimitry Andric } 64880b57cec5SDimitry Andric 64890b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 64905ffd83dbSDimitry Andric _mm_srai_epi64(__m128i __A, unsigned int __imm) 64910b57cec5SDimitry Andric { 649281ad6265SDimitry Andric return (__m128i)__builtin_ia32_psraqi128((__v2di)__A, (int)__imm); 64930b57cec5SDimitry Andric } 64940b57cec5SDimitry Andric 64950b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 64965ffd83dbSDimitry Andric _mm_mask_srai_epi64(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __imm) 64970b57cec5SDimitry Andric { 64980b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \ 64990b57cec5SDimitry Andric (__v2di)_mm_srai_epi64(__A, __imm), \ 65000b57cec5SDimitry Andric (__v2di)__W); 65010b57cec5SDimitry Andric } 65020b57cec5SDimitry Andric 65030b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 65045ffd83dbSDimitry Andric _mm_maskz_srai_epi64(__mmask8 __U, __m128i __A, unsigned int __imm) 65050b57cec5SDimitry Andric { 65060b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \ 65070b57cec5SDimitry Andric (__v2di)_mm_srai_epi64(__A, __imm), \ 65080b57cec5SDimitry Andric (__v2di)_mm_setzero_si128()); 65090b57cec5SDimitry Andric } 65100b57cec5SDimitry Andric 65110b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 65125ffd83dbSDimitry Andric _mm256_srai_epi64(__m256i __A, unsigned int __imm) 65130b57cec5SDimitry Andric { 651481ad6265SDimitry Andric return (__m256i)__builtin_ia32_psraqi256((__v4di)__A, (int)__imm); 65150b57cec5SDimitry Andric } 65160b57cec5SDimitry Andric 65170b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 65185ffd83dbSDimitry Andric _mm256_mask_srai_epi64(__m256i __W, __mmask8 __U, __m256i __A, 65195ffd83dbSDimitry Andric unsigned int __imm) 65200b57cec5SDimitry Andric { 65210b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \ 65220b57cec5SDimitry Andric (__v4di)_mm256_srai_epi64(__A, __imm), \ 65230b57cec5SDimitry Andric (__v4di)__W); 65240b57cec5SDimitry Andric } 65250b57cec5SDimitry Andric 65260b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 65275ffd83dbSDimitry Andric _mm256_maskz_srai_epi64(__mmask8 __U, __m256i __A, unsigned int __imm) 65280b57cec5SDimitry Andric { 65290b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \ 65300b57cec5SDimitry Andric (__v4di)_mm256_srai_epi64(__A, __imm), \ 65310b57cec5SDimitry Andric (__v4di)_mm256_setzero_si256()); 65320b57cec5SDimitry Andric } 65330b57cec5SDimitry Andric 65340b57cec5SDimitry Andric #define _mm_ternarylogic_epi32(A, B, C, imm) \ 653581ad6265SDimitry Andric ((__m128i)__builtin_ia32_pternlogd128_mask( \ 653681ad6265SDimitry Andric (__v4si)(__m128i)(A), (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), \ 653781ad6265SDimitry Andric (unsigned char)(imm), (__mmask8)-1)) 65380b57cec5SDimitry Andric 65390b57cec5SDimitry Andric #define _mm_mask_ternarylogic_epi32(A, U, B, C, imm) \ 654081ad6265SDimitry Andric ((__m128i)__builtin_ia32_pternlogd128_mask( \ 654181ad6265SDimitry Andric (__v4si)(__m128i)(A), (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), \ 654281ad6265SDimitry Andric (unsigned char)(imm), (__mmask8)(U))) 65430b57cec5SDimitry Andric 65440b57cec5SDimitry Andric #define _mm_maskz_ternarylogic_epi32(U, A, B, C, imm) \ 654581ad6265SDimitry Andric ((__m128i)__builtin_ia32_pternlogd128_maskz( \ 654681ad6265SDimitry Andric (__v4si)(__m128i)(A), (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), \ 654781ad6265SDimitry Andric (unsigned char)(imm), (__mmask8)(U))) 65480b57cec5SDimitry Andric 65490b57cec5SDimitry Andric #define _mm256_ternarylogic_epi32(A, B, C, imm) \ 655081ad6265SDimitry Andric ((__m256i)__builtin_ia32_pternlogd256_mask( \ 655181ad6265SDimitry Andric (__v8si)(__m256i)(A), (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), \ 655281ad6265SDimitry Andric (unsigned char)(imm), (__mmask8)-1)) 65530b57cec5SDimitry Andric 65540b57cec5SDimitry Andric #define _mm256_mask_ternarylogic_epi32(A, U, B, C, imm) \ 655581ad6265SDimitry Andric ((__m256i)__builtin_ia32_pternlogd256_mask( \ 655681ad6265SDimitry Andric (__v8si)(__m256i)(A), (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), \ 655781ad6265SDimitry Andric (unsigned char)(imm), (__mmask8)(U))) 65580b57cec5SDimitry Andric 65590b57cec5SDimitry Andric #define _mm256_maskz_ternarylogic_epi32(U, A, B, C, imm) \ 656081ad6265SDimitry Andric ((__m256i)__builtin_ia32_pternlogd256_maskz( \ 656181ad6265SDimitry Andric (__v8si)(__m256i)(A), (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), \ 656281ad6265SDimitry Andric (unsigned char)(imm), (__mmask8)(U))) 65630b57cec5SDimitry Andric 65640b57cec5SDimitry Andric #define _mm_ternarylogic_epi64(A, B, C, imm) \ 656581ad6265SDimitry Andric ((__m128i)__builtin_ia32_pternlogq128_mask( \ 656681ad6265SDimitry Andric (__v2di)(__m128i)(A), (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), \ 656781ad6265SDimitry Andric (unsigned char)(imm), (__mmask8)-1)) 65680b57cec5SDimitry Andric 65690b57cec5SDimitry Andric #define _mm_mask_ternarylogic_epi64(A, U, B, C, imm) \ 657081ad6265SDimitry Andric ((__m128i)__builtin_ia32_pternlogq128_mask( \ 657181ad6265SDimitry Andric (__v2di)(__m128i)(A), (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), \ 657281ad6265SDimitry Andric (unsigned char)(imm), (__mmask8)(U))) 65730b57cec5SDimitry Andric 65740b57cec5SDimitry Andric #define _mm_maskz_ternarylogic_epi64(U, A, B, C, imm) \ 657581ad6265SDimitry Andric ((__m128i)__builtin_ia32_pternlogq128_maskz( \ 657681ad6265SDimitry Andric (__v2di)(__m128i)(A), (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), \ 657781ad6265SDimitry Andric (unsigned char)(imm), (__mmask8)(U))) 65780b57cec5SDimitry Andric 65790b57cec5SDimitry Andric #define _mm256_ternarylogic_epi64(A, B, C, imm) \ 658081ad6265SDimitry Andric ((__m256i)__builtin_ia32_pternlogq256_mask( \ 658181ad6265SDimitry Andric (__v4di)(__m256i)(A), (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), \ 658281ad6265SDimitry Andric (unsigned char)(imm), (__mmask8)-1)) 65830b57cec5SDimitry Andric 65840b57cec5SDimitry Andric #define _mm256_mask_ternarylogic_epi64(A, U, B, C, imm) \ 658581ad6265SDimitry Andric ((__m256i)__builtin_ia32_pternlogq256_mask( \ 658681ad6265SDimitry Andric (__v4di)(__m256i)(A), (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), \ 658781ad6265SDimitry Andric (unsigned char)(imm), (__mmask8)(U))) 65880b57cec5SDimitry Andric 65890b57cec5SDimitry Andric #define _mm256_maskz_ternarylogic_epi64(U, A, B, C, imm) \ 659081ad6265SDimitry Andric ((__m256i)__builtin_ia32_pternlogq256_maskz( \ 659181ad6265SDimitry Andric (__v4di)(__m256i)(A), (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), \ 659281ad6265SDimitry Andric (unsigned char)(imm), (__mmask8)(U))) 65930b57cec5SDimitry Andric 65940b57cec5SDimitry Andric #define _mm256_shuffle_f32x4(A, B, imm) \ 6595349cc55cSDimitry Andric ((__m256)__builtin_ia32_shuf_f32x4_256((__v8sf)(__m256)(A), \ 6596349cc55cSDimitry Andric (__v8sf)(__m256)(B), (int)(imm))) 65970b57cec5SDimitry Andric 65980b57cec5SDimitry Andric #define _mm256_mask_shuffle_f32x4(W, U, A, B, imm) \ 6599349cc55cSDimitry Andric ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 66000b57cec5SDimitry Andric (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \ 6601349cc55cSDimitry Andric (__v8sf)(__m256)(W))) 66020b57cec5SDimitry Andric 66030b57cec5SDimitry Andric #define _mm256_maskz_shuffle_f32x4(U, A, B, imm) \ 6604349cc55cSDimitry Andric ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 66050b57cec5SDimitry Andric (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \ 6606349cc55cSDimitry Andric (__v8sf)_mm256_setzero_ps())) 66070b57cec5SDimitry Andric 66080b57cec5SDimitry Andric #define _mm256_shuffle_f64x2(A, B, imm) \ 6609349cc55cSDimitry Andric ((__m256d)__builtin_ia32_shuf_f64x2_256((__v4df)(__m256d)(A), \ 6610349cc55cSDimitry Andric (__v4df)(__m256d)(B), (int)(imm))) 66110b57cec5SDimitry Andric 66120b57cec5SDimitry Andric #define _mm256_mask_shuffle_f64x2(W, U, A, B, imm) \ 6613349cc55cSDimitry Andric ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 66140b57cec5SDimitry Andric (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \ 6615349cc55cSDimitry Andric (__v4df)(__m256d)(W))) 66160b57cec5SDimitry Andric 66170b57cec5SDimitry Andric #define _mm256_maskz_shuffle_f64x2(U, A, B, imm) \ 6618349cc55cSDimitry Andric ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 66190b57cec5SDimitry Andric (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \ 6620349cc55cSDimitry Andric (__v4df)_mm256_setzero_pd())) 66210b57cec5SDimitry Andric 66220b57cec5SDimitry Andric #define _mm256_shuffle_i32x4(A, B, imm) \ 6623349cc55cSDimitry Andric ((__m256i)__builtin_ia32_shuf_i32x4_256((__v8si)(__m256i)(A), \ 6624349cc55cSDimitry Andric (__v8si)(__m256i)(B), (int)(imm))) 66250b57cec5SDimitry Andric 66260b57cec5SDimitry Andric #define _mm256_mask_shuffle_i32x4(W, U, A, B, imm) \ 6627349cc55cSDimitry Andric ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 66280b57cec5SDimitry Andric (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \ 6629349cc55cSDimitry Andric (__v8si)(__m256i)(W))) 66300b57cec5SDimitry Andric 66310b57cec5SDimitry Andric #define _mm256_maskz_shuffle_i32x4(U, A, B, imm) \ 6632349cc55cSDimitry Andric ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 66330b57cec5SDimitry Andric (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \ 6634349cc55cSDimitry Andric (__v8si)_mm256_setzero_si256())) 66350b57cec5SDimitry Andric 66360b57cec5SDimitry Andric #define _mm256_shuffle_i64x2(A, B, imm) \ 6637349cc55cSDimitry Andric ((__m256i)__builtin_ia32_shuf_i64x2_256((__v4di)(__m256i)(A), \ 6638349cc55cSDimitry Andric (__v4di)(__m256i)(B), (int)(imm))) 66390b57cec5SDimitry Andric 66400b57cec5SDimitry Andric #define _mm256_mask_shuffle_i64x2(W, U, A, B, imm) \ 6641349cc55cSDimitry Andric ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 66420b57cec5SDimitry Andric (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \ 6643349cc55cSDimitry Andric (__v4di)(__m256i)(W))) 66440b57cec5SDimitry Andric 66450b57cec5SDimitry Andric 66460b57cec5SDimitry Andric #define _mm256_maskz_shuffle_i64x2(U, A, B, imm) \ 6647349cc55cSDimitry Andric ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 66480b57cec5SDimitry Andric (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \ 6649349cc55cSDimitry Andric (__v4di)_mm256_setzero_si256())) 66500b57cec5SDimitry Andric 66510b57cec5SDimitry Andric #define _mm_mask_shuffle_pd(W, U, A, B, M) \ 6652349cc55cSDimitry Andric ((__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ 66530b57cec5SDimitry Andric (__v2df)_mm_shuffle_pd((A), (B), (M)), \ 6654349cc55cSDimitry Andric (__v2df)(__m128d)(W))) 66550b57cec5SDimitry Andric 66560b57cec5SDimitry Andric #define _mm_maskz_shuffle_pd(U, A, B, M) \ 6657349cc55cSDimitry Andric ((__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ 66580b57cec5SDimitry Andric (__v2df)_mm_shuffle_pd((A), (B), (M)), \ 6659349cc55cSDimitry Andric (__v2df)_mm_setzero_pd())) 66600b57cec5SDimitry Andric 66610b57cec5SDimitry Andric #define _mm256_mask_shuffle_pd(W, U, A, B, M) \ 6662349cc55cSDimitry Andric ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 66630b57cec5SDimitry Andric (__v4df)_mm256_shuffle_pd((A), (B), (M)), \ 6664349cc55cSDimitry Andric (__v4df)(__m256d)(W))) 66650b57cec5SDimitry Andric 66660b57cec5SDimitry Andric #define _mm256_maskz_shuffle_pd(U, A, B, M) \ 6667349cc55cSDimitry Andric ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 66680b57cec5SDimitry Andric (__v4df)_mm256_shuffle_pd((A), (B), (M)), \ 6669349cc55cSDimitry Andric (__v4df)_mm256_setzero_pd())) 66700b57cec5SDimitry Andric 66710b57cec5SDimitry Andric #define _mm_mask_shuffle_ps(W, U, A, B, M) \ 6672349cc55cSDimitry Andric ((__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ 66730b57cec5SDimitry Andric (__v4sf)_mm_shuffle_ps((A), (B), (M)), \ 6674349cc55cSDimitry Andric (__v4sf)(__m128)(W))) 66750b57cec5SDimitry Andric 66760b57cec5SDimitry Andric #define _mm_maskz_shuffle_ps(U, A, B, M) \ 6677349cc55cSDimitry Andric ((__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ 66780b57cec5SDimitry Andric (__v4sf)_mm_shuffle_ps((A), (B), (M)), \ 6679349cc55cSDimitry Andric (__v4sf)_mm_setzero_ps())) 66800b57cec5SDimitry Andric 66810b57cec5SDimitry Andric #define _mm256_mask_shuffle_ps(W, U, A, B, M) \ 6682349cc55cSDimitry Andric ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 66830b57cec5SDimitry Andric (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \ 6684349cc55cSDimitry Andric (__v8sf)(__m256)(W))) 66850b57cec5SDimitry Andric 66860b57cec5SDimitry Andric #define _mm256_maskz_shuffle_ps(U, A, B, M) \ 6687349cc55cSDimitry Andric ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 66880b57cec5SDimitry Andric (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \ 6689349cc55cSDimitry Andric (__v8sf)_mm256_setzero_ps())) 66900b57cec5SDimitry Andric 66910b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 66920b57cec5SDimitry Andric _mm_rsqrt14_pd (__m128d __A) 66930b57cec5SDimitry Andric { 66940b57cec5SDimitry Andric return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A, 66950b57cec5SDimitry Andric (__v2df) 66960b57cec5SDimitry Andric _mm_setzero_pd (), 66970b57cec5SDimitry Andric (__mmask8) -1); 66980b57cec5SDimitry Andric } 66990b57cec5SDimitry Andric 67000b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 67010b57cec5SDimitry Andric _mm_mask_rsqrt14_pd (__m128d __W, __mmask8 __U, __m128d __A) 67020b57cec5SDimitry Andric { 67030b57cec5SDimitry Andric return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A, 67040b57cec5SDimitry Andric (__v2df) __W, 67050b57cec5SDimitry Andric (__mmask8) __U); 67060b57cec5SDimitry Andric } 67070b57cec5SDimitry Andric 67080b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 67090b57cec5SDimitry Andric _mm_maskz_rsqrt14_pd (__mmask8 __U, __m128d __A) 67100b57cec5SDimitry Andric { 67110b57cec5SDimitry Andric return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A, 67120b57cec5SDimitry Andric (__v2df) 67130b57cec5SDimitry Andric _mm_setzero_pd (), 67140b57cec5SDimitry Andric (__mmask8) __U); 67150b57cec5SDimitry Andric } 67160b57cec5SDimitry Andric 67170b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 67180b57cec5SDimitry Andric _mm256_rsqrt14_pd (__m256d __A) 67190b57cec5SDimitry Andric { 67200b57cec5SDimitry Andric return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A, 67210b57cec5SDimitry Andric (__v4df) 67220b57cec5SDimitry Andric _mm256_setzero_pd (), 67230b57cec5SDimitry Andric (__mmask8) -1); 67240b57cec5SDimitry Andric } 67250b57cec5SDimitry Andric 67260b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 67270b57cec5SDimitry Andric _mm256_mask_rsqrt14_pd (__m256d __W, __mmask8 __U, __m256d __A) 67280b57cec5SDimitry Andric { 67290b57cec5SDimitry Andric return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A, 67300b57cec5SDimitry Andric (__v4df) __W, 67310b57cec5SDimitry Andric (__mmask8) __U); 67320b57cec5SDimitry Andric } 67330b57cec5SDimitry Andric 67340b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 67350b57cec5SDimitry Andric _mm256_maskz_rsqrt14_pd (__mmask8 __U, __m256d __A) 67360b57cec5SDimitry Andric { 67370b57cec5SDimitry Andric return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A, 67380b57cec5SDimitry Andric (__v4df) 67390b57cec5SDimitry Andric _mm256_setzero_pd (), 67400b57cec5SDimitry Andric (__mmask8) __U); 67410b57cec5SDimitry Andric } 67420b57cec5SDimitry Andric 67430b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 67440b57cec5SDimitry Andric _mm_rsqrt14_ps (__m128 __A) 67450b57cec5SDimitry Andric { 67460b57cec5SDimitry Andric return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A, 67470b57cec5SDimitry Andric (__v4sf) 67480b57cec5SDimitry Andric _mm_setzero_ps (), 67490b57cec5SDimitry Andric (__mmask8) -1); 67500b57cec5SDimitry Andric } 67510b57cec5SDimitry Andric 67520b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 67530b57cec5SDimitry Andric _mm_mask_rsqrt14_ps (__m128 __W, __mmask8 __U, __m128 __A) 67540b57cec5SDimitry Andric { 67550b57cec5SDimitry Andric return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A, 67560b57cec5SDimitry Andric (__v4sf) __W, 67570b57cec5SDimitry Andric (__mmask8) __U); 67580b57cec5SDimitry Andric } 67590b57cec5SDimitry Andric 67600b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 67610b57cec5SDimitry Andric _mm_maskz_rsqrt14_ps (__mmask8 __U, __m128 __A) 67620b57cec5SDimitry Andric { 67630b57cec5SDimitry Andric return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A, 67640b57cec5SDimitry Andric (__v4sf) 67650b57cec5SDimitry Andric _mm_setzero_ps (), 67660b57cec5SDimitry Andric (__mmask8) __U); 67670b57cec5SDimitry Andric } 67680b57cec5SDimitry Andric 67690b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 67700b57cec5SDimitry Andric _mm256_rsqrt14_ps (__m256 __A) 67710b57cec5SDimitry Andric { 67720b57cec5SDimitry Andric return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A, 67730b57cec5SDimitry Andric (__v8sf) 67740b57cec5SDimitry Andric _mm256_setzero_ps (), 67750b57cec5SDimitry Andric (__mmask8) -1); 67760b57cec5SDimitry Andric } 67770b57cec5SDimitry Andric 67780b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 67790b57cec5SDimitry Andric _mm256_mask_rsqrt14_ps (__m256 __W, __mmask8 __U, __m256 __A) 67800b57cec5SDimitry Andric { 67810b57cec5SDimitry Andric return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A, 67820b57cec5SDimitry Andric (__v8sf) __W, 67830b57cec5SDimitry Andric (__mmask8) __U); 67840b57cec5SDimitry Andric } 67850b57cec5SDimitry Andric 67860b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 67870b57cec5SDimitry Andric _mm256_maskz_rsqrt14_ps (__mmask8 __U, __m256 __A) 67880b57cec5SDimitry Andric { 67890b57cec5SDimitry Andric return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A, 67900b57cec5SDimitry Andric (__v8sf) 67910b57cec5SDimitry Andric _mm256_setzero_ps (), 67920b57cec5SDimitry Andric (__mmask8) __U); 67930b57cec5SDimitry Andric } 67940b57cec5SDimitry Andric 67950b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 67960b57cec5SDimitry Andric _mm256_broadcast_f32x4(__m128 __A) 67970b57cec5SDimitry Andric { 67980b57cec5SDimitry Andric return (__m256)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A, 67990b57cec5SDimitry Andric 0, 1, 2, 3, 0, 1, 2, 3); 68000b57cec5SDimitry Andric } 68010b57cec5SDimitry Andric 68020b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 68030b57cec5SDimitry Andric _mm256_mask_broadcast_f32x4(__m256 __O, __mmask8 __M, __m128 __A) 68040b57cec5SDimitry Andric { 68050b57cec5SDimitry Andric return (__m256)__builtin_ia32_selectps_256((__mmask8)__M, 68060b57cec5SDimitry Andric (__v8sf)_mm256_broadcast_f32x4(__A), 68070b57cec5SDimitry Andric (__v8sf)__O); 68080b57cec5SDimitry Andric } 68090b57cec5SDimitry Andric 68100b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 68110b57cec5SDimitry Andric _mm256_maskz_broadcast_f32x4 (__mmask8 __M, __m128 __A) 68120b57cec5SDimitry Andric { 68130b57cec5SDimitry Andric return (__m256)__builtin_ia32_selectps_256((__mmask8)__M, 68140b57cec5SDimitry Andric (__v8sf)_mm256_broadcast_f32x4(__A), 68150b57cec5SDimitry Andric (__v8sf)_mm256_setzero_ps()); 68160b57cec5SDimitry Andric } 68170b57cec5SDimitry Andric 68180b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 68190b57cec5SDimitry Andric _mm256_broadcast_i32x4(__m128i __A) 68200b57cec5SDimitry Andric { 68210b57cec5SDimitry Andric return (__m256i)__builtin_shufflevector((__v4si)__A, (__v4si)__A, 68220b57cec5SDimitry Andric 0, 1, 2, 3, 0, 1, 2, 3); 68230b57cec5SDimitry Andric } 68240b57cec5SDimitry Andric 68250b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 68260b57cec5SDimitry Andric _mm256_mask_broadcast_i32x4(__m256i __O, __mmask8 __M, __m128i __A) 68270b57cec5SDimitry Andric { 68280b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 68290b57cec5SDimitry Andric (__v8si)_mm256_broadcast_i32x4(__A), 68300b57cec5SDimitry Andric (__v8si)__O); 68310b57cec5SDimitry Andric } 68320b57cec5SDimitry Andric 68330b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 68340b57cec5SDimitry Andric _mm256_maskz_broadcast_i32x4(__mmask8 __M, __m128i __A) 68350b57cec5SDimitry Andric { 68360b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 68370b57cec5SDimitry Andric (__v8si)_mm256_broadcast_i32x4(__A), 68380b57cec5SDimitry Andric (__v8si)_mm256_setzero_si256()); 68390b57cec5SDimitry Andric } 68400b57cec5SDimitry Andric 68410b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 68420b57cec5SDimitry Andric _mm256_mask_broadcastsd_pd (__m256d __O, __mmask8 __M, __m128d __A) 68430b57cec5SDimitry Andric { 68440b57cec5SDimitry Andric return (__m256d)__builtin_ia32_selectpd_256(__M, 68450b57cec5SDimitry Andric (__v4df) _mm256_broadcastsd_pd(__A), 68460b57cec5SDimitry Andric (__v4df) __O); 68470b57cec5SDimitry Andric } 68480b57cec5SDimitry Andric 68490b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 68500b57cec5SDimitry Andric _mm256_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A) 68510b57cec5SDimitry Andric { 68520b57cec5SDimitry Andric return (__m256d)__builtin_ia32_selectpd_256(__M, 68530b57cec5SDimitry Andric (__v4df) _mm256_broadcastsd_pd(__A), 68540b57cec5SDimitry Andric (__v4df) _mm256_setzero_pd()); 68550b57cec5SDimitry Andric } 68560b57cec5SDimitry Andric 68570b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 68580b57cec5SDimitry Andric _mm_mask_broadcastss_ps (__m128 __O, __mmask8 __M, __m128 __A) 68590b57cec5SDimitry Andric { 68600b57cec5SDimitry Andric return (__m128)__builtin_ia32_selectps_128(__M, 68610b57cec5SDimitry Andric (__v4sf) _mm_broadcastss_ps(__A), 68620b57cec5SDimitry Andric (__v4sf) __O); 68630b57cec5SDimitry Andric } 68640b57cec5SDimitry Andric 68650b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 68660b57cec5SDimitry Andric _mm_maskz_broadcastss_ps (__mmask8 __M, __m128 __A) 68670b57cec5SDimitry Andric { 68680b57cec5SDimitry Andric return (__m128)__builtin_ia32_selectps_128(__M, 68690b57cec5SDimitry Andric (__v4sf) _mm_broadcastss_ps(__A), 68700b57cec5SDimitry Andric (__v4sf) _mm_setzero_ps()); 68710b57cec5SDimitry Andric } 68720b57cec5SDimitry Andric 68730b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 68740b57cec5SDimitry Andric _mm256_mask_broadcastss_ps (__m256 __O, __mmask8 __M, __m128 __A) 68750b57cec5SDimitry Andric { 68760b57cec5SDimitry Andric return (__m256)__builtin_ia32_selectps_256(__M, 68770b57cec5SDimitry Andric (__v8sf) _mm256_broadcastss_ps(__A), 68780b57cec5SDimitry Andric (__v8sf) __O); 68790b57cec5SDimitry Andric } 68800b57cec5SDimitry Andric 68810b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 68820b57cec5SDimitry Andric _mm256_maskz_broadcastss_ps (__mmask8 __M, __m128 __A) 68830b57cec5SDimitry Andric { 68840b57cec5SDimitry Andric return (__m256)__builtin_ia32_selectps_256(__M, 68850b57cec5SDimitry Andric (__v8sf) _mm256_broadcastss_ps(__A), 68860b57cec5SDimitry Andric (__v8sf) _mm256_setzero_ps()); 68870b57cec5SDimitry Andric } 68880b57cec5SDimitry Andric 68890b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 68900b57cec5SDimitry Andric _mm_mask_broadcastd_epi32 (__m128i __O, __mmask8 __M, __m128i __A) 68910b57cec5SDimitry Andric { 68920b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128(__M, 68930b57cec5SDimitry Andric (__v4si) _mm_broadcastd_epi32(__A), 68940b57cec5SDimitry Andric (__v4si) __O); 68950b57cec5SDimitry Andric } 68960b57cec5SDimitry Andric 68970b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 68980b57cec5SDimitry Andric _mm_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A) 68990b57cec5SDimitry Andric { 69000b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128(__M, 69010b57cec5SDimitry Andric (__v4si) _mm_broadcastd_epi32(__A), 69020b57cec5SDimitry Andric (__v4si) _mm_setzero_si128()); 69030b57cec5SDimitry Andric } 69040b57cec5SDimitry Andric 69050b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 69060b57cec5SDimitry Andric _mm256_mask_broadcastd_epi32 (__m256i __O, __mmask8 __M, __m128i __A) 69070b57cec5SDimitry Andric { 69080b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256(__M, 69090b57cec5SDimitry Andric (__v8si) _mm256_broadcastd_epi32(__A), 69100b57cec5SDimitry Andric (__v8si) __O); 69110b57cec5SDimitry Andric } 69120b57cec5SDimitry Andric 69130b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 69140b57cec5SDimitry Andric _mm256_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A) 69150b57cec5SDimitry Andric { 69160b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256(__M, 69170b57cec5SDimitry Andric (__v8si) _mm256_broadcastd_epi32(__A), 69180b57cec5SDimitry Andric (__v8si) _mm256_setzero_si256()); 69190b57cec5SDimitry Andric } 69200b57cec5SDimitry Andric 69210b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 69220b57cec5SDimitry Andric _mm_mask_broadcastq_epi64 (__m128i __O, __mmask8 __M, __m128i __A) 69230b57cec5SDimitry Andric { 69240b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128(__M, 69250b57cec5SDimitry Andric (__v2di) _mm_broadcastq_epi64(__A), 69260b57cec5SDimitry Andric (__v2di) __O); 69270b57cec5SDimitry Andric } 69280b57cec5SDimitry Andric 69290b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 69300b57cec5SDimitry Andric _mm_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A) 69310b57cec5SDimitry Andric { 69320b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128(__M, 69330b57cec5SDimitry Andric (__v2di) _mm_broadcastq_epi64(__A), 69340b57cec5SDimitry Andric (__v2di) _mm_setzero_si128()); 69350b57cec5SDimitry Andric } 69360b57cec5SDimitry Andric 69370b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 69380b57cec5SDimitry Andric _mm256_mask_broadcastq_epi64 (__m256i __O, __mmask8 __M, __m128i __A) 69390b57cec5SDimitry Andric { 69400b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256(__M, 69410b57cec5SDimitry Andric (__v4di) _mm256_broadcastq_epi64(__A), 69420b57cec5SDimitry Andric (__v4di) __O); 69430b57cec5SDimitry Andric } 69440b57cec5SDimitry Andric 69450b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 69460b57cec5SDimitry Andric _mm256_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A) 69470b57cec5SDimitry Andric { 69480b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256(__M, 69490b57cec5SDimitry Andric (__v4di) _mm256_broadcastq_epi64(__A), 69500b57cec5SDimitry Andric (__v4di) _mm256_setzero_si256()); 69510b57cec5SDimitry Andric } 69520b57cec5SDimitry Andric 69530b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 69540b57cec5SDimitry Andric _mm_cvtsepi32_epi8 (__m128i __A) 69550b57cec5SDimitry Andric { 69560b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A, 69570b57cec5SDimitry Andric (__v16qi)_mm_undefined_si128(), 69580b57cec5SDimitry Andric (__mmask8) -1); 69590b57cec5SDimitry Andric } 69600b57cec5SDimitry Andric 69610b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 69620b57cec5SDimitry Andric _mm_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A) 69630b57cec5SDimitry Andric { 69640b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A, 69650b57cec5SDimitry Andric (__v16qi) __O, __M); 69660b57cec5SDimitry Andric } 69670b57cec5SDimitry Andric 69680b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 69690b57cec5SDimitry Andric _mm_maskz_cvtsepi32_epi8 (__mmask8 __M, __m128i __A) 69700b57cec5SDimitry Andric { 69710b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A, 69720b57cec5SDimitry Andric (__v16qi) _mm_setzero_si128 (), 69730b57cec5SDimitry Andric __M); 69740b57cec5SDimitry Andric } 69750b57cec5SDimitry Andric 69760b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS128 69770b57cec5SDimitry Andric _mm_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 69780b57cec5SDimitry Andric { 69790b57cec5SDimitry Andric __builtin_ia32_pmovsdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M); 69800b57cec5SDimitry Andric } 69810b57cec5SDimitry Andric 69820b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS256 69830b57cec5SDimitry Andric _mm256_cvtsepi32_epi8 (__m256i __A) 69840b57cec5SDimitry Andric { 69850b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A, 69860b57cec5SDimitry Andric (__v16qi)_mm_undefined_si128(), 69870b57cec5SDimitry Andric (__mmask8) -1); 69880b57cec5SDimitry Andric } 69890b57cec5SDimitry Andric 69900b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS256 69910b57cec5SDimitry Andric _mm256_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A) 69920b57cec5SDimitry Andric { 69930b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A, 69940b57cec5SDimitry Andric (__v16qi) __O, __M); 69950b57cec5SDimitry Andric } 69960b57cec5SDimitry Andric 69970b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS256 69980b57cec5SDimitry Andric _mm256_maskz_cvtsepi32_epi8 (__mmask8 __M, __m256i __A) 69990b57cec5SDimitry Andric { 70000b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A, 70010b57cec5SDimitry Andric (__v16qi) _mm_setzero_si128 (), 70020b57cec5SDimitry Andric __M); 70030b57cec5SDimitry Andric } 70040b57cec5SDimitry Andric 70050b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS256 70060b57cec5SDimitry Andric _mm256_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) 70070b57cec5SDimitry Andric { 70080b57cec5SDimitry Andric __builtin_ia32_pmovsdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M); 70090b57cec5SDimitry Andric } 70100b57cec5SDimitry Andric 70110b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 70120b57cec5SDimitry Andric _mm_cvtsepi32_epi16 (__m128i __A) 70130b57cec5SDimitry Andric { 70140b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A, 70150b57cec5SDimitry Andric (__v8hi)_mm_setzero_si128 (), 70160b57cec5SDimitry Andric (__mmask8) -1); 70170b57cec5SDimitry Andric } 70180b57cec5SDimitry Andric 70190b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 70200b57cec5SDimitry Andric _mm_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A) 70210b57cec5SDimitry Andric { 70220b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A, 70230b57cec5SDimitry Andric (__v8hi)__O, 70240b57cec5SDimitry Andric __M); 70250b57cec5SDimitry Andric } 70260b57cec5SDimitry Andric 70270b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 70280b57cec5SDimitry Andric _mm_maskz_cvtsepi32_epi16 (__mmask8 __M, __m128i __A) 70290b57cec5SDimitry Andric { 70300b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A, 70310b57cec5SDimitry Andric (__v8hi) _mm_setzero_si128 (), 70320b57cec5SDimitry Andric __M); 70330b57cec5SDimitry Andric } 70340b57cec5SDimitry Andric 70350b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS128 70360b57cec5SDimitry Andric _mm_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) 70370b57cec5SDimitry Andric { 70380b57cec5SDimitry Andric __builtin_ia32_pmovsdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M); 70390b57cec5SDimitry Andric } 70400b57cec5SDimitry Andric 70410b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS256 70420b57cec5SDimitry Andric _mm256_cvtsepi32_epi16 (__m256i __A) 70430b57cec5SDimitry Andric { 70440b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A, 70450b57cec5SDimitry Andric (__v8hi)_mm_undefined_si128(), 70460b57cec5SDimitry Andric (__mmask8) -1); 70470b57cec5SDimitry Andric } 70480b57cec5SDimitry Andric 70490b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS256 70500b57cec5SDimitry Andric _mm256_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A) 70510b57cec5SDimitry Andric { 70520b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A, 70530b57cec5SDimitry Andric (__v8hi) __O, __M); 70540b57cec5SDimitry Andric } 70550b57cec5SDimitry Andric 70560b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS256 70570b57cec5SDimitry Andric _mm256_maskz_cvtsepi32_epi16 (__mmask8 __M, __m256i __A) 70580b57cec5SDimitry Andric { 70590b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A, 70600b57cec5SDimitry Andric (__v8hi) _mm_setzero_si128 (), 70610b57cec5SDimitry Andric __M); 70620b57cec5SDimitry Andric } 70630b57cec5SDimitry Andric 70640b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS256 70650b57cec5SDimitry Andric _mm256_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) 70660b57cec5SDimitry Andric { 70670b57cec5SDimitry Andric __builtin_ia32_pmovsdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M); 70680b57cec5SDimitry Andric } 70690b57cec5SDimitry Andric 70700b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 70710b57cec5SDimitry Andric _mm_cvtsepi64_epi8 (__m128i __A) 70720b57cec5SDimitry Andric { 70730b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A, 70740b57cec5SDimitry Andric (__v16qi)_mm_undefined_si128(), 70750b57cec5SDimitry Andric (__mmask8) -1); 70760b57cec5SDimitry Andric } 70770b57cec5SDimitry Andric 70780b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 70790b57cec5SDimitry Andric _mm_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A) 70800b57cec5SDimitry Andric { 70810b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A, 70820b57cec5SDimitry Andric (__v16qi) __O, __M); 70830b57cec5SDimitry Andric } 70840b57cec5SDimitry Andric 70850b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 70860b57cec5SDimitry Andric _mm_maskz_cvtsepi64_epi8 (__mmask8 __M, __m128i __A) 70870b57cec5SDimitry Andric { 70880b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A, 70890b57cec5SDimitry Andric (__v16qi) _mm_setzero_si128 (), 70900b57cec5SDimitry Andric __M); 70910b57cec5SDimitry Andric } 70920b57cec5SDimitry Andric 70930b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS128 70940b57cec5SDimitry Andric _mm_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 70950b57cec5SDimitry Andric { 70960b57cec5SDimitry Andric __builtin_ia32_pmovsqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M); 70970b57cec5SDimitry Andric } 70980b57cec5SDimitry Andric 70990b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS256 71000b57cec5SDimitry Andric _mm256_cvtsepi64_epi8 (__m256i __A) 71010b57cec5SDimitry Andric { 71020b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A, 71030b57cec5SDimitry Andric (__v16qi)_mm_undefined_si128(), 71040b57cec5SDimitry Andric (__mmask8) -1); 71050b57cec5SDimitry Andric } 71060b57cec5SDimitry Andric 71070b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS256 71080b57cec5SDimitry Andric _mm256_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A) 71090b57cec5SDimitry Andric { 71100b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A, 71110b57cec5SDimitry Andric (__v16qi) __O, __M); 71120b57cec5SDimitry Andric } 71130b57cec5SDimitry Andric 71140b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS256 71150b57cec5SDimitry Andric _mm256_maskz_cvtsepi64_epi8 (__mmask8 __M, __m256i __A) 71160b57cec5SDimitry Andric { 71170b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A, 71180b57cec5SDimitry Andric (__v16qi) _mm_setzero_si128 (), 71190b57cec5SDimitry Andric __M); 71200b57cec5SDimitry Andric } 71210b57cec5SDimitry Andric 71220b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS256 71230b57cec5SDimitry Andric _mm256_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) 71240b57cec5SDimitry Andric { 71250b57cec5SDimitry Andric __builtin_ia32_pmovsqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M); 71260b57cec5SDimitry Andric } 71270b57cec5SDimitry Andric 71280b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 71290b57cec5SDimitry Andric _mm_cvtsepi64_epi32 (__m128i __A) 71300b57cec5SDimitry Andric { 71310b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A, 71320b57cec5SDimitry Andric (__v4si)_mm_undefined_si128(), 71330b57cec5SDimitry Andric (__mmask8) -1); 71340b57cec5SDimitry Andric } 71350b57cec5SDimitry Andric 71360b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 71370b57cec5SDimitry Andric _mm_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A) 71380b57cec5SDimitry Andric { 71390b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A, 71400b57cec5SDimitry Andric (__v4si) __O, __M); 71410b57cec5SDimitry Andric } 71420b57cec5SDimitry Andric 71430b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 71440b57cec5SDimitry Andric _mm_maskz_cvtsepi64_epi32 (__mmask8 __M, __m128i __A) 71450b57cec5SDimitry Andric { 71460b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A, 71470b57cec5SDimitry Andric (__v4si) _mm_setzero_si128 (), 71480b57cec5SDimitry Andric __M); 71490b57cec5SDimitry Andric } 71500b57cec5SDimitry Andric 71510b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS128 71520b57cec5SDimitry Andric _mm_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A) 71530b57cec5SDimitry Andric { 71540b57cec5SDimitry Andric __builtin_ia32_pmovsqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M); 71550b57cec5SDimitry Andric } 71560b57cec5SDimitry Andric 71570b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS256 71580b57cec5SDimitry Andric _mm256_cvtsepi64_epi32 (__m256i __A) 71590b57cec5SDimitry Andric { 71600b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A, 71610b57cec5SDimitry Andric (__v4si)_mm_undefined_si128(), 71620b57cec5SDimitry Andric (__mmask8) -1); 71630b57cec5SDimitry Andric } 71640b57cec5SDimitry Andric 71650b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS256 71660b57cec5SDimitry Andric _mm256_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A) 71670b57cec5SDimitry Andric { 71680b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A, 71690b57cec5SDimitry Andric (__v4si)__O, 71700b57cec5SDimitry Andric __M); 71710b57cec5SDimitry Andric } 71720b57cec5SDimitry Andric 71730b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS256 71740b57cec5SDimitry Andric _mm256_maskz_cvtsepi64_epi32 (__mmask8 __M, __m256i __A) 71750b57cec5SDimitry Andric { 71760b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A, 71770b57cec5SDimitry Andric (__v4si) _mm_setzero_si128 (), 71780b57cec5SDimitry Andric __M); 71790b57cec5SDimitry Andric } 71800b57cec5SDimitry Andric 71810b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS256 71820b57cec5SDimitry Andric _mm256_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A) 71830b57cec5SDimitry Andric { 71840b57cec5SDimitry Andric __builtin_ia32_pmovsqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M); 71850b57cec5SDimitry Andric } 71860b57cec5SDimitry Andric 71870b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 71880b57cec5SDimitry Andric _mm_cvtsepi64_epi16 (__m128i __A) 71890b57cec5SDimitry Andric { 71900b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A, 71910b57cec5SDimitry Andric (__v8hi)_mm_undefined_si128(), 71920b57cec5SDimitry Andric (__mmask8) -1); 71930b57cec5SDimitry Andric } 71940b57cec5SDimitry Andric 71950b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 71960b57cec5SDimitry Andric _mm_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A) 71970b57cec5SDimitry Andric { 71980b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A, 71990b57cec5SDimitry Andric (__v8hi) __O, __M); 72000b57cec5SDimitry Andric } 72010b57cec5SDimitry Andric 72020b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 72030b57cec5SDimitry Andric _mm_maskz_cvtsepi64_epi16 (__mmask8 __M, __m128i __A) 72040b57cec5SDimitry Andric { 72050b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A, 72060b57cec5SDimitry Andric (__v8hi) _mm_setzero_si128 (), 72070b57cec5SDimitry Andric __M); 72080b57cec5SDimitry Andric } 72090b57cec5SDimitry Andric 72100b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS128 72110b57cec5SDimitry Andric _mm_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) 72120b57cec5SDimitry Andric { 72130b57cec5SDimitry Andric __builtin_ia32_pmovsqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M); 72140b57cec5SDimitry Andric } 72150b57cec5SDimitry Andric 72160b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS256 72170b57cec5SDimitry Andric _mm256_cvtsepi64_epi16 (__m256i __A) 72180b57cec5SDimitry Andric { 72190b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A, 72200b57cec5SDimitry Andric (__v8hi)_mm_undefined_si128(), 72210b57cec5SDimitry Andric (__mmask8) -1); 72220b57cec5SDimitry Andric } 72230b57cec5SDimitry Andric 72240b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS256 72250b57cec5SDimitry Andric _mm256_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A) 72260b57cec5SDimitry Andric { 72270b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A, 72280b57cec5SDimitry Andric (__v8hi) __O, __M); 72290b57cec5SDimitry Andric } 72300b57cec5SDimitry Andric 72310b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS256 72320b57cec5SDimitry Andric _mm256_maskz_cvtsepi64_epi16 (__mmask8 __M, __m256i __A) 72330b57cec5SDimitry Andric { 72340b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A, 72350b57cec5SDimitry Andric (__v8hi) _mm_setzero_si128 (), 72360b57cec5SDimitry Andric __M); 72370b57cec5SDimitry Andric } 72380b57cec5SDimitry Andric 72390b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS256 72400b57cec5SDimitry Andric _mm256_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) 72410b57cec5SDimitry Andric { 72420b57cec5SDimitry Andric __builtin_ia32_pmovsqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M); 72430b57cec5SDimitry Andric } 72440b57cec5SDimitry Andric 72450b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 72460b57cec5SDimitry Andric _mm_cvtusepi32_epi8 (__m128i __A) 72470b57cec5SDimitry Andric { 72480b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A, 72490b57cec5SDimitry Andric (__v16qi)_mm_undefined_si128(), 72500b57cec5SDimitry Andric (__mmask8) -1); 72510b57cec5SDimitry Andric } 72520b57cec5SDimitry Andric 72530b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 72540b57cec5SDimitry Andric _mm_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A) 72550b57cec5SDimitry Andric { 72560b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A, 72570b57cec5SDimitry Andric (__v16qi) __O, 72580b57cec5SDimitry Andric __M); 72590b57cec5SDimitry Andric } 72600b57cec5SDimitry Andric 72610b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 72620b57cec5SDimitry Andric _mm_maskz_cvtusepi32_epi8 (__mmask8 __M, __m128i __A) 72630b57cec5SDimitry Andric { 72640b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A, 72650b57cec5SDimitry Andric (__v16qi) _mm_setzero_si128 (), 72660b57cec5SDimitry Andric __M); 72670b57cec5SDimitry Andric } 72680b57cec5SDimitry Andric 72690b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS128 72700b57cec5SDimitry Andric _mm_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 72710b57cec5SDimitry Andric { 72720b57cec5SDimitry Andric __builtin_ia32_pmovusdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M); 72730b57cec5SDimitry Andric } 72740b57cec5SDimitry Andric 72750b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS256 72760b57cec5SDimitry Andric _mm256_cvtusepi32_epi8 (__m256i __A) 72770b57cec5SDimitry Andric { 72780b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A, 72790b57cec5SDimitry Andric (__v16qi)_mm_undefined_si128(), 72800b57cec5SDimitry Andric (__mmask8) -1); 72810b57cec5SDimitry Andric } 72820b57cec5SDimitry Andric 72830b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS256 72840b57cec5SDimitry Andric _mm256_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A) 72850b57cec5SDimitry Andric { 72860b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A, 72870b57cec5SDimitry Andric (__v16qi) __O, 72880b57cec5SDimitry Andric __M); 72890b57cec5SDimitry Andric } 72900b57cec5SDimitry Andric 72910b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS256 72920b57cec5SDimitry Andric _mm256_maskz_cvtusepi32_epi8 (__mmask8 __M, __m256i __A) 72930b57cec5SDimitry Andric { 72940b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A, 72950b57cec5SDimitry Andric (__v16qi) _mm_setzero_si128 (), 72960b57cec5SDimitry Andric __M); 72970b57cec5SDimitry Andric } 72980b57cec5SDimitry Andric 72990b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS256 73000b57cec5SDimitry Andric _mm256_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) 73010b57cec5SDimitry Andric { 73020b57cec5SDimitry Andric __builtin_ia32_pmovusdb256mem_mask ((__v16qi*) __P, (__v8si) __A, __M); 73030b57cec5SDimitry Andric } 73040b57cec5SDimitry Andric 73050b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 73060b57cec5SDimitry Andric _mm_cvtusepi32_epi16 (__m128i __A) 73070b57cec5SDimitry Andric { 73080b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A, 73090b57cec5SDimitry Andric (__v8hi)_mm_undefined_si128(), 73100b57cec5SDimitry Andric (__mmask8) -1); 73110b57cec5SDimitry Andric } 73120b57cec5SDimitry Andric 73130b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 73140b57cec5SDimitry Andric _mm_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A) 73150b57cec5SDimitry Andric { 73160b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A, 73170b57cec5SDimitry Andric (__v8hi) __O, __M); 73180b57cec5SDimitry Andric } 73190b57cec5SDimitry Andric 73200b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 73210b57cec5SDimitry Andric _mm_maskz_cvtusepi32_epi16 (__mmask8 __M, __m128i __A) 73220b57cec5SDimitry Andric { 73230b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A, 73240b57cec5SDimitry Andric (__v8hi) _mm_setzero_si128 (), 73250b57cec5SDimitry Andric __M); 73260b57cec5SDimitry Andric } 73270b57cec5SDimitry Andric 73280b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS128 73290b57cec5SDimitry Andric _mm_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) 73300b57cec5SDimitry Andric { 73310b57cec5SDimitry Andric __builtin_ia32_pmovusdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M); 73320b57cec5SDimitry Andric } 73330b57cec5SDimitry Andric 73340b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS256 73350b57cec5SDimitry Andric _mm256_cvtusepi32_epi16 (__m256i __A) 73360b57cec5SDimitry Andric { 73370b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A, 73380b57cec5SDimitry Andric (__v8hi) _mm_undefined_si128(), 73390b57cec5SDimitry Andric (__mmask8) -1); 73400b57cec5SDimitry Andric } 73410b57cec5SDimitry Andric 73420b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS256 73430b57cec5SDimitry Andric _mm256_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A) 73440b57cec5SDimitry Andric { 73450b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A, 73460b57cec5SDimitry Andric (__v8hi) __O, __M); 73470b57cec5SDimitry Andric } 73480b57cec5SDimitry Andric 73490b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS256 73500b57cec5SDimitry Andric _mm256_maskz_cvtusepi32_epi16 (__mmask8 __M, __m256i __A) 73510b57cec5SDimitry Andric { 73520b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A, 73530b57cec5SDimitry Andric (__v8hi) _mm_setzero_si128 (), 73540b57cec5SDimitry Andric __M); 73550b57cec5SDimitry Andric } 73560b57cec5SDimitry Andric 73570b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS256 73580b57cec5SDimitry Andric _mm256_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) 73590b57cec5SDimitry Andric { 73600b57cec5SDimitry Andric __builtin_ia32_pmovusdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M); 73610b57cec5SDimitry Andric } 73620b57cec5SDimitry Andric 73630b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 73640b57cec5SDimitry Andric _mm_cvtusepi64_epi8 (__m128i __A) 73650b57cec5SDimitry Andric { 73660b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A, 73670b57cec5SDimitry Andric (__v16qi)_mm_undefined_si128(), 73680b57cec5SDimitry Andric (__mmask8) -1); 73690b57cec5SDimitry Andric } 73700b57cec5SDimitry Andric 73710b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 73720b57cec5SDimitry Andric _mm_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A) 73730b57cec5SDimitry Andric { 73740b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A, 73750b57cec5SDimitry Andric (__v16qi) __O, 73760b57cec5SDimitry Andric __M); 73770b57cec5SDimitry Andric } 73780b57cec5SDimitry Andric 73790b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 73800b57cec5SDimitry Andric _mm_maskz_cvtusepi64_epi8 (__mmask8 __M, __m128i __A) 73810b57cec5SDimitry Andric { 73820b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A, 73830b57cec5SDimitry Andric (__v16qi) _mm_setzero_si128 (), 73840b57cec5SDimitry Andric __M); 73850b57cec5SDimitry Andric } 73860b57cec5SDimitry Andric 73870b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS128 73880b57cec5SDimitry Andric _mm_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 73890b57cec5SDimitry Andric { 73900b57cec5SDimitry Andric __builtin_ia32_pmovusqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M); 73910b57cec5SDimitry Andric } 73920b57cec5SDimitry Andric 73930b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS256 73940b57cec5SDimitry Andric _mm256_cvtusepi64_epi8 (__m256i __A) 73950b57cec5SDimitry Andric { 73960b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A, 73970b57cec5SDimitry Andric (__v16qi)_mm_undefined_si128(), 73980b57cec5SDimitry Andric (__mmask8) -1); 73990b57cec5SDimitry Andric } 74000b57cec5SDimitry Andric 74010b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS256 74020b57cec5SDimitry Andric _mm256_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A) 74030b57cec5SDimitry Andric { 74040b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A, 74050b57cec5SDimitry Andric (__v16qi) __O, 74060b57cec5SDimitry Andric __M); 74070b57cec5SDimitry Andric } 74080b57cec5SDimitry Andric 74090b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS256 74100b57cec5SDimitry Andric _mm256_maskz_cvtusepi64_epi8 (__mmask8 __M, __m256i __A) 74110b57cec5SDimitry Andric { 74120b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A, 74130b57cec5SDimitry Andric (__v16qi) _mm_setzero_si128 (), 74140b57cec5SDimitry Andric __M); 74150b57cec5SDimitry Andric } 74160b57cec5SDimitry Andric 74170b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS256 74180b57cec5SDimitry Andric _mm256_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) 74190b57cec5SDimitry Andric { 74200b57cec5SDimitry Andric __builtin_ia32_pmovusqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M); 74210b57cec5SDimitry Andric } 74220b57cec5SDimitry Andric 74230b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 74240b57cec5SDimitry Andric _mm_cvtusepi64_epi32 (__m128i __A) 74250b57cec5SDimitry Andric { 74260b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A, 74270b57cec5SDimitry Andric (__v4si)_mm_undefined_si128(), 74280b57cec5SDimitry Andric (__mmask8) -1); 74290b57cec5SDimitry Andric } 74300b57cec5SDimitry Andric 74310b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 74320b57cec5SDimitry Andric _mm_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A) 74330b57cec5SDimitry Andric { 74340b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A, 74350b57cec5SDimitry Andric (__v4si) __O, __M); 74360b57cec5SDimitry Andric } 74370b57cec5SDimitry Andric 74380b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 74390b57cec5SDimitry Andric _mm_maskz_cvtusepi64_epi32 (__mmask8 __M, __m128i __A) 74400b57cec5SDimitry Andric { 74410b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A, 74420b57cec5SDimitry Andric (__v4si) _mm_setzero_si128 (), 74430b57cec5SDimitry Andric __M); 74440b57cec5SDimitry Andric } 74450b57cec5SDimitry Andric 74460b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS128 74470b57cec5SDimitry Andric _mm_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A) 74480b57cec5SDimitry Andric { 74490b57cec5SDimitry Andric __builtin_ia32_pmovusqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M); 74500b57cec5SDimitry Andric } 74510b57cec5SDimitry Andric 74520b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS256 74530b57cec5SDimitry Andric _mm256_cvtusepi64_epi32 (__m256i __A) 74540b57cec5SDimitry Andric { 74550b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A, 74560b57cec5SDimitry Andric (__v4si)_mm_undefined_si128(), 74570b57cec5SDimitry Andric (__mmask8) -1); 74580b57cec5SDimitry Andric } 74590b57cec5SDimitry Andric 74600b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS256 74610b57cec5SDimitry Andric _mm256_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A) 74620b57cec5SDimitry Andric { 74630b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A, 74640b57cec5SDimitry Andric (__v4si) __O, __M); 74650b57cec5SDimitry Andric } 74660b57cec5SDimitry Andric 74670b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS256 74680b57cec5SDimitry Andric _mm256_maskz_cvtusepi64_epi32 (__mmask8 __M, __m256i __A) 74690b57cec5SDimitry Andric { 74700b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A, 74710b57cec5SDimitry Andric (__v4si) _mm_setzero_si128 (), 74720b57cec5SDimitry Andric __M); 74730b57cec5SDimitry Andric } 74740b57cec5SDimitry Andric 74750b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS256 74760b57cec5SDimitry Andric _mm256_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A) 74770b57cec5SDimitry Andric { 74780b57cec5SDimitry Andric __builtin_ia32_pmovusqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M); 74790b57cec5SDimitry Andric } 74800b57cec5SDimitry Andric 74810b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 74820b57cec5SDimitry Andric _mm_cvtusepi64_epi16 (__m128i __A) 74830b57cec5SDimitry Andric { 74840b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A, 74850b57cec5SDimitry Andric (__v8hi)_mm_undefined_si128(), 74860b57cec5SDimitry Andric (__mmask8) -1); 74870b57cec5SDimitry Andric } 74880b57cec5SDimitry Andric 74890b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 74900b57cec5SDimitry Andric _mm_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A) 74910b57cec5SDimitry Andric { 74920b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A, 74930b57cec5SDimitry Andric (__v8hi) __O, __M); 74940b57cec5SDimitry Andric } 74950b57cec5SDimitry Andric 74960b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 74970b57cec5SDimitry Andric _mm_maskz_cvtusepi64_epi16 (__mmask8 __M, __m128i __A) 74980b57cec5SDimitry Andric { 74990b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A, 75000b57cec5SDimitry Andric (__v8hi) _mm_setzero_si128 (), 75010b57cec5SDimitry Andric __M); 75020b57cec5SDimitry Andric } 75030b57cec5SDimitry Andric 75040b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS128 75050b57cec5SDimitry Andric _mm_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) 75060b57cec5SDimitry Andric { 75070b57cec5SDimitry Andric __builtin_ia32_pmovusqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M); 75080b57cec5SDimitry Andric } 75090b57cec5SDimitry Andric 75100b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS256 75110b57cec5SDimitry Andric _mm256_cvtusepi64_epi16 (__m256i __A) 75120b57cec5SDimitry Andric { 75130b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A, 75140b57cec5SDimitry Andric (__v8hi)_mm_undefined_si128(), 75150b57cec5SDimitry Andric (__mmask8) -1); 75160b57cec5SDimitry Andric } 75170b57cec5SDimitry Andric 75180b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS256 75190b57cec5SDimitry Andric _mm256_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A) 75200b57cec5SDimitry Andric { 75210b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A, 75220b57cec5SDimitry Andric (__v8hi) __O, __M); 75230b57cec5SDimitry Andric } 75240b57cec5SDimitry Andric 75250b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS256 75260b57cec5SDimitry Andric _mm256_maskz_cvtusepi64_epi16 (__mmask8 __M, __m256i __A) 75270b57cec5SDimitry Andric { 75280b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A, 75290b57cec5SDimitry Andric (__v8hi) _mm_setzero_si128 (), 75300b57cec5SDimitry Andric __M); 75310b57cec5SDimitry Andric } 75320b57cec5SDimitry Andric 75330b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS256 75340b57cec5SDimitry Andric _mm256_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) 75350b57cec5SDimitry Andric { 75360b57cec5SDimitry Andric __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M); 75370b57cec5SDimitry Andric } 75380b57cec5SDimitry Andric 75390b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 75400b57cec5SDimitry Andric _mm_cvtepi32_epi8 (__m128i __A) 75410b57cec5SDimitry Andric { 75420b57cec5SDimitry Andric return (__m128i)__builtin_shufflevector( 75430b57cec5SDimitry Andric __builtin_convertvector((__v4si)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1, 75440b57cec5SDimitry Andric 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7); 75450b57cec5SDimitry Andric } 75460b57cec5SDimitry Andric 75470b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 75480b57cec5SDimitry Andric _mm_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A) 75490b57cec5SDimitry Andric { 75500b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A, 75510b57cec5SDimitry Andric (__v16qi) __O, __M); 75520b57cec5SDimitry Andric } 75530b57cec5SDimitry Andric 75540b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 75550b57cec5SDimitry Andric _mm_maskz_cvtepi32_epi8 (__mmask8 __M, __m128i __A) 75560b57cec5SDimitry Andric { 75570b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A, 75580b57cec5SDimitry Andric (__v16qi) 75590b57cec5SDimitry Andric _mm_setzero_si128 (), 75600b57cec5SDimitry Andric __M); 75610b57cec5SDimitry Andric } 75620b57cec5SDimitry Andric 75630b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS128 75640b57cec5SDimitry Andric _mm_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 75650b57cec5SDimitry Andric { 75660b57cec5SDimitry Andric __builtin_ia32_pmovdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M); 75670b57cec5SDimitry Andric } 75680b57cec5SDimitry Andric 75690b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS256 75700b57cec5SDimitry Andric _mm256_cvtepi32_epi8 (__m256i __A) 75710b57cec5SDimitry Andric { 75720b57cec5SDimitry Andric return (__m128i)__builtin_shufflevector( 75730b57cec5SDimitry Andric __builtin_convertvector((__v8si)__A, __v8qi), 75740b57cec5SDimitry Andric (__v8qi){0, 0, 0, 0, 0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 75750b57cec5SDimitry Andric 12, 13, 14, 15); 75760b57cec5SDimitry Andric } 75770b57cec5SDimitry Andric 75780b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS256 75790b57cec5SDimitry Andric _mm256_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A) 75800b57cec5SDimitry Andric { 75810b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A, 75820b57cec5SDimitry Andric (__v16qi) __O, __M); 75830b57cec5SDimitry Andric } 75840b57cec5SDimitry Andric 75850b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS256 75860b57cec5SDimitry Andric _mm256_maskz_cvtepi32_epi8 (__mmask8 __M, __m256i __A) 75870b57cec5SDimitry Andric { 75880b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A, 75890b57cec5SDimitry Andric (__v16qi) _mm_setzero_si128 (), 75900b57cec5SDimitry Andric __M); 75910b57cec5SDimitry Andric } 75920b57cec5SDimitry Andric 75930b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS256 75940b57cec5SDimitry Andric _mm256_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) 75950b57cec5SDimitry Andric { 75960b57cec5SDimitry Andric __builtin_ia32_pmovdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M); 75970b57cec5SDimitry Andric } 75980b57cec5SDimitry Andric 75990b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 76000b57cec5SDimitry Andric _mm_cvtepi32_epi16 (__m128i __A) 76010b57cec5SDimitry Andric { 76020b57cec5SDimitry Andric return (__m128i)__builtin_shufflevector( 76030b57cec5SDimitry Andric __builtin_convertvector((__v4si)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1, 76040b57cec5SDimitry Andric 2, 3, 4, 5, 6, 7); 76050b57cec5SDimitry Andric } 76060b57cec5SDimitry Andric 76070b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 76080b57cec5SDimitry Andric _mm_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A) 76090b57cec5SDimitry Andric { 76100b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A, 76110b57cec5SDimitry Andric (__v8hi) __O, __M); 76120b57cec5SDimitry Andric } 76130b57cec5SDimitry Andric 76140b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 76150b57cec5SDimitry Andric _mm_maskz_cvtepi32_epi16 (__mmask8 __M, __m128i __A) 76160b57cec5SDimitry Andric { 76170b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A, 76180b57cec5SDimitry Andric (__v8hi) _mm_setzero_si128 (), 76190b57cec5SDimitry Andric __M); 76200b57cec5SDimitry Andric } 76210b57cec5SDimitry Andric 76220b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS128 76230b57cec5SDimitry Andric _mm_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) 76240b57cec5SDimitry Andric { 76250b57cec5SDimitry Andric __builtin_ia32_pmovdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M); 76260b57cec5SDimitry Andric } 76270b57cec5SDimitry Andric 76280b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS256 76290b57cec5SDimitry Andric _mm256_cvtepi32_epi16 (__m256i __A) 76300b57cec5SDimitry Andric { 76310b57cec5SDimitry Andric return (__m128i)__builtin_convertvector((__v8si)__A, __v8hi); 76320b57cec5SDimitry Andric } 76330b57cec5SDimitry Andric 76340b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS256 76350b57cec5SDimitry Andric _mm256_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A) 76360b57cec5SDimitry Andric { 76370b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A, 76380b57cec5SDimitry Andric (__v8hi) __O, __M); 76390b57cec5SDimitry Andric } 76400b57cec5SDimitry Andric 76410b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS256 76420b57cec5SDimitry Andric _mm256_maskz_cvtepi32_epi16 (__mmask8 __M, __m256i __A) 76430b57cec5SDimitry Andric { 76440b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A, 76450b57cec5SDimitry Andric (__v8hi) _mm_setzero_si128 (), 76460b57cec5SDimitry Andric __M); 76470b57cec5SDimitry Andric } 76480b57cec5SDimitry Andric 76490b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS256 76500b57cec5SDimitry Andric _mm256_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) 76510b57cec5SDimitry Andric { 76520b57cec5SDimitry Andric __builtin_ia32_pmovdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M); 76530b57cec5SDimitry Andric } 76540b57cec5SDimitry Andric 76550b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 76560b57cec5SDimitry Andric _mm_cvtepi64_epi8 (__m128i __A) 76570b57cec5SDimitry Andric { 76580b57cec5SDimitry Andric return (__m128i)__builtin_shufflevector( 76590b57cec5SDimitry Andric __builtin_convertvector((__v2di)__A, __v2qi), (__v2qi){0, 0}, 0, 1, 2, 3, 76600b57cec5SDimitry Andric 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3); 76610b57cec5SDimitry Andric } 76620b57cec5SDimitry Andric 76630b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 76640b57cec5SDimitry Andric _mm_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A) 76650b57cec5SDimitry Andric { 76660b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A, 76670b57cec5SDimitry Andric (__v16qi) __O, __M); 76680b57cec5SDimitry Andric } 76690b57cec5SDimitry Andric 76700b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 76710b57cec5SDimitry Andric _mm_maskz_cvtepi64_epi8 (__mmask8 __M, __m128i __A) 76720b57cec5SDimitry Andric { 76730b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A, 76740b57cec5SDimitry Andric (__v16qi) _mm_setzero_si128 (), 76750b57cec5SDimitry Andric __M); 76760b57cec5SDimitry Andric } 76770b57cec5SDimitry Andric 76780b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS128 76790b57cec5SDimitry Andric _mm_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 76800b57cec5SDimitry Andric { 76810b57cec5SDimitry Andric __builtin_ia32_pmovqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M); 76820b57cec5SDimitry Andric } 76830b57cec5SDimitry Andric 76840b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS256 76850b57cec5SDimitry Andric _mm256_cvtepi64_epi8 (__m256i __A) 76860b57cec5SDimitry Andric { 76870b57cec5SDimitry Andric return (__m128i)__builtin_shufflevector( 76880b57cec5SDimitry Andric __builtin_convertvector((__v4di)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1, 76890b57cec5SDimitry Andric 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7); 76900b57cec5SDimitry Andric } 76910b57cec5SDimitry Andric 76920b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS256 76930b57cec5SDimitry Andric _mm256_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A) 76940b57cec5SDimitry Andric { 76950b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A, 76960b57cec5SDimitry Andric (__v16qi) __O, __M); 76970b57cec5SDimitry Andric } 76980b57cec5SDimitry Andric 76990b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS256 77000b57cec5SDimitry Andric _mm256_maskz_cvtepi64_epi8 (__mmask8 __M, __m256i __A) 77010b57cec5SDimitry Andric { 77020b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A, 77030b57cec5SDimitry Andric (__v16qi) _mm_setzero_si128 (), 77040b57cec5SDimitry Andric __M); 77050b57cec5SDimitry Andric } 77060b57cec5SDimitry Andric 77070b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS256 77080b57cec5SDimitry Andric _mm256_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) 77090b57cec5SDimitry Andric { 77100b57cec5SDimitry Andric __builtin_ia32_pmovqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M); 77110b57cec5SDimitry Andric } 77120b57cec5SDimitry Andric 77130b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 77140b57cec5SDimitry Andric _mm_cvtepi64_epi32 (__m128i __A) 77150b57cec5SDimitry Andric { 77160b57cec5SDimitry Andric return (__m128i)__builtin_shufflevector( 77170b57cec5SDimitry Andric __builtin_convertvector((__v2di)__A, __v2si), (__v2si){0, 0}, 0, 1, 2, 3); 77180b57cec5SDimitry Andric } 77190b57cec5SDimitry Andric 77200b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 77210b57cec5SDimitry Andric _mm_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A) 77220b57cec5SDimitry Andric { 77230b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A, 77240b57cec5SDimitry Andric (__v4si) __O, __M); 77250b57cec5SDimitry Andric } 77260b57cec5SDimitry Andric 77270b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 77280b57cec5SDimitry Andric _mm_maskz_cvtepi64_epi32 (__mmask8 __M, __m128i __A) 77290b57cec5SDimitry Andric { 77300b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A, 77310b57cec5SDimitry Andric (__v4si) _mm_setzero_si128 (), 77320b57cec5SDimitry Andric __M); 77330b57cec5SDimitry Andric } 77340b57cec5SDimitry Andric 77350b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS128 77360b57cec5SDimitry Andric _mm_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A) 77370b57cec5SDimitry Andric { 77380b57cec5SDimitry Andric __builtin_ia32_pmovqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M); 77390b57cec5SDimitry Andric } 77400b57cec5SDimitry Andric 77410b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS256 77420b57cec5SDimitry Andric _mm256_cvtepi64_epi32 (__m256i __A) 77430b57cec5SDimitry Andric { 77440b57cec5SDimitry Andric return (__m128i)__builtin_convertvector((__v4di)__A, __v4si); 77450b57cec5SDimitry Andric } 77460b57cec5SDimitry Andric 77470b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS256 77480b57cec5SDimitry Andric _mm256_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A) 77490b57cec5SDimitry Andric { 77500b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 77510b57cec5SDimitry Andric (__v4si)_mm256_cvtepi64_epi32(__A), 77520b57cec5SDimitry Andric (__v4si)__O); 77530b57cec5SDimitry Andric } 77540b57cec5SDimitry Andric 77550b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS256 77560b57cec5SDimitry Andric _mm256_maskz_cvtepi64_epi32 (__mmask8 __M, __m256i __A) 77570b57cec5SDimitry Andric { 77580b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 77590b57cec5SDimitry Andric (__v4si)_mm256_cvtepi64_epi32(__A), 77600b57cec5SDimitry Andric (__v4si)_mm_setzero_si128()); 77610b57cec5SDimitry Andric } 77620b57cec5SDimitry Andric 77630b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS256 77640b57cec5SDimitry Andric _mm256_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A) 77650b57cec5SDimitry Andric { 77660b57cec5SDimitry Andric __builtin_ia32_pmovqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M); 77670b57cec5SDimitry Andric } 77680b57cec5SDimitry Andric 77690b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 77700b57cec5SDimitry Andric _mm_cvtepi64_epi16 (__m128i __A) 77710b57cec5SDimitry Andric { 77720b57cec5SDimitry Andric return (__m128i)__builtin_shufflevector( 77730b57cec5SDimitry Andric __builtin_convertvector((__v2di)__A, __v2hi), (__v2hi){0, 0}, 0, 1, 2, 3, 77740b57cec5SDimitry Andric 3, 3, 3, 3); 77750b57cec5SDimitry Andric } 77760b57cec5SDimitry Andric 77770b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 77780b57cec5SDimitry Andric _mm_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A) 77790b57cec5SDimitry Andric { 77800b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A, 77810b57cec5SDimitry Andric (__v8hi)__O, 77820b57cec5SDimitry Andric __M); 77830b57cec5SDimitry Andric } 77840b57cec5SDimitry Andric 77850b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 77860b57cec5SDimitry Andric _mm_maskz_cvtepi64_epi16 (__mmask8 __M, __m128i __A) 77870b57cec5SDimitry Andric { 77880b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A, 77890b57cec5SDimitry Andric (__v8hi) _mm_setzero_si128 (), 77900b57cec5SDimitry Andric __M); 77910b57cec5SDimitry Andric } 77920b57cec5SDimitry Andric 77930b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS128 77940b57cec5SDimitry Andric _mm_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) 77950b57cec5SDimitry Andric { 77960b57cec5SDimitry Andric __builtin_ia32_pmovqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M); 77970b57cec5SDimitry Andric } 77980b57cec5SDimitry Andric 77990b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS256 78000b57cec5SDimitry Andric _mm256_cvtepi64_epi16 (__m256i __A) 78010b57cec5SDimitry Andric { 78020b57cec5SDimitry Andric return (__m128i)__builtin_shufflevector( 78030b57cec5SDimitry Andric __builtin_convertvector((__v4di)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1, 78040b57cec5SDimitry Andric 2, 3, 4, 5, 6, 7); 78050b57cec5SDimitry Andric } 78060b57cec5SDimitry Andric 78070b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS256 78080b57cec5SDimitry Andric _mm256_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A) 78090b57cec5SDimitry Andric { 78100b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A, 78110b57cec5SDimitry Andric (__v8hi) __O, __M); 78120b57cec5SDimitry Andric } 78130b57cec5SDimitry Andric 78140b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS256 78150b57cec5SDimitry Andric _mm256_maskz_cvtepi64_epi16 (__mmask8 __M, __m256i __A) 78160b57cec5SDimitry Andric { 78170b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A, 78180b57cec5SDimitry Andric (__v8hi) _mm_setzero_si128 (), 78190b57cec5SDimitry Andric __M); 78200b57cec5SDimitry Andric } 78210b57cec5SDimitry Andric 78220b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS256 78230b57cec5SDimitry Andric _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) 78240b57cec5SDimitry Andric { 78250b57cec5SDimitry Andric __builtin_ia32_pmovqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M); 78260b57cec5SDimitry Andric } 78270b57cec5SDimitry Andric 78280b57cec5SDimitry Andric #define _mm256_extractf32x4_ps(A, imm) \ 7829349cc55cSDimitry Andric ((__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \ 78300b57cec5SDimitry Andric (int)(imm), \ 78310b57cec5SDimitry Andric (__v4sf)_mm_undefined_ps(), \ 7832349cc55cSDimitry Andric (__mmask8)-1)) 78330b57cec5SDimitry Andric 78340b57cec5SDimitry Andric #define _mm256_mask_extractf32x4_ps(W, U, A, imm) \ 7835349cc55cSDimitry Andric ((__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \ 78360b57cec5SDimitry Andric (int)(imm), \ 78370b57cec5SDimitry Andric (__v4sf)(__m128)(W), \ 7838349cc55cSDimitry Andric (__mmask8)(U))) 78390b57cec5SDimitry Andric 78400b57cec5SDimitry Andric #define _mm256_maskz_extractf32x4_ps(U, A, imm) \ 7841349cc55cSDimitry Andric ((__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \ 78420b57cec5SDimitry Andric (int)(imm), \ 78430b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps(), \ 7844349cc55cSDimitry Andric (__mmask8)(U))) 78450b57cec5SDimitry Andric 78460b57cec5SDimitry Andric #define _mm256_extracti32x4_epi32(A, imm) \ 7847349cc55cSDimitry Andric ((__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \ 78480b57cec5SDimitry Andric (int)(imm), \ 78490b57cec5SDimitry Andric (__v4si)_mm_undefined_si128(), \ 7850349cc55cSDimitry Andric (__mmask8)-1)) 78510b57cec5SDimitry Andric 78520b57cec5SDimitry Andric #define _mm256_mask_extracti32x4_epi32(W, U, A, imm) \ 7853349cc55cSDimitry Andric ((__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \ 78540b57cec5SDimitry Andric (int)(imm), \ 78550b57cec5SDimitry Andric (__v4si)(__m128i)(W), \ 7856349cc55cSDimitry Andric (__mmask8)(U))) 78570b57cec5SDimitry Andric 78580b57cec5SDimitry Andric #define _mm256_maskz_extracti32x4_epi32(U, A, imm) \ 7859349cc55cSDimitry Andric ((__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \ 78600b57cec5SDimitry Andric (int)(imm), \ 78610b57cec5SDimitry Andric (__v4si)_mm_setzero_si128(), \ 7862349cc55cSDimitry Andric (__mmask8)(U))) 78630b57cec5SDimitry Andric 78640b57cec5SDimitry Andric #define _mm256_insertf32x4(A, B, imm) \ 7865349cc55cSDimitry Andric ((__m256)__builtin_ia32_insertf32x4_256((__v8sf)(__m256)(A), \ 7866349cc55cSDimitry Andric (__v4sf)(__m128)(B), (int)(imm))) 78670b57cec5SDimitry Andric 78680b57cec5SDimitry Andric #define _mm256_mask_insertf32x4(W, U, A, B, imm) \ 7869349cc55cSDimitry Andric ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 78700b57cec5SDimitry Andric (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \ 7871349cc55cSDimitry Andric (__v8sf)(__m256)(W))) 78720b57cec5SDimitry Andric 78730b57cec5SDimitry Andric #define _mm256_maskz_insertf32x4(U, A, B, imm) \ 7874349cc55cSDimitry Andric ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 78750b57cec5SDimitry Andric (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \ 7876349cc55cSDimitry Andric (__v8sf)_mm256_setzero_ps())) 78770b57cec5SDimitry Andric 78780b57cec5SDimitry Andric #define _mm256_inserti32x4(A, B, imm) \ 7879349cc55cSDimitry Andric ((__m256i)__builtin_ia32_inserti32x4_256((__v8si)(__m256i)(A), \ 7880349cc55cSDimitry Andric (__v4si)(__m128i)(B), (int)(imm))) 78810b57cec5SDimitry Andric 78820b57cec5SDimitry Andric #define _mm256_mask_inserti32x4(W, U, A, B, imm) \ 7883349cc55cSDimitry Andric ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 78840b57cec5SDimitry Andric (__v8si)_mm256_inserti32x4((A), (B), (imm)), \ 7885349cc55cSDimitry Andric (__v8si)(__m256i)(W))) 78860b57cec5SDimitry Andric 78870b57cec5SDimitry Andric #define _mm256_maskz_inserti32x4(U, A, B, imm) \ 7888349cc55cSDimitry Andric ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 78890b57cec5SDimitry Andric (__v8si)_mm256_inserti32x4((A), (B), (imm)), \ 7890349cc55cSDimitry Andric (__v8si)_mm256_setzero_si256())) 78910b57cec5SDimitry Andric 78920b57cec5SDimitry Andric #define _mm_getmant_pd(A, B, C) \ 7893349cc55cSDimitry Andric ((__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \ 78940b57cec5SDimitry Andric (int)(((C)<<2) | (B)), \ 78950b57cec5SDimitry Andric (__v2df)_mm_setzero_pd(), \ 7896349cc55cSDimitry Andric (__mmask8)-1)) 78970b57cec5SDimitry Andric 78980b57cec5SDimitry Andric #define _mm_mask_getmant_pd(W, U, A, B, C) \ 7899349cc55cSDimitry Andric ((__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \ 79000b57cec5SDimitry Andric (int)(((C)<<2) | (B)), \ 79010b57cec5SDimitry Andric (__v2df)(__m128d)(W), \ 7902349cc55cSDimitry Andric (__mmask8)(U))) 79030b57cec5SDimitry Andric 79040b57cec5SDimitry Andric #define _mm_maskz_getmant_pd(U, A, B, C) \ 7905349cc55cSDimitry Andric ((__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \ 79060b57cec5SDimitry Andric (int)(((C)<<2) | (B)), \ 79070b57cec5SDimitry Andric (__v2df)_mm_setzero_pd(), \ 7908349cc55cSDimitry Andric (__mmask8)(U))) 79090b57cec5SDimitry Andric 79100b57cec5SDimitry Andric #define _mm256_getmant_pd(A, B, C) \ 7911349cc55cSDimitry Andric ((__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \ 79120b57cec5SDimitry Andric (int)(((C)<<2) | (B)), \ 79130b57cec5SDimitry Andric (__v4df)_mm256_setzero_pd(), \ 7914349cc55cSDimitry Andric (__mmask8)-1)) 79150b57cec5SDimitry Andric 79160b57cec5SDimitry Andric #define _mm256_mask_getmant_pd(W, U, A, B, C) \ 7917349cc55cSDimitry Andric ((__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \ 79180b57cec5SDimitry Andric (int)(((C)<<2) | (B)), \ 79190b57cec5SDimitry Andric (__v4df)(__m256d)(W), \ 7920349cc55cSDimitry Andric (__mmask8)(U))) 79210b57cec5SDimitry Andric 79220b57cec5SDimitry Andric #define _mm256_maskz_getmant_pd(U, A, B, C) \ 7923349cc55cSDimitry Andric ((__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \ 79240b57cec5SDimitry Andric (int)(((C)<<2) | (B)), \ 79250b57cec5SDimitry Andric (__v4df)_mm256_setzero_pd(), \ 7926349cc55cSDimitry Andric (__mmask8)(U))) 79270b57cec5SDimitry Andric 79280b57cec5SDimitry Andric #define _mm_getmant_ps(A, B, C) \ 7929349cc55cSDimitry Andric ((__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \ 79300b57cec5SDimitry Andric (int)(((C)<<2) | (B)), \ 79310b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps(), \ 7932349cc55cSDimitry Andric (__mmask8)-1)) 79330b57cec5SDimitry Andric 79340b57cec5SDimitry Andric #define _mm_mask_getmant_ps(W, U, A, B, C) \ 7935349cc55cSDimitry Andric ((__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \ 79360b57cec5SDimitry Andric (int)(((C)<<2) | (B)), \ 79370b57cec5SDimitry Andric (__v4sf)(__m128)(W), \ 7938349cc55cSDimitry Andric (__mmask8)(U))) 79390b57cec5SDimitry Andric 79400b57cec5SDimitry Andric #define _mm_maskz_getmant_ps(U, A, B, C) \ 7941349cc55cSDimitry Andric ((__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \ 79420b57cec5SDimitry Andric (int)(((C)<<2) | (B)), \ 79430b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps(), \ 7944349cc55cSDimitry Andric (__mmask8)(U))) 79450b57cec5SDimitry Andric 79460b57cec5SDimitry Andric #define _mm256_getmant_ps(A, B, C) \ 7947349cc55cSDimitry Andric ((__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \ 79480b57cec5SDimitry Andric (int)(((C)<<2) | (B)), \ 79490b57cec5SDimitry Andric (__v8sf)_mm256_setzero_ps(), \ 7950349cc55cSDimitry Andric (__mmask8)-1)) 79510b57cec5SDimitry Andric 79520b57cec5SDimitry Andric #define _mm256_mask_getmant_ps(W, U, A, B, C) \ 7953349cc55cSDimitry Andric ((__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \ 79540b57cec5SDimitry Andric (int)(((C)<<2) | (B)), \ 79550b57cec5SDimitry Andric (__v8sf)(__m256)(W), \ 7956349cc55cSDimitry Andric (__mmask8)(U))) 79570b57cec5SDimitry Andric 79580b57cec5SDimitry Andric #define _mm256_maskz_getmant_ps(U, A, B, C) \ 7959349cc55cSDimitry Andric ((__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \ 79600b57cec5SDimitry Andric (int)(((C)<<2) | (B)), \ 79610b57cec5SDimitry Andric (__v8sf)_mm256_setzero_ps(), \ 7962349cc55cSDimitry Andric (__mmask8)(U))) 79630b57cec5SDimitry Andric 79640b57cec5SDimitry Andric #define _mm_mmask_i64gather_pd(v1_old, mask, index, addr, scale) \ 7965349cc55cSDimitry Andric ((__m128d)__builtin_ia32_gather3div2df((__v2df)(__m128d)(v1_old), \ 79660b57cec5SDimitry Andric (void const *)(addr), \ 79670b57cec5SDimitry Andric (__v2di)(__m128i)(index), \ 7968349cc55cSDimitry Andric (__mmask8)(mask), (int)(scale))) 79690b57cec5SDimitry Andric 79700b57cec5SDimitry Andric #define _mm_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) \ 7971349cc55cSDimitry Andric ((__m128i)__builtin_ia32_gather3div2di((__v2di)(__m128i)(v1_old), \ 79720b57cec5SDimitry Andric (void const *)(addr), \ 79730b57cec5SDimitry Andric (__v2di)(__m128i)(index), \ 7974349cc55cSDimitry Andric (__mmask8)(mask), (int)(scale))) 79750b57cec5SDimitry Andric 79760b57cec5SDimitry Andric #define _mm256_mmask_i64gather_pd(v1_old, mask, index, addr, scale) \ 7977349cc55cSDimitry Andric ((__m256d)__builtin_ia32_gather3div4df((__v4df)(__m256d)(v1_old), \ 79780b57cec5SDimitry Andric (void const *)(addr), \ 79790b57cec5SDimitry Andric (__v4di)(__m256i)(index), \ 7980349cc55cSDimitry Andric (__mmask8)(mask), (int)(scale))) 79810b57cec5SDimitry Andric 79820b57cec5SDimitry Andric #define _mm256_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) \ 7983349cc55cSDimitry Andric ((__m256i)__builtin_ia32_gather3div4di((__v4di)(__m256i)(v1_old), \ 79840b57cec5SDimitry Andric (void const *)(addr), \ 79850b57cec5SDimitry Andric (__v4di)(__m256i)(index), \ 7986349cc55cSDimitry Andric (__mmask8)(mask), (int)(scale))) 79870b57cec5SDimitry Andric 79880b57cec5SDimitry Andric #define _mm_mmask_i64gather_ps(v1_old, mask, index, addr, scale) \ 7989349cc55cSDimitry Andric ((__m128)__builtin_ia32_gather3div4sf((__v4sf)(__m128)(v1_old), \ 79900b57cec5SDimitry Andric (void const *)(addr), \ 79910b57cec5SDimitry Andric (__v2di)(__m128i)(index), \ 7992349cc55cSDimitry Andric (__mmask8)(mask), (int)(scale))) 79930b57cec5SDimitry Andric 79940b57cec5SDimitry Andric #define _mm_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \ 7995349cc55cSDimitry Andric ((__m128i)__builtin_ia32_gather3div4si((__v4si)(__m128i)(v1_old), \ 79960b57cec5SDimitry Andric (void const *)(addr), \ 79970b57cec5SDimitry Andric (__v2di)(__m128i)(index), \ 7998349cc55cSDimitry Andric (__mmask8)(mask), (int)(scale))) 79990b57cec5SDimitry Andric 80000b57cec5SDimitry Andric #define _mm256_mmask_i64gather_ps(v1_old, mask, index, addr, scale) \ 8001349cc55cSDimitry Andric ((__m128)__builtin_ia32_gather3div8sf((__v4sf)(__m128)(v1_old), \ 80020b57cec5SDimitry Andric (void const *)(addr), \ 80030b57cec5SDimitry Andric (__v4di)(__m256i)(index), \ 8004349cc55cSDimitry Andric (__mmask8)(mask), (int)(scale))) 80050b57cec5SDimitry Andric 80060b57cec5SDimitry Andric #define _mm256_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \ 8007349cc55cSDimitry Andric ((__m128i)__builtin_ia32_gather3div8si((__v4si)(__m128i)(v1_old), \ 80080b57cec5SDimitry Andric (void const *)(addr), \ 80090b57cec5SDimitry Andric (__v4di)(__m256i)(index), \ 8010349cc55cSDimitry Andric (__mmask8)(mask), (int)(scale))) 80110b57cec5SDimitry Andric 80120b57cec5SDimitry Andric #define _mm_mmask_i32gather_pd(v1_old, mask, index, addr, scale) \ 8013349cc55cSDimitry Andric ((__m128d)__builtin_ia32_gather3siv2df((__v2df)(__m128d)(v1_old), \ 80140b57cec5SDimitry Andric (void const *)(addr), \ 80150b57cec5SDimitry Andric (__v4si)(__m128i)(index), \ 8016349cc55cSDimitry Andric (__mmask8)(mask), (int)(scale))) 80170b57cec5SDimitry Andric 80180b57cec5SDimitry Andric #define _mm_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) \ 8019349cc55cSDimitry Andric ((__m128i)__builtin_ia32_gather3siv2di((__v2di)(__m128i)(v1_old), \ 80200b57cec5SDimitry Andric (void const *)(addr), \ 80210b57cec5SDimitry Andric (__v4si)(__m128i)(index), \ 8022349cc55cSDimitry Andric (__mmask8)(mask), (int)(scale))) 80230b57cec5SDimitry Andric 80240b57cec5SDimitry Andric #define _mm256_mmask_i32gather_pd(v1_old, mask, index, addr, scale) \ 8025349cc55cSDimitry Andric ((__m256d)__builtin_ia32_gather3siv4df((__v4df)(__m256d)(v1_old), \ 80260b57cec5SDimitry Andric (void const *)(addr), \ 80270b57cec5SDimitry Andric (__v4si)(__m128i)(index), \ 8028349cc55cSDimitry Andric (__mmask8)(mask), (int)(scale))) 80290b57cec5SDimitry Andric 80300b57cec5SDimitry Andric #define _mm256_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) \ 8031349cc55cSDimitry Andric ((__m256i)__builtin_ia32_gather3siv4di((__v4di)(__m256i)(v1_old), \ 80320b57cec5SDimitry Andric (void const *)(addr), \ 80330b57cec5SDimitry Andric (__v4si)(__m128i)(index), \ 8034349cc55cSDimitry Andric (__mmask8)(mask), (int)(scale))) 80350b57cec5SDimitry Andric 80360b57cec5SDimitry Andric #define _mm_mmask_i32gather_ps(v1_old, mask, index, addr, scale) \ 8037349cc55cSDimitry Andric ((__m128)__builtin_ia32_gather3siv4sf((__v4sf)(__m128)(v1_old), \ 80380b57cec5SDimitry Andric (void const *)(addr), \ 80390b57cec5SDimitry Andric (__v4si)(__m128i)(index), \ 8040349cc55cSDimitry Andric (__mmask8)(mask), (int)(scale))) 80410b57cec5SDimitry Andric 80420b57cec5SDimitry Andric #define _mm_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \ 8043349cc55cSDimitry Andric ((__m128i)__builtin_ia32_gather3siv4si((__v4si)(__m128i)(v1_old), \ 80440b57cec5SDimitry Andric (void const *)(addr), \ 80450b57cec5SDimitry Andric (__v4si)(__m128i)(index), \ 8046349cc55cSDimitry Andric (__mmask8)(mask), (int)(scale))) 80470b57cec5SDimitry Andric 80480b57cec5SDimitry Andric #define _mm256_mmask_i32gather_ps(v1_old, mask, index, addr, scale) \ 8049349cc55cSDimitry Andric ((__m256)__builtin_ia32_gather3siv8sf((__v8sf)(__m256)(v1_old), \ 80500b57cec5SDimitry Andric (void const *)(addr), \ 80510b57cec5SDimitry Andric (__v8si)(__m256i)(index), \ 8052349cc55cSDimitry Andric (__mmask8)(mask), (int)(scale))) 80530b57cec5SDimitry Andric 80540b57cec5SDimitry Andric #define _mm256_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \ 8055349cc55cSDimitry Andric ((__m256i)__builtin_ia32_gather3siv8si((__v8si)(__m256i)(v1_old), \ 80560b57cec5SDimitry Andric (void const *)(addr), \ 80570b57cec5SDimitry Andric (__v8si)(__m256i)(index), \ 8058349cc55cSDimitry Andric (__mmask8)(mask), (int)(scale))) 80590b57cec5SDimitry Andric 80600b57cec5SDimitry Andric #define _mm256_permutex_pd(X, C) \ 8061349cc55cSDimitry Andric ((__m256d)__builtin_ia32_permdf256((__v4df)(__m256d)(X), (int)(C))) 80620b57cec5SDimitry Andric 80630b57cec5SDimitry Andric #define _mm256_mask_permutex_pd(W, U, X, C) \ 8064349cc55cSDimitry Andric ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 80650b57cec5SDimitry Andric (__v4df)_mm256_permutex_pd((X), (C)), \ 8066349cc55cSDimitry Andric (__v4df)(__m256d)(W))) 80670b57cec5SDimitry Andric 80680b57cec5SDimitry Andric #define _mm256_maskz_permutex_pd(U, X, C) \ 8069349cc55cSDimitry Andric ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 80700b57cec5SDimitry Andric (__v4df)_mm256_permutex_pd((X), (C)), \ 8071349cc55cSDimitry Andric (__v4df)_mm256_setzero_pd())) 80720b57cec5SDimitry Andric 80730b57cec5SDimitry Andric #define _mm256_permutex_epi64(X, C) \ 8074349cc55cSDimitry Andric ((__m256i)__builtin_ia32_permdi256((__v4di)(__m256i)(X), (int)(C))) 80750b57cec5SDimitry Andric 80760b57cec5SDimitry Andric #define _mm256_mask_permutex_epi64(W, U, X, C) \ 8077349cc55cSDimitry Andric ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 80780b57cec5SDimitry Andric (__v4di)_mm256_permutex_epi64((X), (C)), \ 8079349cc55cSDimitry Andric (__v4di)(__m256i)(W))) 80800b57cec5SDimitry Andric 80810b57cec5SDimitry Andric #define _mm256_maskz_permutex_epi64(U, X, C) \ 8082349cc55cSDimitry Andric ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 80830b57cec5SDimitry Andric (__v4di)_mm256_permutex_epi64((X), (C)), \ 8084349cc55cSDimitry Andric (__v4di)_mm256_setzero_si256())) 80850b57cec5SDimitry Andric 80860b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 80870b57cec5SDimitry Andric _mm256_permutexvar_pd (__m256i __X, __m256d __Y) 80880b57cec5SDimitry Andric { 80890b57cec5SDimitry Andric return (__m256d)__builtin_ia32_permvardf256((__v4df)__Y, (__v4di)__X); 80900b57cec5SDimitry Andric } 80910b57cec5SDimitry Andric 80920b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 80930b57cec5SDimitry Andric _mm256_mask_permutexvar_pd (__m256d __W, __mmask8 __U, __m256i __X, 80940b57cec5SDimitry Andric __m256d __Y) 80950b57cec5SDimitry Andric { 80960b57cec5SDimitry Andric return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 80970b57cec5SDimitry Andric (__v4df)_mm256_permutexvar_pd(__X, __Y), 80980b57cec5SDimitry Andric (__v4df)__W); 80990b57cec5SDimitry Andric } 81000b57cec5SDimitry Andric 81010b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 81020b57cec5SDimitry Andric _mm256_maskz_permutexvar_pd (__mmask8 __U, __m256i __X, __m256d __Y) 81030b57cec5SDimitry Andric { 81040b57cec5SDimitry Andric return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 81050b57cec5SDimitry Andric (__v4df)_mm256_permutexvar_pd(__X, __Y), 81060b57cec5SDimitry Andric (__v4df)_mm256_setzero_pd()); 81070b57cec5SDimitry Andric } 81080b57cec5SDimitry Andric 81090b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 81100b57cec5SDimitry Andric _mm256_permutexvar_epi64 ( __m256i __X, __m256i __Y) 81110b57cec5SDimitry Andric { 81120b57cec5SDimitry Andric return (__m256i)__builtin_ia32_permvardi256((__v4di) __Y, (__v4di) __X); 81130b57cec5SDimitry Andric } 81140b57cec5SDimitry Andric 81150b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 81160b57cec5SDimitry Andric _mm256_maskz_permutexvar_epi64 (__mmask8 __M, __m256i __X, __m256i __Y) 81170b57cec5SDimitry Andric { 81180b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 81190b57cec5SDimitry Andric (__v4di)_mm256_permutexvar_epi64(__X, __Y), 81200b57cec5SDimitry Andric (__v4di)_mm256_setzero_si256()); 81210b57cec5SDimitry Andric } 81220b57cec5SDimitry Andric 81230b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 81240b57cec5SDimitry Andric _mm256_mask_permutexvar_epi64 (__m256i __W, __mmask8 __M, __m256i __X, 81250b57cec5SDimitry Andric __m256i __Y) 81260b57cec5SDimitry Andric { 81270b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 81280b57cec5SDimitry Andric (__v4di)_mm256_permutexvar_epi64(__X, __Y), 81290b57cec5SDimitry Andric (__v4di)__W); 81300b57cec5SDimitry Andric } 81310b57cec5SDimitry Andric 81320b57cec5SDimitry Andric #define _mm256_permutexvar_ps(A, B) _mm256_permutevar8x32_ps((B), (A)) 81330b57cec5SDimitry Andric 81340b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 81350b57cec5SDimitry Andric _mm256_mask_permutexvar_ps(__m256 __W, __mmask8 __U, __m256i __X, __m256 __Y) 81360b57cec5SDimitry Andric { 81370b57cec5SDimitry Andric return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 81380b57cec5SDimitry Andric (__v8sf)_mm256_permutexvar_ps(__X, __Y), 81390b57cec5SDimitry Andric (__v8sf)__W); 81400b57cec5SDimitry Andric } 81410b57cec5SDimitry Andric 81420b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 81430b57cec5SDimitry Andric _mm256_maskz_permutexvar_ps(__mmask8 __U, __m256i __X, __m256 __Y) 81440b57cec5SDimitry Andric { 81450b57cec5SDimitry Andric return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 81460b57cec5SDimitry Andric (__v8sf)_mm256_permutexvar_ps(__X, __Y), 81470b57cec5SDimitry Andric (__v8sf)_mm256_setzero_ps()); 81480b57cec5SDimitry Andric } 81490b57cec5SDimitry Andric 81500b57cec5SDimitry Andric #define _mm256_permutexvar_epi32(A, B) _mm256_permutevar8x32_epi32((B), (A)) 81510b57cec5SDimitry Andric 81520b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 81530b57cec5SDimitry Andric _mm256_mask_permutexvar_epi32(__m256i __W, __mmask8 __M, __m256i __X, 81540b57cec5SDimitry Andric __m256i __Y) 81550b57cec5SDimitry Andric { 81560b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 81570b57cec5SDimitry Andric (__v8si)_mm256_permutexvar_epi32(__X, __Y), 81580b57cec5SDimitry Andric (__v8si)__W); 81590b57cec5SDimitry Andric } 81600b57cec5SDimitry Andric 81610b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 81620b57cec5SDimitry Andric _mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y) 81630b57cec5SDimitry Andric { 81640b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 81650b57cec5SDimitry Andric (__v8si)_mm256_permutexvar_epi32(__X, __Y), 81660b57cec5SDimitry Andric (__v8si)_mm256_setzero_si256()); 81670b57cec5SDimitry Andric } 81680b57cec5SDimitry Andric 81690b57cec5SDimitry Andric #define _mm_alignr_epi32(A, B, imm) \ 8170349cc55cSDimitry Andric ((__m128i)__builtin_ia32_alignd128((__v4si)(__m128i)(A), \ 8171349cc55cSDimitry Andric (__v4si)(__m128i)(B), (int)(imm))) 81720b57cec5SDimitry Andric 81730b57cec5SDimitry Andric #define _mm_mask_alignr_epi32(W, U, A, B, imm) \ 8174349cc55cSDimitry Andric ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ 81750b57cec5SDimitry Andric (__v4si)_mm_alignr_epi32((A), (B), (imm)), \ 8176349cc55cSDimitry Andric (__v4si)(__m128i)(W))) 81770b57cec5SDimitry Andric 81780b57cec5SDimitry Andric #define _mm_maskz_alignr_epi32(U, A, B, imm) \ 8179349cc55cSDimitry Andric ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ 81800b57cec5SDimitry Andric (__v4si)_mm_alignr_epi32((A), (B), (imm)), \ 8181349cc55cSDimitry Andric (__v4si)_mm_setzero_si128())) 81820b57cec5SDimitry Andric 81830b57cec5SDimitry Andric #define _mm256_alignr_epi32(A, B, imm) \ 8184349cc55cSDimitry Andric ((__m256i)__builtin_ia32_alignd256((__v8si)(__m256i)(A), \ 8185349cc55cSDimitry Andric (__v8si)(__m256i)(B), (int)(imm))) 81860b57cec5SDimitry Andric 81870b57cec5SDimitry Andric #define _mm256_mask_alignr_epi32(W, U, A, B, imm) \ 8188349cc55cSDimitry Andric ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 81890b57cec5SDimitry Andric (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \ 8190349cc55cSDimitry Andric (__v8si)(__m256i)(W))) 81910b57cec5SDimitry Andric 81920b57cec5SDimitry Andric #define _mm256_maskz_alignr_epi32(U, A, B, imm) \ 8193349cc55cSDimitry Andric ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 81940b57cec5SDimitry Andric (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \ 8195349cc55cSDimitry Andric (__v8si)_mm256_setzero_si256())) 81960b57cec5SDimitry Andric 81970b57cec5SDimitry Andric #define _mm_alignr_epi64(A, B, imm) \ 8198349cc55cSDimitry Andric ((__m128i)__builtin_ia32_alignq128((__v2di)(__m128i)(A), \ 8199349cc55cSDimitry Andric (__v2di)(__m128i)(B), (int)(imm))) 82000b57cec5SDimitry Andric 82010b57cec5SDimitry Andric #define _mm_mask_alignr_epi64(W, U, A, B, imm) \ 8202349cc55cSDimitry Andric ((__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ 82030b57cec5SDimitry Andric (__v2di)_mm_alignr_epi64((A), (B), (imm)), \ 8204349cc55cSDimitry Andric (__v2di)(__m128i)(W))) 82050b57cec5SDimitry Andric 82060b57cec5SDimitry Andric #define _mm_maskz_alignr_epi64(U, A, B, imm) \ 8207349cc55cSDimitry Andric ((__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ 82080b57cec5SDimitry Andric (__v2di)_mm_alignr_epi64((A), (B), (imm)), \ 8209349cc55cSDimitry Andric (__v2di)_mm_setzero_si128())) 82100b57cec5SDimitry Andric 82110b57cec5SDimitry Andric #define _mm256_alignr_epi64(A, B, imm) \ 8212349cc55cSDimitry Andric ((__m256i)__builtin_ia32_alignq256((__v4di)(__m256i)(A), \ 8213349cc55cSDimitry Andric (__v4di)(__m256i)(B), (int)(imm))) 82140b57cec5SDimitry Andric 82150b57cec5SDimitry Andric #define _mm256_mask_alignr_epi64(W, U, A, B, imm) \ 8216349cc55cSDimitry Andric ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 82170b57cec5SDimitry Andric (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \ 8218349cc55cSDimitry Andric (__v4di)(__m256i)(W))) 82190b57cec5SDimitry Andric 82200b57cec5SDimitry Andric #define _mm256_maskz_alignr_epi64(U, A, B, imm) \ 8221349cc55cSDimitry Andric ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 82220b57cec5SDimitry Andric (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \ 8223349cc55cSDimitry Andric (__v4di)_mm256_setzero_si256())) 82240b57cec5SDimitry Andric 82250b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 82260b57cec5SDimitry Andric _mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A) 82270b57cec5SDimitry Andric { 82280b57cec5SDimitry Andric return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 82290b57cec5SDimitry Andric (__v4sf)_mm_movehdup_ps(__A), 82300b57cec5SDimitry Andric (__v4sf)__W); 82310b57cec5SDimitry Andric } 82320b57cec5SDimitry Andric 82330b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 82340b57cec5SDimitry Andric _mm_maskz_movehdup_ps (__mmask8 __U, __m128 __A) 82350b57cec5SDimitry Andric { 82360b57cec5SDimitry Andric return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 82370b57cec5SDimitry Andric (__v4sf)_mm_movehdup_ps(__A), 82380b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps()); 82390b57cec5SDimitry Andric } 82400b57cec5SDimitry Andric 82410b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 82420b57cec5SDimitry Andric _mm256_mask_movehdup_ps (__m256 __W, __mmask8 __U, __m256 __A) 82430b57cec5SDimitry Andric { 82440b57cec5SDimitry Andric return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 82450b57cec5SDimitry Andric (__v8sf)_mm256_movehdup_ps(__A), 82460b57cec5SDimitry Andric (__v8sf)__W); 82470b57cec5SDimitry Andric } 82480b57cec5SDimitry Andric 82490b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 82500b57cec5SDimitry Andric _mm256_maskz_movehdup_ps (__mmask8 __U, __m256 __A) 82510b57cec5SDimitry Andric { 82520b57cec5SDimitry Andric return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 82530b57cec5SDimitry Andric (__v8sf)_mm256_movehdup_ps(__A), 82540b57cec5SDimitry Andric (__v8sf)_mm256_setzero_ps()); 82550b57cec5SDimitry Andric } 82560b57cec5SDimitry Andric 82570b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 82580b57cec5SDimitry Andric _mm_mask_moveldup_ps (__m128 __W, __mmask8 __U, __m128 __A) 82590b57cec5SDimitry Andric { 82600b57cec5SDimitry Andric return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 82610b57cec5SDimitry Andric (__v4sf)_mm_moveldup_ps(__A), 82620b57cec5SDimitry Andric (__v4sf)__W); 82630b57cec5SDimitry Andric } 82640b57cec5SDimitry Andric 82650b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 82660b57cec5SDimitry Andric _mm_maskz_moveldup_ps (__mmask8 __U, __m128 __A) 82670b57cec5SDimitry Andric { 82680b57cec5SDimitry Andric return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 82690b57cec5SDimitry Andric (__v4sf)_mm_moveldup_ps(__A), 82700b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps()); 82710b57cec5SDimitry Andric } 82720b57cec5SDimitry Andric 82730b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 82740b57cec5SDimitry Andric _mm256_mask_moveldup_ps (__m256 __W, __mmask8 __U, __m256 __A) 82750b57cec5SDimitry Andric { 82760b57cec5SDimitry Andric return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 82770b57cec5SDimitry Andric (__v8sf)_mm256_moveldup_ps(__A), 82780b57cec5SDimitry Andric (__v8sf)__W); 82790b57cec5SDimitry Andric } 82800b57cec5SDimitry Andric 82810b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 82820b57cec5SDimitry Andric _mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A) 82830b57cec5SDimitry Andric { 82840b57cec5SDimitry Andric return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 82850b57cec5SDimitry Andric (__v8sf)_mm256_moveldup_ps(__A), 82860b57cec5SDimitry Andric (__v8sf)_mm256_setzero_ps()); 82870b57cec5SDimitry Andric } 82880b57cec5SDimitry Andric 82890b57cec5SDimitry Andric #define _mm256_mask_shuffle_epi32(W, U, A, I) \ 8290349cc55cSDimitry Andric ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 82910b57cec5SDimitry Andric (__v8si)_mm256_shuffle_epi32((A), (I)), \ 8292349cc55cSDimitry Andric (__v8si)(__m256i)(W))) 82930b57cec5SDimitry Andric 82940b57cec5SDimitry Andric #define _mm256_maskz_shuffle_epi32(U, A, I) \ 8295349cc55cSDimitry Andric ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 82960b57cec5SDimitry Andric (__v8si)_mm256_shuffle_epi32((A), (I)), \ 8297349cc55cSDimitry Andric (__v8si)_mm256_setzero_si256())) 82980b57cec5SDimitry Andric 82990b57cec5SDimitry Andric #define _mm_mask_shuffle_epi32(W, U, A, I) \ 8300349cc55cSDimitry Andric ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ 83010b57cec5SDimitry Andric (__v4si)_mm_shuffle_epi32((A), (I)), \ 8302349cc55cSDimitry Andric (__v4si)(__m128i)(W))) 83030b57cec5SDimitry Andric 83040b57cec5SDimitry Andric #define _mm_maskz_shuffle_epi32(U, A, I) \ 8305349cc55cSDimitry Andric ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ 83060b57cec5SDimitry Andric (__v4si)_mm_shuffle_epi32((A), (I)), \ 8307349cc55cSDimitry Andric (__v4si)_mm_setzero_si128())) 83080b57cec5SDimitry Andric 83090b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 83100b57cec5SDimitry Andric _mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A) 83110b57cec5SDimitry Andric { 83120b57cec5SDimitry Andric return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U, 83130b57cec5SDimitry Andric (__v2df) __A, 83140b57cec5SDimitry Andric (__v2df) __W); 83150b57cec5SDimitry Andric } 83160b57cec5SDimitry Andric 83170b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 83180b57cec5SDimitry Andric _mm_maskz_mov_pd (__mmask8 __U, __m128d __A) 83190b57cec5SDimitry Andric { 83200b57cec5SDimitry Andric return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U, 83210b57cec5SDimitry Andric (__v2df) __A, 83220b57cec5SDimitry Andric (__v2df) _mm_setzero_pd ()); 83230b57cec5SDimitry Andric } 83240b57cec5SDimitry Andric 83250b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 83260b57cec5SDimitry Andric _mm256_mask_mov_pd (__m256d __W, __mmask8 __U, __m256d __A) 83270b57cec5SDimitry Andric { 83280b57cec5SDimitry Andric return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U, 83290b57cec5SDimitry Andric (__v4df) __A, 83300b57cec5SDimitry Andric (__v4df) __W); 83310b57cec5SDimitry Andric } 83320b57cec5SDimitry Andric 83330b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 83340b57cec5SDimitry Andric _mm256_maskz_mov_pd (__mmask8 __U, __m256d __A) 83350b57cec5SDimitry Andric { 83360b57cec5SDimitry Andric return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U, 83370b57cec5SDimitry Andric (__v4df) __A, 83380b57cec5SDimitry Andric (__v4df) _mm256_setzero_pd ()); 83390b57cec5SDimitry Andric } 83400b57cec5SDimitry Andric 83410b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 83420b57cec5SDimitry Andric _mm_mask_mov_ps (__m128 __W, __mmask8 __U, __m128 __A) 83430b57cec5SDimitry Andric { 83440b57cec5SDimitry Andric return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U, 83450b57cec5SDimitry Andric (__v4sf) __A, 83460b57cec5SDimitry Andric (__v4sf) __W); 83470b57cec5SDimitry Andric } 83480b57cec5SDimitry Andric 83490b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 83500b57cec5SDimitry Andric _mm_maskz_mov_ps (__mmask8 __U, __m128 __A) 83510b57cec5SDimitry Andric { 83520b57cec5SDimitry Andric return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U, 83530b57cec5SDimitry Andric (__v4sf) __A, 83540b57cec5SDimitry Andric (__v4sf) _mm_setzero_ps ()); 83550b57cec5SDimitry Andric } 83560b57cec5SDimitry Andric 83570b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 83580b57cec5SDimitry Andric _mm256_mask_mov_ps (__m256 __W, __mmask8 __U, __m256 __A) 83590b57cec5SDimitry Andric { 83600b57cec5SDimitry Andric return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U, 83610b57cec5SDimitry Andric (__v8sf) __A, 83620b57cec5SDimitry Andric (__v8sf) __W); 83630b57cec5SDimitry Andric } 83640b57cec5SDimitry Andric 83650b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 83660b57cec5SDimitry Andric _mm256_maskz_mov_ps (__mmask8 __U, __m256 __A) 83670b57cec5SDimitry Andric { 83680b57cec5SDimitry Andric return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U, 83690b57cec5SDimitry Andric (__v8sf) __A, 83700b57cec5SDimitry Andric (__v8sf) _mm256_setzero_ps ()); 83710b57cec5SDimitry Andric } 83720b57cec5SDimitry Andric 83730b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 83740b57cec5SDimitry Andric _mm_mask_cvtph_ps (__m128 __W, __mmask8 __U, __m128i __A) 83750b57cec5SDimitry Andric { 83760b57cec5SDimitry Andric return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A, 83770b57cec5SDimitry Andric (__v4sf) __W, 83780b57cec5SDimitry Andric (__mmask8) __U); 83790b57cec5SDimitry Andric } 83800b57cec5SDimitry Andric 83810b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 83820b57cec5SDimitry Andric _mm_maskz_cvtph_ps (__mmask8 __U, __m128i __A) 83830b57cec5SDimitry Andric { 83840b57cec5SDimitry Andric return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A, 83850b57cec5SDimitry Andric (__v4sf) 83860b57cec5SDimitry Andric _mm_setzero_ps (), 83870b57cec5SDimitry Andric (__mmask8) __U); 83880b57cec5SDimitry Andric } 83890b57cec5SDimitry Andric 83900b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 83910b57cec5SDimitry Andric _mm256_mask_cvtph_ps (__m256 __W, __mmask8 __U, __m128i __A) 83920b57cec5SDimitry Andric { 83930b57cec5SDimitry Andric return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A, 83940b57cec5SDimitry Andric (__v8sf) __W, 83950b57cec5SDimitry Andric (__mmask8) __U); 83960b57cec5SDimitry Andric } 83970b57cec5SDimitry Andric 83980b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 83990b57cec5SDimitry Andric _mm256_maskz_cvtph_ps (__mmask8 __U, __m128i __A) 84000b57cec5SDimitry Andric { 84010b57cec5SDimitry Andric return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A, 84020b57cec5SDimitry Andric (__v8sf) 84030b57cec5SDimitry Andric _mm256_setzero_ps (), 84040b57cec5SDimitry Andric (__mmask8) __U); 84050b57cec5SDimitry Andric } 84060b57cec5SDimitry Andric 84070b57cec5SDimitry Andric #define _mm_mask_cvt_roundps_ph(W, U, A, I) \ 8408349cc55cSDimitry Andric ((__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \ 84090b57cec5SDimitry Andric (__v8hi)(__m128i)(W), \ 8410349cc55cSDimitry Andric (__mmask8)(U))) 84110b57cec5SDimitry Andric 84120b57cec5SDimitry Andric #define _mm_maskz_cvt_roundps_ph(U, A, I) \ 8413349cc55cSDimitry Andric ((__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \ 84140b57cec5SDimitry Andric (__v8hi)_mm_setzero_si128(), \ 8415349cc55cSDimitry Andric (__mmask8)(U))) 84160b57cec5SDimitry Andric 84170b57cec5SDimitry Andric #define _mm_mask_cvtps_ph _mm_mask_cvt_roundps_ph 84180b57cec5SDimitry Andric #define _mm_maskz_cvtps_ph _mm_maskz_cvt_roundps_ph 84190b57cec5SDimitry Andric 84200b57cec5SDimitry Andric #define _mm256_mask_cvt_roundps_ph(W, U, A, I) \ 8421349cc55cSDimitry Andric ((__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \ 84220b57cec5SDimitry Andric (__v8hi)(__m128i)(W), \ 8423349cc55cSDimitry Andric (__mmask8)(U))) 84240b57cec5SDimitry Andric 84250b57cec5SDimitry Andric #define _mm256_maskz_cvt_roundps_ph(U, A, I) \ 8426349cc55cSDimitry Andric ((__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \ 84270b57cec5SDimitry Andric (__v8hi)_mm_setzero_si128(), \ 8428349cc55cSDimitry Andric (__mmask8)(U))) 84290b57cec5SDimitry Andric 84300b57cec5SDimitry Andric #define _mm256_mask_cvtps_ph _mm256_mask_cvt_roundps_ph 84310b57cec5SDimitry Andric #define _mm256_maskz_cvtps_ph _mm256_maskz_cvt_roundps_ph 84320b57cec5SDimitry Andric 84330b57cec5SDimitry Andric 84340b57cec5SDimitry Andric #undef __DEFAULT_FN_ATTRS128 84350b57cec5SDimitry Andric #undef __DEFAULT_FN_ATTRS256 84360b57cec5SDimitry Andric 84370b57cec5SDimitry Andric #endif /* __AVX512VLINTRIN_H */ 8438