10b57cec5SDimitry Andric /*===---- avx512vldqintrin.h - AVX512VL and AVX512DQ intrinsics ------------=== 20b57cec5SDimitry Andric * 30b57cec5SDimitry Andric * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric * See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric * 70b57cec5SDimitry Andric *===-----------------------------------------------------------------------=== 80b57cec5SDimitry Andric */ 90b57cec5SDimitry Andric 100b57cec5SDimitry Andric #ifndef __IMMINTRIN_H 110b57cec5SDimitry Andric #error "Never use <avx512vldqintrin.h> directly; include <immintrin.h> instead." 120b57cec5SDimitry Andric #endif 130b57cec5SDimitry Andric 140b57cec5SDimitry Andric #ifndef __AVX512VLDQINTRIN_H 150b57cec5SDimitry Andric #define __AVX512VLDQINTRIN_H 160b57cec5SDimitry Andric 170b57cec5SDimitry Andric /* Define the default attributes for the functions in this file. */ 18*5f757f3fSDimitry Andric #define __DEFAULT_FN_ATTRS128 \ 19*5f757f3fSDimitry Andric __attribute__((__always_inline__, __nodebug__, \ 20*5f757f3fSDimitry Andric __target__("avx512vl,avx512dq,no-evex512"), \ 21*5f757f3fSDimitry Andric __min_vector_width__(128))) 22*5f757f3fSDimitry Andric #define __DEFAULT_FN_ATTRS256 \ 23*5f757f3fSDimitry Andric __attribute__((__always_inline__, __nodebug__, \ 24*5f757f3fSDimitry Andric __target__("avx512vl,avx512dq,no-evex512"), \ 25*5f757f3fSDimitry Andric __min_vector_width__(256))) 260b57cec5SDimitry Andric 270b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 280b57cec5SDimitry Andric _mm256_mullo_epi64 (__m256i __A, __m256i __B) { 290b57cec5SDimitry Andric return (__m256i) ((__v4du) __A * (__v4du) __B); 300b57cec5SDimitry Andric } 310b57cec5SDimitry Andric 320b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 330b57cec5SDimitry Andric _mm256_mask_mullo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { 340b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 350b57cec5SDimitry Andric (__v4di)_mm256_mullo_epi64(__A, __B), 360b57cec5SDimitry Andric (__v4di)__W); 370b57cec5SDimitry Andric } 380b57cec5SDimitry Andric 390b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 400b57cec5SDimitry Andric _mm256_maskz_mullo_epi64(__mmask8 __U, __m256i __A, __m256i __B) { 410b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 420b57cec5SDimitry Andric (__v4di)_mm256_mullo_epi64(__A, __B), 430b57cec5SDimitry Andric (__v4di)_mm256_setzero_si256()); 440b57cec5SDimitry Andric } 450b57cec5SDimitry Andric 460b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 470b57cec5SDimitry Andric _mm_mullo_epi64 (__m128i __A, __m128i __B) { 480b57cec5SDimitry Andric return (__m128i) ((__v2du) __A * (__v2du) __B); 490b57cec5SDimitry Andric } 500b57cec5SDimitry Andric 510b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 520b57cec5SDimitry Andric _mm_mask_mullo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { 530b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 540b57cec5SDimitry Andric (__v2di)_mm_mullo_epi64(__A, __B), 550b57cec5SDimitry Andric (__v2di)__W); 560b57cec5SDimitry Andric } 570b57cec5SDimitry Andric 580b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 590b57cec5SDimitry Andric _mm_maskz_mullo_epi64(__mmask8 __U, __m128i __A, __m128i __B) { 600b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 610b57cec5SDimitry Andric (__v2di)_mm_mullo_epi64(__A, __B), 620b57cec5SDimitry Andric (__v2di)_mm_setzero_si128()); 630b57cec5SDimitry Andric } 640b57cec5SDimitry Andric 650b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 660b57cec5SDimitry Andric _mm256_mask_andnot_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 670b57cec5SDimitry Andric return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 680b57cec5SDimitry Andric (__v4df)_mm256_andnot_pd(__A, __B), 690b57cec5SDimitry Andric (__v4df)__W); 700b57cec5SDimitry Andric } 710b57cec5SDimitry Andric 720b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 730b57cec5SDimitry Andric _mm256_maskz_andnot_pd(__mmask8 __U, __m256d __A, __m256d __B) { 740b57cec5SDimitry Andric return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 750b57cec5SDimitry Andric (__v4df)_mm256_andnot_pd(__A, __B), 760b57cec5SDimitry Andric (__v4df)_mm256_setzero_pd()); 770b57cec5SDimitry Andric } 780b57cec5SDimitry Andric 790b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 800b57cec5SDimitry Andric _mm_mask_andnot_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 810b57cec5SDimitry Andric return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 820b57cec5SDimitry Andric (__v2df)_mm_andnot_pd(__A, __B), 830b57cec5SDimitry Andric (__v2df)__W); 840b57cec5SDimitry Andric } 850b57cec5SDimitry Andric 860b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 870b57cec5SDimitry Andric _mm_maskz_andnot_pd(__mmask8 __U, __m128d __A, __m128d __B) { 880b57cec5SDimitry Andric return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 890b57cec5SDimitry Andric (__v2df)_mm_andnot_pd(__A, __B), 900b57cec5SDimitry Andric (__v2df)_mm_setzero_pd()); 910b57cec5SDimitry Andric } 920b57cec5SDimitry Andric 930b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 940b57cec5SDimitry Andric _mm256_mask_andnot_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 950b57cec5SDimitry Andric return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 960b57cec5SDimitry Andric (__v8sf)_mm256_andnot_ps(__A, __B), 970b57cec5SDimitry Andric (__v8sf)__W); 980b57cec5SDimitry Andric } 990b57cec5SDimitry Andric 1000b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 1010b57cec5SDimitry Andric _mm256_maskz_andnot_ps(__mmask8 __U, __m256 __A, __m256 __B) { 1020b57cec5SDimitry Andric return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 1030b57cec5SDimitry Andric (__v8sf)_mm256_andnot_ps(__A, __B), 1040b57cec5SDimitry Andric (__v8sf)_mm256_setzero_ps()); 1050b57cec5SDimitry Andric } 1060b57cec5SDimitry Andric 1070b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 1080b57cec5SDimitry Andric _mm_mask_andnot_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 1090b57cec5SDimitry Andric return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 1100b57cec5SDimitry Andric (__v4sf)_mm_andnot_ps(__A, __B), 1110b57cec5SDimitry Andric (__v4sf)__W); 1120b57cec5SDimitry Andric } 1130b57cec5SDimitry Andric 1140b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 1150b57cec5SDimitry Andric _mm_maskz_andnot_ps(__mmask8 __U, __m128 __A, __m128 __B) { 1160b57cec5SDimitry Andric return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 1170b57cec5SDimitry Andric (__v4sf)_mm_andnot_ps(__A, __B), 1180b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps()); 1190b57cec5SDimitry Andric } 1200b57cec5SDimitry Andric 1210b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 1220b57cec5SDimitry Andric _mm256_mask_and_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 1230b57cec5SDimitry Andric return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 1240b57cec5SDimitry Andric (__v4df)_mm256_and_pd(__A, __B), 1250b57cec5SDimitry Andric (__v4df)__W); 1260b57cec5SDimitry Andric } 1270b57cec5SDimitry Andric 1280b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 1290b57cec5SDimitry Andric _mm256_maskz_and_pd(__mmask8 __U, __m256d __A, __m256d __B) { 1300b57cec5SDimitry Andric return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 1310b57cec5SDimitry Andric (__v4df)_mm256_and_pd(__A, __B), 1320b57cec5SDimitry Andric (__v4df)_mm256_setzero_pd()); 1330b57cec5SDimitry Andric } 1340b57cec5SDimitry Andric 1350b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 1360b57cec5SDimitry Andric _mm_mask_and_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 1370b57cec5SDimitry Andric return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 1380b57cec5SDimitry Andric (__v2df)_mm_and_pd(__A, __B), 1390b57cec5SDimitry Andric (__v2df)__W); 1400b57cec5SDimitry Andric } 1410b57cec5SDimitry Andric 1420b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 1430b57cec5SDimitry Andric _mm_maskz_and_pd(__mmask8 __U, __m128d __A, __m128d __B) { 1440b57cec5SDimitry Andric return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 1450b57cec5SDimitry Andric (__v2df)_mm_and_pd(__A, __B), 1460b57cec5SDimitry Andric (__v2df)_mm_setzero_pd()); 1470b57cec5SDimitry Andric } 1480b57cec5SDimitry Andric 1490b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 1500b57cec5SDimitry Andric _mm256_mask_and_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 1510b57cec5SDimitry Andric return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 1520b57cec5SDimitry Andric (__v8sf)_mm256_and_ps(__A, __B), 1530b57cec5SDimitry Andric (__v8sf)__W); 1540b57cec5SDimitry Andric } 1550b57cec5SDimitry Andric 1560b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 1570b57cec5SDimitry Andric _mm256_maskz_and_ps(__mmask8 __U, __m256 __A, __m256 __B) { 1580b57cec5SDimitry Andric return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 1590b57cec5SDimitry Andric (__v8sf)_mm256_and_ps(__A, __B), 1600b57cec5SDimitry Andric (__v8sf)_mm256_setzero_ps()); 1610b57cec5SDimitry Andric } 1620b57cec5SDimitry Andric 1630b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 1640b57cec5SDimitry Andric _mm_mask_and_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 1650b57cec5SDimitry Andric return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 1660b57cec5SDimitry Andric (__v4sf)_mm_and_ps(__A, __B), 1670b57cec5SDimitry Andric (__v4sf)__W); 1680b57cec5SDimitry Andric } 1690b57cec5SDimitry Andric 1700b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 1710b57cec5SDimitry Andric _mm_maskz_and_ps(__mmask8 __U, __m128 __A, __m128 __B) { 1720b57cec5SDimitry Andric return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 1730b57cec5SDimitry Andric (__v4sf)_mm_and_ps(__A, __B), 1740b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps()); 1750b57cec5SDimitry Andric } 1760b57cec5SDimitry Andric 1770b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 1780b57cec5SDimitry Andric _mm256_mask_xor_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 1790b57cec5SDimitry Andric return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 1800b57cec5SDimitry Andric (__v4df)_mm256_xor_pd(__A, __B), 1810b57cec5SDimitry Andric (__v4df)__W); 1820b57cec5SDimitry Andric } 1830b57cec5SDimitry Andric 1840b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 1850b57cec5SDimitry Andric _mm256_maskz_xor_pd(__mmask8 __U, __m256d __A, __m256d __B) { 1860b57cec5SDimitry Andric return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 1870b57cec5SDimitry Andric (__v4df)_mm256_xor_pd(__A, __B), 1880b57cec5SDimitry Andric (__v4df)_mm256_setzero_pd()); 1890b57cec5SDimitry Andric } 1900b57cec5SDimitry Andric 1910b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 1920b57cec5SDimitry Andric _mm_mask_xor_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 1930b57cec5SDimitry Andric return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 1940b57cec5SDimitry Andric (__v2df)_mm_xor_pd(__A, __B), 1950b57cec5SDimitry Andric (__v2df)__W); 1960b57cec5SDimitry Andric } 1970b57cec5SDimitry Andric 1980b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 1990b57cec5SDimitry Andric _mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B) { 2000b57cec5SDimitry Andric return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 2010b57cec5SDimitry Andric (__v2df)_mm_xor_pd(__A, __B), 2020b57cec5SDimitry Andric (__v2df)_mm_setzero_pd()); 2030b57cec5SDimitry Andric } 2040b57cec5SDimitry Andric 2050b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 2060b57cec5SDimitry Andric _mm256_mask_xor_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 2070b57cec5SDimitry Andric return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 2080b57cec5SDimitry Andric (__v8sf)_mm256_xor_ps(__A, __B), 2090b57cec5SDimitry Andric (__v8sf)__W); 2100b57cec5SDimitry Andric } 2110b57cec5SDimitry Andric 2120b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 2130b57cec5SDimitry Andric _mm256_maskz_xor_ps(__mmask8 __U, __m256 __A, __m256 __B) { 2140b57cec5SDimitry Andric return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 2150b57cec5SDimitry Andric (__v8sf)_mm256_xor_ps(__A, __B), 2160b57cec5SDimitry Andric (__v8sf)_mm256_setzero_ps()); 2170b57cec5SDimitry Andric } 2180b57cec5SDimitry Andric 2190b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 2200b57cec5SDimitry Andric _mm_mask_xor_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 2210b57cec5SDimitry Andric return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2220b57cec5SDimitry Andric (__v4sf)_mm_xor_ps(__A, __B), 2230b57cec5SDimitry Andric (__v4sf)__W); 2240b57cec5SDimitry Andric } 2250b57cec5SDimitry Andric 2260b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 2270b57cec5SDimitry Andric _mm_maskz_xor_ps(__mmask8 __U, __m128 __A, __m128 __B) { 2280b57cec5SDimitry Andric return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2290b57cec5SDimitry Andric (__v4sf)_mm_xor_ps(__A, __B), 2300b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps()); 2310b57cec5SDimitry Andric } 2320b57cec5SDimitry Andric 2330b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 2340b57cec5SDimitry Andric _mm256_mask_or_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 2350b57cec5SDimitry Andric return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 2360b57cec5SDimitry Andric (__v4df)_mm256_or_pd(__A, __B), 2370b57cec5SDimitry Andric (__v4df)__W); 2380b57cec5SDimitry Andric } 2390b57cec5SDimitry Andric 2400b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 2410b57cec5SDimitry Andric _mm256_maskz_or_pd(__mmask8 __U, __m256d __A, __m256d __B) { 2420b57cec5SDimitry Andric return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 2430b57cec5SDimitry Andric (__v4df)_mm256_or_pd(__A, __B), 2440b57cec5SDimitry Andric (__v4df)_mm256_setzero_pd()); 2450b57cec5SDimitry Andric } 2460b57cec5SDimitry Andric 2470b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 2480b57cec5SDimitry Andric _mm_mask_or_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 2490b57cec5SDimitry Andric return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 2500b57cec5SDimitry Andric (__v2df)_mm_or_pd(__A, __B), 2510b57cec5SDimitry Andric (__v2df)__W); 2520b57cec5SDimitry Andric } 2530b57cec5SDimitry Andric 2540b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 2550b57cec5SDimitry Andric _mm_maskz_or_pd(__mmask8 __U, __m128d __A, __m128d __B) { 2560b57cec5SDimitry Andric return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 2570b57cec5SDimitry Andric (__v2df)_mm_or_pd(__A, __B), 2580b57cec5SDimitry Andric (__v2df)_mm_setzero_pd()); 2590b57cec5SDimitry Andric } 2600b57cec5SDimitry Andric 2610b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 2620b57cec5SDimitry Andric _mm256_mask_or_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 2630b57cec5SDimitry Andric return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 2640b57cec5SDimitry Andric (__v8sf)_mm256_or_ps(__A, __B), 2650b57cec5SDimitry Andric (__v8sf)__W); 2660b57cec5SDimitry Andric } 2670b57cec5SDimitry Andric 2680b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 2690b57cec5SDimitry Andric _mm256_maskz_or_ps(__mmask8 __U, __m256 __A, __m256 __B) { 2700b57cec5SDimitry Andric return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 2710b57cec5SDimitry Andric (__v8sf)_mm256_or_ps(__A, __B), 2720b57cec5SDimitry Andric (__v8sf)_mm256_setzero_ps()); 2730b57cec5SDimitry Andric } 2740b57cec5SDimitry Andric 2750b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 2760b57cec5SDimitry Andric _mm_mask_or_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 2770b57cec5SDimitry Andric return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2780b57cec5SDimitry Andric (__v4sf)_mm_or_ps(__A, __B), 2790b57cec5SDimitry Andric (__v4sf)__W); 2800b57cec5SDimitry Andric } 2810b57cec5SDimitry Andric 2820b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 2830b57cec5SDimitry Andric _mm_maskz_or_ps(__mmask8 __U, __m128 __A, __m128 __B) { 2840b57cec5SDimitry Andric return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2850b57cec5SDimitry Andric (__v4sf)_mm_or_ps(__A, __B), 2860b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps()); 2870b57cec5SDimitry Andric } 2880b57cec5SDimitry Andric 2890b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 2900b57cec5SDimitry Andric _mm_cvtpd_epi64 (__m128d __A) { 2910b57cec5SDimitry Andric return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A, 2920b57cec5SDimitry Andric (__v2di) _mm_setzero_si128(), 2930b57cec5SDimitry Andric (__mmask8) -1); 2940b57cec5SDimitry Andric } 2950b57cec5SDimitry Andric 2960b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 2970b57cec5SDimitry Andric _mm_mask_cvtpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A) { 2980b57cec5SDimitry Andric return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A, 2990b57cec5SDimitry Andric (__v2di) __W, 3000b57cec5SDimitry Andric (__mmask8) __U); 3010b57cec5SDimitry Andric } 3020b57cec5SDimitry Andric 3030b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 3040b57cec5SDimitry Andric _mm_maskz_cvtpd_epi64 (__mmask8 __U, __m128d __A) { 3050b57cec5SDimitry Andric return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A, 3060b57cec5SDimitry Andric (__v2di) _mm_setzero_si128(), 3070b57cec5SDimitry Andric (__mmask8) __U); 3080b57cec5SDimitry Andric } 3090b57cec5SDimitry Andric 3100b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 3110b57cec5SDimitry Andric _mm256_cvtpd_epi64 (__m256d __A) { 3120b57cec5SDimitry Andric return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A, 3130b57cec5SDimitry Andric (__v4di) _mm256_setzero_si256(), 3140b57cec5SDimitry Andric (__mmask8) -1); 3150b57cec5SDimitry Andric } 3160b57cec5SDimitry Andric 3170b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 3180b57cec5SDimitry Andric _mm256_mask_cvtpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A) { 3190b57cec5SDimitry Andric return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A, 3200b57cec5SDimitry Andric (__v4di) __W, 3210b57cec5SDimitry Andric (__mmask8) __U); 3220b57cec5SDimitry Andric } 3230b57cec5SDimitry Andric 3240b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 3250b57cec5SDimitry Andric _mm256_maskz_cvtpd_epi64 (__mmask8 __U, __m256d __A) { 3260b57cec5SDimitry Andric return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A, 3270b57cec5SDimitry Andric (__v4di) _mm256_setzero_si256(), 3280b57cec5SDimitry Andric (__mmask8) __U); 3290b57cec5SDimitry Andric } 3300b57cec5SDimitry Andric 3310b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 3320b57cec5SDimitry Andric _mm_cvtpd_epu64 (__m128d __A) { 3330b57cec5SDimitry Andric return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A, 3340b57cec5SDimitry Andric (__v2di) _mm_setzero_si128(), 3350b57cec5SDimitry Andric (__mmask8) -1); 3360b57cec5SDimitry Andric } 3370b57cec5SDimitry Andric 3380b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 3390b57cec5SDimitry Andric _mm_mask_cvtpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A) { 3400b57cec5SDimitry Andric return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A, 3410b57cec5SDimitry Andric (__v2di) __W, 3420b57cec5SDimitry Andric (__mmask8) __U); 3430b57cec5SDimitry Andric } 3440b57cec5SDimitry Andric 3450b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 3460b57cec5SDimitry Andric _mm_maskz_cvtpd_epu64 (__mmask8 __U, __m128d __A) { 3470b57cec5SDimitry Andric return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A, 3480b57cec5SDimitry Andric (__v2di) _mm_setzero_si128(), 3490b57cec5SDimitry Andric (__mmask8) __U); 3500b57cec5SDimitry Andric } 3510b57cec5SDimitry Andric 3520b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 3530b57cec5SDimitry Andric _mm256_cvtpd_epu64 (__m256d __A) { 3540b57cec5SDimitry Andric return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A, 3550b57cec5SDimitry Andric (__v4di) _mm256_setzero_si256(), 3560b57cec5SDimitry Andric (__mmask8) -1); 3570b57cec5SDimitry Andric } 3580b57cec5SDimitry Andric 3590b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 3600b57cec5SDimitry Andric _mm256_mask_cvtpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A) { 3610b57cec5SDimitry Andric return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A, 3620b57cec5SDimitry Andric (__v4di) __W, 3630b57cec5SDimitry Andric (__mmask8) __U); 3640b57cec5SDimitry Andric } 3650b57cec5SDimitry Andric 3660b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 3670b57cec5SDimitry Andric _mm256_maskz_cvtpd_epu64 (__mmask8 __U, __m256d __A) { 3680b57cec5SDimitry Andric return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A, 3690b57cec5SDimitry Andric (__v4di) _mm256_setzero_si256(), 3700b57cec5SDimitry Andric (__mmask8) __U); 3710b57cec5SDimitry Andric } 3720b57cec5SDimitry Andric 3730b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 3740b57cec5SDimitry Andric _mm_cvtps_epi64 (__m128 __A) { 3750b57cec5SDimitry Andric return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A, 3760b57cec5SDimitry Andric (__v2di) _mm_setzero_si128(), 3770b57cec5SDimitry Andric (__mmask8) -1); 3780b57cec5SDimitry Andric } 3790b57cec5SDimitry Andric 3800b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 3810b57cec5SDimitry Andric _mm_mask_cvtps_epi64 (__m128i __W, __mmask8 __U, __m128 __A) { 3820b57cec5SDimitry Andric return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A, 3830b57cec5SDimitry Andric (__v2di) __W, 3840b57cec5SDimitry Andric (__mmask8) __U); 3850b57cec5SDimitry Andric } 3860b57cec5SDimitry Andric 3870b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 3880b57cec5SDimitry Andric _mm_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A) { 3890b57cec5SDimitry Andric return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A, 3900b57cec5SDimitry Andric (__v2di) _mm_setzero_si128(), 3910b57cec5SDimitry Andric (__mmask8) __U); 3920b57cec5SDimitry Andric } 3930b57cec5SDimitry Andric 3940b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 3950b57cec5SDimitry Andric _mm256_cvtps_epi64 (__m128 __A) { 3960b57cec5SDimitry Andric return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A, 3970b57cec5SDimitry Andric (__v4di) _mm256_setzero_si256(), 3980b57cec5SDimitry Andric (__mmask8) -1); 3990b57cec5SDimitry Andric } 4000b57cec5SDimitry Andric 4010b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 4020b57cec5SDimitry Andric _mm256_mask_cvtps_epi64 (__m256i __W, __mmask8 __U, __m128 __A) { 4030b57cec5SDimitry Andric return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A, 4040b57cec5SDimitry Andric (__v4di) __W, 4050b57cec5SDimitry Andric (__mmask8) __U); 4060b57cec5SDimitry Andric } 4070b57cec5SDimitry Andric 4080b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 4090b57cec5SDimitry Andric _mm256_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A) { 4100b57cec5SDimitry Andric return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A, 4110b57cec5SDimitry Andric (__v4di) _mm256_setzero_si256(), 4120b57cec5SDimitry Andric (__mmask8) __U); 4130b57cec5SDimitry Andric } 4140b57cec5SDimitry Andric 4150b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 4160b57cec5SDimitry Andric _mm_cvtps_epu64 (__m128 __A) { 4170b57cec5SDimitry Andric return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A, 4180b57cec5SDimitry Andric (__v2di) _mm_setzero_si128(), 4190b57cec5SDimitry Andric (__mmask8) -1); 4200b57cec5SDimitry Andric } 4210b57cec5SDimitry Andric 4220b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 4230b57cec5SDimitry Andric _mm_mask_cvtps_epu64 (__m128i __W, __mmask8 __U, __m128 __A) { 4240b57cec5SDimitry Andric return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A, 4250b57cec5SDimitry Andric (__v2di) __W, 4260b57cec5SDimitry Andric (__mmask8) __U); 4270b57cec5SDimitry Andric } 4280b57cec5SDimitry Andric 4290b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 4300b57cec5SDimitry Andric _mm_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A) { 4310b57cec5SDimitry Andric return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A, 4320b57cec5SDimitry Andric (__v2di) _mm_setzero_si128(), 4330b57cec5SDimitry Andric (__mmask8) __U); 4340b57cec5SDimitry Andric } 4350b57cec5SDimitry Andric 4360b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 4370b57cec5SDimitry Andric _mm256_cvtps_epu64 (__m128 __A) { 4380b57cec5SDimitry Andric return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A, 4390b57cec5SDimitry Andric (__v4di) _mm256_setzero_si256(), 4400b57cec5SDimitry Andric (__mmask8) -1); 4410b57cec5SDimitry Andric } 4420b57cec5SDimitry Andric 4430b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 4440b57cec5SDimitry Andric _mm256_mask_cvtps_epu64 (__m256i __W, __mmask8 __U, __m128 __A) { 4450b57cec5SDimitry Andric return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A, 4460b57cec5SDimitry Andric (__v4di) __W, 4470b57cec5SDimitry Andric (__mmask8) __U); 4480b57cec5SDimitry Andric } 4490b57cec5SDimitry Andric 4500b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 4510b57cec5SDimitry Andric _mm256_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A) { 4520b57cec5SDimitry Andric return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A, 4530b57cec5SDimitry Andric (__v4di) _mm256_setzero_si256(), 4540b57cec5SDimitry Andric (__mmask8) __U); 4550b57cec5SDimitry Andric } 4560b57cec5SDimitry Andric 4570b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 4580b57cec5SDimitry Andric _mm_cvtepi64_pd (__m128i __A) { 4590b57cec5SDimitry Andric return (__m128d)__builtin_convertvector((__v2di)__A, __v2df); 4600b57cec5SDimitry Andric } 4610b57cec5SDimitry Andric 4620b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 4630b57cec5SDimitry Andric _mm_mask_cvtepi64_pd (__m128d __W, __mmask8 __U, __m128i __A) { 4640b57cec5SDimitry Andric return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 4650b57cec5SDimitry Andric (__v2df)_mm_cvtepi64_pd(__A), 4660b57cec5SDimitry Andric (__v2df)__W); 4670b57cec5SDimitry Andric } 4680b57cec5SDimitry Andric 4690b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 4700b57cec5SDimitry Andric _mm_maskz_cvtepi64_pd (__mmask8 __U, __m128i __A) { 4710b57cec5SDimitry Andric return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 4720b57cec5SDimitry Andric (__v2df)_mm_cvtepi64_pd(__A), 4730b57cec5SDimitry Andric (__v2df)_mm_setzero_pd()); 4740b57cec5SDimitry Andric } 4750b57cec5SDimitry Andric 4760b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 4770b57cec5SDimitry Andric _mm256_cvtepi64_pd (__m256i __A) { 4780b57cec5SDimitry Andric return (__m256d)__builtin_convertvector((__v4di)__A, __v4df); 4790b57cec5SDimitry Andric } 4800b57cec5SDimitry Andric 4810b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 4820b57cec5SDimitry Andric _mm256_mask_cvtepi64_pd (__m256d __W, __mmask8 __U, __m256i __A) { 4830b57cec5SDimitry Andric return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 4840b57cec5SDimitry Andric (__v4df)_mm256_cvtepi64_pd(__A), 4850b57cec5SDimitry Andric (__v4df)__W); 4860b57cec5SDimitry Andric } 4870b57cec5SDimitry Andric 4880b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 4890b57cec5SDimitry Andric _mm256_maskz_cvtepi64_pd (__mmask8 __U, __m256i __A) { 4900b57cec5SDimitry Andric return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 4910b57cec5SDimitry Andric (__v4df)_mm256_cvtepi64_pd(__A), 4920b57cec5SDimitry Andric (__v4df)_mm256_setzero_pd()); 4930b57cec5SDimitry Andric } 4940b57cec5SDimitry Andric 4950b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 4960b57cec5SDimitry Andric _mm_cvtepi64_ps (__m128i __A) { 4970b57cec5SDimitry Andric return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A, 4980b57cec5SDimitry Andric (__v4sf) _mm_setzero_ps(), 4990b57cec5SDimitry Andric (__mmask8) -1); 5000b57cec5SDimitry Andric } 5010b57cec5SDimitry Andric 5020b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 5030b57cec5SDimitry Andric _mm_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m128i __A) { 5040b57cec5SDimitry Andric return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A, 5050b57cec5SDimitry Andric (__v4sf) __W, 5060b57cec5SDimitry Andric (__mmask8) __U); 5070b57cec5SDimitry Andric } 5080b57cec5SDimitry Andric 5090b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 5100b57cec5SDimitry Andric _mm_maskz_cvtepi64_ps (__mmask8 __U, __m128i __A) { 5110b57cec5SDimitry Andric return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A, 5120b57cec5SDimitry Andric (__v4sf) _mm_setzero_ps(), 5130b57cec5SDimitry Andric (__mmask8) __U); 5140b57cec5SDimitry Andric } 5150b57cec5SDimitry Andric 5160b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS256 5170b57cec5SDimitry Andric _mm256_cvtepi64_ps (__m256i __A) { 5180b57cec5SDimitry Andric return (__m128)__builtin_convertvector((__v4di)__A, __v4sf); 5190b57cec5SDimitry Andric } 5200b57cec5SDimitry Andric 5210b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS256 5220b57cec5SDimitry Andric _mm256_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m256i __A) { 5230b57cec5SDimitry Andric return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 5240b57cec5SDimitry Andric (__v4sf)_mm256_cvtepi64_ps(__A), 5250b57cec5SDimitry Andric (__v4sf)__W); 5260b57cec5SDimitry Andric } 5270b57cec5SDimitry Andric 5280b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS256 5290b57cec5SDimitry Andric _mm256_maskz_cvtepi64_ps (__mmask8 __U, __m256i __A) { 5300b57cec5SDimitry Andric return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 5310b57cec5SDimitry Andric (__v4sf)_mm256_cvtepi64_ps(__A), 5320b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps()); 5330b57cec5SDimitry Andric } 5340b57cec5SDimitry Andric 5350b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 5360b57cec5SDimitry Andric _mm_cvttpd_epi64 (__m128d __A) { 5370b57cec5SDimitry Andric return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A, 5380b57cec5SDimitry Andric (__v2di) _mm_setzero_si128(), 5390b57cec5SDimitry Andric (__mmask8) -1); 5400b57cec5SDimitry Andric } 5410b57cec5SDimitry Andric 5420b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 5430b57cec5SDimitry Andric _mm_mask_cvttpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A) { 5440b57cec5SDimitry Andric return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A, 5450b57cec5SDimitry Andric (__v2di) __W, 5460b57cec5SDimitry Andric (__mmask8) __U); 5470b57cec5SDimitry Andric } 5480b57cec5SDimitry Andric 5490b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 5500b57cec5SDimitry Andric _mm_maskz_cvttpd_epi64 (__mmask8 __U, __m128d __A) { 5510b57cec5SDimitry Andric return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A, 5520b57cec5SDimitry Andric (__v2di) _mm_setzero_si128(), 5530b57cec5SDimitry Andric (__mmask8) __U); 5540b57cec5SDimitry Andric } 5550b57cec5SDimitry Andric 5560b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 5570b57cec5SDimitry Andric _mm256_cvttpd_epi64 (__m256d __A) { 5580b57cec5SDimitry Andric return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A, 5590b57cec5SDimitry Andric (__v4di) _mm256_setzero_si256(), 5600b57cec5SDimitry Andric (__mmask8) -1); 5610b57cec5SDimitry Andric } 5620b57cec5SDimitry Andric 5630b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 5640b57cec5SDimitry Andric _mm256_mask_cvttpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A) { 5650b57cec5SDimitry Andric return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A, 5660b57cec5SDimitry Andric (__v4di) __W, 5670b57cec5SDimitry Andric (__mmask8) __U); 5680b57cec5SDimitry Andric } 5690b57cec5SDimitry Andric 5700b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 5710b57cec5SDimitry Andric _mm256_maskz_cvttpd_epi64 (__mmask8 __U, __m256d __A) { 5720b57cec5SDimitry Andric return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A, 5730b57cec5SDimitry Andric (__v4di) _mm256_setzero_si256(), 5740b57cec5SDimitry Andric (__mmask8) __U); 5750b57cec5SDimitry Andric } 5760b57cec5SDimitry Andric 5770b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 5780b57cec5SDimitry Andric _mm_cvttpd_epu64 (__m128d __A) { 5790b57cec5SDimitry Andric return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A, 5800b57cec5SDimitry Andric (__v2di) _mm_setzero_si128(), 5810b57cec5SDimitry Andric (__mmask8) -1); 5820b57cec5SDimitry Andric } 5830b57cec5SDimitry Andric 5840b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 5850b57cec5SDimitry Andric _mm_mask_cvttpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A) { 5860b57cec5SDimitry Andric return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A, 5870b57cec5SDimitry Andric (__v2di) __W, 5880b57cec5SDimitry Andric (__mmask8) __U); 5890b57cec5SDimitry Andric } 5900b57cec5SDimitry Andric 5910b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 5920b57cec5SDimitry Andric _mm_maskz_cvttpd_epu64 (__mmask8 __U, __m128d __A) { 5930b57cec5SDimitry Andric return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A, 5940b57cec5SDimitry Andric (__v2di) _mm_setzero_si128(), 5950b57cec5SDimitry Andric (__mmask8) __U); 5960b57cec5SDimitry Andric } 5970b57cec5SDimitry Andric 5980b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 5990b57cec5SDimitry Andric _mm256_cvttpd_epu64 (__m256d __A) { 6000b57cec5SDimitry Andric return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A, 6010b57cec5SDimitry Andric (__v4di) _mm256_setzero_si256(), 6020b57cec5SDimitry Andric (__mmask8) -1); 6030b57cec5SDimitry Andric } 6040b57cec5SDimitry Andric 6050b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 6060b57cec5SDimitry Andric _mm256_mask_cvttpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A) { 6070b57cec5SDimitry Andric return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A, 6080b57cec5SDimitry Andric (__v4di) __W, 6090b57cec5SDimitry Andric (__mmask8) __U); 6100b57cec5SDimitry Andric } 6110b57cec5SDimitry Andric 6120b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 6130b57cec5SDimitry Andric _mm256_maskz_cvttpd_epu64 (__mmask8 __U, __m256d __A) { 6140b57cec5SDimitry Andric return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A, 6150b57cec5SDimitry Andric (__v4di) _mm256_setzero_si256(), 6160b57cec5SDimitry Andric (__mmask8) __U); 6170b57cec5SDimitry Andric } 6180b57cec5SDimitry Andric 6190b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 6200b57cec5SDimitry Andric _mm_cvttps_epi64 (__m128 __A) { 6210b57cec5SDimitry Andric return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A, 6220b57cec5SDimitry Andric (__v2di) _mm_setzero_si128(), 6230b57cec5SDimitry Andric (__mmask8) -1); 6240b57cec5SDimitry Andric } 6250b57cec5SDimitry Andric 6260b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 6270b57cec5SDimitry Andric _mm_mask_cvttps_epi64 (__m128i __W, __mmask8 __U, __m128 __A) { 6280b57cec5SDimitry Andric return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A, 6290b57cec5SDimitry Andric (__v2di) __W, 6300b57cec5SDimitry Andric (__mmask8) __U); 6310b57cec5SDimitry Andric } 6320b57cec5SDimitry Andric 6330b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 6340b57cec5SDimitry Andric _mm_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A) { 6350b57cec5SDimitry Andric return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A, 6360b57cec5SDimitry Andric (__v2di) _mm_setzero_si128(), 6370b57cec5SDimitry Andric (__mmask8) __U); 6380b57cec5SDimitry Andric } 6390b57cec5SDimitry Andric 6400b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 6410b57cec5SDimitry Andric _mm256_cvttps_epi64 (__m128 __A) { 6420b57cec5SDimitry Andric return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A, 6430b57cec5SDimitry Andric (__v4di) _mm256_setzero_si256(), 6440b57cec5SDimitry Andric (__mmask8) -1); 6450b57cec5SDimitry Andric } 6460b57cec5SDimitry Andric 6470b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 6480b57cec5SDimitry Andric _mm256_mask_cvttps_epi64 (__m256i __W, __mmask8 __U, __m128 __A) { 6490b57cec5SDimitry Andric return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A, 6500b57cec5SDimitry Andric (__v4di) __W, 6510b57cec5SDimitry Andric (__mmask8) __U); 6520b57cec5SDimitry Andric } 6530b57cec5SDimitry Andric 6540b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 6550b57cec5SDimitry Andric _mm256_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A) { 6560b57cec5SDimitry Andric return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A, 6570b57cec5SDimitry Andric (__v4di) _mm256_setzero_si256(), 6580b57cec5SDimitry Andric (__mmask8) __U); 6590b57cec5SDimitry Andric } 6600b57cec5SDimitry Andric 6610b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 6620b57cec5SDimitry Andric _mm_cvttps_epu64 (__m128 __A) { 6630b57cec5SDimitry Andric return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A, 6640b57cec5SDimitry Andric (__v2di) _mm_setzero_si128(), 6650b57cec5SDimitry Andric (__mmask8) -1); 6660b57cec5SDimitry Andric } 6670b57cec5SDimitry Andric 6680b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 6690b57cec5SDimitry Andric _mm_mask_cvttps_epu64 (__m128i __W, __mmask8 __U, __m128 __A) { 6700b57cec5SDimitry Andric return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A, 6710b57cec5SDimitry Andric (__v2di) __W, 6720b57cec5SDimitry Andric (__mmask8) __U); 6730b57cec5SDimitry Andric } 6740b57cec5SDimitry Andric 6750b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 6760b57cec5SDimitry Andric _mm_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A) { 6770b57cec5SDimitry Andric return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A, 6780b57cec5SDimitry Andric (__v2di) _mm_setzero_si128(), 6790b57cec5SDimitry Andric (__mmask8) __U); 6800b57cec5SDimitry Andric } 6810b57cec5SDimitry Andric 6820b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 6830b57cec5SDimitry Andric _mm256_cvttps_epu64 (__m128 __A) { 6840b57cec5SDimitry Andric return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A, 6850b57cec5SDimitry Andric (__v4di) _mm256_setzero_si256(), 6860b57cec5SDimitry Andric (__mmask8) -1); 6870b57cec5SDimitry Andric } 6880b57cec5SDimitry Andric 6890b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 6900b57cec5SDimitry Andric _mm256_mask_cvttps_epu64 (__m256i __W, __mmask8 __U, __m128 __A) { 6910b57cec5SDimitry Andric return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A, 6920b57cec5SDimitry Andric (__v4di) __W, 6930b57cec5SDimitry Andric (__mmask8) __U); 6940b57cec5SDimitry Andric } 6950b57cec5SDimitry Andric 6960b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 6970b57cec5SDimitry Andric _mm256_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A) { 6980b57cec5SDimitry Andric return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A, 6990b57cec5SDimitry Andric (__v4di) _mm256_setzero_si256(), 7000b57cec5SDimitry Andric (__mmask8) __U); 7010b57cec5SDimitry Andric } 7020b57cec5SDimitry Andric 7030b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 7040b57cec5SDimitry Andric _mm_cvtepu64_pd (__m128i __A) { 7050b57cec5SDimitry Andric return (__m128d)__builtin_convertvector((__v2du)__A, __v2df); 7060b57cec5SDimitry Andric } 7070b57cec5SDimitry Andric 7080b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 7090b57cec5SDimitry Andric _mm_mask_cvtepu64_pd (__m128d __W, __mmask8 __U, __m128i __A) { 7100b57cec5SDimitry Andric return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 7110b57cec5SDimitry Andric (__v2df)_mm_cvtepu64_pd(__A), 7120b57cec5SDimitry Andric (__v2df)__W); 7130b57cec5SDimitry Andric } 7140b57cec5SDimitry Andric 7150b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 7160b57cec5SDimitry Andric _mm_maskz_cvtepu64_pd (__mmask8 __U, __m128i __A) { 7170b57cec5SDimitry Andric return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 7180b57cec5SDimitry Andric (__v2df)_mm_cvtepu64_pd(__A), 7190b57cec5SDimitry Andric (__v2df)_mm_setzero_pd()); 7200b57cec5SDimitry Andric } 7210b57cec5SDimitry Andric 7220b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 7230b57cec5SDimitry Andric _mm256_cvtepu64_pd (__m256i __A) { 7240b57cec5SDimitry Andric return (__m256d)__builtin_convertvector((__v4du)__A, __v4df); 7250b57cec5SDimitry Andric } 7260b57cec5SDimitry Andric 7270b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 7280b57cec5SDimitry Andric _mm256_mask_cvtepu64_pd (__m256d __W, __mmask8 __U, __m256i __A) { 7290b57cec5SDimitry Andric return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 7300b57cec5SDimitry Andric (__v4df)_mm256_cvtepu64_pd(__A), 7310b57cec5SDimitry Andric (__v4df)__W); 7320b57cec5SDimitry Andric } 7330b57cec5SDimitry Andric 7340b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 7350b57cec5SDimitry Andric _mm256_maskz_cvtepu64_pd (__mmask8 __U, __m256i __A) { 7360b57cec5SDimitry Andric return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 7370b57cec5SDimitry Andric (__v4df)_mm256_cvtepu64_pd(__A), 7380b57cec5SDimitry Andric (__v4df)_mm256_setzero_pd()); 7390b57cec5SDimitry Andric } 7400b57cec5SDimitry Andric 7410b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 7420b57cec5SDimitry Andric _mm_cvtepu64_ps (__m128i __A) { 7430b57cec5SDimitry Andric return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A, 7440b57cec5SDimitry Andric (__v4sf) _mm_setzero_ps(), 7450b57cec5SDimitry Andric (__mmask8) -1); 7460b57cec5SDimitry Andric } 7470b57cec5SDimitry Andric 7480b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 7490b57cec5SDimitry Andric _mm_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m128i __A) { 7500b57cec5SDimitry Andric return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A, 7510b57cec5SDimitry Andric (__v4sf) __W, 7520b57cec5SDimitry Andric (__mmask8) __U); 7530b57cec5SDimitry Andric } 7540b57cec5SDimitry Andric 7550b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 7560b57cec5SDimitry Andric _mm_maskz_cvtepu64_ps (__mmask8 __U, __m128i __A) { 7570b57cec5SDimitry Andric return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A, 7580b57cec5SDimitry Andric (__v4sf) _mm_setzero_ps(), 7590b57cec5SDimitry Andric (__mmask8) __U); 7600b57cec5SDimitry Andric } 7610b57cec5SDimitry Andric 7620b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS256 7630b57cec5SDimitry Andric _mm256_cvtepu64_ps (__m256i __A) { 7640b57cec5SDimitry Andric return (__m128)__builtin_convertvector((__v4du)__A, __v4sf); 7650b57cec5SDimitry Andric } 7660b57cec5SDimitry Andric 7670b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS256 7680b57cec5SDimitry Andric _mm256_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m256i __A) { 7690b57cec5SDimitry Andric return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 7700b57cec5SDimitry Andric (__v4sf)_mm256_cvtepu64_ps(__A), 7710b57cec5SDimitry Andric (__v4sf)__W); 7720b57cec5SDimitry Andric } 7730b57cec5SDimitry Andric 7740b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS256 7750b57cec5SDimitry Andric _mm256_maskz_cvtepu64_ps (__mmask8 __U, __m256i __A) { 7760b57cec5SDimitry Andric return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 7770b57cec5SDimitry Andric (__v4sf)_mm256_cvtepu64_ps(__A), 7780b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps()); 7790b57cec5SDimitry Andric } 7800b57cec5SDimitry Andric 7810b57cec5SDimitry Andric #define _mm_range_pd(A, B, C) \ 782349cc55cSDimitry Andric ((__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \ 7830b57cec5SDimitry Andric (__v2df)(__m128d)(B), (int)(C), \ 7840b57cec5SDimitry Andric (__v2df)_mm_setzero_pd(), \ 785349cc55cSDimitry Andric (__mmask8)-1)) 7860b57cec5SDimitry Andric 7870b57cec5SDimitry Andric #define _mm_mask_range_pd(W, U, A, B, C) \ 788349cc55cSDimitry Andric ((__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \ 7890b57cec5SDimitry Andric (__v2df)(__m128d)(B), (int)(C), \ 7900b57cec5SDimitry Andric (__v2df)(__m128d)(W), \ 791349cc55cSDimitry Andric (__mmask8)(U))) 7920b57cec5SDimitry Andric 7930b57cec5SDimitry Andric #define _mm_maskz_range_pd(U, A, B, C) \ 794349cc55cSDimitry Andric ((__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \ 7950b57cec5SDimitry Andric (__v2df)(__m128d)(B), (int)(C), \ 7960b57cec5SDimitry Andric (__v2df)_mm_setzero_pd(), \ 797349cc55cSDimitry Andric (__mmask8)(U))) 7980b57cec5SDimitry Andric 7990b57cec5SDimitry Andric #define _mm256_range_pd(A, B, C) \ 800349cc55cSDimitry Andric ((__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \ 8010b57cec5SDimitry Andric (__v4df)(__m256d)(B), (int)(C), \ 8020b57cec5SDimitry Andric (__v4df)_mm256_setzero_pd(), \ 803349cc55cSDimitry Andric (__mmask8)-1)) 8040b57cec5SDimitry Andric 8050b57cec5SDimitry Andric #define _mm256_mask_range_pd(W, U, A, B, C) \ 806349cc55cSDimitry Andric ((__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \ 8070b57cec5SDimitry Andric (__v4df)(__m256d)(B), (int)(C), \ 8080b57cec5SDimitry Andric (__v4df)(__m256d)(W), \ 809349cc55cSDimitry Andric (__mmask8)(U))) 8100b57cec5SDimitry Andric 8110b57cec5SDimitry Andric #define _mm256_maskz_range_pd(U, A, B, C) \ 812349cc55cSDimitry Andric ((__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \ 8130b57cec5SDimitry Andric (__v4df)(__m256d)(B), (int)(C), \ 8140b57cec5SDimitry Andric (__v4df)_mm256_setzero_pd(), \ 815349cc55cSDimitry Andric (__mmask8)(U))) 8160b57cec5SDimitry Andric 8170b57cec5SDimitry Andric #define _mm_range_ps(A, B, C) \ 818349cc55cSDimitry Andric ((__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \ 8190b57cec5SDimitry Andric (__v4sf)(__m128)(B), (int)(C), \ 8200b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps(), \ 821349cc55cSDimitry Andric (__mmask8)-1)) 8220b57cec5SDimitry Andric 8230b57cec5SDimitry Andric #define _mm_mask_range_ps(W, U, A, B, C) \ 824349cc55cSDimitry Andric ((__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \ 8250b57cec5SDimitry Andric (__v4sf)(__m128)(B), (int)(C), \ 826349cc55cSDimitry Andric (__v4sf)(__m128)(W), (__mmask8)(U))) 8270b57cec5SDimitry Andric 8280b57cec5SDimitry Andric #define _mm_maskz_range_ps(U, A, B, C) \ 829349cc55cSDimitry Andric ((__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \ 8300b57cec5SDimitry Andric (__v4sf)(__m128)(B), (int)(C), \ 8310b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps(), \ 832349cc55cSDimitry Andric (__mmask8)(U))) 8330b57cec5SDimitry Andric 8340b57cec5SDimitry Andric #define _mm256_range_ps(A, B, C) \ 835349cc55cSDimitry Andric ((__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \ 8360b57cec5SDimitry Andric (__v8sf)(__m256)(B), (int)(C), \ 8370b57cec5SDimitry Andric (__v8sf)_mm256_setzero_ps(), \ 838349cc55cSDimitry Andric (__mmask8)-1)) 8390b57cec5SDimitry Andric 8400b57cec5SDimitry Andric #define _mm256_mask_range_ps(W, U, A, B, C) \ 841349cc55cSDimitry Andric ((__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \ 8420b57cec5SDimitry Andric (__v8sf)(__m256)(B), (int)(C), \ 843349cc55cSDimitry Andric (__v8sf)(__m256)(W), (__mmask8)(U))) 8440b57cec5SDimitry Andric 8450b57cec5SDimitry Andric #define _mm256_maskz_range_ps(U, A, B, C) \ 846349cc55cSDimitry Andric ((__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \ 8470b57cec5SDimitry Andric (__v8sf)(__m256)(B), (int)(C), \ 8480b57cec5SDimitry Andric (__v8sf)_mm256_setzero_ps(), \ 849349cc55cSDimitry Andric (__mmask8)(U))) 8500b57cec5SDimitry Andric 8510b57cec5SDimitry Andric #define _mm_reduce_pd(A, B) \ 852349cc55cSDimitry Andric ((__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \ 8530b57cec5SDimitry Andric (__v2df)_mm_setzero_pd(), \ 854349cc55cSDimitry Andric (__mmask8)-1)) 8550b57cec5SDimitry Andric 8560b57cec5SDimitry Andric #define _mm_mask_reduce_pd(W, U, A, B) \ 857349cc55cSDimitry Andric ((__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \ 8580b57cec5SDimitry Andric (__v2df)(__m128d)(W), \ 859349cc55cSDimitry Andric (__mmask8)(U))) 8600b57cec5SDimitry Andric 8610b57cec5SDimitry Andric #define _mm_maskz_reduce_pd(U, A, B) \ 862349cc55cSDimitry Andric ((__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \ 8630b57cec5SDimitry Andric (__v2df)_mm_setzero_pd(), \ 864349cc55cSDimitry Andric (__mmask8)(U))) 8650b57cec5SDimitry Andric 8660b57cec5SDimitry Andric #define _mm256_reduce_pd(A, B) \ 867349cc55cSDimitry Andric ((__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \ 8680b57cec5SDimitry Andric (__v4df)_mm256_setzero_pd(), \ 869349cc55cSDimitry Andric (__mmask8)-1)) 8700b57cec5SDimitry Andric 8710b57cec5SDimitry Andric #define _mm256_mask_reduce_pd(W, U, A, B) \ 872349cc55cSDimitry Andric ((__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \ 8730b57cec5SDimitry Andric (__v4df)(__m256d)(W), \ 874349cc55cSDimitry Andric (__mmask8)(U))) 8750b57cec5SDimitry Andric 8760b57cec5SDimitry Andric #define _mm256_maskz_reduce_pd(U, A, B) \ 877349cc55cSDimitry Andric ((__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \ 8780b57cec5SDimitry Andric (__v4df)_mm256_setzero_pd(), \ 879349cc55cSDimitry Andric (__mmask8)(U))) 8800b57cec5SDimitry Andric 8810b57cec5SDimitry Andric #define _mm_reduce_ps(A, B) \ 882349cc55cSDimitry Andric ((__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \ 8830b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps(), \ 884349cc55cSDimitry Andric (__mmask8)-1)) 8850b57cec5SDimitry Andric 8860b57cec5SDimitry Andric #define _mm_mask_reduce_ps(W, U, A, B) \ 887349cc55cSDimitry Andric ((__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \ 8880b57cec5SDimitry Andric (__v4sf)(__m128)(W), \ 889349cc55cSDimitry Andric (__mmask8)(U))) 8900b57cec5SDimitry Andric 8910b57cec5SDimitry Andric #define _mm_maskz_reduce_ps(U, A, B) \ 892349cc55cSDimitry Andric ((__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \ 8930b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps(), \ 894349cc55cSDimitry Andric (__mmask8)(U))) 8950b57cec5SDimitry Andric 8960b57cec5SDimitry Andric #define _mm256_reduce_ps(A, B) \ 897349cc55cSDimitry Andric ((__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \ 8980b57cec5SDimitry Andric (__v8sf)_mm256_setzero_ps(), \ 899349cc55cSDimitry Andric (__mmask8)-1)) 9000b57cec5SDimitry Andric 9010b57cec5SDimitry Andric #define _mm256_mask_reduce_ps(W, U, A, B) \ 902349cc55cSDimitry Andric ((__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \ 9030b57cec5SDimitry Andric (__v8sf)(__m256)(W), \ 904349cc55cSDimitry Andric (__mmask8)(U))) 9050b57cec5SDimitry Andric 9060b57cec5SDimitry Andric #define _mm256_maskz_reduce_ps(U, A, B) \ 907349cc55cSDimitry Andric ((__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \ 9080b57cec5SDimitry Andric (__v8sf)_mm256_setzero_ps(), \ 909349cc55cSDimitry Andric (__mmask8)(U))) 9100b57cec5SDimitry Andric 9110b57cec5SDimitry Andric static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 9120b57cec5SDimitry Andric _mm_movepi32_mask (__m128i __A) 9130b57cec5SDimitry Andric { 9140b57cec5SDimitry Andric return (__mmask8) __builtin_ia32_cvtd2mask128 ((__v4si) __A); 9150b57cec5SDimitry Andric } 9160b57cec5SDimitry Andric 9170b57cec5SDimitry Andric static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 9180b57cec5SDimitry Andric _mm256_movepi32_mask (__m256i __A) 9190b57cec5SDimitry Andric { 9200b57cec5SDimitry Andric return (__mmask8) __builtin_ia32_cvtd2mask256 ((__v8si) __A); 9210b57cec5SDimitry Andric } 9220b57cec5SDimitry Andric 9230b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 9240b57cec5SDimitry Andric _mm_movm_epi32 (__mmask8 __A) 9250b57cec5SDimitry Andric { 9260b57cec5SDimitry Andric return (__m128i) __builtin_ia32_cvtmask2d128 (__A); 9270b57cec5SDimitry Andric } 9280b57cec5SDimitry Andric 9290b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 9300b57cec5SDimitry Andric _mm256_movm_epi32 (__mmask8 __A) 9310b57cec5SDimitry Andric { 9320b57cec5SDimitry Andric return (__m256i) __builtin_ia32_cvtmask2d256 (__A); 9330b57cec5SDimitry Andric } 9340b57cec5SDimitry Andric 9350b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 9360b57cec5SDimitry Andric _mm_movm_epi64 (__mmask8 __A) 9370b57cec5SDimitry Andric { 9380b57cec5SDimitry Andric return (__m128i) __builtin_ia32_cvtmask2q128 (__A); 9390b57cec5SDimitry Andric } 9400b57cec5SDimitry Andric 9410b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 9420b57cec5SDimitry Andric _mm256_movm_epi64 (__mmask8 __A) 9430b57cec5SDimitry Andric { 9440b57cec5SDimitry Andric return (__m256i) __builtin_ia32_cvtmask2q256 (__A); 9450b57cec5SDimitry Andric } 9460b57cec5SDimitry Andric 9470b57cec5SDimitry Andric static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 9480b57cec5SDimitry Andric _mm_movepi64_mask (__m128i __A) 9490b57cec5SDimitry Andric { 9500b57cec5SDimitry Andric return (__mmask8) __builtin_ia32_cvtq2mask128 ((__v2di) __A); 9510b57cec5SDimitry Andric } 9520b57cec5SDimitry Andric 9530b57cec5SDimitry Andric static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 9540b57cec5SDimitry Andric _mm256_movepi64_mask (__m256i __A) 9550b57cec5SDimitry Andric { 9560b57cec5SDimitry Andric return (__mmask8) __builtin_ia32_cvtq2mask256 ((__v4di) __A); 9570b57cec5SDimitry Andric } 9580b57cec5SDimitry Andric 9590b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 9600b57cec5SDimitry Andric _mm256_broadcast_f32x2 (__m128 __A) 9610b57cec5SDimitry Andric { 9620b57cec5SDimitry Andric return (__m256)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A, 9630b57cec5SDimitry Andric 0, 1, 0, 1, 0, 1, 0, 1); 9640b57cec5SDimitry Andric } 9650b57cec5SDimitry Andric 9660b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 9670b57cec5SDimitry Andric _mm256_mask_broadcast_f32x2 (__m256 __O, __mmask8 __M, __m128 __A) 9680b57cec5SDimitry Andric { 9690b57cec5SDimitry Andric return (__m256)__builtin_ia32_selectps_256((__mmask8)__M, 9700b57cec5SDimitry Andric (__v8sf)_mm256_broadcast_f32x2(__A), 9710b57cec5SDimitry Andric (__v8sf)__O); 9720b57cec5SDimitry Andric } 9730b57cec5SDimitry Andric 9740b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS256 9750b57cec5SDimitry Andric _mm256_maskz_broadcast_f32x2 (__mmask8 __M, __m128 __A) 9760b57cec5SDimitry Andric { 9770b57cec5SDimitry Andric return (__m256)__builtin_ia32_selectps_256((__mmask8)__M, 9780b57cec5SDimitry Andric (__v8sf)_mm256_broadcast_f32x2(__A), 9790b57cec5SDimitry Andric (__v8sf)_mm256_setzero_ps()); 9800b57cec5SDimitry Andric } 9810b57cec5SDimitry Andric 9820b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 9830b57cec5SDimitry Andric _mm256_broadcast_f64x2(__m128d __A) 9840b57cec5SDimitry Andric { 9850b57cec5SDimitry Andric return (__m256d)__builtin_shufflevector((__v2df)__A, (__v2df)__A, 9860b57cec5SDimitry Andric 0, 1, 0, 1); 9870b57cec5SDimitry Andric } 9880b57cec5SDimitry Andric 9890b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 9900b57cec5SDimitry Andric _mm256_mask_broadcast_f64x2(__m256d __O, __mmask8 __M, __m128d __A) 9910b57cec5SDimitry Andric { 9920b57cec5SDimitry Andric return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__M, 9930b57cec5SDimitry Andric (__v4df)_mm256_broadcast_f64x2(__A), 9940b57cec5SDimitry Andric (__v4df)__O); 9950b57cec5SDimitry Andric } 9960b57cec5SDimitry Andric 9970b57cec5SDimitry Andric static __inline__ __m256d __DEFAULT_FN_ATTRS256 9980b57cec5SDimitry Andric _mm256_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A) 9990b57cec5SDimitry Andric { 10000b57cec5SDimitry Andric return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__M, 10010b57cec5SDimitry Andric (__v4df)_mm256_broadcast_f64x2(__A), 10020b57cec5SDimitry Andric (__v4df)_mm256_setzero_pd()); 10030b57cec5SDimitry Andric } 10040b57cec5SDimitry Andric 10050b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 10060b57cec5SDimitry Andric _mm_broadcast_i32x2 (__m128i __A) 10070b57cec5SDimitry Andric { 10080b57cec5SDimitry Andric return (__m128i)__builtin_shufflevector((__v4si)__A, (__v4si)__A, 10090b57cec5SDimitry Andric 0, 1, 0, 1); 10100b57cec5SDimitry Andric } 10110b57cec5SDimitry Andric 10120b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 10130b57cec5SDimitry Andric _mm_mask_broadcast_i32x2 (__m128i __O, __mmask8 __M, __m128i __A) 10140b57cec5SDimitry Andric { 10150b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 10160b57cec5SDimitry Andric (__v4si)_mm_broadcast_i32x2(__A), 10170b57cec5SDimitry Andric (__v4si)__O); 10180b57cec5SDimitry Andric } 10190b57cec5SDimitry Andric 10200b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS128 10210b57cec5SDimitry Andric _mm_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A) 10220b57cec5SDimitry Andric { 10230b57cec5SDimitry Andric return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 10240b57cec5SDimitry Andric (__v4si)_mm_broadcast_i32x2(__A), 10250b57cec5SDimitry Andric (__v4si)_mm_setzero_si128()); 10260b57cec5SDimitry Andric } 10270b57cec5SDimitry Andric 10280b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 10290b57cec5SDimitry Andric _mm256_broadcast_i32x2 (__m128i __A) 10300b57cec5SDimitry Andric { 10310b57cec5SDimitry Andric return (__m256i)__builtin_shufflevector((__v4si)__A, (__v4si)__A, 10320b57cec5SDimitry Andric 0, 1, 0, 1, 0, 1, 0, 1); 10330b57cec5SDimitry Andric } 10340b57cec5SDimitry Andric 10350b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 10360b57cec5SDimitry Andric _mm256_mask_broadcast_i32x2 (__m256i __O, __mmask8 __M, __m128i __A) 10370b57cec5SDimitry Andric { 10380b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 10390b57cec5SDimitry Andric (__v8si)_mm256_broadcast_i32x2(__A), 10400b57cec5SDimitry Andric (__v8si)__O); 10410b57cec5SDimitry Andric } 10420b57cec5SDimitry Andric 10430b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 10440b57cec5SDimitry Andric _mm256_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A) 10450b57cec5SDimitry Andric { 10460b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 10470b57cec5SDimitry Andric (__v8si)_mm256_broadcast_i32x2(__A), 10480b57cec5SDimitry Andric (__v8si)_mm256_setzero_si256()); 10490b57cec5SDimitry Andric } 10500b57cec5SDimitry Andric 10510b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 10520b57cec5SDimitry Andric _mm256_broadcast_i64x2(__m128i __A) 10530b57cec5SDimitry Andric { 10540b57cec5SDimitry Andric return (__m256i)__builtin_shufflevector((__v2di)__A, (__v2di)__A, 10550b57cec5SDimitry Andric 0, 1, 0, 1); 10560b57cec5SDimitry Andric } 10570b57cec5SDimitry Andric 10580b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 10590b57cec5SDimitry Andric _mm256_mask_broadcast_i64x2(__m256i __O, __mmask8 __M, __m128i __A) 10600b57cec5SDimitry Andric { 10610b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 10620b57cec5SDimitry Andric (__v4di)_mm256_broadcast_i64x2(__A), 10630b57cec5SDimitry Andric (__v4di)__O); 10640b57cec5SDimitry Andric } 10650b57cec5SDimitry Andric 10660b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS256 10670b57cec5SDimitry Andric _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A) 10680b57cec5SDimitry Andric { 10690b57cec5SDimitry Andric return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 10700b57cec5SDimitry Andric (__v4di)_mm256_broadcast_i64x2(__A), 10710b57cec5SDimitry Andric (__v4di)_mm256_setzero_si256()); 10720b57cec5SDimitry Andric } 10730b57cec5SDimitry Andric 10740b57cec5SDimitry Andric #define _mm256_extractf64x2_pd(A, imm) \ 1075349cc55cSDimitry Andric ((__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \ 10760b57cec5SDimitry Andric (int)(imm), \ 10770b57cec5SDimitry Andric (__v2df)_mm_undefined_pd(), \ 1078349cc55cSDimitry Andric (__mmask8)-1)) 10790b57cec5SDimitry Andric 10800b57cec5SDimitry Andric #define _mm256_mask_extractf64x2_pd(W, U, A, imm) \ 1081349cc55cSDimitry Andric ((__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \ 10820b57cec5SDimitry Andric (int)(imm), \ 10830b57cec5SDimitry Andric (__v2df)(__m128d)(W), \ 1084349cc55cSDimitry Andric (__mmask8)(U))) 10850b57cec5SDimitry Andric 10860b57cec5SDimitry Andric #define _mm256_maskz_extractf64x2_pd(U, A, imm) \ 1087349cc55cSDimitry Andric ((__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \ 10880b57cec5SDimitry Andric (int)(imm), \ 10890b57cec5SDimitry Andric (__v2df)_mm_setzero_pd(), \ 1090349cc55cSDimitry Andric (__mmask8)(U))) 10910b57cec5SDimitry Andric 10920b57cec5SDimitry Andric #define _mm256_extracti64x2_epi64(A, imm) \ 1093349cc55cSDimitry Andric ((__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \ 10940b57cec5SDimitry Andric (int)(imm), \ 10950b57cec5SDimitry Andric (__v2di)_mm_undefined_si128(), \ 1096349cc55cSDimitry Andric (__mmask8)-1)) 10970b57cec5SDimitry Andric 10980b57cec5SDimitry Andric #define _mm256_mask_extracti64x2_epi64(W, U, A, imm) \ 1099349cc55cSDimitry Andric ((__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \ 11000b57cec5SDimitry Andric (int)(imm), \ 11010b57cec5SDimitry Andric (__v2di)(__m128i)(W), \ 1102349cc55cSDimitry Andric (__mmask8)(U))) 11030b57cec5SDimitry Andric 11040b57cec5SDimitry Andric #define _mm256_maskz_extracti64x2_epi64(U, A, imm) \ 1105349cc55cSDimitry Andric ((__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \ 11060b57cec5SDimitry Andric (int)(imm), \ 11070b57cec5SDimitry Andric (__v2di)_mm_setzero_si128(), \ 1108349cc55cSDimitry Andric (__mmask8)(U))) 11090b57cec5SDimitry Andric 11100b57cec5SDimitry Andric #define _mm256_insertf64x2(A, B, imm) \ 1111349cc55cSDimitry Andric ((__m256d)__builtin_ia32_insertf64x2_256((__v4df)(__m256d)(A), \ 1112349cc55cSDimitry Andric (__v2df)(__m128d)(B), (int)(imm))) 11130b57cec5SDimitry Andric 11140b57cec5SDimitry Andric #define _mm256_mask_insertf64x2(W, U, A, B, imm) \ 1115349cc55cSDimitry Andric ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 11160b57cec5SDimitry Andric (__v4df)_mm256_insertf64x2((A), (B), (imm)), \ 1117349cc55cSDimitry Andric (__v4df)(__m256d)(W))) 11180b57cec5SDimitry Andric 11190b57cec5SDimitry Andric #define _mm256_maskz_insertf64x2(U, A, B, imm) \ 1120349cc55cSDimitry Andric ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 11210b57cec5SDimitry Andric (__v4df)_mm256_insertf64x2((A), (B), (imm)), \ 1122349cc55cSDimitry Andric (__v4df)_mm256_setzero_pd())) 11230b57cec5SDimitry Andric 11240b57cec5SDimitry Andric #define _mm256_inserti64x2(A, B, imm) \ 1125349cc55cSDimitry Andric ((__m256i)__builtin_ia32_inserti64x2_256((__v4di)(__m256i)(A), \ 1126349cc55cSDimitry Andric (__v2di)(__m128i)(B), (int)(imm))) 11270b57cec5SDimitry Andric 11280b57cec5SDimitry Andric #define _mm256_mask_inserti64x2(W, U, A, B, imm) \ 1129349cc55cSDimitry Andric ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 11300b57cec5SDimitry Andric (__v4di)_mm256_inserti64x2((A), (B), (imm)), \ 1131349cc55cSDimitry Andric (__v4di)(__m256i)(W))) 11320b57cec5SDimitry Andric 11330b57cec5SDimitry Andric #define _mm256_maskz_inserti64x2(U, A, B, imm) \ 1134349cc55cSDimitry Andric ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 11350b57cec5SDimitry Andric (__v4di)_mm256_inserti64x2((A), (B), (imm)), \ 1136349cc55cSDimitry Andric (__v4di)_mm256_setzero_si256())) 11370b57cec5SDimitry Andric 11380b57cec5SDimitry Andric #define _mm_mask_fpclass_pd_mask(U, A, imm) \ 1139349cc55cSDimitry Andric ((__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \ 1140349cc55cSDimitry Andric (__mmask8)(U))) 11410b57cec5SDimitry Andric 11420b57cec5SDimitry Andric #define _mm_fpclass_pd_mask(A, imm) \ 1143349cc55cSDimitry Andric ((__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \ 1144349cc55cSDimitry Andric (__mmask8)-1)) 11450b57cec5SDimitry Andric 11460b57cec5SDimitry Andric #define _mm256_mask_fpclass_pd_mask(U, A, imm) \ 1147349cc55cSDimitry Andric ((__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \ 1148349cc55cSDimitry Andric (__mmask8)(U))) 11490b57cec5SDimitry Andric 11500b57cec5SDimitry Andric #define _mm256_fpclass_pd_mask(A, imm) \ 1151349cc55cSDimitry Andric ((__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \ 1152349cc55cSDimitry Andric (__mmask8)-1)) 11530b57cec5SDimitry Andric 11540b57cec5SDimitry Andric #define _mm_mask_fpclass_ps_mask(U, A, imm) \ 1155349cc55cSDimitry Andric ((__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \ 1156349cc55cSDimitry Andric (__mmask8)(U))) 11570b57cec5SDimitry Andric 11580b57cec5SDimitry Andric #define _mm_fpclass_ps_mask(A, imm) \ 1159349cc55cSDimitry Andric ((__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \ 1160349cc55cSDimitry Andric (__mmask8)-1)) 11610b57cec5SDimitry Andric 11620b57cec5SDimitry Andric #define _mm256_mask_fpclass_ps_mask(U, A, imm) \ 1163349cc55cSDimitry Andric ((__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \ 1164349cc55cSDimitry Andric (__mmask8)(U))) 11650b57cec5SDimitry Andric 11660b57cec5SDimitry Andric #define _mm256_fpclass_ps_mask(A, imm) \ 1167349cc55cSDimitry Andric ((__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \ 1168349cc55cSDimitry Andric (__mmask8)-1)) 11690b57cec5SDimitry Andric 11700b57cec5SDimitry Andric #undef __DEFAULT_FN_ATTRS128 11710b57cec5SDimitry Andric #undef __DEFAULT_FN_ATTRS256 11720b57cec5SDimitry Andric 11730b57cec5SDimitry Andric #endif 1174