10b57cec5SDimitry Andric /*===---- avx512dqintrin.h - AVX512DQ intrinsics ---------------------------=== 20b57cec5SDimitry Andric * 30b57cec5SDimitry Andric * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric * See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric * 70b57cec5SDimitry Andric *===-----------------------------------------------------------------------=== 80b57cec5SDimitry Andric */ 90b57cec5SDimitry Andric 100b57cec5SDimitry Andric #ifndef __IMMINTRIN_H 110b57cec5SDimitry Andric #error "Never use <avx512dqintrin.h> directly; include <immintrin.h> instead." 120b57cec5SDimitry Andric #endif 130b57cec5SDimitry Andric 140b57cec5SDimitry Andric #ifndef __AVX512DQINTRIN_H 150b57cec5SDimitry Andric #define __AVX512DQINTRIN_H 160b57cec5SDimitry Andric 170b57cec5SDimitry Andric /* Define the default attributes for the functions in this file. */ 18*5f757f3fSDimitry Andric #define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512dq,evex512"), __min_vector_width__(512))) 19*5f757f3fSDimitry Andric #define __DEFAULT_FN_ATTRS \ 20*5f757f3fSDimitry Andric __attribute__((__always_inline__, __nodebug__, \ 21*5f757f3fSDimitry Andric __target__("avx512dq,no-evex512"))) 220b57cec5SDimitry Andric 230b57cec5SDimitry Andric static __inline __mmask8 __DEFAULT_FN_ATTRS 240b57cec5SDimitry Andric _knot_mask8(__mmask8 __M) 250b57cec5SDimitry Andric { 260b57cec5SDimitry Andric return __builtin_ia32_knotqi(__M); 270b57cec5SDimitry Andric } 280b57cec5SDimitry Andric 290b57cec5SDimitry Andric static __inline__ __mmask8 __DEFAULT_FN_ATTRS 300b57cec5SDimitry Andric _kand_mask8(__mmask8 __A, __mmask8 __B) 310b57cec5SDimitry Andric { 320b57cec5SDimitry Andric return (__mmask8)__builtin_ia32_kandqi((__mmask8)__A, (__mmask8)__B); 330b57cec5SDimitry Andric } 340b57cec5SDimitry Andric 350b57cec5SDimitry Andric static __inline__ __mmask8 __DEFAULT_FN_ATTRS 360b57cec5SDimitry Andric _kandn_mask8(__mmask8 __A, __mmask8 __B) 370b57cec5SDimitry Andric { 380b57cec5SDimitry Andric return (__mmask8)__builtin_ia32_kandnqi((__mmask8)__A, (__mmask8)__B); 390b57cec5SDimitry Andric } 400b57cec5SDimitry Andric 410b57cec5SDimitry Andric static __inline__ __mmask8 __DEFAULT_FN_ATTRS 420b57cec5SDimitry Andric _kor_mask8(__mmask8 __A, __mmask8 __B) 430b57cec5SDimitry Andric { 440b57cec5SDimitry Andric return (__mmask8)__builtin_ia32_korqi((__mmask8)__A, (__mmask8)__B); 450b57cec5SDimitry Andric } 460b57cec5SDimitry Andric 470b57cec5SDimitry Andric static __inline__ __mmask8 __DEFAULT_FN_ATTRS 480b57cec5SDimitry Andric _kxnor_mask8(__mmask8 __A, __mmask8 __B) 490b57cec5SDimitry Andric { 500b57cec5SDimitry Andric return (__mmask8)__builtin_ia32_kxnorqi((__mmask8)__A, (__mmask8)__B); 510b57cec5SDimitry Andric } 520b57cec5SDimitry Andric 530b57cec5SDimitry Andric static __inline__ __mmask8 __DEFAULT_FN_ATTRS 540b57cec5SDimitry Andric _kxor_mask8(__mmask8 __A, __mmask8 __B) 550b57cec5SDimitry Andric { 560b57cec5SDimitry Andric return (__mmask8)__builtin_ia32_kxorqi((__mmask8)__A, (__mmask8)__B); 570b57cec5SDimitry Andric } 580b57cec5SDimitry Andric 590b57cec5SDimitry Andric static __inline__ unsigned char __DEFAULT_FN_ATTRS 600b57cec5SDimitry Andric _kortestc_mask8_u8(__mmask8 __A, __mmask8 __B) 610b57cec5SDimitry Andric { 620b57cec5SDimitry Andric return (unsigned char)__builtin_ia32_kortestcqi(__A, __B); 630b57cec5SDimitry Andric } 640b57cec5SDimitry Andric 650b57cec5SDimitry Andric static __inline__ unsigned char __DEFAULT_FN_ATTRS 660b57cec5SDimitry Andric _kortestz_mask8_u8(__mmask8 __A, __mmask8 __B) 670b57cec5SDimitry Andric { 680b57cec5SDimitry Andric return (unsigned char)__builtin_ia32_kortestzqi(__A, __B); 690b57cec5SDimitry Andric } 700b57cec5SDimitry Andric 710b57cec5SDimitry Andric static __inline__ unsigned char __DEFAULT_FN_ATTRS 720b57cec5SDimitry Andric _kortest_mask8_u8(__mmask8 __A, __mmask8 __B, unsigned char *__C) { 730b57cec5SDimitry Andric *__C = (unsigned char)__builtin_ia32_kortestcqi(__A, __B); 740b57cec5SDimitry Andric return (unsigned char)__builtin_ia32_kortestzqi(__A, __B); 750b57cec5SDimitry Andric } 760b57cec5SDimitry Andric 770b57cec5SDimitry Andric static __inline__ unsigned char __DEFAULT_FN_ATTRS 780b57cec5SDimitry Andric _ktestc_mask8_u8(__mmask8 __A, __mmask8 __B) 790b57cec5SDimitry Andric { 800b57cec5SDimitry Andric return (unsigned char)__builtin_ia32_ktestcqi(__A, __B); 810b57cec5SDimitry Andric } 820b57cec5SDimitry Andric 830b57cec5SDimitry Andric static __inline__ unsigned char __DEFAULT_FN_ATTRS 840b57cec5SDimitry Andric _ktestz_mask8_u8(__mmask8 __A, __mmask8 __B) 850b57cec5SDimitry Andric { 860b57cec5SDimitry Andric return (unsigned char)__builtin_ia32_ktestzqi(__A, __B); 870b57cec5SDimitry Andric } 880b57cec5SDimitry Andric 890b57cec5SDimitry Andric static __inline__ unsigned char __DEFAULT_FN_ATTRS 900b57cec5SDimitry Andric _ktest_mask8_u8(__mmask8 __A, __mmask8 __B, unsigned char *__C) { 910b57cec5SDimitry Andric *__C = (unsigned char)__builtin_ia32_ktestcqi(__A, __B); 920b57cec5SDimitry Andric return (unsigned char)__builtin_ia32_ktestzqi(__A, __B); 930b57cec5SDimitry Andric } 940b57cec5SDimitry Andric 950b57cec5SDimitry Andric static __inline__ unsigned char __DEFAULT_FN_ATTRS 960b57cec5SDimitry Andric _ktestc_mask16_u8(__mmask16 __A, __mmask16 __B) 970b57cec5SDimitry Andric { 980b57cec5SDimitry Andric return (unsigned char)__builtin_ia32_ktestchi(__A, __B); 990b57cec5SDimitry Andric } 1000b57cec5SDimitry Andric 1010b57cec5SDimitry Andric static __inline__ unsigned char __DEFAULT_FN_ATTRS 1020b57cec5SDimitry Andric _ktestz_mask16_u8(__mmask16 __A, __mmask16 __B) 1030b57cec5SDimitry Andric { 1040b57cec5SDimitry Andric return (unsigned char)__builtin_ia32_ktestzhi(__A, __B); 1050b57cec5SDimitry Andric } 1060b57cec5SDimitry Andric 1070b57cec5SDimitry Andric static __inline__ unsigned char __DEFAULT_FN_ATTRS 1080b57cec5SDimitry Andric _ktest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C) { 1090b57cec5SDimitry Andric *__C = (unsigned char)__builtin_ia32_ktestchi(__A, __B); 1100b57cec5SDimitry Andric return (unsigned char)__builtin_ia32_ktestzhi(__A, __B); 1110b57cec5SDimitry Andric } 1120b57cec5SDimitry Andric 1130b57cec5SDimitry Andric static __inline__ __mmask8 __DEFAULT_FN_ATTRS 1140b57cec5SDimitry Andric _kadd_mask8(__mmask8 __A, __mmask8 __B) 1150b57cec5SDimitry Andric { 1160b57cec5SDimitry Andric return (__mmask8)__builtin_ia32_kaddqi((__mmask8)__A, (__mmask8)__B); 1170b57cec5SDimitry Andric } 1180b57cec5SDimitry Andric 1190b57cec5SDimitry Andric static __inline__ __mmask16 __DEFAULT_FN_ATTRS 1200b57cec5SDimitry Andric _kadd_mask16(__mmask16 __A, __mmask16 __B) 1210b57cec5SDimitry Andric { 1220b57cec5SDimitry Andric return (__mmask16)__builtin_ia32_kaddhi((__mmask16)__A, (__mmask16)__B); 1230b57cec5SDimitry Andric } 1240b57cec5SDimitry Andric 1250b57cec5SDimitry Andric #define _kshiftli_mask8(A, I) \ 126349cc55cSDimitry Andric ((__mmask8)__builtin_ia32_kshiftliqi((__mmask8)(A), (unsigned int)(I))) 1270b57cec5SDimitry Andric 1280b57cec5SDimitry Andric #define _kshiftri_mask8(A, I) \ 129349cc55cSDimitry Andric ((__mmask8)__builtin_ia32_kshiftriqi((__mmask8)(A), (unsigned int)(I))) 1300b57cec5SDimitry Andric 1310b57cec5SDimitry Andric static __inline__ unsigned int __DEFAULT_FN_ATTRS 1320b57cec5SDimitry Andric _cvtmask8_u32(__mmask8 __A) { 1330b57cec5SDimitry Andric return (unsigned int)__builtin_ia32_kmovb((__mmask8)__A); 1340b57cec5SDimitry Andric } 1350b57cec5SDimitry Andric 1360b57cec5SDimitry Andric static __inline__ __mmask8 __DEFAULT_FN_ATTRS 1370b57cec5SDimitry Andric _cvtu32_mask8(unsigned int __A) { 1380b57cec5SDimitry Andric return (__mmask8)__builtin_ia32_kmovb((__mmask8)__A); 1390b57cec5SDimitry Andric } 1400b57cec5SDimitry Andric 1410b57cec5SDimitry Andric static __inline__ __mmask8 __DEFAULT_FN_ATTRS 1420b57cec5SDimitry Andric _load_mask8(__mmask8 *__A) { 1430b57cec5SDimitry Andric return (__mmask8)__builtin_ia32_kmovb(*(__mmask8 *)__A); 1440b57cec5SDimitry Andric } 1450b57cec5SDimitry Andric 1460b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS 1470b57cec5SDimitry Andric _store_mask8(__mmask8 *__A, __mmask8 __B) { 1480b57cec5SDimitry Andric *(__mmask8 *)__A = __builtin_ia32_kmovb((__mmask8)__B); 1490b57cec5SDimitry Andric } 1500b57cec5SDimitry Andric 1510b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 1520b57cec5SDimitry Andric _mm512_mullo_epi64 (__m512i __A, __m512i __B) { 1530b57cec5SDimitry Andric return (__m512i) ((__v8du) __A * (__v8du) __B); 1540b57cec5SDimitry Andric } 1550b57cec5SDimitry Andric 1560b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 1570b57cec5SDimitry Andric _mm512_mask_mullo_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { 1580b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 1590b57cec5SDimitry Andric (__v8di)_mm512_mullo_epi64(__A, __B), 1600b57cec5SDimitry Andric (__v8di)__W); 1610b57cec5SDimitry Andric } 1620b57cec5SDimitry Andric 1630b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 1640b57cec5SDimitry Andric _mm512_maskz_mullo_epi64(__mmask8 __U, __m512i __A, __m512i __B) { 1650b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 1660b57cec5SDimitry Andric (__v8di)_mm512_mullo_epi64(__A, __B), 1670b57cec5SDimitry Andric (__v8di)_mm512_setzero_si512()); 1680b57cec5SDimitry Andric } 1690b57cec5SDimitry Andric 1700b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 1710b57cec5SDimitry Andric _mm512_xor_pd(__m512d __A, __m512d __B) { 1720b57cec5SDimitry Andric return (__m512d)((__v8du)__A ^ (__v8du)__B); 1730b57cec5SDimitry Andric } 1740b57cec5SDimitry Andric 1750b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 1760b57cec5SDimitry Andric _mm512_mask_xor_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { 1770b57cec5SDimitry Andric return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 1780b57cec5SDimitry Andric (__v8df)_mm512_xor_pd(__A, __B), 1790b57cec5SDimitry Andric (__v8df)__W); 1800b57cec5SDimitry Andric } 1810b57cec5SDimitry Andric 1820b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 1830b57cec5SDimitry Andric _mm512_maskz_xor_pd(__mmask8 __U, __m512d __A, __m512d __B) { 1840b57cec5SDimitry Andric return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 1850b57cec5SDimitry Andric (__v8df)_mm512_xor_pd(__A, __B), 1860b57cec5SDimitry Andric (__v8df)_mm512_setzero_pd()); 1870b57cec5SDimitry Andric } 1880b57cec5SDimitry Andric 1890b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 1900b57cec5SDimitry Andric _mm512_xor_ps (__m512 __A, __m512 __B) { 1910b57cec5SDimitry Andric return (__m512)((__v16su)__A ^ (__v16su)__B); 1920b57cec5SDimitry Andric } 1930b57cec5SDimitry Andric 1940b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 1950b57cec5SDimitry Andric _mm512_mask_xor_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { 1960b57cec5SDimitry Andric return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 1970b57cec5SDimitry Andric (__v16sf)_mm512_xor_ps(__A, __B), 1980b57cec5SDimitry Andric (__v16sf)__W); 1990b57cec5SDimitry Andric } 2000b57cec5SDimitry Andric 2010b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 2020b57cec5SDimitry Andric _mm512_maskz_xor_ps(__mmask16 __U, __m512 __A, __m512 __B) { 2030b57cec5SDimitry Andric return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 2040b57cec5SDimitry Andric (__v16sf)_mm512_xor_ps(__A, __B), 2050b57cec5SDimitry Andric (__v16sf)_mm512_setzero_ps()); 2060b57cec5SDimitry Andric } 2070b57cec5SDimitry Andric 2080b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 2090b57cec5SDimitry Andric _mm512_or_pd(__m512d __A, __m512d __B) { 2100b57cec5SDimitry Andric return (__m512d)((__v8du)__A | (__v8du)__B); 2110b57cec5SDimitry Andric } 2120b57cec5SDimitry Andric 2130b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 2140b57cec5SDimitry Andric _mm512_mask_or_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { 2150b57cec5SDimitry Andric return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 2160b57cec5SDimitry Andric (__v8df)_mm512_or_pd(__A, __B), 2170b57cec5SDimitry Andric (__v8df)__W); 2180b57cec5SDimitry Andric } 2190b57cec5SDimitry Andric 2200b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 2210b57cec5SDimitry Andric _mm512_maskz_or_pd(__mmask8 __U, __m512d __A, __m512d __B) { 2220b57cec5SDimitry Andric return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 2230b57cec5SDimitry Andric (__v8df)_mm512_or_pd(__A, __B), 2240b57cec5SDimitry Andric (__v8df)_mm512_setzero_pd()); 2250b57cec5SDimitry Andric } 2260b57cec5SDimitry Andric 2270b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 2280b57cec5SDimitry Andric _mm512_or_ps(__m512 __A, __m512 __B) { 2290b57cec5SDimitry Andric return (__m512)((__v16su)__A | (__v16su)__B); 2300b57cec5SDimitry Andric } 2310b57cec5SDimitry Andric 2320b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 2330b57cec5SDimitry Andric _mm512_mask_or_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { 2340b57cec5SDimitry Andric return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 2350b57cec5SDimitry Andric (__v16sf)_mm512_or_ps(__A, __B), 2360b57cec5SDimitry Andric (__v16sf)__W); 2370b57cec5SDimitry Andric } 2380b57cec5SDimitry Andric 2390b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 2400b57cec5SDimitry Andric _mm512_maskz_or_ps(__mmask16 __U, __m512 __A, __m512 __B) { 2410b57cec5SDimitry Andric return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 2420b57cec5SDimitry Andric (__v16sf)_mm512_or_ps(__A, __B), 2430b57cec5SDimitry Andric (__v16sf)_mm512_setzero_ps()); 2440b57cec5SDimitry Andric } 2450b57cec5SDimitry Andric 2460b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 2470b57cec5SDimitry Andric _mm512_and_pd(__m512d __A, __m512d __B) { 2480b57cec5SDimitry Andric return (__m512d)((__v8du)__A & (__v8du)__B); 2490b57cec5SDimitry Andric } 2500b57cec5SDimitry Andric 2510b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 2520b57cec5SDimitry Andric _mm512_mask_and_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { 2530b57cec5SDimitry Andric return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 2540b57cec5SDimitry Andric (__v8df)_mm512_and_pd(__A, __B), 2550b57cec5SDimitry Andric (__v8df)__W); 2560b57cec5SDimitry Andric } 2570b57cec5SDimitry Andric 2580b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 2590b57cec5SDimitry Andric _mm512_maskz_and_pd(__mmask8 __U, __m512d __A, __m512d __B) { 2600b57cec5SDimitry Andric return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 2610b57cec5SDimitry Andric (__v8df)_mm512_and_pd(__A, __B), 2620b57cec5SDimitry Andric (__v8df)_mm512_setzero_pd()); 2630b57cec5SDimitry Andric } 2640b57cec5SDimitry Andric 2650b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 2660b57cec5SDimitry Andric _mm512_and_ps(__m512 __A, __m512 __B) { 2670b57cec5SDimitry Andric return (__m512)((__v16su)__A & (__v16su)__B); 2680b57cec5SDimitry Andric } 2690b57cec5SDimitry Andric 2700b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 2710b57cec5SDimitry Andric _mm512_mask_and_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { 2720b57cec5SDimitry Andric return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 2730b57cec5SDimitry Andric (__v16sf)_mm512_and_ps(__A, __B), 2740b57cec5SDimitry Andric (__v16sf)__W); 2750b57cec5SDimitry Andric } 2760b57cec5SDimitry Andric 2770b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 2780b57cec5SDimitry Andric _mm512_maskz_and_ps(__mmask16 __U, __m512 __A, __m512 __B) { 2790b57cec5SDimitry Andric return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 2800b57cec5SDimitry Andric (__v16sf)_mm512_and_ps(__A, __B), 2810b57cec5SDimitry Andric (__v16sf)_mm512_setzero_ps()); 2820b57cec5SDimitry Andric } 2830b57cec5SDimitry Andric 2840b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 2850b57cec5SDimitry Andric _mm512_andnot_pd(__m512d __A, __m512d __B) { 2860b57cec5SDimitry Andric return (__m512d)(~(__v8du)__A & (__v8du)__B); 2870b57cec5SDimitry Andric } 2880b57cec5SDimitry Andric 2890b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 2900b57cec5SDimitry Andric _mm512_mask_andnot_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { 2910b57cec5SDimitry Andric return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 2920b57cec5SDimitry Andric (__v8df)_mm512_andnot_pd(__A, __B), 2930b57cec5SDimitry Andric (__v8df)__W); 2940b57cec5SDimitry Andric } 2950b57cec5SDimitry Andric 2960b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 2970b57cec5SDimitry Andric _mm512_maskz_andnot_pd(__mmask8 __U, __m512d __A, __m512d __B) { 2980b57cec5SDimitry Andric return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 2990b57cec5SDimitry Andric (__v8df)_mm512_andnot_pd(__A, __B), 3000b57cec5SDimitry Andric (__v8df)_mm512_setzero_pd()); 3010b57cec5SDimitry Andric } 3020b57cec5SDimitry Andric 3030b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 3040b57cec5SDimitry Andric _mm512_andnot_ps(__m512 __A, __m512 __B) { 3050b57cec5SDimitry Andric return (__m512)(~(__v16su)__A & (__v16su)__B); 3060b57cec5SDimitry Andric } 3070b57cec5SDimitry Andric 3080b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 3090b57cec5SDimitry Andric _mm512_mask_andnot_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { 3100b57cec5SDimitry Andric return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 3110b57cec5SDimitry Andric (__v16sf)_mm512_andnot_ps(__A, __B), 3120b57cec5SDimitry Andric (__v16sf)__W); 3130b57cec5SDimitry Andric } 3140b57cec5SDimitry Andric 3150b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 3160b57cec5SDimitry Andric _mm512_maskz_andnot_ps(__mmask16 __U, __m512 __A, __m512 __B) { 3170b57cec5SDimitry Andric return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 3180b57cec5SDimitry Andric (__v16sf)_mm512_andnot_ps(__A, __B), 3190b57cec5SDimitry Andric (__v16sf)_mm512_setzero_ps()); 3200b57cec5SDimitry Andric } 3210b57cec5SDimitry Andric 3220b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 3230b57cec5SDimitry Andric _mm512_cvtpd_epi64 (__m512d __A) { 3240b57cec5SDimitry Andric return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A, 3250b57cec5SDimitry Andric (__v8di) _mm512_setzero_si512(), 3260b57cec5SDimitry Andric (__mmask8) -1, 3270b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 3280b57cec5SDimitry Andric } 3290b57cec5SDimitry Andric 3300b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 3310b57cec5SDimitry Andric _mm512_mask_cvtpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) { 3320b57cec5SDimitry Andric return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A, 3330b57cec5SDimitry Andric (__v8di) __W, 3340b57cec5SDimitry Andric (__mmask8) __U, 3350b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 3360b57cec5SDimitry Andric } 3370b57cec5SDimitry Andric 3380b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 3390b57cec5SDimitry Andric _mm512_maskz_cvtpd_epi64 (__mmask8 __U, __m512d __A) { 3400b57cec5SDimitry Andric return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A, 3410b57cec5SDimitry Andric (__v8di) _mm512_setzero_si512(), 3420b57cec5SDimitry Andric (__mmask8) __U, 3430b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 3440b57cec5SDimitry Andric } 3450b57cec5SDimitry Andric 3460b57cec5SDimitry Andric #define _mm512_cvt_roundpd_epi64(A, R) \ 347349cc55cSDimitry Andric ((__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \ 3480b57cec5SDimitry Andric (__v8di)_mm512_setzero_si512(), \ 349349cc55cSDimitry Andric (__mmask8)-1, (int)(R))) 3500b57cec5SDimitry Andric 3510b57cec5SDimitry Andric #define _mm512_mask_cvt_roundpd_epi64(W, U, A, R) \ 352349cc55cSDimitry Andric ((__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \ 3530b57cec5SDimitry Andric (__v8di)(__m512i)(W), \ 354349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 3550b57cec5SDimitry Andric 3560b57cec5SDimitry Andric #define _mm512_maskz_cvt_roundpd_epi64(U, A, R) \ 357349cc55cSDimitry Andric ((__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \ 3580b57cec5SDimitry Andric (__v8di)_mm512_setzero_si512(), \ 359349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 3600b57cec5SDimitry Andric 3610b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 3620b57cec5SDimitry Andric _mm512_cvtpd_epu64 (__m512d __A) { 3630b57cec5SDimitry Andric return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A, 3640b57cec5SDimitry Andric (__v8di) _mm512_setzero_si512(), 3650b57cec5SDimitry Andric (__mmask8) -1, 3660b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 3670b57cec5SDimitry Andric } 3680b57cec5SDimitry Andric 3690b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 3700b57cec5SDimitry Andric _mm512_mask_cvtpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) { 3710b57cec5SDimitry Andric return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A, 3720b57cec5SDimitry Andric (__v8di) __W, 3730b57cec5SDimitry Andric (__mmask8) __U, 3740b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 3750b57cec5SDimitry Andric } 3760b57cec5SDimitry Andric 3770b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 3780b57cec5SDimitry Andric _mm512_maskz_cvtpd_epu64 (__mmask8 __U, __m512d __A) { 3790b57cec5SDimitry Andric return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A, 3800b57cec5SDimitry Andric (__v8di) _mm512_setzero_si512(), 3810b57cec5SDimitry Andric (__mmask8) __U, 3820b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 3830b57cec5SDimitry Andric } 3840b57cec5SDimitry Andric 3850b57cec5SDimitry Andric #define _mm512_cvt_roundpd_epu64(A, R) \ 386349cc55cSDimitry Andric ((__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \ 3870b57cec5SDimitry Andric (__v8di)_mm512_setzero_si512(), \ 388349cc55cSDimitry Andric (__mmask8)-1, (int)(R))) 3890b57cec5SDimitry Andric 3900b57cec5SDimitry Andric #define _mm512_mask_cvt_roundpd_epu64(W, U, A, R) \ 391349cc55cSDimitry Andric ((__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \ 3920b57cec5SDimitry Andric (__v8di)(__m512i)(W), \ 393349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 3940b57cec5SDimitry Andric 3950b57cec5SDimitry Andric #define _mm512_maskz_cvt_roundpd_epu64(U, A, R) \ 396349cc55cSDimitry Andric ((__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \ 3970b57cec5SDimitry Andric (__v8di)_mm512_setzero_si512(), \ 398349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 3990b57cec5SDimitry Andric 4000b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 4010b57cec5SDimitry Andric _mm512_cvtps_epi64 (__m256 __A) { 4020b57cec5SDimitry Andric return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A, 4030b57cec5SDimitry Andric (__v8di) _mm512_setzero_si512(), 4040b57cec5SDimitry Andric (__mmask8) -1, 4050b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 4060b57cec5SDimitry Andric } 4070b57cec5SDimitry Andric 4080b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 4090b57cec5SDimitry Andric _mm512_mask_cvtps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) { 4100b57cec5SDimitry Andric return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A, 4110b57cec5SDimitry Andric (__v8di) __W, 4120b57cec5SDimitry Andric (__mmask8) __U, 4130b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 4140b57cec5SDimitry Andric } 4150b57cec5SDimitry Andric 4160b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 4170b57cec5SDimitry Andric _mm512_maskz_cvtps_epi64 (__mmask8 __U, __m256 __A) { 4180b57cec5SDimitry Andric return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A, 4190b57cec5SDimitry Andric (__v8di) _mm512_setzero_si512(), 4200b57cec5SDimitry Andric (__mmask8) __U, 4210b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 4220b57cec5SDimitry Andric } 4230b57cec5SDimitry Andric 4240b57cec5SDimitry Andric #define _mm512_cvt_roundps_epi64(A, R) \ 425349cc55cSDimitry Andric ((__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \ 4260b57cec5SDimitry Andric (__v8di)_mm512_setzero_si512(), \ 427349cc55cSDimitry Andric (__mmask8)-1, (int)(R))) 4280b57cec5SDimitry Andric 4290b57cec5SDimitry Andric #define _mm512_mask_cvt_roundps_epi64(W, U, A, R) \ 430349cc55cSDimitry Andric ((__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \ 4310b57cec5SDimitry Andric (__v8di)(__m512i)(W), \ 432349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 4330b57cec5SDimitry Andric 4340b57cec5SDimitry Andric #define _mm512_maskz_cvt_roundps_epi64(U, A, R) \ 435349cc55cSDimitry Andric ((__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \ 4360b57cec5SDimitry Andric (__v8di)_mm512_setzero_si512(), \ 437349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 4380b57cec5SDimitry Andric 4390b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 4400b57cec5SDimitry Andric _mm512_cvtps_epu64 (__m256 __A) { 4410b57cec5SDimitry Andric return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A, 4420b57cec5SDimitry Andric (__v8di) _mm512_setzero_si512(), 4430b57cec5SDimitry Andric (__mmask8) -1, 4440b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 4450b57cec5SDimitry Andric } 4460b57cec5SDimitry Andric 4470b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 4480b57cec5SDimitry Andric _mm512_mask_cvtps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) { 4490b57cec5SDimitry Andric return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A, 4500b57cec5SDimitry Andric (__v8di) __W, 4510b57cec5SDimitry Andric (__mmask8) __U, 4520b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 4530b57cec5SDimitry Andric } 4540b57cec5SDimitry Andric 4550b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 4560b57cec5SDimitry Andric _mm512_maskz_cvtps_epu64 (__mmask8 __U, __m256 __A) { 4570b57cec5SDimitry Andric return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A, 4580b57cec5SDimitry Andric (__v8di) _mm512_setzero_si512(), 4590b57cec5SDimitry Andric (__mmask8) __U, 4600b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 4610b57cec5SDimitry Andric } 4620b57cec5SDimitry Andric 4630b57cec5SDimitry Andric #define _mm512_cvt_roundps_epu64(A, R) \ 464349cc55cSDimitry Andric ((__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \ 4650b57cec5SDimitry Andric (__v8di)_mm512_setzero_si512(), \ 466349cc55cSDimitry Andric (__mmask8)-1, (int)(R))) 4670b57cec5SDimitry Andric 4680b57cec5SDimitry Andric #define _mm512_mask_cvt_roundps_epu64(W, U, A, R) \ 469349cc55cSDimitry Andric ((__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \ 4700b57cec5SDimitry Andric (__v8di)(__m512i)(W), \ 471349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 4720b57cec5SDimitry Andric 4730b57cec5SDimitry Andric #define _mm512_maskz_cvt_roundps_epu64(U, A, R) \ 474349cc55cSDimitry Andric ((__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \ 4750b57cec5SDimitry Andric (__v8di)_mm512_setzero_si512(), \ 476349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 4770b57cec5SDimitry Andric 4780b57cec5SDimitry Andric 4790b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 4800b57cec5SDimitry Andric _mm512_cvtepi64_pd (__m512i __A) { 4810b57cec5SDimitry Andric return (__m512d)__builtin_convertvector((__v8di)__A, __v8df); 4820b57cec5SDimitry Andric } 4830b57cec5SDimitry Andric 4840b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 4850b57cec5SDimitry Andric _mm512_mask_cvtepi64_pd (__m512d __W, __mmask8 __U, __m512i __A) { 4860b57cec5SDimitry Andric return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 4870b57cec5SDimitry Andric (__v8df)_mm512_cvtepi64_pd(__A), 4880b57cec5SDimitry Andric (__v8df)__W); 4890b57cec5SDimitry Andric } 4900b57cec5SDimitry Andric 4910b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 4920b57cec5SDimitry Andric _mm512_maskz_cvtepi64_pd (__mmask8 __U, __m512i __A) { 4930b57cec5SDimitry Andric return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 4940b57cec5SDimitry Andric (__v8df)_mm512_cvtepi64_pd(__A), 4950b57cec5SDimitry Andric (__v8df)_mm512_setzero_pd()); 4960b57cec5SDimitry Andric } 4970b57cec5SDimitry Andric 4980b57cec5SDimitry Andric #define _mm512_cvt_roundepi64_pd(A, R) \ 499349cc55cSDimitry Andric ((__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \ 5000b57cec5SDimitry Andric (__v8df)_mm512_setzero_pd(), \ 501349cc55cSDimitry Andric (__mmask8)-1, (int)(R))) 5020b57cec5SDimitry Andric 5030b57cec5SDimitry Andric #define _mm512_mask_cvt_roundepi64_pd(W, U, A, R) \ 504349cc55cSDimitry Andric ((__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \ 5050b57cec5SDimitry Andric (__v8df)(__m512d)(W), \ 506349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 5070b57cec5SDimitry Andric 5080b57cec5SDimitry Andric #define _mm512_maskz_cvt_roundepi64_pd(U, A, R) \ 509349cc55cSDimitry Andric ((__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \ 5100b57cec5SDimitry Andric (__v8df)_mm512_setzero_pd(), \ 511349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 5120b57cec5SDimitry Andric 5130b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS512 5140b57cec5SDimitry Andric _mm512_cvtepi64_ps (__m512i __A) { 5150b57cec5SDimitry Andric return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A, 5160b57cec5SDimitry Andric (__v8sf) _mm256_setzero_ps(), 5170b57cec5SDimitry Andric (__mmask8) -1, 5180b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 5190b57cec5SDimitry Andric } 5200b57cec5SDimitry Andric 5210b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS512 5220b57cec5SDimitry Andric _mm512_mask_cvtepi64_ps (__m256 __W, __mmask8 __U, __m512i __A) { 5230b57cec5SDimitry Andric return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A, 5240b57cec5SDimitry Andric (__v8sf) __W, 5250b57cec5SDimitry Andric (__mmask8) __U, 5260b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 5270b57cec5SDimitry Andric } 5280b57cec5SDimitry Andric 5290b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS512 5300b57cec5SDimitry Andric _mm512_maskz_cvtepi64_ps (__mmask8 __U, __m512i __A) { 5310b57cec5SDimitry Andric return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A, 5320b57cec5SDimitry Andric (__v8sf) _mm256_setzero_ps(), 5330b57cec5SDimitry Andric (__mmask8) __U, 5340b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 5350b57cec5SDimitry Andric } 5360b57cec5SDimitry Andric 5370b57cec5SDimitry Andric #define _mm512_cvt_roundepi64_ps(A, R) \ 538349cc55cSDimitry Andric ((__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \ 5390b57cec5SDimitry Andric (__v8sf)_mm256_setzero_ps(), \ 540349cc55cSDimitry Andric (__mmask8)-1, (int)(R))) 5410b57cec5SDimitry Andric 5420b57cec5SDimitry Andric #define _mm512_mask_cvt_roundepi64_ps(W, U, A, R) \ 543349cc55cSDimitry Andric ((__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \ 5440b57cec5SDimitry Andric (__v8sf)(__m256)(W), (__mmask8)(U), \ 545349cc55cSDimitry Andric (int)(R))) 5460b57cec5SDimitry Andric 5470b57cec5SDimitry Andric #define _mm512_maskz_cvt_roundepi64_ps(U, A, R) \ 548349cc55cSDimitry Andric ((__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \ 5490b57cec5SDimitry Andric (__v8sf)_mm256_setzero_ps(), \ 550349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 5510b57cec5SDimitry Andric 5520b57cec5SDimitry Andric 5530b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 5540b57cec5SDimitry Andric _mm512_cvttpd_epi64 (__m512d __A) { 5550b57cec5SDimitry Andric return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A, 5560b57cec5SDimitry Andric (__v8di) _mm512_setzero_si512(), 5570b57cec5SDimitry Andric (__mmask8) -1, 5580b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 5590b57cec5SDimitry Andric } 5600b57cec5SDimitry Andric 5610b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 5620b57cec5SDimitry Andric _mm512_mask_cvttpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) { 5630b57cec5SDimitry Andric return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A, 5640b57cec5SDimitry Andric (__v8di) __W, 5650b57cec5SDimitry Andric (__mmask8) __U, 5660b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 5670b57cec5SDimitry Andric } 5680b57cec5SDimitry Andric 5690b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 5700b57cec5SDimitry Andric _mm512_maskz_cvttpd_epi64 (__mmask8 __U, __m512d __A) { 5710b57cec5SDimitry Andric return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A, 5720b57cec5SDimitry Andric (__v8di) _mm512_setzero_si512(), 5730b57cec5SDimitry Andric (__mmask8) __U, 5740b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 5750b57cec5SDimitry Andric } 5760b57cec5SDimitry Andric 5770b57cec5SDimitry Andric #define _mm512_cvtt_roundpd_epi64(A, R) \ 578349cc55cSDimitry Andric ((__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \ 5790b57cec5SDimitry Andric (__v8di)_mm512_setzero_si512(), \ 580349cc55cSDimitry Andric (__mmask8)-1, (int)(R))) 5810b57cec5SDimitry Andric 5820b57cec5SDimitry Andric #define _mm512_mask_cvtt_roundpd_epi64(W, U, A, R) \ 583349cc55cSDimitry Andric ((__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \ 5840b57cec5SDimitry Andric (__v8di)(__m512i)(W), \ 585349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 5860b57cec5SDimitry Andric 5870b57cec5SDimitry Andric #define _mm512_maskz_cvtt_roundpd_epi64(U, A, R) \ 588349cc55cSDimitry Andric ((__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \ 5890b57cec5SDimitry Andric (__v8di)_mm512_setzero_si512(), \ 590349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 5910b57cec5SDimitry Andric 5920b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 5930b57cec5SDimitry Andric _mm512_cvttpd_epu64 (__m512d __A) { 5940b57cec5SDimitry Andric return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A, 5950b57cec5SDimitry Andric (__v8di) _mm512_setzero_si512(), 5960b57cec5SDimitry Andric (__mmask8) -1, 5970b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 5980b57cec5SDimitry Andric } 5990b57cec5SDimitry Andric 6000b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 6010b57cec5SDimitry Andric _mm512_mask_cvttpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) { 6020b57cec5SDimitry Andric return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A, 6030b57cec5SDimitry Andric (__v8di) __W, 6040b57cec5SDimitry Andric (__mmask8) __U, 6050b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 6060b57cec5SDimitry Andric } 6070b57cec5SDimitry Andric 6080b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 6090b57cec5SDimitry Andric _mm512_maskz_cvttpd_epu64 (__mmask8 __U, __m512d __A) { 6100b57cec5SDimitry Andric return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A, 6110b57cec5SDimitry Andric (__v8di) _mm512_setzero_si512(), 6120b57cec5SDimitry Andric (__mmask8) __U, 6130b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 6140b57cec5SDimitry Andric } 6150b57cec5SDimitry Andric 6160b57cec5SDimitry Andric #define _mm512_cvtt_roundpd_epu64(A, R) \ 617349cc55cSDimitry Andric ((__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \ 6180b57cec5SDimitry Andric (__v8di)_mm512_setzero_si512(), \ 619349cc55cSDimitry Andric (__mmask8)-1, (int)(R))) 6200b57cec5SDimitry Andric 6210b57cec5SDimitry Andric #define _mm512_mask_cvtt_roundpd_epu64(W, U, A, R) \ 622349cc55cSDimitry Andric ((__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \ 6230b57cec5SDimitry Andric (__v8di)(__m512i)(W), \ 624349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 6250b57cec5SDimitry Andric 6260b57cec5SDimitry Andric #define _mm512_maskz_cvtt_roundpd_epu64(U, A, R) \ 627349cc55cSDimitry Andric ((__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \ 6280b57cec5SDimitry Andric (__v8di)_mm512_setzero_si512(), \ 629349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 6300b57cec5SDimitry Andric 6310b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 6320b57cec5SDimitry Andric _mm512_cvttps_epi64 (__m256 __A) { 6330b57cec5SDimitry Andric return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A, 6340b57cec5SDimitry Andric (__v8di) _mm512_setzero_si512(), 6350b57cec5SDimitry Andric (__mmask8) -1, 6360b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 6370b57cec5SDimitry Andric } 6380b57cec5SDimitry Andric 6390b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 6400b57cec5SDimitry Andric _mm512_mask_cvttps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) { 6410b57cec5SDimitry Andric return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A, 6420b57cec5SDimitry Andric (__v8di) __W, 6430b57cec5SDimitry Andric (__mmask8) __U, 6440b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 6450b57cec5SDimitry Andric } 6460b57cec5SDimitry Andric 6470b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 6480b57cec5SDimitry Andric _mm512_maskz_cvttps_epi64 (__mmask8 __U, __m256 __A) { 6490b57cec5SDimitry Andric return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A, 6500b57cec5SDimitry Andric (__v8di) _mm512_setzero_si512(), 6510b57cec5SDimitry Andric (__mmask8) __U, 6520b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 6530b57cec5SDimitry Andric } 6540b57cec5SDimitry Andric 6550b57cec5SDimitry Andric #define _mm512_cvtt_roundps_epi64(A, R) \ 656349cc55cSDimitry Andric ((__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \ 6570b57cec5SDimitry Andric (__v8di)_mm512_setzero_si512(), \ 658349cc55cSDimitry Andric (__mmask8)-1, (int)(R))) 6590b57cec5SDimitry Andric 6600b57cec5SDimitry Andric #define _mm512_mask_cvtt_roundps_epi64(W, U, A, R) \ 661349cc55cSDimitry Andric ((__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \ 6620b57cec5SDimitry Andric (__v8di)(__m512i)(W), \ 663349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 6640b57cec5SDimitry Andric 6650b57cec5SDimitry Andric #define _mm512_maskz_cvtt_roundps_epi64(U, A, R) \ 666349cc55cSDimitry Andric ((__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \ 6670b57cec5SDimitry Andric (__v8di)_mm512_setzero_si512(), \ 668349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 6690b57cec5SDimitry Andric 6700b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 6710b57cec5SDimitry Andric _mm512_cvttps_epu64 (__m256 __A) { 6720b57cec5SDimitry Andric return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A, 6730b57cec5SDimitry Andric (__v8di) _mm512_setzero_si512(), 6740b57cec5SDimitry Andric (__mmask8) -1, 6750b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 6760b57cec5SDimitry Andric } 6770b57cec5SDimitry Andric 6780b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 6790b57cec5SDimitry Andric _mm512_mask_cvttps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) { 6800b57cec5SDimitry Andric return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A, 6810b57cec5SDimitry Andric (__v8di) __W, 6820b57cec5SDimitry Andric (__mmask8) __U, 6830b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 6840b57cec5SDimitry Andric } 6850b57cec5SDimitry Andric 6860b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 6870b57cec5SDimitry Andric _mm512_maskz_cvttps_epu64 (__mmask8 __U, __m256 __A) { 6880b57cec5SDimitry Andric return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A, 6890b57cec5SDimitry Andric (__v8di) _mm512_setzero_si512(), 6900b57cec5SDimitry Andric (__mmask8) __U, 6910b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 6920b57cec5SDimitry Andric } 6930b57cec5SDimitry Andric 6940b57cec5SDimitry Andric #define _mm512_cvtt_roundps_epu64(A, R) \ 695349cc55cSDimitry Andric ((__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \ 6960b57cec5SDimitry Andric (__v8di)_mm512_setzero_si512(), \ 697349cc55cSDimitry Andric (__mmask8)-1, (int)(R))) 6980b57cec5SDimitry Andric 6990b57cec5SDimitry Andric #define _mm512_mask_cvtt_roundps_epu64(W, U, A, R) \ 700349cc55cSDimitry Andric ((__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \ 7010b57cec5SDimitry Andric (__v8di)(__m512i)(W), \ 702349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 7030b57cec5SDimitry Andric 7040b57cec5SDimitry Andric #define _mm512_maskz_cvtt_roundps_epu64(U, A, R) \ 705349cc55cSDimitry Andric ((__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \ 7060b57cec5SDimitry Andric (__v8di)_mm512_setzero_si512(), \ 707349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 7080b57cec5SDimitry Andric 7090b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 7100b57cec5SDimitry Andric _mm512_cvtepu64_pd (__m512i __A) { 7110b57cec5SDimitry Andric return (__m512d)__builtin_convertvector((__v8du)__A, __v8df); 7120b57cec5SDimitry Andric } 7130b57cec5SDimitry Andric 7140b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 7150b57cec5SDimitry Andric _mm512_mask_cvtepu64_pd (__m512d __W, __mmask8 __U, __m512i __A) { 7160b57cec5SDimitry Andric return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 7170b57cec5SDimitry Andric (__v8df)_mm512_cvtepu64_pd(__A), 7180b57cec5SDimitry Andric (__v8df)__W); 7190b57cec5SDimitry Andric } 7200b57cec5SDimitry Andric 7210b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 7220b57cec5SDimitry Andric _mm512_maskz_cvtepu64_pd (__mmask8 __U, __m512i __A) { 7230b57cec5SDimitry Andric return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 7240b57cec5SDimitry Andric (__v8df)_mm512_cvtepu64_pd(__A), 7250b57cec5SDimitry Andric (__v8df)_mm512_setzero_pd()); 7260b57cec5SDimitry Andric } 7270b57cec5SDimitry Andric 7280b57cec5SDimitry Andric #define _mm512_cvt_roundepu64_pd(A, R) \ 729349cc55cSDimitry Andric ((__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \ 7300b57cec5SDimitry Andric (__v8df)_mm512_setzero_pd(), \ 731349cc55cSDimitry Andric (__mmask8)-1, (int)(R))) 7320b57cec5SDimitry Andric 7330b57cec5SDimitry Andric #define _mm512_mask_cvt_roundepu64_pd(W, U, A, R) \ 734349cc55cSDimitry Andric ((__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \ 7350b57cec5SDimitry Andric (__v8df)(__m512d)(W), \ 736349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 7370b57cec5SDimitry Andric 7380b57cec5SDimitry Andric 7390b57cec5SDimitry Andric #define _mm512_maskz_cvt_roundepu64_pd(U, A, R) \ 740349cc55cSDimitry Andric ((__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \ 7410b57cec5SDimitry Andric (__v8df)_mm512_setzero_pd(), \ 742349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 7430b57cec5SDimitry Andric 7440b57cec5SDimitry Andric 7450b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS512 7460b57cec5SDimitry Andric _mm512_cvtepu64_ps (__m512i __A) { 7470b57cec5SDimitry Andric return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A, 7480b57cec5SDimitry Andric (__v8sf) _mm256_setzero_ps(), 7490b57cec5SDimitry Andric (__mmask8) -1, 7500b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 7510b57cec5SDimitry Andric } 7520b57cec5SDimitry Andric 7530b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS512 7540b57cec5SDimitry Andric _mm512_mask_cvtepu64_ps (__m256 __W, __mmask8 __U, __m512i __A) { 7550b57cec5SDimitry Andric return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A, 7560b57cec5SDimitry Andric (__v8sf) __W, 7570b57cec5SDimitry Andric (__mmask8) __U, 7580b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 7590b57cec5SDimitry Andric } 7600b57cec5SDimitry Andric 7610b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS512 7620b57cec5SDimitry Andric _mm512_maskz_cvtepu64_ps (__mmask8 __U, __m512i __A) { 7630b57cec5SDimitry Andric return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A, 7640b57cec5SDimitry Andric (__v8sf) _mm256_setzero_ps(), 7650b57cec5SDimitry Andric (__mmask8) __U, 7660b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 7670b57cec5SDimitry Andric } 7680b57cec5SDimitry Andric 7690b57cec5SDimitry Andric #define _mm512_cvt_roundepu64_ps(A, R) \ 770349cc55cSDimitry Andric ((__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \ 7710b57cec5SDimitry Andric (__v8sf)_mm256_setzero_ps(), \ 772349cc55cSDimitry Andric (__mmask8)-1, (int)(R))) 7730b57cec5SDimitry Andric 7740b57cec5SDimitry Andric #define _mm512_mask_cvt_roundepu64_ps(W, U, A, R) \ 775349cc55cSDimitry Andric ((__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \ 7760b57cec5SDimitry Andric (__v8sf)(__m256)(W), (__mmask8)(U), \ 777349cc55cSDimitry Andric (int)(R))) 7780b57cec5SDimitry Andric 7790b57cec5SDimitry Andric #define _mm512_maskz_cvt_roundepu64_ps(U, A, R) \ 780349cc55cSDimitry Andric ((__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \ 7810b57cec5SDimitry Andric (__v8sf)_mm256_setzero_ps(), \ 782349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 7830b57cec5SDimitry Andric 7840b57cec5SDimitry Andric #define _mm512_range_pd(A, B, C) \ 785349cc55cSDimitry Andric ((__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ 7860b57cec5SDimitry Andric (__v8df)(__m512d)(B), (int)(C), \ 7870b57cec5SDimitry Andric (__v8df)_mm512_setzero_pd(), \ 7880b57cec5SDimitry Andric (__mmask8)-1, \ 789349cc55cSDimitry Andric _MM_FROUND_CUR_DIRECTION)) 7900b57cec5SDimitry Andric 7910b57cec5SDimitry Andric #define _mm512_mask_range_pd(W, U, A, B, C) \ 792349cc55cSDimitry Andric ((__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ 7930b57cec5SDimitry Andric (__v8df)(__m512d)(B), (int)(C), \ 7940b57cec5SDimitry Andric (__v8df)(__m512d)(W), (__mmask8)(U), \ 795349cc55cSDimitry Andric _MM_FROUND_CUR_DIRECTION)) 7960b57cec5SDimitry Andric 7970b57cec5SDimitry Andric #define _mm512_maskz_range_pd(U, A, B, C) \ 798349cc55cSDimitry Andric ((__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ 7990b57cec5SDimitry Andric (__v8df)(__m512d)(B), (int)(C), \ 8000b57cec5SDimitry Andric (__v8df)_mm512_setzero_pd(), \ 8010b57cec5SDimitry Andric (__mmask8)(U), \ 802349cc55cSDimitry Andric _MM_FROUND_CUR_DIRECTION)) 8030b57cec5SDimitry Andric 8040b57cec5SDimitry Andric #define _mm512_range_round_pd(A, B, C, R) \ 805349cc55cSDimitry Andric ((__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ 8060b57cec5SDimitry Andric (__v8df)(__m512d)(B), (int)(C), \ 8070b57cec5SDimitry Andric (__v8df)_mm512_setzero_pd(), \ 808349cc55cSDimitry Andric (__mmask8)-1, (int)(R))) 8090b57cec5SDimitry Andric 8100b57cec5SDimitry Andric #define _mm512_mask_range_round_pd(W, U, A, B, C, R) \ 811349cc55cSDimitry Andric ((__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ 8120b57cec5SDimitry Andric (__v8df)(__m512d)(B), (int)(C), \ 8130b57cec5SDimitry Andric (__v8df)(__m512d)(W), (__mmask8)(U), \ 814349cc55cSDimitry Andric (int)(R))) 8150b57cec5SDimitry Andric 8160b57cec5SDimitry Andric #define _mm512_maskz_range_round_pd(U, A, B, C, R) \ 817349cc55cSDimitry Andric ((__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ 8180b57cec5SDimitry Andric (__v8df)(__m512d)(B), (int)(C), \ 8190b57cec5SDimitry Andric (__v8df)_mm512_setzero_pd(), \ 820349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 8210b57cec5SDimitry Andric 8220b57cec5SDimitry Andric #define _mm512_range_ps(A, B, C) \ 823349cc55cSDimitry Andric ((__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ 8240b57cec5SDimitry Andric (__v16sf)(__m512)(B), (int)(C), \ 8250b57cec5SDimitry Andric (__v16sf)_mm512_setzero_ps(), \ 8260b57cec5SDimitry Andric (__mmask16)-1, \ 827349cc55cSDimitry Andric _MM_FROUND_CUR_DIRECTION)) 8280b57cec5SDimitry Andric 8290b57cec5SDimitry Andric #define _mm512_mask_range_ps(W, U, A, B, C) \ 830349cc55cSDimitry Andric ((__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ 8310b57cec5SDimitry Andric (__v16sf)(__m512)(B), (int)(C), \ 8320b57cec5SDimitry Andric (__v16sf)(__m512)(W), (__mmask16)(U), \ 833349cc55cSDimitry Andric _MM_FROUND_CUR_DIRECTION)) 8340b57cec5SDimitry Andric 8350b57cec5SDimitry Andric #define _mm512_maskz_range_ps(U, A, B, C) \ 836349cc55cSDimitry Andric ((__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ 8370b57cec5SDimitry Andric (__v16sf)(__m512)(B), (int)(C), \ 8380b57cec5SDimitry Andric (__v16sf)_mm512_setzero_ps(), \ 8390b57cec5SDimitry Andric (__mmask16)(U), \ 840349cc55cSDimitry Andric _MM_FROUND_CUR_DIRECTION)) 8410b57cec5SDimitry Andric 8420b57cec5SDimitry Andric #define _mm512_range_round_ps(A, B, C, R) \ 843349cc55cSDimitry Andric ((__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ 8440b57cec5SDimitry Andric (__v16sf)(__m512)(B), (int)(C), \ 8450b57cec5SDimitry Andric (__v16sf)_mm512_setzero_ps(), \ 846349cc55cSDimitry Andric (__mmask16)-1, (int)(R))) 8470b57cec5SDimitry Andric 8480b57cec5SDimitry Andric #define _mm512_mask_range_round_ps(W, U, A, B, C, R) \ 849349cc55cSDimitry Andric ((__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ 8500b57cec5SDimitry Andric (__v16sf)(__m512)(B), (int)(C), \ 8510b57cec5SDimitry Andric (__v16sf)(__m512)(W), (__mmask16)(U), \ 852349cc55cSDimitry Andric (int)(R))) 8530b57cec5SDimitry Andric 8540b57cec5SDimitry Andric #define _mm512_maskz_range_round_ps(U, A, B, C, R) \ 855349cc55cSDimitry Andric ((__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ 8560b57cec5SDimitry Andric (__v16sf)(__m512)(B), (int)(C), \ 8570b57cec5SDimitry Andric (__v16sf)_mm512_setzero_ps(), \ 858349cc55cSDimitry Andric (__mmask16)(U), (int)(R))) 8590b57cec5SDimitry Andric 8600b57cec5SDimitry Andric #define _mm_range_round_ss(A, B, C, R) \ 861349cc55cSDimitry Andric ((__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \ 8620b57cec5SDimitry Andric (__v4sf)(__m128)(B), \ 8630b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps(), \ 8640b57cec5SDimitry Andric (__mmask8) -1, (int)(C),\ 865349cc55cSDimitry Andric (int)(R))) 8660b57cec5SDimitry Andric 8670b57cec5SDimitry Andric #define _mm_range_ss(A ,B , C) _mm_range_round_ss(A, B, C ,_MM_FROUND_CUR_DIRECTION) 8680b57cec5SDimitry Andric 8690b57cec5SDimitry Andric #define _mm_mask_range_round_ss(W, U, A, B, C, R) \ 870349cc55cSDimitry Andric ((__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \ 8710b57cec5SDimitry Andric (__v4sf)(__m128)(B), \ 8720b57cec5SDimitry Andric (__v4sf)(__m128)(W),\ 8730b57cec5SDimitry Andric (__mmask8)(U), (int)(C),\ 874349cc55cSDimitry Andric (int)(R))) 8750b57cec5SDimitry Andric 8760b57cec5SDimitry Andric #define _mm_mask_range_ss(W , U, A, B, C) _mm_mask_range_round_ss(W, U, A, B, C , _MM_FROUND_CUR_DIRECTION) 8770b57cec5SDimitry Andric 8780b57cec5SDimitry Andric #define _mm_maskz_range_round_ss(U, A, B, C, R) \ 879349cc55cSDimitry Andric ((__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \ 8800b57cec5SDimitry Andric (__v4sf)(__m128)(B), \ 8810b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps(), \ 8820b57cec5SDimitry Andric (__mmask8)(U), (int)(C),\ 883349cc55cSDimitry Andric (int)(R))) 8840b57cec5SDimitry Andric 8850b57cec5SDimitry Andric #define _mm_maskz_range_ss(U, A ,B , C) _mm_maskz_range_round_ss(U, A, B, C ,_MM_FROUND_CUR_DIRECTION) 8860b57cec5SDimitry Andric 8870b57cec5SDimitry Andric #define _mm_range_round_sd(A, B, C, R) \ 888349cc55cSDimitry Andric ((__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \ 8890b57cec5SDimitry Andric (__v2df)(__m128d)(B), \ 8900b57cec5SDimitry Andric (__v2df)_mm_setzero_pd(), \ 8910b57cec5SDimitry Andric (__mmask8) -1, (int)(C),\ 892349cc55cSDimitry Andric (int)(R))) 8930b57cec5SDimitry Andric 8940b57cec5SDimitry Andric #define _mm_range_sd(A ,B , C) _mm_range_round_sd(A, B, C ,_MM_FROUND_CUR_DIRECTION) 8950b57cec5SDimitry Andric 8960b57cec5SDimitry Andric #define _mm_mask_range_round_sd(W, U, A, B, C, R) \ 897349cc55cSDimitry Andric ((__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \ 8980b57cec5SDimitry Andric (__v2df)(__m128d)(B), \ 8990b57cec5SDimitry Andric (__v2df)(__m128d)(W),\ 9000b57cec5SDimitry Andric (__mmask8)(U), (int)(C),\ 901349cc55cSDimitry Andric (int)(R))) 9020b57cec5SDimitry Andric 9030b57cec5SDimitry Andric #define _mm_mask_range_sd(W, U, A, B, C) _mm_mask_range_round_sd(W, U, A, B, C ,_MM_FROUND_CUR_DIRECTION) 9040b57cec5SDimitry Andric 9050b57cec5SDimitry Andric #define _mm_maskz_range_round_sd(U, A, B, C, R) \ 906349cc55cSDimitry Andric ((__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \ 9070b57cec5SDimitry Andric (__v2df)(__m128d)(B), \ 9080b57cec5SDimitry Andric (__v2df)_mm_setzero_pd(), \ 9090b57cec5SDimitry Andric (__mmask8)(U), (int)(C),\ 910349cc55cSDimitry Andric (int)(R))) 9110b57cec5SDimitry Andric 9120b57cec5SDimitry Andric #define _mm_maskz_range_sd(U, A, B, C) _mm_maskz_range_round_sd(U, A, B, C ,_MM_FROUND_CUR_DIRECTION) 9130b57cec5SDimitry Andric 9140b57cec5SDimitry Andric #define _mm512_reduce_pd(A, B) \ 915349cc55cSDimitry Andric ((__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ 9160b57cec5SDimitry Andric (__v8df)_mm512_setzero_pd(), \ 9170b57cec5SDimitry Andric (__mmask8)-1, \ 918349cc55cSDimitry Andric _MM_FROUND_CUR_DIRECTION)) 9190b57cec5SDimitry Andric 9200b57cec5SDimitry Andric #define _mm512_mask_reduce_pd(W, U, A, B) \ 921349cc55cSDimitry Andric ((__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ 9220b57cec5SDimitry Andric (__v8df)(__m512d)(W), \ 9230b57cec5SDimitry Andric (__mmask8)(U), \ 924349cc55cSDimitry Andric _MM_FROUND_CUR_DIRECTION)) 9250b57cec5SDimitry Andric 9260b57cec5SDimitry Andric #define _mm512_maskz_reduce_pd(U, A, B) \ 927349cc55cSDimitry Andric ((__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ 9280b57cec5SDimitry Andric (__v8df)_mm512_setzero_pd(), \ 9290b57cec5SDimitry Andric (__mmask8)(U), \ 930349cc55cSDimitry Andric _MM_FROUND_CUR_DIRECTION)) 9310b57cec5SDimitry Andric 9320b57cec5SDimitry Andric #define _mm512_reduce_ps(A, B) \ 933349cc55cSDimitry Andric ((__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ 9340b57cec5SDimitry Andric (__v16sf)_mm512_setzero_ps(), \ 9350b57cec5SDimitry Andric (__mmask16)-1, \ 936349cc55cSDimitry Andric _MM_FROUND_CUR_DIRECTION)) 9370b57cec5SDimitry Andric 9380b57cec5SDimitry Andric #define _mm512_mask_reduce_ps(W, U, A, B) \ 939349cc55cSDimitry Andric ((__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ 9400b57cec5SDimitry Andric (__v16sf)(__m512)(W), \ 9410b57cec5SDimitry Andric (__mmask16)(U), \ 942349cc55cSDimitry Andric _MM_FROUND_CUR_DIRECTION)) 9430b57cec5SDimitry Andric 9440b57cec5SDimitry Andric #define _mm512_maskz_reduce_ps(U, A, B) \ 945349cc55cSDimitry Andric ((__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ 9460b57cec5SDimitry Andric (__v16sf)_mm512_setzero_ps(), \ 9470b57cec5SDimitry Andric (__mmask16)(U), \ 948349cc55cSDimitry Andric _MM_FROUND_CUR_DIRECTION)) 9490b57cec5SDimitry Andric 9500b57cec5SDimitry Andric #define _mm512_reduce_round_pd(A, B, R) \ 951349cc55cSDimitry Andric ((__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ 9520b57cec5SDimitry Andric (__v8df)_mm512_setzero_pd(), \ 953349cc55cSDimitry Andric (__mmask8)-1, (int)(R))) 9540b57cec5SDimitry Andric 9550b57cec5SDimitry Andric #define _mm512_mask_reduce_round_pd(W, U, A, B, R) \ 956349cc55cSDimitry Andric ((__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ 9570b57cec5SDimitry Andric (__v8df)(__m512d)(W), \ 958349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 9590b57cec5SDimitry Andric 9600b57cec5SDimitry Andric #define _mm512_maskz_reduce_round_pd(U, A, B, R) \ 961349cc55cSDimitry Andric ((__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ 9620b57cec5SDimitry Andric (__v8df)_mm512_setzero_pd(), \ 963349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 9640b57cec5SDimitry Andric 9650b57cec5SDimitry Andric #define _mm512_reduce_round_ps(A, B, R) \ 966349cc55cSDimitry Andric ((__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ 9670b57cec5SDimitry Andric (__v16sf)_mm512_setzero_ps(), \ 968349cc55cSDimitry Andric (__mmask16)-1, (int)(R))) 9690b57cec5SDimitry Andric 9700b57cec5SDimitry Andric #define _mm512_mask_reduce_round_ps(W, U, A, B, R) \ 971349cc55cSDimitry Andric ((__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ 9720b57cec5SDimitry Andric (__v16sf)(__m512)(W), \ 973349cc55cSDimitry Andric (__mmask16)(U), (int)(R))) 9740b57cec5SDimitry Andric 9750b57cec5SDimitry Andric #define _mm512_maskz_reduce_round_ps(U, A, B, R) \ 976349cc55cSDimitry Andric ((__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ 9770b57cec5SDimitry Andric (__v16sf)_mm512_setzero_ps(), \ 978349cc55cSDimitry Andric (__mmask16)(U), (int)(R))) 9790b57cec5SDimitry Andric 9800b57cec5SDimitry Andric #define _mm_reduce_ss(A, B, C) \ 981349cc55cSDimitry Andric ((__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ 9820b57cec5SDimitry Andric (__v4sf)(__m128)(B), \ 9830b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps(), (__mmask8)-1, \ 984349cc55cSDimitry Andric (int)(C), _MM_FROUND_CUR_DIRECTION)) 9850b57cec5SDimitry Andric 9860b57cec5SDimitry Andric #define _mm_mask_reduce_ss(W, U, A, B, C) \ 987349cc55cSDimitry Andric ((__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ 9880b57cec5SDimitry Andric (__v4sf)(__m128)(B), \ 9890b57cec5SDimitry Andric (__v4sf)(__m128)(W), (__mmask8)(U), \ 990349cc55cSDimitry Andric (int)(C), _MM_FROUND_CUR_DIRECTION)) 9910b57cec5SDimitry Andric 9920b57cec5SDimitry Andric #define _mm_maskz_reduce_ss(U, A, B, C) \ 993349cc55cSDimitry Andric ((__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ 9940b57cec5SDimitry Andric (__v4sf)(__m128)(B), \ 9950b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps(), \ 9960b57cec5SDimitry Andric (__mmask8)(U), (int)(C), \ 997349cc55cSDimitry Andric _MM_FROUND_CUR_DIRECTION)) 9980b57cec5SDimitry Andric 9990b57cec5SDimitry Andric #define _mm_reduce_round_ss(A, B, C, R) \ 1000349cc55cSDimitry Andric ((__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ 10010b57cec5SDimitry Andric (__v4sf)(__m128)(B), \ 10020b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps(), (__mmask8)-1, \ 1003349cc55cSDimitry Andric (int)(C), (int)(R))) 10040b57cec5SDimitry Andric 10050b57cec5SDimitry Andric #define _mm_mask_reduce_round_ss(W, U, A, B, C, R) \ 1006349cc55cSDimitry Andric ((__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ 10070b57cec5SDimitry Andric (__v4sf)(__m128)(B), \ 10080b57cec5SDimitry Andric (__v4sf)(__m128)(W), (__mmask8)(U), \ 1009349cc55cSDimitry Andric (int)(C), (int)(R))) 10100b57cec5SDimitry Andric 10110b57cec5SDimitry Andric #define _mm_maskz_reduce_round_ss(U, A, B, C, R) \ 1012349cc55cSDimitry Andric ((__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ 10130b57cec5SDimitry Andric (__v4sf)(__m128)(B), \ 10140b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps(), \ 1015349cc55cSDimitry Andric (__mmask8)(U), (int)(C), (int)(R))) 10160b57cec5SDimitry Andric 10170b57cec5SDimitry Andric #define _mm_reduce_sd(A, B, C) \ 1018349cc55cSDimitry Andric ((__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ 10190b57cec5SDimitry Andric (__v2df)(__m128d)(B), \ 10200b57cec5SDimitry Andric (__v2df)_mm_setzero_pd(), \ 10210b57cec5SDimitry Andric (__mmask8)-1, (int)(C), \ 1022349cc55cSDimitry Andric _MM_FROUND_CUR_DIRECTION)) 10230b57cec5SDimitry Andric 10240b57cec5SDimitry Andric #define _mm_mask_reduce_sd(W, U, A, B, C) \ 1025349cc55cSDimitry Andric ((__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ 10260b57cec5SDimitry Andric (__v2df)(__m128d)(B), \ 10270b57cec5SDimitry Andric (__v2df)(__m128d)(W), (__mmask8)(U), \ 1028349cc55cSDimitry Andric (int)(C), _MM_FROUND_CUR_DIRECTION)) 10290b57cec5SDimitry Andric 10300b57cec5SDimitry Andric #define _mm_maskz_reduce_sd(U, A, B, C) \ 1031349cc55cSDimitry Andric ((__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ 10320b57cec5SDimitry Andric (__v2df)(__m128d)(B), \ 10330b57cec5SDimitry Andric (__v2df)_mm_setzero_pd(), \ 10340b57cec5SDimitry Andric (__mmask8)(U), (int)(C), \ 1035349cc55cSDimitry Andric _MM_FROUND_CUR_DIRECTION)) 10360b57cec5SDimitry Andric 10370b57cec5SDimitry Andric #define _mm_reduce_round_sd(A, B, C, R) \ 1038349cc55cSDimitry Andric ((__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ 10390b57cec5SDimitry Andric (__v2df)(__m128d)(B), \ 10400b57cec5SDimitry Andric (__v2df)_mm_setzero_pd(), \ 1041349cc55cSDimitry Andric (__mmask8)-1, (int)(C), (int)(R))) 10420b57cec5SDimitry Andric 10430b57cec5SDimitry Andric #define _mm_mask_reduce_round_sd(W, U, A, B, C, R) \ 1044349cc55cSDimitry Andric ((__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ 10450b57cec5SDimitry Andric (__v2df)(__m128d)(B), \ 10460b57cec5SDimitry Andric (__v2df)(__m128d)(W), (__mmask8)(U), \ 1047349cc55cSDimitry Andric (int)(C), (int)(R))) 10480b57cec5SDimitry Andric 10490b57cec5SDimitry Andric #define _mm_maskz_reduce_round_sd(U, A, B, C, R) \ 1050349cc55cSDimitry Andric ((__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ 10510b57cec5SDimitry Andric (__v2df)(__m128d)(B), \ 10520b57cec5SDimitry Andric (__v2df)_mm_setzero_pd(), \ 1053349cc55cSDimitry Andric (__mmask8)(U), (int)(C), (int)(R))) 10540b57cec5SDimitry Andric 10550b57cec5SDimitry Andric static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 10560b57cec5SDimitry Andric _mm512_movepi32_mask (__m512i __A) 10570b57cec5SDimitry Andric { 10580b57cec5SDimitry Andric return (__mmask16) __builtin_ia32_cvtd2mask512 ((__v16si) __A); 10590b57cec5SDimitry Andric } 10600b57cec5SDimitry Andric 10610b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 10620b57cec5SDimitry Andric _mm512_movm_epi32 (__mmask16 __A) 10630b57cec5SDimitry Andric { 10640b57cec5SDimitry Andric return (__m512i) __builtin_ia32_cvtmask2d512 (__A); 10650b57cec5SDimitry Andric } 10660b57cec5SDimitry Andric 10670b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 10680b57cec5SDimitry Andric _mm512_movm_epi64 (__mmask8 __A) 10690b57cec5SDimitry Andric { 10700b57cec5SDimitry Andric return (__m512i) __builtin_ia32_cvtmask2q512 (__A); 10710b57cec5SDimitry Andric } 10720b57cec5SDimitry Andric 10730b57cec5SDimitry Andric static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 10740b57cec5SDimitry Andric _mm512_movepi64_mask (__m512i __A) 10750b57cec5SDimitry Andric { 10760b57cec5SDimitry Andric return (__mmask8) __builtin_ia32_cvtq2mask512 ((__v8di) __A); 10770b57cec5SDimitry Andric } 10780b57cec5SDimitry Andric 10790b57cec5SDimitry Andric 10800b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 10810b57cec5SDimitry Andric _mm512_broadcast_f32x2 (__m128 __A) 10820b57cec5SDimitry Andric { 10830b57cec5SDimitry Andric return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A, 10840b57cec5SDimitry Andric 0, 1, 0, 1, 0, 1, 0, 1, 10850b57cec5SDimitry Andric 0, 1, 0, 1, 0, 1, 0, 1); 10860b57cec5SDimitry Andric } 10870b57cec5SDimitry Andric 10880b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 10890b57cec5SDimitry Andric _mm512_mask_broadcast_f32x2 (__m512 __O, __mmask16 __M, __m128 __A) 10900b57cec5SDimitry Andric { 10910b57cec5SDimitry Andric return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, 10920b57cec5SDimitry Andric (__v16sf)_mm512_broadcast_f32x2(__A), 10930b57cec5SDimitry Andric (__v16sf)__O); 10940b57cec5SDimitry Andric } 10950b57cec5SDimitry Andric 10960b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 10970b57cec5SDimitry Andric _mm512_maskz_broadcast_f32x2 (__mmask16 __M, __m128 __A) 10980b57cec5SDimitry Andric { 10990b57cec5SDimitry Andric return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, 11000b57cec5SDimitry Andric (__v16sf)_mm512_broadcast_f32x2(__A), 11010b57cec5SDimitry Andric (__v16sf)_mm512_setzero_ps()); 11020b57cec5SDimitry Andric } 11030b57cec5SDimitry Andric 11040b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 11050b57cec5SDimitry Andric _mm512_broadcast_f32x8(__m256 __A) 11060b57cec5SDimitry Andric { 11070b57cec5SDimitry Andric return (__m512)__builtin_shufflevector((__v8sf)__A, (__v8sf)__A, 11080b57cec5SDimitry Andric 0, 1, 2, 3, 4, 5, 6, 7, 11090b57cec5SDimitry Andric 0, 1, 2, 3, 4, 5, 6, 7); 11100b57cec5SDimitry Andric } 11110b57cec5SDimitry Andric 11120b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 11130b57cec5SDimitry Andric _mm512_mask_broadcast_f32x8(__m512 __O, __mmask16 __M, __m256 __A) 11140b57cec5SDimitry Andric { 11150b57cec5SDimitry Andric return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, 11160b57cec5SDimitry Andric (__v16sf)_mm512_broadcast_f32x8(__A), 11170b57cec5SDimitry Andric (__v16sf)__O); 11180b57cec5SDimitry Andric } 11190b57cec5SDimitry Andric 11200b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 11210b57cec5SDimitry Andric _mm512_maskz_broadcast_f32x8(__mmask16 __M, __m256 __A) 11220b57cec5SDimitry Andric { 11230b57cec5SDimitry Andric return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, 11240b57cec5SDimitry Andric (__v16sf)_mm512_broadcast_f32x8(__A), 11250b57cec5SDimitry Andric (__v16sf)_mm512_setzero_ps()); 11260b57cec5SDimitry Andric } 11270b57cec5SDimitry Andric 11280b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 11290b57cec5SDimitry Andric _mm512_broadcast_f64x2(__m128d __A) 11300b57cec5SDimitry Andric { 11310b57cec5SDimitry Andric return (__m512d)__builtin_shufflevector((__v2df)__A, (__v2df)__A, 11320b57cec5SDimitry Andric 0, 1, 0, 1, 0, 1, 0, 1); 11330b57cec5SDimitry Andric } 11340b57cec5SDimitry Andric 11350b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 11360b57cec5SDimitry Andric _mm512_mask_broadcast_f64x2(__m512d __O, __mmask8 __M, __m128d __A) 11370b57cec5SDimitry Andric { 11380b57cec5SDimitry Andric return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M, 11390b57cec5SDimitry Andric (__v8df)_mm512_broadcast_f64x2(__A), 11400b57cec5SDimitry Andric (__v8df)__O); 11410b57cec5SDimitry Andric } 11420b57cec5SDimitry Andric 11430b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 11440b57cec5SDimitry Andric _mm512_maskz_broadcast_f64x2(__mmask8 __M, __m128d __A) 11450b57cec5SDimitry Andric { 11460b57cec5SDimitry Andric return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M, 11470b57cec5SDimitry Andric (__v8df)_mm512_broadcast_f64x2(__A), 11480b57cec5SDimitry Andric (__v8df)_mm512_setzero_pd()); 11490b57cec5SDimitry Andric } 11500b57cec5SDimitry Andric 11510b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 11520b57cec5SDimitry Andric _mm512_broadcast_i32x2 (__m128i __A) 11530b57cec5SDimitry Andric { 11540b57cec5SDimitry Andric return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A, 11550b57cec5SDimitry Andric 0, 1, 0, 1, 0, 1, 0, 1, 11560b57cec5SDimitry Andric 0, 1, 0, 1, 0, 1, 0, 1); 11570b57cec5SDimitry Andric } 11580b57cec5SDimitry Andric 11590b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 11600b57cec5SDimitry Andric _mm512_mask_broadcast_i32x2 (__m512i __O, __mmask16 __M, __m128i __A) 11610b57cec5SDimitry Andric { 11620b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, 11630b57cec5SDimitry Andric (__v16si)_mm512_broadcast_i32x2(__A), 11640b57cec5SDimitry Andric (__v16si)__O); 11650b57cec5SDimitry Andric } 11660b57cec5SDimitry Andric 11670b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 11680b57cec5SDimitry Andric _mm512_maskz_broadcast_i32x2 (__mmask16 __M, __m128i __A) 11690b57cec5SDimitry Andric { 11700b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, 11710b57cec5SDimitry Andric (__v16si)_mm512_broadcast_i32x2(__A), 11720b57cec5SDimitry Andric (__v16si)_mm512_setzero_si512()); 11730b57cec5SDimitry Andric } 11740b57cec5SDimitry Andric 11750b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 11760b57cec5SDimitry Andric _mm512_broadcast_i32x8(__m256i __A) 11770b57cec5SDimitry Andric { 11780b57cec5SDimitry Andric return (__m512i)__builtin_shufflevector((__v8si)__A, (__v8si)__A, 11790b57cec5SDimitry Andric 0, 1, 2, 3, 4, 5, 6, 7, 11800b57cec5SDimitry Andric 0, 1, 2, 3, 4, 5, 6, 7); 11810b57cec5SDimitry Andric } 11820b57cec5SDimitry Andric 11830b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 11840b57cec5SDimitry Andric _mm512_mask_broadcast_i32x8(__m512i __O, __mmask16 __M, __m256i __A) 11850b57cec5SDimitry Andric { 11860b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, 11870b57cec5SDimitry Andric (__v16si)_mm512_broadcast_i32x8(__A), 11880b57cec5SDimitry Andric (__v16si)__O); 11890b57cec5SDimitry Andric } 11900b57cec5SDimitry Andric 11910b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 11920b57cec5SDimitry Andric _mm512_maskz_broadcast_i32x8(__mmask16 __M, __m256i __A) 11930b57cec5SDimitry Andric { 11940b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, 11950b57cec5SDimitry Andric (__v16si)_mm512_broadcast_i32x8(__A), 11960b57cec5SDimitry Andric (__v16si)_mm512_setzero_si512()); 11970b57cec5SDimitry Andric } 11980b57cec5SDimitry Andric 11990b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 12000b57cec5SDimitry Andric _mm512_broadcast_i64x2(__m128i __A) 12010b57cec5SDimitry Andric { 12020b57cec5SDimitry Andric return (__m512i)__builtin_shufflevector((__v2di)__A, (__v2di)__A, 12030b57cec5SDimitry Andric 0, 1, 0, 1, 0, 1, 0, 1); 12040b57cec5SDimitry Andric } 12050b57cec5SDimitry Andric 12060b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 12070b57cec5SDimitry Andric _mm512_mask_broadcast_i64x2(__m512i __O, __mmask8 __M, __m128i __A) 12080b57cec5SDimitry Andric { 12090b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, 12100b57cec5SDimitry Andric (__v8di)_mm512_broadcast_i64x2(__A), 12110b57cec5SDimitry Andric (__v8di)__O); 12120b57cec5SDimitry Andric } 12130b57cec5SDimitry Andric 12140b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 12150b57cec5SDimitry Andric _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) 12160b57cec5SDimitry Andric { 12170b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, 12180b57cec5SDimitry Andric (__v8di)_mm512_broadcast_i64x2(__A), 12190b57cec5SDimitry Andric (__v8di)_mm512_setzero_si512()); 12200b57cec5SDimitry Andric } 12210b57cec5SDimitry Andric 12220b57cec5SDimitry Andric #define _mm512_extractf32x8_ps(A, imm) \ 1223349cc55cSDimitry Andric ((__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \ 12240b57cec5SDimitry Andric (__v8sf)_mm256_undefined_ps(), \ 1225349cc55cSDimitry Andric (__mmask8)-1)) 12260b57cec5SDimitry Andric 12270b57cec5SDimitry Andric #define _mm512_mask_extractf32x8_ps(W, U, A, imm) \ 1228349cc55cSDimitry Andric ((__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \ 12290b57cec5SDimitry Andric (__v8sf)(__m256)(W), \ 1230349cc55cSDimitry Andric (__mmask8)(U))) 12310b57cec5SDimitry Andric 12320b57cec5SDimitry Andric #define _mm512_maskz_extractf32x8_ps(U, A, imm) \ 1233349cc55cSDimitry Andric ((__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \ 12340b57cec5SDimitry Andric (__v8sf)_mm256_setzero_ps(), \ 1235349cc55cSDimitry Andric (__mmask8)(U))) 12360b57cec5SDimitry Andric 12370b57cec5SDimitry Andric #define _mm512_extractf64x2_pd(A, imm) \ 1238349cc55cSDimitry Andric ((__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \ 12390b57cec5SDimitry Andric (int)(imm), \ 12400b57cec5SDimitry Andric (__v2df)_mm_undefined_pd(), \ 1241349cc55cSDimitry Andric (__mmask8)-1)) 12420b57cec5SDimitry Andric 12430b57cec5SDimitry Andric #define _mm512_mask_extractf64x2_pd(W, U, A, imm) \ 1244349cc55cSDimitry Andric ((__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \ 12450b57cec5SDimitry Andric (int)(imm), \ 12460b57cec5SDimitry Andric (__v2df)(__m128d)(W), \ 1247349cc55cSDimitry Andric (__mmask8)(U))) 12480b57cec5SDimitry Andric 12490b57cec5SDimitry Andric #define _mm512_maskz_extractf64x2_pd(U, A, imm) \ 1250349cc55cSDimitry Andric ((__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \ 12510b57cec5SDimitry Andric (int)(imm), \ 12520b57cec5SDimitry Andric (__v2df)_mm_setzero_pd(), \ 1253349cc55cSDimitry Andric (__mmask8)(U))) 12540b57cec5SDimitry Andric 12550b57cec5SDimitry Andric #define _mm512_extracti32x8_epi32(A, imm) \ 1256349cc55cSDimitry Andric ((__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \ 12570b57cec5SDimitry Andric (__v8si)_mm256_undefined_si256(), \ 1258349cc55cSDimitry Andric (__mmask8)-1)) 12590b57cec5SDimitry Andric 12600b57cec5SDimitry Andric #define _mm512_mask_extracti32x8_epi32(W, U, A, imm) \ 1261349cc55cSDimitry Andric ((__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \ 12620b57cec5SDimitry Andric (__v8si)(__m256i)(W), \ 1263349cc55cSDimitry Andric (__mmask8)(U))) 12640b57cec5SDimitry Andric 12650b57cec5SDimitry Andric #define _mm512_maskz_extracti32x8_epi32(U, A, imm) \ 1266349cc55cSDimitry Andric ((__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \ 12670b57cec5SDimitry Andric (__v8si)_mm256_setzero_si256(), \ 1268349cc55cSDimitry Andric (__mmask8)(U))) 12690b57cec5SDimitry Andric 12700b57cec5SDimitry Andric #define _mm512_extracti64x2_epi64(A, imm) \ 1271349cc55cSDimitry Andric ((__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \ 12720b57cec5SDimitry Andric (int)(imm), \ 12730b57cec5SDimitry Andric (__v2di)_mm_undefined_si128(), \ 1274349cc55cSDimitry Andric (__mmask8)-1)) 12750b57cec5SDimitry Andric 12760b57cec5SDimitry Andric #define _mm512_mask_extracti64x2_epi64(W, U, A, imm) \ 1277349cc55cSDimitry Andric ((__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \ 12780b57cec5SDimitry Andric (int)(imm), \ 12790b57cec5SDimitry Andric (__v2di)(__m128i)(W), \ 1280349cc55cSDimitry Andric (__mmask8)(U))) 12810b57cec5SDimitry Andric 12820b57cec5SDimitry Andric #define _mm512_maskz_extracti64x2_epi64(U, A, imm) \ 1283349cc55cSDimitry Andric ((__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \ 12840b57cec5SDimitry Andric (int)(imm), \ 12850b57cec5SDimitry Andric (__v2di)_mm_setzero_si128(), \ 1286349cc55cSDimitry Andric (__mmask8)(U))) 12870b57cec5SDimitry Andric 12880b57cec5SDimitry Andric #define _mm512_insertf32x8(A, B, imm) \ 1289349cc55cSDimitry Andric ((__m512)__builtin_ia32_insertf32x8((__v16sf)(__m512)(A), \ 1290349cc55cSDimitry Andric (__v8sf)(__m256)(B), (int)(imm))) 12910b57cec5SDimitry Andric 12920b57cec5SDimitry Andric #define _mm512_mask_insertf32x8(W, U, A, B, imm) \ 1293349cc55cSDimitry Andric ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 12940b57cec5SDimitry Andric (__v16sf)_mm512_insertf32x8((A), (B), (imm)), \ 1295349cc55cSDimitry Andric (__v16sf)(__m512)(W))) 12960b57cec5SDimitry Andric 12970b57cec5SDimitry Andric #define _mm512_maskz_insertf32x8(U, A, B, imm) \ 1298349cc55cSDimitry Andric ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 12990b57cec5SDimitry Andric (__v16sf)_mm512_insertf32x8((A), (B), (imm)), \ 1300349cc55cSDimitry Andric (__v16sf)_mm512_setzero_ps())) 13010b57cec5SDimitry Andric 13020b57cec5SDimitry Andric #define _mm512_insertf64x2(A, B, imm) \ 1303349cc55cSDimitry Andric ((__m512d)__builtin_ia32_insertf64x2_512((__v8df)(__m512d)(A), \ 1304349cc55cSDimitry Andric (__v2df)(__m128d)(B), (int)(imm))) 13050b57cec5SDimitry Andric 13060b57cec5SDimitry Andric #define _mm512_mask_insertf64x2(W, U, A, B, imm) \ 1307349cc55cSDimitry Andric ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 13080b57cec5SDimitry Andric (__v8df)_mm512_insertf64x2((A), (B), (imm)), \ 1309349cc55cSDimitry Andric (__v8df)(__m512d)(W))) 13100b57cec5SDimitry Andric 13110b57cec5SDimitry Andric #define _mm512_maskz_insertf64x2(U, A, B, imm) \ 1312349cc55cSDimitry Andric ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 13130b57cec5SDimitry Andric (__v8df)_mm512_insertf64x2((A), (B), (imm)), \ 1314349cc55cSDimitry Andric (__v8df)_mm512_setzero_pd())) 13150b57cec5SDimitry Andric 13160b57cec5SDimitry Andric #define _mm512_inserti32x8(A, B, imm) \ 1317349cc55cSDimitry Andric ((__m512i)__builtin_ia32_inserti32x8((__v16si)(__m512i)(A), \ 1318349cc55cSDimitry Andric (__v8si)(__m256i)(B), (int)(imm))) 13190b57cec5SDimitry Andric 13200b57cec5SDimitry Andric #define _mm512_mask_inserti32x8(W, U, A, B, imm) \ 1321349cc55cSDimitry Andric ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 13220b57cec5SDimitry Andric (__v16si)_mm512_inserti32x8((A), (B), (imm)), \ 1323349cc55cSDimitry Andric (__v16si)(__m512i)(W))) 13240b57cec5SDimitry Andric 13250b57cec5SDimitry Andric #define _mm512_maskz_inserti32x8(U, A, B, imm) \ 1326349cc55cSDimitry Andric ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 13270b57cec5SDimitry Andric (__v16si)_mm512_inserti32x8((A), (B), (imm)), \ 1328349cc55cSDimitry Andric (__v16si)_mm512_setzero_si512())) 13290b57cec5SDimitry Andric 13300b57cec5SDimitry Andric #define _mm512_inserti64x2(A, B, imm) \ 1331349cc55cSDimitry Andric ((__m512i)__builtin_ia32_inserti64x2_512((__v8di)(__m512i)(A), \ 1332349cc55cSDimitry Andric (__v2di)(__m128i)(B), (int)(imm))) 13330b57cec5SDimitry Andric 13340b57cec5SDimitry Andric #define _mm512_mask_inserti64x2(W, U, A, B, imm) \ 1335349cc55cSDimitry Andric ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 13360b57cec5SDimitry Andric (__v8di)_mm512_inserti64x2((A), (B), (imm)), \ 1337349cc55cSDimitry Andric (__v8di)(__m512i)(W))) 13380b57cec5SDimitry Andric 13390b57cec5SDimitry Andric #define _mm512_maskz_inserti64x2(U, A, B, imm) \ 1340349cc55cSDimitry Andric ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 13410b57cec5SDimitry Andric (__v8di)_mm512_inserti64x2((A), (B), (imm)), \ 1342349cc55cSDimitry Andric (__v8di)_mm512_setzero_si512())) 13430b57cec5SDimitry Andric 13440b57cec5SDimitry Andric #define _mm512_mask_fpclass_ps_mask(U, A, imm) \ 1345349cc55cSDimitry Andric ((__mmask16)__builtin_ia32_fpclassps512_mask((__v16sf)(__m512)(A), \ 1346349cc55cSDimitry Andric (int)(imm), (__mmask16)(U))) 13470b57cec5SDimitry Andric 13480b57cec5SDimitry Andric #define _mm512_fpclass_ps_mask(A, imm) \ 1349349cc55cSDimitry Andric ((__mmask16)__builtin_ia32_fpclassps512_mask((__v16sf)(__m512)(A), \ 1350349cc55cSDimitry Andric (int)(imm), (__mmask16)-1)) 13510b57cec5SDimitry Andric 13520b57cec5SDimitry Andric #define _mm512_mask_fpclass_pd_mask(U, A, imm) \ 1353349cc55cSDimitry Andric ((__mmask8)__builtin_ia32_fpclasspd512_mask((__v8df)(__m512d)(A), (int)(imm), \ 1354349cc55cSDimitry Andric (__mmask8)(U))) 13550b57cec5SDimitry Andric 13560b57cec5SDimitry Andric #define _mm512_fpclass_pd_mask(A, imm) \ 1357349cc55cSDimitry Andric ((__mmask8)__builtin_ia32_fpclasspd512_mask((__v8df)(__m512d)(A), (int)(imm), \ 1358349cc55cSDimitry Andric (__mmask8)-1)) 13590b57cec5SDimitry Andric 13600b57cec5SDimitry Andric #define _mm_fpclass_sd_mask(A, imm) \ 1361349cc55cSDimitry Andric ((__mmask8)__builtin_ia32_fpclasssd_mask((__v2df)(__m128d)(A), (int)(imm), \ 1362349cc55cSDimitry Andric (__mmask8)-1)) 13630b57cec5SDimitry Andric 13640b57cec5SDimitry Andric #define _mm_mask_fpclass_sd_mask(U, A, imm) \ 1365349cc55cSDimitry Andric ((__mmask8)__builtin_ia32_fpclasssd_mask((__v2df)(__m128d)(A), (int)(imm), \ 1366349cc55cSDimitry Andric (__mmask8)(U))) 13670b57cec5SDimitry Andric 13680b57cec5SDimitry Andric #define _mm_fpclass_ss_mask(A, imm) \ 1369349cc55cSDimitry Andric ((__mmask8)__builtin_ia32_fpclassss_mask((__v4sf)(__m128)(A), (int)(imm), \ 1370349cc55cSDimitry Andric (__mmask8)-1)) 13710b57cec5SDimitry Andric 13720b57cec5SDimitry Andric #define _mm_mask_fpclass_ss_mask(U, A, imm) \ 1373349cc55cSDimitry Andric ((__mmask8)__builtin_ia32_fpclassss_mask((__v4sf)(__m128)(A), (int)(imm), \ 1374349cc55cSDimitry Andric (__mmask8)(U))) 13750b57cec5SDimitry Andric 13760b57cec5SDimitry Andric #undef __DEFAULT_FN_ATTRS512 13770b57cec5SDimitry Andric #undef __DEFAULT_FN_ATTRS 13780b57cec5SDimitry Andric 13790b57cec5SDimitry Andric #endif 1380