10b57cec5SDimitry Andric /*===---- avx512fintrin.h - AVX512F intrinsics -----------------------------=== 20b57cec5SDimitry Andric * 30b57cec5SDimitry Andric * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric * See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric * 70b57cec5SDimitry Andric *===-----------------------------------------------------------------------=== 80b57cec5SDimitry Andric */ 90b57cec5SDimitry Andric #ifndef __IMMINTRIN_H 100b57cec5SDimitry Andric #error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead." 110b57cec5SDimitry Andric #endif 120b57cec5SDimitry Andric 130b57cec5SDimitry Andric #ifndef __AVX512FINTRIN_H 140b57cec5SDimitry Andric #define __AVX512FINTRIN_H 150b57cec5SDimitry Andric 160b57cec5SDimitry Andric typedef char __v64qi __attribute__((__vector_size__(64))); 170b57cec5SDimitry Andric typedef short __v32hi __attribute__((__vector_size__(64))); 180b57cec5SDimitry Andric typedef double __v8df __attribute__((__vector_size__(64))); 190b57cec5SDimitry Andric typedef float __v16sf __attribute__((__vector_size__(64))); 200b57cec5SDimitry Andric typedef long long __v8di __attribute__((__vector_size__(64))); 210b57cec5SDimitry Andric typedef int __v16si __attribute__((__vector_size__(64))); 220b57cec5SDimitry Andric 230b57cec5SDimitry Andric /* Unsigned types */ 240b57cec5SDimitry Andric typedef unsigned char __v64qu __attribute__((__vector_size__(64))); 250b57cec5SDimitry Andric typedef unsigned short __v32hu __attribute__((__vector_size__(64))); 260b57cec5SDimitry Andric typedef unsigned long long __v8du __attribute__((__vector_size__(64))); 270b57cec5SDimitry Andric typedef unsigned int __v16su __attribute__((__vector_size__(64))); 280b57cec5SDimitry Andric 2904eeddc0SDimitry Andric /* We need an explicitly signed variant for char. Note that this shouldn't 3004eeddc0SDimitry Andric * appear in the interface though. */ 3104eeddc0SDimitry Andric typedef signed char __v64qs __attribute__((__vector_size__(64))); 3204eeddc0SDimitry Andric 330b57cec5SDimitry Andric typedef float __m512 __attribute__((__vector_size__(64), __aligned__(64))); 340b57cec5SDimitry Andric typedef double __m512d __attribute__((__vector_size__(64), __aligned__(64))); 350b57cec5SDimitry Andric typedef long long __m512i __attribute__((__vector_size__(64), __aligned__(64))); 360b57cec5SDimitry Andric 370b57cec5SDimitry Andric typedef float __m512_u __attribute__((__vector_size__(64), __aligned__(1))); 380b57cec5SDimitry Andric typedef double __m512d_u __attribute__((__vector_size__(64), __aligned__(1))); 390b57cec5SDimitry Andric typedef long long __m512i_u __attribute__((__vector_size__(64), __aligned__(1))); 400b57cec5SDimitry Andric 410b57cec5SDimitry Andric typedef unsigned char __mmask8; 420b57cec5SDimitry Andric typedef unsigned short __mmask16; 430b57cec5SDimitry Andric 440b57cec5SDimitry Andric /* Rounding mode macros. */ 450b57cec5SDimitry Andric #define _MM_FROUND_TO_NEAREST_INT 0x00 460b57cec5SDimitry Andric #define _MM_FROUND_TO_NEG_INF 0x01 470b57cec5SDimitry Andric #define _MM_FROUND_TO_POS_INF 0x02 480b57cec5SDimitry Andric #define _MM_FROUND_TO_ZERO 0x03 490b57cec5SDimitry Andric #define _MM_FROUND_CUR_DIRECTION 0x04 500b57cec5SDimitry Andric 510b57cec5SDimitry Andric /* Constants for integer comparison predicates */ 520b57cec5SDimitry Andric typedef enum { 530b57cec5SDimitry Andric _MM_CMPINT_EQ, /* Equal */ 540b57cec5SDimitry Andric _MM_CMPINT_LT, /* Less than */ 550b57cec5SDimitry Andric _MM_CMPINT_LE, /* Less than or Equal */ 560b57cec5SDimitry Andric _MM_CMPINT_UNUSED, 570b57cec5SDimitry Andric _MM_CMPINT_NE, /* Not Equal */ 580b57cec5SDimitry Andric _MM_CMPINT_NLT, /* Not Less than */ 590b57cec5SDimitry Andric #define _MM_CMPINT_GE _MM_CMPINT_NLT /* Greater than or Equal */ 600b57cec5SDimitry Andric _MM_CMPINT_NLE /* Not Less than or Equal */ 610b57cec5SDimitry Andric #define _MM_CMPINT_GT _MM_CMPINT_NLE /* Greater than */ 620b57cec5SDimitry Andric } _MM_CMPINT_ENUM; 630b57cec5SDimitry Andric 640b57cec5SDimitry Andric typedef enum 650b57cec5SDimitry Andric { 660b57cec5SDimitry Andric _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02, 670b57cec5SDimitry Andric _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05, 680b57cec5SDimitry Andric _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08, 690b57cec5SDimitry Andric _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B, 700b57cec5SDimitry Andric _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E, 710b57cec5SDimitry Andric _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11, 720b57cec5SDimitry Andric _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14, 730b57cec5SDimitry Andric _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17, 740b57cec5SDimitry Andric _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A, 750b57cec5SDimitry Andric _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D, 760b57cec5SDimitry Andric _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20, 770b57cec5SDimitry Andric _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23, 780b57cec5SDimitry Andric _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26, 790b57cec5SDimitry Andric _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29, 800b57cec5SDimitry Andric _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C, 810b57cec5SDimitry Andric _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F, 820b57cec5SDimitry Andric _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32, 830b57cec5SDimitry Andric _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35, 840b57cec5SDimitry Andric _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38, 850b57cec5SDimitry Andric _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B, 860b57cec5SDimitry Andric _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E, 870b57cec5SDimitry Andric _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41, 880b57cec5SDimitry Andric _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44, 890b57cec5SDimitry Andric _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47, 900b57cec5SDimitry Andric _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A, 910b57cec5SDimitry Andric _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D, 920b57cec5SDimitry Andric _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50, 930b57cec5SDimitry Andric _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53, 940b57cec5SDimitry Andric _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56, 950b57cec5SDimitry Andric _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59, 960b57cec5SDimitry Andric _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C, 970b57cec5SDimitry Andric _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F, 980b57cec5SDimitry Andric _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62, 990b57cec5SDimitry Andric _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65, 1000b57cec5SDimitry Andric _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68, 1010b57cec5SDimitry Andric _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B, 1020b57cec5SDimitry Andric _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E, 1030b57cec5SDimitry Andric _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71, 1040b57cec5SDimitry Andric _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74, 1050b57cec5SDimitry Andric _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77, 1060b57cec5SDimitry Andric _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A, 1070b57cec5SDimitry Andric _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D, 1080b57cec5SDimitry Andric _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80, 1090b57cec5SDimitry Andric _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83, 1100b57cec5SDimitry Andric _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86, 1110b57cec5SDimitry Andric _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89, 1120b57cec5SDimitry Andric _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C, 1130b57cec5SDimitry Andric _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F, 1140b57cec5SDimitry Andric _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92, 1150b57cec5SDimitry Andric _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95, 1160b57cec5SDimitry Andric _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98, 1170b57cec5SDimitry Andric _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B, 1180b57cec5SDimitry Andric _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E, 1190b57cec5SDimitry Andric _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1, 1200b57cec5SDimitry Andric _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4, 1210b57cec5SDimitry Andric _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7, 1220b57cec5SDimitry Andric _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA, 1230b57cec5SDimitry Andric _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD, 1240b57cec5SDimitry Andric _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0, 1250b57cec5SDimitry Andric _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3, 1260b57cec5SDimitry Andric _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6, 1270b57cec5SDimitry Andric _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9, 1280b57cec5SDimitry Andric _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC, 1290b57cec5SDimitry Andric _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF, 1300b57cec5SDimitry Andric _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2, 1310b57cec5SDimitry Andric _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5, 1320b57cec5SDimitry Andric _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8, 1330b57cec5SDimitry Andric _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB, 1340b57cec5SDimitry Andric _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE, 1350b57cec5SDimitry Andric _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1, 1360b57cec5SDimitry Andric _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4, 1370b57cec5SDimitry Andric _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7, 1380b57cec5SDimitry Andric _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA, 1390b57cec5SDimitry Andric _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD, 1400b57cec5SDimitry Andric _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0, 1410b57cec5SDimitry Andric _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3, 1420b57cec5SDimitry Andric _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6, 1430b57cec5SDimitry Andric _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9, 1440b57cec5SDimitry Andric _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC, 1450b57cec5SDimitry Andric _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF, 1460b57cec5SDimitry Andric _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2, 1470b57cec5SDimitry Andric _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5, 1480b57cec5SDimitry Andric _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8, 1490b57cec5SDimitry Andric _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB, 1500b57cec5SDimitry Andric _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE, 1510b57cec5SDimitry Andric _MM_PERM_DDDD = 0xFF 1520b57cec5SDimitry Andric } _MM_PERM_ENUM; 1530b57cec5SDimitry Andric 1540b57cec5SDimitry Andric typedef enum 1550b57cec5SDimitry Andric { 1560b57cec5SDimitry Andric _MM_MANT_NORM_1_2, /* interval [1, 2) */ 1570b57cec5SDimitry Andric _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */ 1580b57cec5SDimitry Andric _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */ 1590b57cec5SDimitry Andric _MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */ 1600b57cec5SDimitry Andric } _MM_MANTISSA_NORM_ENUM; 1610b57cec5SDimitry Andric 1620b57cec5SDimitry Andric typedef enum 1630b57cec5SDimitry Andric { 1640b57cec5SDimitry Andric _MM_MANT_SIGN_src, /* sign = sign(SRC) */ 1650b57cec5SDimitry Andric _MM_MANT_SIGN_zero, /* sign = 0 */ 1660b57cec5SDimitry Andric _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */ 1670b57cec5SDimitry Andric } _MM_MANTISSA_SIGN_ENUM; 1680b57cec5SDimitry Andric 1690b57cec5SDimitry Andric /* Define the default attributes for the functions in this file. */ 170*5f757f3fSDimitry Andric #define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512f,evex512"), __min_vector_width__(512))) 171*5f757f3fSDimitry Andric #define __DEFAULT_FN_ATTRS128 \ 172*5f757f3fSDimitry Andric __attribute__((__always_inline__, __nodebug__, \ 173*5f757f3fSDimitry Andric __target__("avx512f,no-evex512"), __min_vector_width__(128))) 174*5f757f3fSDimitry Andric #define __DEFAULT_FN_ATTRS \ 175*5f757f3fSDimitry Andric __attribute__((__always_inline__, __nodebug__, \ 176*5f757f3fSDimitry Andric __target__("avx512f,no-evex512"))) 1770b57cec5SDimitry Andric 1780b57cec5SDimitry Andric /* Create vectors with repeated elements */ 1790b57cec5SDimitry Andric 1800b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512 1810b57cec5SDimitry Andric _mm512_setzero_si512(void) 1820b57cec5SDimitry Andric { 1830b57cec5SDimitry Andric return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 }; 1840b57cec5SDimitry Andric } 1850b57cec5SDimitry Andric 1860b57cec5SDimitry Andric #define _mm512_setzero_epi32 _mm512_setzero_si512 1870b57cec5SDimitry Andric 1880b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 1890b57cec5SDimitry Andric _mm512_undefined_pd(void) 1900b57cec5SDimitry Andric { 1910b57cec5SDimitry Andric return (__m512d)__builtin_ia32_undef512(); 1920b57cec5SDimitry Andric } 1930b57cec5SDimitry Andric 1940b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 1950b57cec5SDimitry Andric _mm512_undefined(void) 1960b57cec5SDimitry Andric { 1970b57cec5SDimitry Andric return (__m512)__builtin_ia32_undef512(); 1980b57cec5SDimitry Andric } 1990b57cec5SDimitry Andric 2000b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 2010b57cec5SDimitry Andric _mm512_undefined_ps(void) 2020b57cec5SDimitry Andric { 2030b57cec5SDimitry Andric return (__m512)__builtin_ia32_undef512(); 2040b57cec5SDimitry Andric } 2050b57cec5SDimitry Andric 2060b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 2070b57cec5SDimitry Andric _mm512_undefined_epi32(void) 2080b57cec5SDimitry Andric { 2090b57cec5SDimitry Andric return (__m512i)__builtin_ia32_undef512(); 2100b57cec5SDimitry Andric } 2110b57cec5SDimitry Andric 2120b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 2130b57cec5SDimitry Andric _mm512_broadcastd_epi32 (__m128i __A) 2140b57cec5SDimitry Andric { 2150b57cec5SDimitry Andric return (__m512i)__builtin_shufflevector((__v4si) __A, (__v4si) __A, 2160b57cec5SDimitry Andric 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); 2170b57cec5SDimitry Andric } 2180b57cec5SDimitry Andric 2190b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 2200b57cec5SDimitry Andric _mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A) 2210b57cec5SDimitry Andric { 2220b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512(__M, 2230b57cec5SDimitry Andric (__v16si) _mm512_broadcastd_epi32(__A), 2240b57cec5SDimitry Andric (__v16si) __O); 2250b57cec5SDimitry Andric } 2260b57cec5SDimitry Andric 2270b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 2280b57cec5SDimitry Andric _mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A) 2290b57cec5SDimitry Andric { 2300b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512(__M, 2310b57cec5SDimitry Andric (__v16si) _mm512_broadcastd_epi32(__A), 2320b57cec5SDimitry Andric (__v16si) _mm512_setzero_si512()); 2330b57cec5SDimitry Andric } 2340b57cec5SDimitry Andric 2350b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 2360b57cec5SDimitry Andric _mm512_broadcastq_epi64 (__m128i __A) 2370b57cec5SDimitry Andric { 2380b57cec5SDimitry Andric return (__m512i)__builtin_shufflevector((__v2di) __A, (__v2di) __A, 2390b57cec5SDimitry Andric 0, 0, 0, 0, 0, 0, 0, 0); 2400b57cec5SDimitry Andric } 2410b57cec5SDimitry Andric 2420b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 2430b57cec5SDimitry Andric _mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A) 2440b57cec5SDimitry Andric { 2450b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512(__M, 2460b57cec5SDimitry Andric (__v8di) _mm512_broadcastq_epi64(__A), 2470b57cec5SDimitry Andric (__v8di) __O); 2480b57cec5SDimitry Andric 2490b57cec5SDimitry Andric } 2500b57cec5SDimitry Andric 2510b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 2520b57cec5SDimitry Andric _mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A) 2530b57cec5SDimitry Andric { 2540b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512(__M, 2550b57cec5SDimitry Andric (__v8di) _mm512_broadcastq_epi64(__A), 2560b57cec5SDimitry Andric (__v8di) _mm512_setzero_si512()); 2570b57cec5SDimitry Andric } 2580b57cec5SDimitry Andric 2590b57cec5SDimitry Andric 2600b57cec5SDimitry Andric static __inline __m512 __DEFAULT_FN_ATTRS512 2610b57cec5SDimitry Andric _mm512_setzero_ps(void) 2620b57cec5SDimitry Andric { 263bdd1243dSDimitry Andric return __extension__ (__m512){ 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 264bdd1243dSDimitry Andric 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f }; 2650b57cec5SDimitry Andric } 2660b57cec5SDimitry Andric 2670b57cec5SDimitry Andric #define _mm512_setzero _mm512_setzero_ps 2680b57cec5SDimitry Andric 2690b57cec5SDimitry Andric static __inline __m512d __DEFAULT_FN_ATTRS512 2700b57cec5SDimitry Andric _mm512_setzero_pd(void) 2710b57cec5SDimitry Andric { 2720b57cec5SDimitry Andric return __extension__ (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; 2730b57cec5SDimitry Andric } 2740b57cec5SDimitry Andric 2750b57cec5SDimitry Andric static __inline __m512 __DEFAULT_FN_ATTRS512 2760b57cec5SDimitry Andric _mm512_set1_ps(float __w) 2770b57cec5SDimitry Andric { 2780b57cec5SDimitry Andric return __extension__ (__m512){ __w, __w, __w, __w, __w, __w, __w, __w, 2790b57cec5SDimitry Andric __w, __w, __w, __w, __w, __w, __w, __w }; 2800b57cec5SDimitry Andric } 2810b57cec5SDimitry Andric 2820b57cec5SDimitry Andric static __inline __m512d __DEFAULT_FN_ATTRS512 2830b57cec5SDimitry Andric _mm512_set1_pd(double __w) 2840b57cec5SDimitry Andric { 2850b57cec5SDimitry Andric return __extension__ (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w }; 2860b57cec5SDimitry Andric } 2870b57cec5SDimitry Andric 2880b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512 2890b57cec5SDimitry Andric _mm512_set1_epi8(char __w) 2900b57cec5SDimitry Andric { 2910b57cec5SDimitry Andric return __extension__ (__m512i)(__v64qi){ 2920b57cec5SDimitry Andric __w, __w, __w, __w, __w, __w, __w, __w, 2930b57cec5SDimitry Andric __w, __w, __w, __w, __w, __w, __w, __w, 2940b57cec5SDimitry Andric __w, __w, __w, __w, __w, __w, __w, __w, 2950b57cec5SDimitry Andric __w, __w, __w, __w, __w, __w, __w, __w, 2960b57cec5SDimitry Andric __w, __w, __w, __w, __w, __w, __w, __w, 2970b57cec5SDimitry Andric __w, __w, __w, __w, __w, __w, __w, __w, 2980b57cec5SDimitry Andric __w, __w, __w, __w, __w, __w, __w, __w, 2990b57cec5SDimitry Andric __w, __w, __w, __w, __w, __w, __w, __w }; 3000b57cec5SDimitry Andric } 3010b57cec5SDimitry Andric 3020b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512 3030b57cec5SDimitry Andric _mm512_set1_epi16(short __w) 3040b57cec5SDimitry Andric { 3050b57cec5SDimitry Andric return __extension__ (__m512i)(__v32hi){ 3060b57cec5SDimitry Andric __w, __w, __w, __w, __w, __w, __w, __w, 3070b57cec5SDimitry Andric __w, __w, __w, __w, __w, __w, __w, __w, 3080b57cec5SDimitry Andric __w, __w, __w, __w, __w, __w, __w, __w, 3090b57cec5SDimitry Andric __w, __w, __w, __w, __w, __w, __w, __w }; 3100b57cec5SDimitry Andric } 3110b57cec5SDimitry Andric 3120b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512 3130b57cec5SDimitry Andric _mm512_set1_epi32(int __s) 3140b57cec5SDimitry Andric { 3150b57cec5SDimitry Andric return __extension__ (__m512i)(__v16si){ 3160b57cec5SDimitry Andric __s, __s, __s, __s, __s, __s, __s, __s, 3170b57cec5SDimitry Andric __s, __s, __s, __s, __s, __s, __s, __s }; 3180b57cec5SDimitry Andric } 3190b57cec5SDimitry Andric 3200b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512 3210b57cec5SDimitry Andric _mm512_maskz_set1_epi32(__mmask16 __M, int __A) 3220b57cec5SDimitry Andric { 3230b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512(__M, 3240b57cec5SDimitry Andric (__v16si)_mm512_set1_epi32(__A), 3250b57cec5SDimitry Andric (__v16si)_mm512_setzero_si512()); 3260b57cec5SDimitry Andric } 3270b57cec5SDimitry Andric 3280b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512 3290b57cec5SDimitry Andric _mm512_set1_epi64(long long __d) 3300b57cec5SDimitry Andric { 3310b57cec5SDimitry Andric return __extension__(__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d }; 3320b57cec5SDimitry Andric } 3330b57cec5SDimitry Andric 3340b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512 3350b57cec5SDimitry Andric _mm512_maskz_set1_epi64(__mmask8 __M, long long __A) 3360b57cec5SDimitry Andric { 3370b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512(__M, 3380b57cec5SDimitry Andric (__v8di)_mm512_set1_epi64(__A), 3390b57cec5SDimitry Andric (__v8di)_mm512_setzero_si512()); 3400b57cec5SDimitry Andric } 3410b57cec5SDimitry Andric 3420b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 3430b57cec5SDimitry Andric _mm512_broadcastss_ps(__m128 __A) 3440b57cec5SDimitry Andric { 3450b57cec5SDimitry Andric return (__m512)__builtin_shufflevector((__v4sf) __A, (__v4sf) __A, 3460b57cec5SDimitry Andric 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); 3470b57cec5SDimitry Andric } 3480b57cec5SDimitry Andric 3490b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512 3500b57cec5SDimitry Andric _mm512_set4_epi32 (int __A, int __B, int __C, int __D) 3510b57cec5SDimitry Andric { 3520b57cec5SDimitry Andric return __extension__ (__m512i)(__v16si) 3530b57cec5SDimitry Andric { __D, __C, __B, __A, __D, __C, __B, __A, 3540b57cec5SDimitry Andric __D, __C, __B, __A, __D, __C, __B, __A }; 3550b57cec5SDimitry Andric } 3560b57cec5SDimitry Andric 3570b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512 3580b57cec5SDimitry Andric _mm512_set4_epi64 (long long __A, long long __B, long long __C, 3590b57cec5SDimitry Andric long long __D) 3600b57cec5SDimitry Andric { 3610b57cec5SDimitry Andric return __extension__ (__m512i) (__v8di) 3620b57cec5SDimitry Andric { __D, __C, __B, __A, __D, __C, __B, __A }; 3630b57cec5SDimitry Andric } 3640b57cec5SDimitry Andric 3650b57cec5SDimitry Andric static __inline __m512d __DEFAULT_FN_ATTRS512 3660b57cec5SDimitry Andric _mm512_set4_pd (double __A, double __B, double __C, double __D) 3670b57cec5SDimitry Andric { 3680b57cec5SDimitry Andric return __extension__ (__m512d) 3690b57cec5SDimitry Andric { __D, __C, __B, __A, __D, __C, __B, __A }; 3700b57cec5SDimitry Andric } 3710b57cec5SDimitry Andric 3720b57cec5SDimitry Andric static __inline __m512 __DEFAULT_FN_ATTRS512 3730b57cec5SDimitry Andric _mm512_set4_ps (float __A, float __B, float __C, float __D) 3740b57cec5SDimitry Andric { 3750b57cec5SDimitry Andric return __extension__ (__m512) 3760b57cec5SDimitry Andric { __D, __C, __B, __A, __D, __C, __B, __A, 3770b57cec5SDimitry Andric __D, __C, __B, __A, __D, __C, __B, __A }; 3780b57cec5SDimitry Andric } 3790b57cec5SDimitry Andric 3800b57cec5SDimitry Andric #define _mm512_setr4_epi32(e0,e1,e2,e3) \ 3810b57cec5SDimitry Andric _mm512_set4_epi32((e3),(e2),(e1),(e0)) 3820b57cec5SDimitry Andric 3830b57cec5SDimitry Andric #define _mm512_setr4_epi64(e0,e1,e2,e3) \ 3840b57cec5SDimitry Andric _mm512_set4_epi64((e3),(e2),(e1),(e0)) 3850b57cec5SDimitry Andric 3860b57cec5SDimitry Andric #define _mm512_setr4_pd(e0,e1,e2,e3) \ 3870b57cec5SDimitry Andric _mm512_set4_pd((e3),(e2),(e1),(e0)) 3880b57cec5SDimitry Andric 3890b57cec5SDimitry Andric #define _mm512_setr4_ps(e0,e1,e2,e3) \ 3900b57cec5SDimitry Andric _mm512_set4_ps((e3),(e2),(e1),(e0)) 3910b57cec5SDimitry Andric 3920b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 3930b57cec5SDimitry Andric _mm512_broadcastsd_pd(__m128d __A) 3940b57cec5SDimitry Andric { 3950b57cec5SDimitry Andric return (__m512d)__builtin_shufflevector((__v2df) __A, (__v2df) __A, 3960b57cec5SDimitry Andric 0, 0, 0, 0, 0, 0, 0, 0); 3970b57cec5SDimitry Andric } 3980b57cec5SDimitry Andric 3990b57cec5SDimitry Andric /* Cast between vector types */ 4000b57cec5SDimitry Andric 4010b57cec5SDimitry Andric static __inline __m512d __DEFAULT_FN_ATTRS512 4020b57cec5SDimitry Andric _mm512_castpd256_pd512(__m256d __a) 4030b57cec5SDimitry Andric { 40406c3fb27SDimitry Andric return __builtin_shufflevector(__a, __builtin_nondeterministic_value(__a), 0, 40506c3fb27SDimitry Andric 1, 2, 3, 4, 5, 6, 7); 4060b57cec5SDimitry Andric } 4070b57cec5SDimitry Andric 4080b57cec5SDimitry Andric static __inline __m512 __DEFAULT_FN_ATTRS512 4090b57cec5SDimitry Andric _mm512_castps256_ps512(__m256 __a) 4100b57cec5SDimitry Andric { 41106c3fb27SDimitry Andric return __builtin_shufflevector(__a, __builtin_nondeterministic_value(__a), 0, 41206c3fb27SDimitry Andric 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); 4130b57cec5SDimitry Andric } 4140b57cec5SDimitry Andric 4150b57cec5SDimitry Andric static __inline __m128d __DEFAULT_FN_ATTRS512 4160b57cec5SDimitry Andric _mm512_castpd512_pd128(__m512d __a) 4170b57cec5SDimitry Andric { 4180b57cec5SDimitry Andric return __builtin_shufflevector(__a, __a, 0, 1); 4190b57cec5SDimitry Andric } 4200b57cec5SDimitry Andric 4210b57cec5SDimitry Andric static __inline __m256d __DEFAULT_FN_ATTRS512 4220b57cec5SDimitry Andric _mm512_castpd512_pd256 (__m512d __A) 4230b57cec5SDimitry Andric { 4240b57cec5SDimitry Andric return __builtin_shufflevector(__A, __A, 0, 1, 2, 3); 4250b57cec5SDimitry Andric } 4260b57cec5SDimitry Andric 4270b57cec5SDimitry Andric static __inline __m128 __DEFAULT_FN_ATTRS512 4280b57cec5SDimitry Andric _mm512_castps512_ps128(__m512 __a) 4290b57cec5SDimitry Andric { 4300b57cec5SDimitry Andric return __builtin_shufflevector(__a, __a, 0, 1, 2, 3); 4310b57cec5SDimitry Andric } 4320b57cec5SDimitry Andric 4330b57cec5SDimitry Andric static __inline __m256 __DEFAULT_FN_ATTRS512 4340b57cec5SDimitry Andric _mm512_castps512_ps256 (__m512 __A) 4350b57cec5SDimitry Andric { 4360b57cec5SDimitry Andric return __builtin_shufflevector(__A, __A, 0, 1, 2, 3, 4, 5, 6, 7); 4370b57cec5SDimitry Andric } 4380b57cec5SDimitry Andric 4390b57cec5SDimitry Andric static __inline __m512 __DEFAULT_FN_ATTRS512 4400b57cec5SDimitry Andric _mm512_castpd_ps (__m512d __A) 4410b57cec5SDimitry Andric { 4420b57cec5SDimitry Andric return (__m512) (__A); 4430b57cec5SDimitry Andric } 4440b57cec5SDimitry Andric 4450b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512 4460b57cec5SDimitry Andric _mm512_castpd_si512 (__m512d __A) 4470b57cec5SDimitry Andric { 4480b57cec5SDimitry Andric return (__m512i) (__A); 4490b57cec5SDimitry Andric } 4500b57cec5SDimitry Andric 4510b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 4520b57cec5SDimitry Andric _mm512_castpd128_pd512 (__m128d __A) 4530b57cec5SDimitry Andric { 45406c3fb27SDimitry Andric __m256d __B = __builtin_nondeterministic_value(__B); 45506c3fb27SDimitry Andric return __builtin_shufflevector( 45606c3fb27SDimitry Andric __builtin_shufflevector(__A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3), 45706c3fb27SDimitry Andric __B, 0, 1, 2, 3, 4, 5, 6, 7); 4580b57cec5SDimitry Andric } 4590b57cec5SDimitry Andric 4600b57cec5SDimitry Andric static __inline __m512d __DEFAULT_FN_ATTRS512 4610b57cec5SDimitry Andric _mm512_castps_pd (__m512 __A) 4620b57cec5SDimitry Andric { 4630b57cec5SDimitry Andric return (__m512d) (__A); 4640b57cec5SDimitry Andric } 4650b57cec5SDimitry Andric 4660b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512 4670b57cec5SDimitry Andric _mm512_castps_si512 (__m512 __A) 4680b57cec5SDimitry Andric { 4690b57cec5SDimitry Andric return (__m512i) (__A); 4700b57cec5SDimitry Andric } 4710b57cec5SDimitry Andric 4720b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 4730b57cec5SDimitry Andric _mm512_castps128_ps512 (__m128 __A) 4740b57cec5SDimitry Andric { 47506c3fb27SDimitry Andric __m256 __B = __builtin_nondeterministic_value(__B); 47606c3fb27SDimitry Andric return __builtin_shufflevector( 47706c3fb27SDimitry Andric __builtin_shufflevector(__A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3, 4, 5, 6, 7), 47806c3fb27SDimitry Andric __B, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); 4790b57cec5SDimitry Andric } 4800b57cec5SDimitry Andric 4810b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 4820b57cec5SDimitry Andric _mm512_castsi128_si512 (__m128i __A) 4830b57cec5SDimitry Andric { 48406c3fb27SDimitry Andric __m256i __B = __builtin_nondeterministic_value(__B); 48506c3fb27SDimitry Andric return __builtin_shufflevector( 48606c3fb27SDimitry Andric __builtin_shufflevector(__A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3), 48706c3fb27SDimitry Andric __B, 0, 1, 2, 3, 4, 5, 6, 7); 4880b57cec5SDimitry Andric } 4890b57cec5SDimitry Andric 4900b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 4910b57cec5SDimitry Andric _mm512_castsi256_si512 (__m256i __A) 4920b57cec5SDimitry Andric { 49306c3fb27SDimitry Andric return __builtin_shufflevector( __A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3, 4, 5, 6, 7); 4940b57cec5SDimitry Andric } 4950b57cec5SDimitry Andric 4960b57cec5SDimitry Andric static __inline __m512 __DEFAULT_FN_ATTRS512 4970b57cec5SDimitry Andric _mm512_castsi512_ps (__m512i __A) 4980b57cec5SDimitry Andric { 4990b57cec5SDimitry Andric return (__m512) (__A); 5000b57cec5SDimitry Andric } 5010b57cec5SDimitry Andric 5020b57cec5SDimitry Andric static __inline __m512d __DEFAULT_FN_ATTRS512 5030b57cec5SDimitry Andric _mm512_castsi512_pd (__m512i __A) 5040b57cec5SDimitry Andric { 5050b57cec5SDimitry Andric return (__m512d) (__A); 5060b57cec5SDimitry Andric } 5070b57cec5SDimitry Andric 5080b57cec5SDimitry Andric static __inline __m128i __DEFAULT_FN_ATTRS512 5090b57cec5SDimitry Andric _mm512_castsi512_si128 (__m512i __A) 5100b57cec5SDimitry Andric { 5110b57cec5SDimitry Andric return (__m128i)__builtin_shufflevector(__A, __A , 0, 1); 5120b57cec5SDimitry Andric } 5130b57cec5SDimitry Andric 5140b57cec5SDimitry Andric static __inline __m256i __DEFAULT_FN_ATTRS512 5150b57cec5SDimitry Andric _mm512_castsi512_si256 (__m512i __A) 5160b57cec5SDimitry Andric { 5170b57cec5SDimitry Andric return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3); 5180b57cec5SDimitry Andric } 5190b57cec5SDimitry Andric 5200b57cec5SDimitry Andric static __inline__ __mmask16 __DEFAULT_FN_ATTRS 5210b57cec5SDimitry Andric _mm512_int2mask(int __a) 5220b57cec5SDimitry Andric { 5230b57cec5SDimitry Andric return (__mmask16)__a; 5240b57cec5SDimitry Andric } 5250b57cec5SDimitry Andric 5260b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS 5270b57cec5SDimitry Andric _mm512_mask2int(__mmask16 __a) 5280b57cec5SDimitry Andric { 5290b57cec5SDimitry Andric return (int)__a; 5300b57cec5SDimitry Andric } 5310b57cec5SDimitry Andric 5320b57cec5SDimitry Andric /// Constructs a 512-bit floating-point vector of [8 x double] from a 5330b57cec5SDimitry Andric /// 128-bit floating-point vector of [2 x double]. The lower 128 bits 5340b57cec5SDimitry Andric /// contain the value of the source vector. The upper 384 bits are set 5350b57cec5SDimitry Andric /// to zero. 5360b57cec5SDimitry Andric /// 5370b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 5380b57cec5SDimitry Andric /// 5390b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction. 5400b57cec5SDimitry Andric /// 5410b57cec5SDimitry Andric /// \param __a 5420b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. 5430b57cec5SDimitry Andric /// \returns A 512-bit floating-point vector of [8 x double]. The lower 128 bits 5440b57cec5SDimitry Andric /// contain the value of the parameter. The upper 384 bits are set to zero. 5450b57cec5SDimitry Andric static __inline __m512d __DEFAULT_FN_ATTRS512 5460b57cec5SDimitry Andric _mm512_zextpd128_pd512(__m128d __a) 5470b57cec5SDimitry Andric { 5480b57cec5SDimitry Andric return __builtin_shufflevector((__v2df)__a, (__v2df)_mm_setzero_pd(), 0, 1, 2, 3, 2, 3, 2, 3); 5490b57cec5SDimitry Andric } 5500b57cec5SDimitry Andric 5510b57cec5SDimitry Andric /// Constructs a 512-bit floating-point vector of [8 x double] from a 5520b57cec5SDimitry Andric /// 256-bit floating-point vector of [4 x double]. The lower 256 bits 5530b57cec5SDimitry Andric /// contain the value of the source vector. The upper 256 bits are set 5540b57cec5SDimitry Andric /// to zero. 5550b57cec5SDimitry Andric /// 5560b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 5570b57cec5SDimitry Andric /// 5580b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction. 5590b57cec5SDimitry Andric /// 5600b57cec5SDimitry Andric /// \param __a 5610b57cec5SDimitry Andric /// A 256-bit vector of [4 x double]. 5620b57cec5SDimitry Andric /// \returns A 512-bit floating-point vector of [8 x double]. The lower 256 bits 5630b57cec5SDimitry Andric /// contain the value of the parameter. The upper 256 bits are set to zero. 5640b57cec5SDimitry Andric static __inline __m512d __DEFAULT_FN_ATTRS512 5650b57cec5SDimitry Andric _mm512_zextpd256_pd512(__m256d __a) 5660b57cec5SDimitry Andric { 5670b57cec5SDimitry Andric return __builtin_shufflevector((__v4df)__a, (__v4df)_mm256_setzero_pd(), 0, 1, 2, 3, 4, 5, 6, 7); 5680b57cec5SDimitry Andric } 5690b57cec5SDimitry Andric 5700b57cec5SDimitry Andric /// Constructs a 512-bit floating-point vector of [16 x float] from a 5710b57cec5SDimitry Andric /// 128-bit floating-point vector of [4 x float]. The lower 128 bits contain 5720b57cec5SDimitry Andric /// the value of the source vector. The upper 384 bits are set to zero. 5730b57cec5SDimitry Andric /// 5740b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 5750b57cec5SDimitry Andric /// 5760b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction. 5770b57cec5SDimitry Andric /// 5780b57cec5SDimitry Andric /// \param __a 5790b57cec5SDimitry Andric /// A 128-bit vector of [4 x float]. 5800b57cec5SDimitry Andric /// \returns A 512-bit floating-point vector of [16 x float]. The lower 128 bits 5810b57cec5SDimitry Andric /// contain the value of the parameter. The upper 384 bits are set to zero. 5820b57cec5SDimitry Andric static __inline __m512 __DEFAULT_FN_ATTRS512 5830b57cec5SDimitry Andric _mm512_zextps128_ps512(__m128 __a) 5840b57cec5SDimitry Andric { 5850b57cec5SDimitry Andric return __builtin_shufflevector((__v4sf)__a, (__v4sf)_mm_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7); 5860b57cec5SDimitry Andric } 5870b57cec5SDimitry Andric 5880b57cec5SDimitry Andric /// Constructs a 512-bit floating-point vector of [16 x float] from a 5890b57cec5SDimitry Andric /// 256-bit floating-point vector of [8 x float]. The lower 256 bits contain 5900b57cec5SDimitry Andric /// the value of the source vector. The upper 256 bits are set to zero. 5910b57cec5SDimitry Andric /// 5920b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 5930b57cec5SDimitry Andric /// 5940b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction. 5950b57cec5SDimitry Andric /// 5960b57cec5SDimitry Andric /// \param __a 5970b57cec5SDimitry Andric /// A 256-bit vector of [8 x float]. 5980b57cec5SDimitry Andric /// \returns A 512-bit floating-point vector of [16 x float]. The lower 256 bits 5990b57cec5SDimitry Andric /// contain the value of the parameter. The upper 256 bits are set to zero. 6000b57cec5SDimitry Andric static __inline __m512 __DEFAULT_FN_ATTRS512 6010b57cec5SDimitry Andric _mm512_zextps256_ps512(__m256 __a) 6020b57cec5SDimitry Andric { 6030b57cec5SDimitry Andric return __builtin_shufflevector((__v8sf)__a, (__v8sf)_mm256_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); 6040b57cec5SDimitry Andric } 6050b57cec5SDimitry Andric 6060b57cec5SDimitry Andric /// Constructs a 512-bit integer vector from a 128-bit integer vector. 6070b57cec5SDimitry Andric /// The lower 128 bits contain the value of the source vector. The upper 6080b57cec5SDimitry Andric /// 384 bits are set to zero. 6090b57cec5SDimitry Andric /// 6100b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 6110b57cec5SDimitry Andric /// 6120b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction. 6130b57cec5SDimitry Andric /// 6140b57cec5SDimitry Andric /// \param __a 6150b57cec5SDimitry Andric /// A 128-bit integer vector. 6160b57cec5SDimitry Andric /// \returns A 512-bit integer vector. The lower 128 bits contain the value of 6170b57cec5SDimitry Andric /// the parameter. The upper 384 bits are set to zero. 6180b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512 6190b57cec5SDimitry Andric _mm512_zextsi128_si512(__m128i __a) 6200b57cec5SDimitry Andric { 6210b57cec5SDimitry Andric return __builtin_shufflevector((__v2di)__a, (__v2di)_mm_setzero_si128(), 0, 1, 2, 3, 2, 3, 2, 3); 6220b57cec5SDimitry Andric } 6230b57cec5SDimitry Andric 6240b57cec5SDimitry Andric /// Constructs a 512-bit integer vector from a 256-bit integer vector. 6250b57cec5SDimitry Andric /// The lower 256 bits contain the value of the source vector. The upper 6260b57cec5SDimitry Andric /// 256 bits are set to zero. 6270b57cec5SDimitry Andric /// 6280b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 6290b57cec5SDimitry Andric /// 6300b57cec5SDimitry Andric /// This intrinsic has no corresponding instruction. 6310b57cec5SDimitry Andric /// 6320b57cec5SDimitry Andric /// \param __a 6330b57cec5SDimitry Andric /// A 256-bit integer vector. 6340b57cec5SDimitry Andric /// \returns A 512-bit integer vector. The lower 256 bits contain the value of 6350b57cec5SDimitry Andric /// the parameter. The upper 256 bits are set to zero. 6360b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512 6370b57cec5SDimitry Andric _mm512_zextsi256_si512(__m256i __a) 6380b57cec5SDimitry Andric { 6390b57cec5SDimitry Andric return __builtin_shufflevector((__v4di)__a, (__v4di)_mm256_setzero_si256(), 0, 1, 2, 3, 4, 5, 6, 7); 6400b57cec5SDimitry Andric } 6410b57cec5SDimitry Andric 6420b57cec5SDimitry Andric /* Bitwise operators */ 6430b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 6440b57cec5SDimitry Andric _mm512_and_epi32(__m512i __a, __m512i __b) 6450b57cec5SDimitry Andric { 6460b57cec5SDimitry Andric return (__m512i)((__v16su)__a & (__v16su)__b); 6470b57cec5SDimitry Andric } 6480b57cec5SDimitry Andric 6490b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 6500b57cec5SDimitry Andric _mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b) 6510b57cec5SDimitry Andric { 6520b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k, 6530b57cec5SDimitry Andric (__v16si) _mm512_and_epi32(__a, __b), 6540b57cec5SDimitry Andric (__v16si) __src); 6550b57cec5SDimitry Andric } 6560b57cec5SDimitry Andric 6570b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 6580b57cec5SDimitry Andric _mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b) 6590b57cec5SDimitry Andric { 6600b57cec5SDimitry Andric return (__m512i) _mm512_mask_and_epi32(_mm512_setzero_si512 (), 6610b57cec5SDimitry Andric __k, __a, __b); 6620b57cec5SDimitry Andric } 6630b57cec5SDimitry Andric 6640b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 6650b57cec5SDimitry Andric _mm512_and_epi64(__m512i __a, __m512i __b) 6660b57cec5SDimitry Andric { 6670b57cec5SDimitry Andric return (__m512i)((__v8du)__a & (__v8du)__b); 6680b57cec5SDimitry Andric } 6690b57cec5SDimitry Andric 6700b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 6710b57cec5SDimitry Andric _mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b) 6720b57cec5SDimitry Andric { 6730b57cec5SDimitry Andric return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __k, 6740b57cec5SDimitry Andric (__v8di) _mm512_and_epi64(__a, __b), 6750b57cec5SDimitry Andric (__v8di) __src); 6760b57cec5SDimitry Andric } 6770b57cec5SDimitry Andric 6780b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 6790b57cec5SDimitry Andric _mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b) 6800b57cec5SDimitry Andric { 6810b57cec5SDimitry Andric return (__m512i) _mm512_mask_and_epi64(_mm512_setzero_si512 (), 6820b57cec5SDimitry Andric __k, __a, __b); 6830b57cec5SDimitry Andric } 6840b57cec5SDimitry Andric 6850b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 6860b57cec5SDimitry Andric _mm512_andnot_si512 (__m512i __A, __m512i __B) 6870b57cec5SDimitry Andric { 6880b57cec5SDimitry Andric return (__m512i)(~(__v8du)__A & (__v8du)__B); 6890b57cec5SDimitry Andric } 6900b57cec5SDimitry Andric 6910b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 6920b57cec5SDimitry Andric _mm512_andnot_epi32 (__m512i __A, __m512i __B) 6930b57cec5SDimitry Andric { 6940b57cec5SDimitry Andric return (__m512i)(~(__v16su)__A & (__v16su)__B); 6950b57cec5SDimitry Andric } 6960b57cec5SDimitry Andric 6970b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 6980b57cec5SDimitry Andric _mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 6990b57cec5SDimitry Andric { 7000b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 7010b57cec5SDimitry Andric (__v16si)_mm512_andnot_epi32(__A, __B), 7020b57cec5SDimitry Andric (__v16si)__W); 7030b57cec5SDimitry Andric } 7040b57cec5SDimitry Andric 7050b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 7060b57cec5SDimitry Andric _mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B) 7070b57cec5SDimitry Andric { 7080b57cec5SDimitry Andric return (__m512i)_mm512_mask_andnot_epi32(_mm512_setzero_si512(), 7090b57cec5SDimitry Andric __U, __A, __B); 7100b57cec5SDimitry Andric } 7110b57cec5SDimitry Andric 7120b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 7130b57cec5SDimitry Andric _mm512_andnot_epi64(__m512i __A, __m512i __B) 7140b57cec5SDimitry Andric { 7150b57cec5SDimitry Andric return (__m512i)(~(__v8du)__A & (__v8du)__B); 7160b57cec5SDimitry Andric } 7170b57cec5SDimitry Andric 7180b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 7190b57cec5SDimitry Andric _mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 7200b57cec5SDimitry Andric { 7210b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 7220b57cec5SDimitry Andric (__v8di)_mm512_andnot_epi64(__A, __B), 7230b57cec5SDimitry Andric (__v8di)__W); 7240b57cec5SDimitry Andric } 7250b57cec5SDimitry Andric 7260b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 7270b57cec5SDimitry Andric _mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B) 7280b57cec5SDimitry Andric { 7290b57cec5SDimitry Andric return (__m512i)_mm512_mask_andnot_epi64(_mm512_setzero_si512(), 7300b57cec5SDimitry Andric __U, __A, __B); 7310b57cec5SDimitry Andric } 7320b57cec5SDimitry Andric 7330b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 7340b57cec5SDimitry Andric _mm512_or_epi32(__m512i __a, __m512i __b) 7350b57cec5SDimitry Andric { 7360b57cec5SDimitry Andric return (__m512i)((__v16su)__a | (__v16su)__b); 7370b57cec5SDimitry Andric } 7380b57cec5SDimitry Andric 7390b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 7400b57cec5SDimitry Andric _mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b) 7410b57cec5SDimitry Andric { 7420b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k, 7430b57cec5SDimitry Andric (__v16si)_mm512_or_epi32(__a, __b), 7440b57cec5SDimitry Andric (__v16si)__src); 7450b57cec5SDimitry Andric } 7460b57cec5SDimitry Andric 7470b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 7480b57cec5SDimitry Andric _mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b) 7490b57cec5SDimitry Andric { 7500b57cec5SDimitry Andric return (__m512i)_mm512_mask_or_epi32(_mm512_setzero_si512(), __k, __a, __b); 7510b57cec5SDimitry Andric } 7520b57cec5SDimitry Andric 7530b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 7540b57cec5SDimitry Andric _mm512_or_epi64(__m512i __a, __m512i __b) 7550b57cec5SDimitry Andric { 7560b57cec5SDimitry Andric return (__m512i)((__v8du)__a | (__v8du)__b); 7570b57cec5SDimitry Andric } 7580b57cec5SDimitry Andric 7590b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 7600b57cec5SDimitry Andric _mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b) 7610b57cec5SDimitry Andric { 7620b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k, 7630b57cec5SDimitry Andric (__v8di)_mm512_or_epi64(__a, __b), 7640b57cec5SDimitry Andric (__v8di)__src); 7650b57cec5SDimitry Andric } 7660b57cec5SDimitry Andric 7670b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 7680b57cec5SDimitry Andric _mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b) 7690b57cec5SDimitry Andric { 7700b57cec5SDimitry Andric return (__m512i)_mm512_mask_or_epi64(_mm512_setzero_si512(), __k, __a, __b); 7710b57cec5SDimitry Andric } 7720b57cec5SDimitry Andric 7730b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 7740b57cec5SDimitry Andric _mm512_xor_epi32(__m512i __a, __m512i __b) 7750b57cec5SDimitry Andric { 7760b57cec5SDimitry Andric return (__m512i)((__v16su)__a ^ (__v16su)__b); 7770b57cec5SDimitry Andric } 7780b57cec5SDimitry Andric 7790b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 7800b57cec5SDimitry Andric _mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b) 7810b57cec5SDimitry Andric { 7820b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k, 7830b57cec5SDimitry Andric (__v16si)_mm512_xor_epi32(__a, __b), 7840b57cec5SDimitry Andric (__v16si)__src); 7850b57cec5SDimitry Andric } 7860b57cec5SDimitry Andric 7870b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 7880b57cec5SDimitry Andric _mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b) 7890b57cec5SDimitry Andric { 7900b57cec5SDimitry Andric return (__m512i)_mm512_mask_xor_epi32(_mm512_setzero_si512(), __k, __a, __b); 7910b57cec5SDimitry Andric } 7920b57cec5SDimitry Andric 7930b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 7940b57cec5SDimitry Andric _mm512_xor_epi64(__m512i __a, __m512i __b) 7950b57cec5SDimitry Andric { 7960b57cec5SDimitry Andric return (__m512i)((__v8du)__a ^ (__v8du)__b); 7970b57cec5SDimitry Andric } 7980b57cec5SDimitry Andric 7990b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 8000b57cec5SDimitry Andric _mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b) 8010b57cec5SDimitry Andric { 8020b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k, 8030b57cec5SDimitry Andric (__v8di)_mm512_xor_epi64(__a, __b), 8040b57cec5SDimitry Andric (__v8di)__src); 8050b57cec5SDimitry Andric } 8060b57cec5SDimitry Andric 8070b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 8080b57cec5SDimitry Andric _mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b) 8090b57cec5SDimitry Andric { 8100b57cec5SDimitry Andric return (__m512i)_mm512_mask_xor_epi64(_mm512_setzero_si512(), __k, __a, __b); 8110b57cec5SDimitry Andric } 8120b57cec5SDimitry Andric 8130b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 8140b57cec5SDimitry Andric _mm512_and_si512(__m512i __a, __m512i __b) 8150b57cec5SDimitry Andric { 8160b57cec5SDimitry Andric return (__m512i)((__v8du)__a & (__v8du)__b); 8170b57cec5SDimitry Andric } 8180b57cec5SDimitry Andric 8190b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 8200b57cec5SDimitry Andric _mm512_or_si512(__m512i __a, __m512i __b) 8210b57cec5SDimitry Andric { 8220b57cec5SDimitry Andric return (__m512i)((__v8du)__a | (__v8du)__b); 8230b57cec5SDimitry Andric } 8240b57cec5SDimitry Andric 8250b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 8260b57cec5SDimitry Andric _mm512_xor_si512(__m512i __a, __m512i __b) 8270b57cec5SDimitry Andric { 8280b57cec5SDimitry Andric return (__m512i)((__v8du)__a ^ (__v8du)__b); 8290b57cec5SDimitry Andric } 8300b57cec5SDimitry Andric 8310b57cec5SDimitry Andric /* Arithmetic */ 8320b57cec5SDimitry Andric 8330b57cec5SDimitry Andric static __inline __m512d __DEFAULT_FN_ATTRS512 8340b57cec5SDimitry Andric _mm512_add_pd(__m512d __a, __m512d __b) 8350b57cec5SDimitry Andric { 8360b57cec5SDimitry Andric return (__m512d)((__v8df)__a + (__v8df)__b); 8370b57cec5SDimitry Andric } 8380b57cec5SDimitry Andric 8390b57cec5SDimitry Andric static __inline __m512 __DEFAULT_FN_ATTRS512 8400b57cec5SDimitry Andric _mm512_add_ps(__m512 __a, __m512 __b) 8410b57cec5SDimitry Andric { 8420b57cec5SDimitry Andric return (__m512)((__v16sf)__a + (__v16sf)__b); 8430b57cec5SDimitry Andric } 8440b57cec5SDimitry Andric 8450b57cec5SDimitry Andric static __inline __m512d __DEFAULT_FN_ATTRS512 8460b57cec5SDimitry Andric _mm512_mul_pd(__m512d __a, __m512d __b) 8470b57cec5SDimitry Andric { 8480b57cec5SDimitry Andric return (__m512d)((__v8df)__a * (__v8df)__b); 8490b57cec5SDimitry Andric } 8500b57cec5SDimitry Andric 8510b57cec5SDimitry Andric static __inline __m512 __DEFAULT_FN_ATTRS512 8520b57cec5SDimitry Andric _mm512_mul_ps(__m512 __a, __m512 __b) 8530b57cec5SDimitry Andric { 8540b57cec5SDimitry Andric return (__m512)((__v16sf)__a * (__v16sf)__b); 8550b57cec5SDimitry Andric } 8560b57cec5SDimitry Andric 8570b57cec5SDimitry Andric static __inline __m512d __DEFAULT_FN_ATTRS512 8580b57cec5SDimitry Andric _mm512_sub_pd(__m512d __a, __m512d __b) 8590b57cec5SDimitry Andric { 8600b57cec5SDimitry Andric return (__m512d)((__v8df)__a - (__v8df)__b); 8610b57cec5SDimitry Andric } 8620b57cec5SDimitry Andric 8630b57cec5SDimitry Andric static __inline __m512 __DEFAULT_FN_ATTRS512 8640b57cec5SDimitry Andric _mm512_sub_ps(__m512 __a, __m512 __b) 8650b57cec5SDimitry Andric { 8660b57cec5SDimitry Andric return (__m512)((__v16sf)__a - (__v16sf)__b); 8670b57cec5SDimitry Andric } 8680b57cec5SDimitry Andric 8690b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 8700b57cec5SDimitry Andric _mm512_add_epi64 (__m512i __A, __m512i __B) 8710b57cec5SDimitry Andric { 8720b57cec5SDimitry Andric return (__m512i) ((__v8du) __A + (__v8du) __B); 8730b57cec5SDimitry Andric } 8740b57cec5SDimitry Andric 8750b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 8760b57cec5SDimitry Andric _mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 8770b57cec5SDimitry Andric { 8780b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 8790b57cec5SDimitry Andric (__v8di)_mm512_add_epi64(__A, __B), 8800b57cec5SDimitry Andric (__v8di)__W); 8810b57cec5SDimitry Andric } 8820b57cec5SDimitry Andric 8830b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 8840b57cec5SDimitry Andric _mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B) 8850b57cec5SDimitry Andric { 8860b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 8870b57cec5SDimitry Andric (__v8di)_mm512_add_epi64(__A, __B), 8880b57cec5SDimitry Andric (__v8di)_mm512_setzero_si512()); 8890b57cec5SDimitry Andric } 8900b57cec5SDimitry Andric 8910b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 8920b57cec5SDimitry Andric _mm512_sub_epi64 (__m512i __A, __m512i __B) 8930b57cec5SDimitry Andric { 8940b57cec5SDimitry Andric return (__m512i) ((__v8du) __A - (__v8du) __B); 8950b57cec5SDimitry Andric } 8960b57cec5SDimitry Andric 8970b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 8980b57cec5SDimitry Andric _mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 8990b57cec5SDimitry Andric { 9000b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 9010b57cec5SDimitry Andric (__v8di)_mm512_sub_epi64(__A, __B), 9020b57cec5SDimitry Andric (__v8di)__W); 9030b57cec5SDimitry Andric } 9040b57cec5SDimitry Andric 9050b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 9060b57cec5SDimitry Andric _mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B) 9070b57cec5SDimitry Andric { 9080b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 9090b57cec5SDimitry Andric (__v8di)_mm512_sub_epi64(__A, __B), 9100b57cec5SDimitry Andric (__v8di)_mm512_setzero_si512()); 9110b57cec5SDimitry Andric } 9120b57cec5SDimitry Andric 9130b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 9140b57cec5SDimitry Andric _mm512_add_epi32 (__m512i __A, __m512i __B) 9150b57cec5SDimitry Andric { 9160b57cec5SDimitry Andric return (__m512i) ((__v16su) __A + (__v16su) __B); 9170b57cec5SDimitry Andric } 9180b57cec5SDimitry Andric 9190b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 9200b57cec5SDimitry Andric _mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 9210b57cec5SDimitry Andric { 9220b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 9230b57cec5SDimitry Andric (__v16si)_mm512_add_epi32(__A, __B), 9240b57cec5SDimitry Andric (__v16si)__W); 9250b57cec5SDimitry Andric } 9260b57cec5SDimitry Andric 9270b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 9280b57cec5SDimitry Andric _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B) 9290b57cec5SDimitry Andric { 9300b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 9310b57cec5SDimitry Andric (__v16si)_mm512_add_epi32(__A, __B), 9320b57cec5SDimitry Andric (__v16si)_mm512_setzero_si512()); 9330b57cec5SDimitry Andric } 9340b57cec5SDimitry Andric 9350b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 9360b57cec5SDimitry Andric _mm512_sub_epi32 (__m512i __A, __m512i __B) 9370b57cec5SDimitry Andric { 9380b57cec5SDimitry Andric return (__m512i) ((__v16su) __A - (__v16su) __B); 9390b57cec5SDimitry Andric } 9400b57cec5SDimitry Andric 9410b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 9420b57cec5SDimitry Andric _mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 9430b57cec5SDimitry Andric { 9440b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 9450b57cec5SDimitry Andric (__v16si)_mm512_sub_epi32(__A, __B), 9460b57cec5SDimitry Andric (__v16si)__W); 9470b57cec5SDimitry Andric } 9480b57cec5SDimitry Andric 9490b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 9500b57cec5SDimitry Andric _mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B) 9510b57cec5SDimitry Andric { 9520b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 9530b57cec5SDimitry Andric (__v16si)_mm512_sub_epi32(__A, __B), 9540b57cec5SDimitry Andric (__v16si)_mm512_setzero_si512()); 9550b57cec5SDimitry Andric } 9560b57cec5SDimitry Andric 9570b57cec5SDimitry Andric #define _mm512_max_round_pd(A, B, R) \ 958349cc55cSDimitry Andric ((__m512d)__builtin_ia32_maxpd512((__v8df)(__m512d)(A), \ 959349cc55cSDimitry Andric (__v8df)(__m512d)(B), (int)(R))) 9600b57cec5SDimitry Andric 9610b57cec5SDimitry Andric #define _mm512_mask_max_round_pd(W, U, A, B, R) \ 962349cc55cSDimitry Andric ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 9630b57cec5SDimitry Andric (__v8df)_mm512_max_round_pd((A), (B), (R)), \ 964349cc55cSDimitry Andric (__v8df)(W))) 9650b57cec5SDimitry Andric 9660b57cec5SDimitry Andric #define _mm512_maskz_max_round_pd(U, A, B, R) \ 967349cc55cSDimitry Andric ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 9680b57cec5SDimitry Andric (__v8df)_mm512_max_round_pd((A), (B), (R)), \ 969349cc55cSDimitry Andric (__v8df)_mm512_setzero_pd())) 9700b57cec5SDimitry Andric 9710b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 9720b57cec5SDimitry Andric _mm512_max_pd(__m512d __A, __m512d __B) 9730b57cec5SDimitry Andric { 9740b57cec5SDimitry Andric return (__m512d) __builtin_ia32_maxpd512((__v8df) __A, (__v8df) __B, 9750b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 9760b57cec5SDimitry Andric } 9770b57cec5SDimitry Andric 9780b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 9790b57cec5SDimitry Andric _mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) 9800b57cec5SDimitry Andric { 9810b57cec5SDimitry Andric return (__m512d)__builtin_ia32_selectpd_512(__U, 9820b57cec5SDimitry Andric (__v8df)_mm512_max_pd(__A, __B), 9830b57cec5SDimitry Andric (__v8df)__W); 9840b57cec5SDimitry Andric } 9850b57cec5SDimitry Andric 9860b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 9870b57cec5SDimitry Andric _mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B) 9880b57cec5SDimitry Andric { 9890b57cec5SDimitry Andric return (__m512d)__builtin_ia32_selectpd_512(__U, 9900b57cec5SDimitry Andric (__v8df)_mm512_max_pd(__A, __B), 9910b57cec5SDimitry Andric (__v8df)_mm512_setzero_pd()); 9920b57cec5SDimitry Andric } 9930b57cec5SDimitry Andric 9940b57cec5SDimitry Andric #define _mm512_max_round_ps(A, B, R) \ 995349cc55cSDimitry Andric ((__m512)__builtin_ia32_maxps512((__v16sf)(__m512)(A), \ 996349cc55cSDimitry Andric (__v16sf)(__m512)(B), (int)(R))) 9970b57cec5SDimitry Andric 9980b57cec5SDimitry Andric #define _mm512_mask_max_round_ps(W, U, A, B, R) \ 999349cc55cSDimitry Andric ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 10000b57cec5SDimitry Andric (__v16sf)_mm512_max_round_ps((A), (B), (R)), \ 1001349cc55cSDimitry Andric (__v16sf)(W))) 10020b57cec5SDimitry Andric 10030b57cec5SDimitry Andric #define _mm512_maskz_max_round_ps(U, A, B, R) \ 1004349cc55cSDimitry Andric ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 10050b57cec5SDimitry Andric (__v16sf)_mm512_max_round_ps((A), (B), (R)), \ 1006349cc55cSDimitry Andric (__v16sf)_mm512_setzero_ps())) 10070b57cec5SDimitry Andric 10080b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 10090b57cec5SDimitry Andric _mm512_max_ps(__m512 __A, __m512 __B) 10100b57cec5SDimitry Andric { 10110b57cec5SDimitry Andric return (__m512) __builtin_ia32_maxps512((__v16sf) __A, (__v16sf) __B, 10120b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 10130b57cec5SDimitry Andric } 10140b57cec5SDimitry Andric 10150b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 10160b57cec5SDimitry Andric _mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 10170b57cec5SDimitry Andric { 10180b57cec5SDimitry Andric return (__m512)__builtin_ia32_selectps_512(__U, 10190b57cec5SDimitry Andric (__v16sf)_mm512_max_ps(__A, __B), 10200b57cec5SDimitry Andric (__v16sf)__W); 10210b57cec5SDimitry Andric } 10220b57cec5SDimitry Andric 10230b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 10240b57cec5SDimitry Andric _mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B) 10250b57cec5SDimitry Andric { 10260b57cec5SDimitry Andric return (__m512)__builtin_ia32_selectps_512(__U, 10270b57cec5SDimitry Andric (__v16sf)_mm512_max_ps(__A, __B), 10280b57cec5SDimitry Andric (__v16sf)_mm512_setzero_ps()); 10290b57cec5SDimitry Andric } 10300b57cec5SDimitry Andric 10310b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 10320b57cec5SDimitry Andric _mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { 10330b57cec5SDimitry Andric return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A, 10340b57cec5SDimitry Andric (__v4sf) __B, 10350b57cec5SDimitry Andric (__v4sf) __W, 10360b57cec5SDimitry Andric (__mmask8) __U, 10370b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 10380b57cec5SDimitry Andric } 10390b57cec5SDimitry Andric 10400b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 10410b57cec5SDimitry Andric _mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) { 10420b57cec5SDimitry Andric return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A, 10430b57cec5SDimitry Andric (__v4sf) __B, 10440b57cec5SDimitry Andric (__v4sf) _mm_setzero_ps (), 10450b57cec5SDimitry Andric (__mmask8) __U, 10460b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 10470b57cec5SDimitry Andric } 10480b57cec5SDimitry Andric 10490b57cec5SDimitry Andric #define _mm_max_round_ss(A, B, R) \ 1050349cc55cSDimitry Andric ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \ 10510b57cec5SDimitry Andric (__v4sf)(__m128)(B), \ 10520b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps(), \ 1053349cc55cSDimitry Andric (__mmask8)-1, (int)(R))) 10540b57cec5SDimitry Andric 10550b57cec5SDimitry Andric #define _mm_mask_max_round_ss(W, U, A, B, R) \ 1056349cc55cSDimitry Andric ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \ 10570b57cec5SDimitry Andric (__v4sf)(__m128)(B), \ 10580b57cec5SDimitry Andric (__v4sf)(__m128)(W), (__mmask8)(U), \ 1059349cc55cSDimitry Andric (int)(R))) 10600b57cec5SDimitry Andric 10610b57cec5SDimitry Andric #define _mm_maskz_max_round_ss(U, A, B, R) \ 1062349cc55cSDimitry Andric ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \ 10630b57cec5SDimitry Andric (__v4sf)(__m128)(B), \ 10640b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps(), \ 1065349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 10660b57cec5SDimitry Andric 10670b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 10680b57cec5SDimitry Andric _mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { 10690b57cec5SDimitry Andric return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A, 10700b57cec5SDimitry Andric (__v2df) __B, 10710b57cec5SDimitry Andric (__v2df) __W, 10720b57cec5SDimitry Andric (__mmask8) __U, 10730b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 10740b57cec5SDimitry Andric } 10750b57cec5SDimitry Andric 10760b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 10770b57cec5SDimitry Andric _mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) { 10780b57cec5SDimitry Andric return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A, 10790b57cec5SDimitry Andric (__v2df) __B, 10800b57cec5SDimitry Andric (__v2df) _mm_setzero_pd (), 10810b57cec5SDimitry Andric (__mmask8) __U, 10820b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 10830b57cec5SDimitry Andric } 10840b57cec5SDimitry Andric 10850b57cec5SDimitry Andric #define _mm_max_round_sd(A, B, R) \ 1086349cc55cSDimitry Andric ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \ 10870b57cec5SDimitry Andric (__v2df)(__m128d)(B), \ 10880b57cec5SDimitry Andric (__v2df)_mm_setzero_pd(), \ 1089349cc55cSDimitry Andric (__mmask8)-1, (int)(R))) 10900b57cec5SDimitry Andric 10910b57cec5SDimitry Andric #define _mm_mask_max_round_sd(W, U, A, B, R) \ 1092349cc55cSDimitry Andric ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \ 10930b57cec5SDimitry Andric (__v2df)(__m128d)(B), \ 10940b57cec5SDimitry Andric (__v2df)(__m128d)(W), \ 1095349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 10960b57cec5SDimitry Andric 10970b57cec5SDimitry Andric #define _mm_maskz_max_round_sd(U, A, B, R) \ 1098349cc55cSDimitry Andric ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \ 10990b57cec5SDimitry Andric (__v2df)(__m128d)(B), \ 11000b57cec5SDimitry Andric (__v2df)_mm_setzero_pd(), \ 1101349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 11020b57cec5SDimitry Andric 11030b57cec5SDimitry Andric static __inline __m512i 11040b57cec5SDimitry Andric __DEFAULT_FN_ATTRS512 11050b57cec5SDimitry Andric _mm512_max_epi32(__m512i __A, __m512i __B) 11060b57cec5SDimitry Andric { 110704eeddc0SDimitry Andric return (__m512i)__builtin_elementwise_max((__v16si)__A, (__v16si)__B); 11080b57cec5SDimitry Andric } 11090b57cec5SDimitry Andric 11100b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 11110b57cec5SDimitry Andric _mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) 11120b57cec5SDimitry Andric { 11130b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, 11140b57cec5SDimitry Andric (__v16si)_mm512_max_epi32(__A, __B), 11150b57cec5SDimitry Andric (__v16si)__W); 11160b57cec5SDimitry Andric } 11170b57cec5SDimitry Andric 11180b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 11190b57cec5SDimitry Andric _mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B) 11200b57cec5SDimitry Andric { 11210b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, 11220b57cec5SDimitry Andric (__v16si)_mm512_max_epi32(__A, __B), 11230b57cec5SDimitry Andric (__v16si)_mm512_setzero_si512()); 11240b57cec5SDimitry Andric } 11250b57cec5SDimitry Andric 11260b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512 11270b57cec5SDimitry Andric _mm512_max_epu32(__m512i __A, __m512i __B) 11280b57cec5SDimitry Andric { 112904eeddc0SDimitry Andric return (__m512i)__builtin_elementwise_max((__v16su)__A, (__v16su)__B); 11300b57cec5SDimitry Andric } 11310b57cec5SDimitry Andric 11320b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 11330b57cec5SDimitry Andric _mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) 11340b57cec5SDimitry Andric { 11350b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, 11360b57cec5SDimitry Andric (__v16si)_mm512_max_epu32(__A, __B), 11370b57cec5SDimitry Andric (__v16si)__W); 11380b57cec5SDimitry Andric } 11390b57cec5SDimitry Andric 11400b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 11410b57cec5SDimitry Andric _mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B) 11420b57cec5SDimitry Andric { 11430b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, 11440b57cec5SDimitry Andric (__v16si)_mm512_max_epu32(__A, __B), 11450b57cec5SDimitry Andric (__v16si)_mm512_setzero_si512()); 11460b57cec5SDimitry Andric } 11470b57cec5SDimitry Andric 11480b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512 11490b57cec5SDimitry Andric _mm512_max_epi64(__m512i __A, __m512i __B) 11500b57cec5SDimitry Andric { 115104eeddc0SDimitry Andric return (__m512i)__builtin_elementwise_max((__v8di)__A, (__v8di)__B); 11520b57cec5SDimitry Andric } 11530b57cec5SDimitry Andric 11540b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 11550b57cec5SDimitry Andric _mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) 11560b57cec5SDimitry Andric { 11570b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, 11580b57cec5SDimitry Andric (__v8di)_mm512_max_epi64(__A, __B), 11590b57cec5SDimitry Andric (__v8di)__W); 11600b57cec5SDimitry Andric } 11610b57cec5SDimitry Andric 11620b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 11630b57cec5SDimitry Andric _mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B) 11640b57cec5SDimitry Andric { 11650b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, 11660b57cec5SDimitry Andric (__v8di)_mm512_max_epi64(__A, __B), 11670b57cec5SDimitry Andric (__v8di)_mm512_setzero_si512()); 11680b57cec5SDimitry Andric } 11690b57cec5SDimitry Andric 11700b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512 11710b57cec5SDimitry Andric _mm512_max_epu64(__m512i __A, __m512i __B) 11720b57cec5SDimitry Andric { 117304eeddc0SDimitry Andric return (__m512i)__builtin_elementwise_max((__v8du)__A, (__v8du)__B); 11740b57cec5SDimitry Andric } 11750b57cec5SDimitry Andric 11760b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 11770b57cec5SDimitry Andric _mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) 11780b57cec5SDimitry Andric { 11790b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, 11800b57cec5SDimitry Andric (__v8di)_mm512_max_epu64(__A, __B), 11810b57cec5SDimitry Andric (__v8di)__W); 11820b57cec5SDimitry Andric } 11830b57cec5SDimitry Andric 11840b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 11850b57cec5SDimitry Andric _mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B) 11860b57cec5SDimitry Andric { 11870b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, 11880b57cec5SDimitry Andric (__v8di)_mm512_max_epu64(__A, __B), 11890b57cec5SDimitry Andric (__v8di)_mm512_setzero_si512()); 11900b57cec5SDimitry Andric } 11910b57cec5SDimitry Andric 11920b57cec5SDimitry Andric #define _mm512_min_round_pd(A, B, R) \ 1193349cc55cSDimitry Andric ((__m512d)__builtin_ia32_minpd512((__v8df)(__m512d)(A), \ 1194349cc55cSDimitry Andric (__v8df)(__m512d)(B), (int)(R))) 11950b57cec5SDimitry Andric 11960b57cec5SDimitry Andric #define _mm512_mask_min_round_pd(W, U, A, B, R) \ 1197349cc55cSDimitry Andric ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 11980b57cec5SDimitry Andric (__v8df)_mm512_min_round_pd((A), (B), (R)), \ 1199349cc55cSDimitry Andric (__v8df)(W))) 12000b57cec5SDimitry Andric 12010b57cec5SDimitry Andric #define _mm512_maskz_min_round_pd(U, A, B, R) \ 1202349cc55cSDimitry Andric ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 12030b57cec5SDimitry Andric (__v8df)_mm512_min_round_pd((A), (B), (R)), \ 1204349cc55cSDimitry Andric (__v8df)_mm512_setzero_pd())) 12050b57cec5SDimitry Andric 12060b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 12070b57cec5SDimitry Andric _mm512_min_pd(__m512d __A, __m512d __B) 12080b57cec5SDimitry Andric { 12090b57cec5SDimitry Andric return (__m512d) __builtin_ia32_minpd512((__v8df) __A, (__v8df) __B, 12100b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 12110b57cec5SDimitry Andric } 12120b57cec5SDimitry Andric 12130b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 12140b57cec5SDimitry Andric _mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) 12150b57cec5SDimitry Andric { 12160b57cec5SDimitry Andric return (__m512d)__builtin_ia32_selectpd_512(__U, 12170b57cec5SDimitry Andric (__v8df)_mm512_min_pd(__A, __B), 12180b57cec5SDimitry Andric (__v8df)__W); 12190b57cec5SDimitry Andric } 12200b57cec5SDimitry Andric 12210b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 12220b57cec5SDimitry Andric _mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B) 12230b57cec5SDimitry Andric { 12240b57cec5SDimitry Andric return (__m512d)__builtin_ia32_selectpd_512(__U, 12250b57cec5SDimitry Andric (__v8df)_mm512_min_pd(__A, __B), 12260b57cec5SDimitry Andric (__v8df)_mm512_setzero_pd()); 12270b57cec5SDimitry Andric } 12280b57cec5SDimitry Andric 12290b57cec5SDimitry Andric #define _mm512_min_round_ps(A, B, R) \ 1230349cc55cSDimitry Andric ((__m512)__builtin_ia32_minps512((__v16sf)(__m512)(A), \ 1231349cc55cSDimitry Andric (__v16sf)(__m512)(B), (int)(R))) 12320b57cec5SDimitry Andric 12330b57cec5SDimitry Andric #define _mm512_mask_min_round_ps(W, U, A, B, R) \ 1234349cc55cSDimitry Andric ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 12350b57cec5SDimitry Andric (__v16sf)_mm512_min_round_ps((A), (B), (R)), \ 1236349cc55cSDimitry Andric (__v16sf)(W))) 12370b57cec5SDimitry Andric 12380b57cec5SDimitry Andric #define _mm512_maskz_min_round_ps(U, A, B, R) \ 1239349cc55cSDimitry Andric ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 12400b57cec5SDimitry Andric (__v16sf)_mm512_min_round_ps((A), (B), (R)), \ 1241349cc55cSDimitry Andric (__v16sf)_mm512_setzero_ps())) 12420b57cec5SDimitry Andric 12430b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 12440b57cec5SDimitry Andric _mm512_min_ps(__m512 __A, __m512 __B) 12450b57cec5SDimitry Andric { 12460b57cec5SDimitry Andric return (__m512) __builtin_ia32_minps512((__v16sf) __A, (__v16sf) __B, 12470b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 12480b57cec5SDimitry Andric } 12490b57cec5SDimitry Andric 12500b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 12510b57cec5SDimitry Andric _mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 12520b57cec5SDimitry Andric { 12530b57cec5SDimitry Andric return (__m512)__builtin_ia32_selectps_512(__U, 12540b57cec5SDimitry Andric (__v16sf)_mm512_min_ps(__A, __B), 12550b57cec5SDimitry Andric (__v16sf)__W); 12560b57cec5SDimitry Andric } 12570b57cec5SDimitry Andric 12580b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 12590b57cec5SDimitry Andric _mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B) 12600b57cec5SDimitry Andric { 12610b57cec5SDimitry Andric return (__m512)__builtin_ia32_selectps_512(__U, 12620b57cec5SDimitry Andric (__v16sf)_mm512_min_ps(__A, __B), 12630b57cec5SDimitry Andric (__v16sf)_mm512_setzero_ps()); 12640b57cec5SDimitry Andric } 12650b57cec5SDimitry Andric 12660b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 12670b57cec5SDimitry Andric _mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { 12680b57cec5SDimitry Andric return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A, 12690b57cec5SDimitry Andric (__v4sf) __B, 12700b57cec5SDimitry Andric (__v4sf) __W, 12710b57cec5SDimitry Andric (__mmask8) __U, 12720b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 12730b57cec5SDimitry Andric } 12740b57cec5SDimitry Andric 12750b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 12760b57cec5SDimitry Andric _mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) { 12770b57cec5SDimitry Andric return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A, 12780b57cec5SDimitry Andric (__v4sf) __B, 12790b57cec5SDimitry Andric (__v4sf) _mm_setzero_ps (), 12800b57cec5SDimitry Andric (__mmask8) __U, 12810b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 12820b57cec5SDimitry Andric } 12830b57cec5SDimitry Andric 12840b57cec5SDimitry Andric #define _mm_min_round_ss(A, B, R) \ 1285349cc55cSDimitry Andric ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \ 12860b57cec5SDimitry Andric (__v4sf)(__m128)(B), \ 12870b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps(), \ 1288349cc55cSDimitry Andric (__mmask8)-1, (int)(R))) 12890b57cec5SDimitry Andric 12900b57cec5SDimitry Andric #define _mm_mask_min_round_ss(W, U, A, B, R) \ 1291349cc55cSDimitry Andric ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \ 12920b57cec5SDimitry Andric (__v4sf)(__m128)(B), \ 12930b57cec5SDimitry Andric (__v4sf)(__m128)(W), (__mmask8)(U), \ 1294349cc55cSDimitry Andric (int)(R))) 12950b57cec5SDimitry Andric 12960b57cec5SDimitry Andric #define _mm_maskz_min_round_ss(U, A, B, R) \ 1297349cc55cSDimitry Andric ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \ 12980b57cec5SDimitry Andric (__v4sf)(__m128)(B), \ 12990b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps(), \ 1300349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 13010b57cec5SDimitry Andric 13020b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 13030b57cec5SDimitry Andric _mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { 13040b57cec5SDimitry Andric return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A, 13050b57cec5SDimitry Andric (__v2df) __B, 13060b57cec5SDimitry Andric (__v2df) __W, 13070b57cec5SDimitry Andric (__mmask8) __U, 13080b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 13090b57cec5SDimitry Andric } 13100b57cec5SDimitry Andric 13110b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 13120b57cec5SDimitry Andric _mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) { 13130b57cec5SDimitry Andric return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A, 13140b57cec5SDimitry Andric (__v2df) __B, 13150b57cec5SDimitry Andric (__v2df) _mm_setzero_pd (), 13160b57cec5SDimitry Andric (__mmask8) __U, 13170b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 13180b57cec5SDimitry Andric } 13190b57cec5SDimitry Andric 13200b57cec5SDimitry Andric #define _mm_min_round_sd(A, B, R) \ 1321349cc55cSDimitry Andric ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \ 13220b57cec5SDimitry Andric (__v2df)(__m128d)(B), \ 13230b57cec5SDimitry Andric (__v2df)_mm_setzero_pd(), \ 1324349cc55cSDimitry Andric (__mmask8)-1, (int)(R))) 13250b57cec5SDimitry Andric 13260b57cec5SDimitry Andric #define _mm_mask_min_round_sd(W, U, A, B, R) \ 1327349cc55cSDimitry Andric ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \ 13280b57cec5SDimitry Andric (__v2df)(__m128d)(B), \ 13290b57cec5SDimitry Andric (__v2df)(__m128d)(W), \ 1330349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 13310b57cec5SDimitry Andric 13320b57cec5SDimitry Andric #define _mm_maskz_min_round_sd(U, A, B, R) \ 1333349cc55cSDimitry Andric ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \ 13340b57cec5SDimitry Andric (__v2df)(__m128d)(B), \ 13350b57cec5SDimitry Andric (__v2df)_mm_setzero_pd(), \ 1336349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 13370b57cec5SDimitry Andric 13380b57cec5SDimitry Andric static __inline __m512i 13390b57cec5SDimitry Andric __DEFAULT_FN_ATTRS512 13400b57cec5SDimitry Andric _mm512_min_epi32(__m512i __A, __m512i __B) 13410b57cec5SDimitry Andric { 134204eeddc0SDimitry Andric return (__m512i)__builtin_elementwise_min((__v16si)__A, (__v16si)__B); 13430b57cec5SDimitry Andric } 13440b57cec5SDimitry Andric 13450b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 13460b57cec5SDimitry Andric _mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) 13470b57cec5SDimitry Andric { 13480b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, 13490b57cec5SDimitry Andric (__v16si)_mm512_min_epi32(__A, __B), 13500b57cec5SDimitry Andric (__v16si)__W); 13510b57cec5SDimitry Andric } 13520b57cec5SDimitry Andric 13530b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 13540b57cec5SDimitry Andric _mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B) 13550b57cec5SDimitry Andric { 13560b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, 13570b57cec5SDimitry Andric (__v16si)_mm512_min_epi32(__A, __B), 13580b57cec5SDimitry Andric (__v16si)_mm512_setzero_si512()); 13590b57cec5SDimitry Andric } 13600b57cec5SDimitry Andric 13610b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512 13620b57cec5SDimitry Andric _mm512_min_epu32(__m512i __A, __m512i __B) 13630b57cec5SDimitry Andric { 136404eeddc0SDimitry Andric return (__m512i)__builtin_elementwise_min((__v16su)__A, (__v16su)__B); 13650b57cec5SDimitry Andric } 13660b57cec5SDimitry Andric 13670b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 13680b57cec5SDimitry Andric _mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) 13690b57cec5SDimitry Andric { 13700b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, 13710b57cec5SDimitry Andric (__v16si)_mm512_min_epu32(__A, __B), 13720b57cec5SDimitry Andric (__v16si)__W); 13730b57cec5SDimitry Andric } 13740b57cec5SDimitry Andric 13750b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 13760b57cec5SDimitry Andric _mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B) 13770b57cec5SDimitry Andric { 13780b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, 13790b57cec5SDimitry Andric (__v16si)_mm512_min_epu32(__A, __B), 13800b57cec5SDimitry Andric (__v16si)_mm512_setzero_si512()); 13810b57cec5SDimitry Andric } 13820b57cec5SDimitry Andric 13830b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512 13840b57cec5SDimitry Andric _mm512_min_epi64(__m512i __A, __m512i __B) 13850b57cec5SDimitry Andric { 138604eeddc0SDimitry Andric return (__m512i)__builtin_elementwise_min((__v8di)__A, (__v8di)__B); 13870b57cec5SDimitry Andric } 13880b57cec5SDimitry Andric 13890b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 13900b57cec5SDimitry Andric _mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) 13910b57cec5SDimitry Andric { 13920b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, 13930b57cec5SDimitry Andric (__v8di)_mm512_min_epi64(__A, __B), 13940b57cec5SDimitry Andric (__v8di)__W); 13950b57cec5SDimitry Andric } 13960b57cec5SDimitry Andric 13970b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 13980b57cec5SDimitry Andric _mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B) 13990b57cec5SDimitry Andric { 14000b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, 14010b57cec5SDimitry Andric (__v8di)_mm512_min_epi64(__A, __B), 14020b57cec5SDimitry Andric (__v8di)_mm512_setzero_si512()); 14030b57cec5SDimitry Andric } 14040b57cec5SDimitry Andric 14050b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512 14060b57cec5SDimitry Andric _mm512_min_epu64(__m512i __A, __m512i __B) 14070b57cec5SDimitry Andric { 140804eeddc0SDimitry Andric return (__m512i)__builtin_elementwise_min((__v8du)__A, (__v8du)__B); 14090b57cec5SDimitry Andric } 14100b57cec5SDimitry Andric 14110b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 14120b57cec5SDimitry Andric _mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) 14130b57cec5SDimitry Andric { 14140b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, 14150b57cec5SDimitry Andric (__v8di)_mm512_min_epu64(__A, __B), 14160b57cec5SDimitry Andric (__v8di)__W); 14170b57cec5SDimitry Andric } 14180b57cec5SDimitry Andric 14190b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 14200b57cec5SDimitry Andric _mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B) 14210b57cec5SDimitry Andric { 14220b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, 14230b57cec5SDimitry Andric (__v8di)_mm512_min_epu64(__A, __B), 14240b57cec5SDimitry Andric (__v8di)_mm512_setzero_si512()); 14250b57cec5SDimitry Andric } 14260b57cec5SDimitry Andric 14270b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512 14280b57cec5SDimitry Andric _mm512_mul_epi32(__m512i __X, __m512i __Y) 14290b57cec5SDimitry Andric { 14300b57cec5SDimitry Andric return (__m512i)__builtin_ia32_pmuldq512((__v16si)__X, (__v16si) __Y); 14310b57cec5SDimitry Andric } 14320b57cec5SDimitry Andric 14330b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512 14340b57cec5SDimitry Andric _mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) 14350b57cec5SDimitry Andric { 14360b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, 14370b57cec5SDimitry Andric (__v8di)_mm512_mul_epi32(__X, __Y), 14380b57cec5SDimitry Andric (__v8di)__W); 14390b57cec5SDimitry Andric } 14400b57cec5SDimitry Andric 14410b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512 14420b57cec5SDimitry Andric _mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y) 14430b57cec5SDimitry Andric { 14440b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, 14450b57cec5SDimitry Andric (__v8di)_mm512_mul_epi32(__X, __Y), 14460b57cec5SDimitry Andric (__v8di)_mm512_setzero_si512 ()); 14470b57cec5SDimitry Andric } 14480b57cec5SDimitry Andric 14490b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512 14500b57cec5SDimitry Andric _mm512_mul_epu32(__m512i __X, __m512i __Y) 14510b57cec5SDimitry Andric { 14520b57cec5SDimitry Andric return (__m512i)__builtin_ia32_pmuludq512((__v16si)__X, (__v16si)__Y); 14530b57cec5SDimitry Andric } 14540b57cec5SDimitry Andric 14550b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512 14560b57cec5SDimitry Andric _mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) 14570b57cec5SDimitry Andric { 14580b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, 14590b57cec5SDimitry Andric (__v8di)_mm512_mul_epu32(__X, __Y), 14600b57cec5SDimitry Andric (__v8di)__W); 14610b57cec5SDimitry Andric } 14620b57cec5SDimitry Andric 14630b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512 14640b57cec5SDimitry Andric _mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y) 14650b57cec5SDimitry Andric { 14660b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, 14670b57cec5SDimitry Andric (__v8di)_mm512_mul_epu32(__X, __Y), 14680b57cec5SDimitry Andric (__v8di)_mm512_setzero_si512 ()); 14690b57cec5SDimitry Andric } 14700b57cec5SDimitry Andric 14710b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512 14720b57cec5SDimitry Andric _mm512_mullo_epi32 (__m512i __A, __m512i __B) 14730b57cec5SDimitry Andric { 14740b57cec5SDimitry Andric return (__m512i) ((__v16su) __A * (__v16su) __B); 14750b57cec5SDimitry Andric } 14760b57cec5SDimitry Andric 14770b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512 14780b57cec5SDimitry Andric _mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B) 14790b57cec5SDimitry Andric { 14800b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, 14810b57cec5SDimitry Andric (__v16si)_mm512_mullo_epi32(__A, __B), 14820b57cec5SDimitry Andric (__v16si)_mm512_setzero_si512()); 14830b57cec5SDimitry Andric } 14840b57cec5SDimitry Andric 14850b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512 14860b57cec5SDimitry Andric _mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) 14870b57cec5SDimitry Andric { 14880b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, 14890b57cec5SDimitry Andric (__v16si)_mm512_mullo_epi32(__A, __B), 14900b57cec5SDimitry Andric (__v16si)__W); 14910b57cec5SDimitry Andric } 14920b57cec5SDimitry Andric 14930b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 14940b57cec5SDimitry Andric _mm512_mullox_epi64 (__m512i __A, __m512i __B) { 14950b57cec5SDimitry Andric return (__m512i) ((__v8du) __A * (__v8du) __B); 14960b57cec5SDimitry Andric } 14970b57cec5SDimitry Andric 14980b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 14990b57cec5SDimitry Andric _mm512_mask_mullox_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { 15000b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 15010b57cec5SDimitry Andric (__v8di)_mm512_mullox_epi64(__A, __B), 15020b57cec5SDimitry Andric (__v8di)__W); 15030b57cec5SDimitry Andric } 15040b57cec5SDimitry Andric 15050b57cec5SDimitry Andric #define _mm512_sqrt_round_pd(A, R) \ 1506349cc55cSDimitry Andric ((__m512d)__builtin_ia32_sqrtpd512((__v8df)(__m512d)(A), (int)(R))) 15070b57cec5SDimitry Andric 15080b57cec5SDimitry Andric #define _mm512_mask_sqrt_round_pd(W, U, A, R) \ 1509349cc55cSDimitry Andric ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 15100b57cec5SDimitry Andric (__v8df)_mm512_sqrt_round_pd((A), (R)), \ 1511349cc55cSDimitry Andric (__v8df)(__m512d)(W))) 15120b57cec5SDimitry Andric 15130b57cec5SDimitry Andric #define _mm512_maskz_sqrt_round_pd(U, A, R) \ 1514349cc55cSDimitry Andric ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 15150b57cec5SDimitry Andric (__v8df)_mm512_sqrt_round_pd((A), (R)), \ 1516349cc55cSDimitry Andric (__v8df)_mm512_setzero_pd())) 15170b57cec5SDimitry Andric 15180b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 15190b57cec5SDimitry Andric _mm512_sqrt_pd(__m512d __A) 15200b57cec5SDimitry Andric { 15210b57cec5SDimitry Andric return (__m512d)__builtin_ia32_sqrtpd512((__v8df)__A, 15220b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 15230b57cec5SDimitry Andric } 15240b57cec5SDimitry Andric 15250b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 15260b57cec5SDimitry Andric _mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A) 15270b57cec5SDimitry Andric { 15280b57cec5SDimitry Andric return (__m512d)__builtin_ia32_selectpd_512(__U, 15290b57cec5SDimitry Andric (__v8df)_mm512_sqrt_pd(__A), 15300b57cec5SDimitry Andric (__v8df)__W); 15310b57cec5SDimitry Andric } 15320b57cec5SDimitry Andric 15330b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 15340b57cec5SDimitry Andric _mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A) 15350b57cec5SDimitry Andric { 15360b57cec5SDimitry Andric return (__m512d)__builtin_ia32_selectpd_512(__U, 15370b57cec5SDimitry Andric (__v8df)_mm512_sqrt_pd(__A), 15380b57cec5SDimitry Andric (__v8df)_mm512_setzero_pd()); 15390b57cec5SDimitry Andric } 15400b57cec5SDimitry Andric 15410b57cec5SDimitry Andric #define _mm512_sqrt_round_ps(A, R) \ 1542349cc55cSDimitry Andric ((__m512)__builtin_ia32_sqrtps512((__v16sf)(__m512)(A), (int)(R))) 15430b57cec5SDimitry Andric 15440b57cec5SDimitry Andric #define _mm512_mask_sqrt_round_ps(W, U, A, R) \ 1545349cc55cSDimitry Andric ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 15460b57cec5SDimitry Andric (__v16sf)_mm512_sqrt_round_ps((A), (R)), \ 1547349cc55cSDimitry Andric (__v16sf)(__m512)(W))) 15480b57cec5SDimitry Andric 15490b57cec5SDimitry Andric #define _mm512_maskz_sqrt_round_ps(U, A, R) \ 1550349cc55cSDimitry Andric ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 15510b57cec5SDimitry Andric (__v16sf)_mm512_sqrt_round_ps((A), (R)), \ 1552349cc55cSDimitry Andric (__v16sf)_mm512_setzero_ps())) 15530b57cec5SDimitry Andric 15540b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 15550b57cec5SDimitry Andric _mm512_sqrt_ps(__m512 __A) 15560b57cec5SDimitry Andric { 15570b57cec5SDimitry Andric return (__m512)__builtin_ia32_sqrtps512((__v16sf)__A, 15580b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 15590b57cec5SDimitry Andric } 15600b57cec5SDimitry Andric 15610b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 15620b57cec5SDimitry Andric _mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A) 15630b57cec5SDimitry Andric { 15640b57cec5SDimitry Andric return (__m512)__builtin_ia32_selectps_512(__U, 15650b57cec5SDimitry Andric (__v16sf)_mm512_sqrt_ps(__A), 15660b57cec5SDimitry Andric (__v16sf)__W); 15670b57cec5SDimitry Andric } 15680b57cec5SDimitry Andric 15690b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 15700b57cec5SDimitry Andric _mm512_maskz_sqrt_ps( __mmask16 __U, __m512 __A) 15710b57cec5SDimitry Andric { 15720b57cec5SDimitry Andric return (__m512)__builtin_ia32_selectps_512(__U, 15730b57cec5SDimitry Andric (__v16sf)_mm512_sqrt_ps(__A), 15740b57cec5SDimitry Andric (__v16sf)_mm512_setzero_ps()); 15750b57cec5SDimitry Andric } 15760b57cec5SDimitry Andric 15770b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 15780b57cec5SDimitry Andric _mm512_rsqrt14_pd(__m512d __A) 15790b57cec5SDimitry Andric { 15800b57cec5SDimitry Andric return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A, 15810b57cec5SDimitry Andric (__v8df) 15820b57cec5SDimitry Andric _mm512_setzero_pd (), 15830b57cec5SDimitry Andric (__mmask8) -1);} 15840b57cec5SDimitry Andric 15850b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 15860b57cec5SDimitry Andric _mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A) 15870b57cec5SDimitry Andric { 15880b57cec5SDimitry Andric return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A, 15890b57cec5SDimitry Andric (__v8df) __W, 15900b57cec5SDimitry Andric (__mmask8) __U); 15910b57cec5SDimitry Andric } 15920b57cec5SDimitry Andric 15930b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 15940b57cec5SDimitry Andric _mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A) 15950b57cec5SDimitry Andric { 15960b57cec5SDimitry Andric return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A, 15970b57cec5SDimitry Andric (__v8df) 15980b57cec5SDimitry Andric _mm512_setzero_pd (), 15990b57cec5SDimitry Andric (__mmask8) __U); 16000b57cec5SDimitry Andric } 16010b57cec5SDimitry Andric 16020b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 16030b57cec5SDimitry Andric _mm512_rsqrt14_ps(__m512 __A) 16040b57cec5SDimitry Andric { 16050b57cec5SDimitry Andric return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A, 16060b57cec5SDimitry Andric (__v16sf) 16070b57cec5SDimitry Andric _mm512_setzero_ps (), 16080b57cec5SDimitry Andric (__mmask16) -1); 16090b57cec5SDimitry Andric } 16100b57cec5SDimitry Andric 16110b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 16120b57cec5SDimitry Andric _mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A) 16130b57cec5SDimitry Andric { 16140b57cec5SDimitry Andric return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A, 16150b57cec5SDimitry Andric (__v16sf) __W, 16160b57cec5SDimitry Andric (__mmask16) __U); 16170b57cec5SDimitry Andric } 16180b57cec5SDimitry Andric 16190b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 16200b57cec5SDimitry Andric _mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A) 16210b57cec5SDimitry Andric { 16220b57cec5SDimitry Andric return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A, 16230b57cec5SDimitry Andric (__v16sf) 16240b57cec5SDimitry Andric _mm512_setzero_ps (), 16250b57cec5SDimitry Andric (__mmask16) __U); 16260b57cec5SDimitry Andric } 16270b57cec5SDimitry Andric 16280b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 16290b57cec5SDimitry Andric _mm_rsqrt14_ss(__m128 __A, __m128 __B) 16300b57cec5SDimitry Andric { 16310b57cec5SDimitry Andric return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A, 16320b57cec5SDimitry Andric (__v4sf) __B, 16330b57cec5SDimitry Andric (__v4sf) 16340b57cec5SDimitry Andric _mm_setzero_ps (), 16350b57cec5SDimitry Andric (__mmask8) -1); 16360b57cec5SDimitry Andric } 16370b57cec5SDimitry Andric 16380b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 16390b57cec5SDimitry Andric _mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 16400b57cec5SDimitry Andric { 16410b57cec5SDimitry Andric return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A, 16420b57cec5SDimitry Andric (__v4sf) __B, 16430b57cec5SDimitry Andric (__v4sf) __W, 16440b57cec5SDimitry Andric (__mmask8) __U); 16450b57cec5SDimitry Andric } 16460b57cec5SDimitry Andric 16470b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 16480b57cec5SDimitry Andric _mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B) 16490b57cec5SDimitry Andric { 16500b57cec5SDimitry Andric return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A, 16510b57cec5SDimitry Andric (__v4sf) __B, 16520b57cec5SDimitry Andric (__v4sf) _mm_setzero_ps (), 16530b57cec5SDimitry Andric (__mmask8) __U); 16540b57cec5SDimitry Andric } 16550b57cec5SDimitry Andric 16560b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 16570b57cec5SDimitry Andric _mm_rsqrt14_sd(__m128d __A, __m128d __B) 16580b57cec5SDimitry Andric { 16590b57cec5SDimitry Andric return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A, 16600b57cec5SDimitry Andric (__v2df) __B, 16610b57cec5SDimitry Andric (__v2df) 16620b57cec5SDimitry Andric _mm_setzero_pd (), 16630b57cec5SDimitry Andric (__mmask8) -1); 16640b57cec5SDimitry Andric } 16650b57cec5SDimitry Andric 16660b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 16670b57cec5SDimitry Andric _mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 16680b57cec5SDimitry Andric { 16690b57cec5SDimitry Andric return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A, 16700b57cec5SDimitry Andric (__v2df) __B, 16710b57cec5SDimitry Andric (__v2df) __W, 16720b57cec5SDimitry Andric (__mmask8) __U); 16730b57cec5SDimitry Andric } 16740b57cec5SDimitry Andric 16750b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 16760b57cec5SDimitry Andric _mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B) 16770b57cec5SDimitry Andric { 16780b57cec5SDimitry Andric return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A, 16790b57cec5SDimitry Andric (__v2df) __B, 16800b57cec5SDimitry Andric (__v2df) _mm_setzero_pd (), 16810b57cec5SDimitry Andric (__mmask8) __U); 16820b57cec5SDimitry Andric } 16830b57cec5SDimitry Andric 16840b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 16850b57cec5SDimitry Andric _mm512_rcp14_pd(__m512d __A) 16860b57cec5SDimitry Andric { 16870b57cec5SDimitry Andric return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A, 16880b57cec5SDimitry Andric (__v8df) 16890b57cec5SDimitry Andric _mm512_setzero_pd (), 16900b57cec5SDimitry Andric (__mmask8) -1); 16910b57cec5SDimitry Andric } 16920b57cec5SDimitry Andric 16930b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 16940b57cec5SDimitry Andric _mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A) 16950b57cec5SDimitry Andric { 16960b57cec5SDimitry Andric return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A, 16970b57cec5SDimitry Andric (__v8df) __W, 16980b57cec5SDimitry Andric (__mmask8) __U); 16990b57cec5SDimitry Andric } 17000b57cec5SDimitry Andric 17010b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 17020b57cec5SDimitry Andric _mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A) 17030b57cec5SDimitry Andric { 17040b57cec5SDimitry Andric return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A, 17050b57cec5SDimitry Andric (__v8df) 17060b57cec5SDimitry Andric _mm512_setzero_pd (), 17070b57cec5SDimitry Andric (__mmask8) __U); 17080b57cec5SDimitry Andric } 17090b57cec5SDimitry Andric 17100b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 17110b57cec5SDimitry Andric _mm512_rcp14_ps(__m512 __A) 17120b57cec5SDimitry Andric { 17130b57cec5SDimitry Andric return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A, 17140b57cec5SDimitry Andric (__v16sf) 17150b57cec5SDimitry Andric _mm512_setzero_ps (), 17160b57cec5SDimitry Andric (__mmask16) -1); 17170b57cec5SDimitry Andric } 17180b57cec5SDimitry Andric 17190b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 17200b57cec5SDimitry Andric _mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A) 17210b57cec5SDimitry Andric { 17220b57cec5SDimitry Andric return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A, 17230b57cec5SDimitry Andric (__v16sf) __W, 17240b57cec5SDimitry Andric (__mmask16) __U); 17250b57cec5SDimitry Andric } 17260b57cec5SDimitry Andric 17270b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 17280b57cec5SDimitry Andric _mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A) 17290b57cec5SDimitry Andric { 17300b57cec5SDimitry Andric return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A, 17310b57cec5SDimitry Andric (__v16sf) 17320b57cec5SDimitry Andric _mm512_setzero_ps (), 17330b57cec5SDimitry Andric (__mmask16) __U); 17340b57cec5SDimitry Andric } 17350b57cec5SDimitry Andric 17360b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 17370b57cec5SDimitry Andric _mm_rcp14_ss(__m128 __A, __m128 __B) 17380b57cec5SDimitry Andric { 17390b57cec5SDimitry Andric return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A, 17400b57cec5SDimitry Andric (__v4sf) __B, 17410b57cec5SDimitry Andric (__v4sf) 17420b57cec5SDimitry Andric _mm_setzero_ps (), 17430b57cec5SDimitry Andric (__mmask8) -1); 17440b57cec5SDimitry Andric } 17450b57cec5SDimitry Andric 17460b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 17470b57cec5SDimitry Andric _mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 17480b57cec5SDimitry Andric { 17490b57cec5SDimitry Andric return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A, 17500b57cec5SDimitry Andric (__v4sf) __B, 17510b57cec5SDimitry Andric (__v4sf) __W, 17520b57cec5SDimitry Andric (__mmask8) __U); 17530b57cec5SDimitry Andric } 17540b57cec5SDimitry Andric 17550b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 17560b57cec5SDimitry Andric _mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B) 17570b57cec5SDimitry Andric { 17580b57cec5SDimitry Andric return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A, 17590b57cec5SDimitry Andric (__v4sf) __B, 17600b57cec5SDimitry Andric (__v4sf) _mm_setzero_ps (), 17610b57cec5SDimitry Andric (__mmask8) __U); 17620b57cec5SDimitry Andric } 17630b57cec5SDimitry Andric 17640b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 17650b57cec5SDimitry Andric _mm_rcp14_sd(__m128d __A, __m128d __B) 17660b57cec5SDimitry Andric { 17670b57cec5SDimitry Andric return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A, 17680b57cec5SDimitry Andric (__v2df) __B, 17690b57cec5SDimitry Andric (__v2df) 17700b57cec5SDimitry Andric _mm_setzero_pd (), 17710b57cec5SDimitry Andric (__mmask8) -1); 17720b57cec5SDimitry Andric } 17730b57cec5SDimitry Andric 17740b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 17750b57cec5SDimitry Andric _mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 17760b57cec5SDimitry Andric { 17770b57cec5SDimitry Andric return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A, 17780b57cec5SDimitry Andric (__v2df) __B, 17790b57cec5SDimitry Andric (__v2df) __W, 17800b57cec5SDimitry Andric (__mmask8) __U); 17810b57cec5SDimitry Andric } 17820b57cec5SDimitry Andric 17830b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 17840b57cec5SDimitry Andric _mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B) 17850b57cec5SDimitry Andric { 17860b57cec5SDimitry Andric return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A, 17870b57cec5SDimitry Andric (__v2df) __B, 17880b57cec5SDimitry Andric (__v2df) _mm_setzero_pd (), 17890b57cec5SDimitry Andric (__mmask8) __U); 17900b57cec5SDimitry Andric } 17910b57cec5SDimitry Andric 17920b57cec5SDimitry Andric static __inline __m512 __DEFAULT_FN_ATTRS512 17930b57cec5SDimitry Andric _mm512_floor_ps(__m512 __A) 17940b57cec5SDimitry Andric { 17950b57cec5SDimitry Andric return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, 17960b57cec5SDimitry Andric _MM_FROUND_FLOOR, 179781ad6265SDimitry Andric (__v16sf) __A, (unsigned short)-1, 17980b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 17990b57cec5SDimitry Andric } 18000b57cec5SDimitry Andric 18010b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 18020b57cec5SDimitry Andric _mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A) 18030b57cec5SDimitry Andric { 18040b57cec5SDimitry Andric return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, 18050b57cec5SDimitry Andric _MM_FROUND_FLOOR, 18060b57cec5SDimitry Andric (__v16sf) __W, __U, 18070b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 18080b57cec5SDimitry Andric } 18090b57cec5SDimitry Andric 18100b57cec5SDimitry Andric static __inline __m512d __DEFAULT_FN_ATTRS512 18110b57cec5SDimitry Andric _mm512_floor_pd(__m512d __A) 18120b57cec5SDimitry Andric { 18130b57cec5SDimitry Andric return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, 18140b57cec5SDimitry Andric _MM_FROUND_FLOOR, 181581ad6265SDimitry Andric (__v8df) __A, (unsigned char)-1, 18160b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 18170b57cec5SDimitry Andric } 18180b57cec5SDimitry Andric 18190b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 18200b57cec5SDimitry Andric _mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A) 18210b57cec5SDimitry Andric { 18220b57cec5SDimitry Andric return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, 18230b57cec5SDimitry Andric _MM_FROUND_FLOOR, 18240b57cec5SDimitry Andric (__v8df) __W, __U, 18250b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 18260b57cec5SDimitry Andric } 18270b57cec5SDimitry Andric 18280b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 18290b57cec5SDimitry Andric _mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A) 18300b57cec5SDimitry Andric { 18310b57cec5SDimitry Andric return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, 18320b57cec5SDimitry Andric _MM_FROUND_CEIL, 18330b57cec5SDimitry Andric (__v16sf) __W, __U, 18340b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 18350b57cec5SDimitry Andric } 18360b57cec5SDimitry Andric 18370b57cec5SDimitry Andric static __inline __m512 __DEFAULT_FN_ATTRS512 18380b57cec5SDimitry Andric _mm512_ceil_ps(__m512 __A) 18390b57cec5SDimitry Andric { 18400b57cec5SDimitry Andric return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, 18410b57cec5SDimitry Andric _MM_FROUND_CEIL, 184281ad6265SDimitry Andric (__v16sf) __A, (unsigned short)-1, 18430b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 18440b57cec5SDimitry Andric } 18450b57cec5SDimitry Andric 18460b57cec5SDimitry Andric static __inline __m512d __DEFAULT_FN_ATTRS512 18470b57cec5SDimitry Andric _mm512_ceil_pd(__m512d __A) 18480b57cec5SDimitry Andric { 18490b57cec5SDimitry Andric return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, 18500b57cec5SDimitry Andric _MM_FROUND_CEIL, 185181ad6265SDimitry Andric (__v8df) __A, (unsigned char)-1, 18520b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 18530b57cec5SDimitry Andric } 18540b57cec5SDimitry Andric 18550b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 18560b57cec5SDimitry Andric _mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A) 18570b57cec5SDimitry Andric { 18580b57cec5SDimitry Andric return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, 18590b57cec5SDimitry Andric _MM_FROUND_CEIL, 18600b57cec5SDimitry Andric (__v8df) __W, __U, 18610b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 18620b57cec5SDimitry Andric } 18630b57cec5SDimitry Andric 18640b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512 18650b57cec5SDimitry Andric _mm512_abs_epi64(__m512i __A) 18660b57cec5SDimitry Andric { 186704eeddc0SDimitry Andric return (__m512i)__builtin_elementwise_abs((__v8di)__A); 18680b57cec5SDimitry Andric } 18690b57cec5SDimitry Andric 18700b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 18710b57cec5SDimitry Andric _mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A) 18720b57cec5SDimitry Andric { 18730b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 18740b57cec5SDimitry Andric (__v8di)_mm512_abs_epi64(__A), 18750b57cec5SDimitry Andric (__v8di)__W); 18760b57cec5SDimitry Andric } 18770b57cec5SDimitry Andric 18780b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 18790b57cec5SDimitry Andric _mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A) 18800b57cec5SDimitry Andric { 18810b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 18820b57cec5SDimitry Andric (__v8di)_mm512_abs_epi64(__A), 18830b57cec5SDimitry Andric (__v8di)_mm512_setzero_si512()); 18840b57cec5SDimitry Andric } 18850b57cec5SDimitry Andric 18860b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512 18870b57cec5SDimitry Andric _mm512_abs_epi32(__m512i __A) 18880b57cec5SDimitry Andric { 188904eeddc0SDimitry Andric return (__m512i)__builtin_elementwise_abs((__v16si) __A); 18900b57cec5SDimitry Andric } 18910b57cec5SDimitry Andric 18920b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 18930b57cec5SDimitry Andric _mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A) 18940b57cec5SDimitry Andric { 18950b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512(__U, 18960b57cec5SDimitry Andric (__v16si)_mm512_abs_epi32(__A), 18970b57cec5SDimitry Andric (__v16si)__W); 18980b57cec5SDimitry Andric } 18990b57cec5SDimitry Andric 19000b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 19010b57cec5SDimitry Andric _mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A) 19020b57cec5SDimitry Andric { 19030b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512(__U, 19040b57cec5SDimitry Andric (__v16si)_mm512_abs_epi32(__A), 19050b57cec5SDimitry Andric (__v16si)_mm512_setzero_si512()); 19060b57cec5SDimitry Andric } 19070b57cec5SDimitry Andric 19080b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 19090b57cec5SDimitry Andric _mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { 19100b57cec5SDimitry Andric __A = _mm_add_ss(__A, __B); 19110b57cec5SDimitry Andric return __builtin_ia32_selectss_128(__U, __A, __W); 19120b57cec5SDimitry Andric } 19130b57cec5SDimitry Andric 19140b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 19150b57cec5SDimitry Andric _mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) { 19160b57cec5SDimitry Andric __A = _mm_add_ss(__A, __B); 19170b57cec5SDimitry Andric return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps()); 19180b57cec5SDimitry Andric } 19190b57cec5SDimitry Andric 19200b57cec5SDimitry Andric #define _mm_add_round_ss(A, B, R) \ 1921349cc55cSDimitry Andric ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \ 19220b57cec5SDimitry Andric (__v4sf)(__m128)(B), \ 19230b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps(), \ 1924349cc55cSDimitry Andric (__mmask8)-1, (int)(R))) 19250b57cec5SDimitry Andric 19260b57cec5SDimitry Andric #define _mm_mask_add_round_ss(W, U, A, B, R) \ 1927349cc55cSDimitry Andric ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \ 19280b57cec5SDimitry Andric (__v4sf)(__m128)(B), \ 19290b57cec5SDimitry Andric (__v4sf)(__m128)(W), (__mmask8)(U), \ 1930349cc55cSDimitry Andric (int)(R))) 19310b57cec5SDimitry Andric 19320b57cec5SDimitry Andric #define _mm_maskz_add_round_ss(U, A, B, R) \ 1933349cc55cSDimitry Andric ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \ 19340b57cec5SDimitry Andric (__v4sf)(__m128)(B), \ 19350b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps(), \ 1936349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 19370b57cec5SDimitry Andric 19380b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 19390b57cec5SDimitry Andric _mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { 19400b57cec5SDimitry Andric __A = _mm_add_sd(__A, __B); 19410b57cec5SDimitry Andric return __builtin_ia32_selectsd_128(__U, __A, __W); 19420b57cec5SDimitry Andric } 19430b57cec5SDimitry Andric 19440b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 19450b57cec5SDimitry Andric _mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) { 19460b57cec5SDimitry Andric __A = _mm_add_sd(__A, __B); 19470b57cec5SDimitry Andric return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd()); 19480b57cec5SDimitry Andric } 19490b57cec5SDimitry Andric #define _mm_add_round_sd(A, B, R) \ 1950349cc55cSDimitry Andric ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \ 19510b57cec5SDimitry Andric (__v2df)(__m128d)(B), \ 19520b57cec5SDimitry Andric (__v2df)_mm_setzero_pd(), \ 1953349cc55cSDimitry Andric (__mmask8)-1, (int)(R))) 19540b57cec5SDimitry Andric 19550b57cec5SDimitry Andric #define _mm_mask_add_round_sd(W, U, A, B, R) \ 1956349cc55cSDimitry Andric ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \ 19570b57cec5SDimitry Andric (__v2df)(__m128d)(B), \ 19580b57cec5SDimitry Andric (__v2df)(__m128d)(W), \ 1959349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 19600b57cec5SDimitry Andric 19610b57cec5SDimitry Andric #define _mm_maskz_add_round_sd(U, A, B, R) \ 1962349cc55cSDimitry Andric ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \ 19630b57cec5SDimitry Andric (__v2df)(__m128d)(B), \ 19640b57cec5SDimitry Andric (__v2df)_mm_setzero_pd(), \ 1965349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 19660b57cec5SDimitry Andric 19670b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 19680b57cec5SDimitry Andric _mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { 19690b57cec5SDimitry Andric return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 19700b57cec5SDimitry Andric (__v8df)_mm512_add_pd(__A, __B), 19710b57cec5SDimitry Andric (__v8df)__W); 19720b57cec5SDimitry Andric } 19730b57cec5SDimitry Andric 19740b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 19750b57cec5SDimitry Andric _mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) { 19760b57cec5SDimitry Andric return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 19770b57cec5SDimitry Andric (__v8df)_mm512_add_pd(__A, __B), 19780b57cec5SDimitry Andric (__v8df)_mm512_setzero_pd()); 19790b57cec5SDimitry Andric } 19800b57cec5SDimitry Andric 19810b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 19820b57cec5SDimitry Andric _mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { 19830b57cec5SDimitry Andric return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 19840b57cec5SDimitry Andric (__v16sf)_mm512_add_ps(__A, __B), 19850b57cec5SDimitry Andric (__v16sf)__W); 19860b57cec5SDimitry Andric } 19870b57cec5SDimitry Andric 19880b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 19890b57cec5SDimitry Andric _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) { 19900b57cec5SDimitry Andric return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 19910b57cec5SDimitry Andric (__v16sf)_mm512_add_ps(__A, __B), 19920b57cec5SDimitry Andric (__v16sf)_mm512_setzero_ps()); 19930b57cec5SDimitry Andric } 19940b57cec5SDimitry Andric 19950b57cec5SDimitry Andric #define _mm512_add_round_pd(A, B, R) \ 1996349cc55cSDimitry Andric ((__m512d)__builtin_ia32_addpd512((__v8df)(__m512d)(A), \ 1997349cc55cSDimitry Andric (__v8df)(__m512d)(B), (int)(R))) 19980b57cec5SDimitry Andric 19990b57cec5SDimitry Andric #define _mm512_mask_add_round_pd(W, U, A, B, R) \ 2000349cc55cSDimitry Andric ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 20010b57cec5SDimitry Andric (__v8df)_mm512_add_round_pd((A), (B), (R)), \ 2002349cc55cSDimitry Andric (__v8df)(__m512d)(W))) 20030b57cec5SDimitry Andric 20040b57cec5SDimitry Andric #define _mm512_maskz_add_round_pd(U, A, B, R) \ 2005349cc55cSDimitry Andric ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 20060b57cec5SDimitry Andric (__v8df)_mm512_add_round_pd((A), (B), (R)), \ 2007349cc55cSDimitry Andric (__v8df)_mm512_setzero_pd())) 20080b57cec5SDimitry Andric 20090b57cec5SDimitry Andric #define _mm512_add_round_ps(A, B, R) \ 2010349cc55cSDimitry Andric ((__m512)__builtin_ia32_addps512((__v16sf)(__m512)(A), \ 2011349cc55cSDimitry Andric (__v16sf)(__m512)(B), (int)(R))) 20120b57cec5SDimitry Andric 20130b57cec5SDimitry Andric #define _mm512_mask_add_round_ps(W, U, A, B, R) \ 2014349cc55cSDimitry Andric ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 20150b57cec5SDimitry Andric (__v16sf)_mm512_add_round_ps((A), (B), (R)), \ 2016349cc55cSDimitry Andric (__v16sf)(__m512)(W))) 20170b57cec5SDimitry Andric 20180b57cec5SDimitry Andric #define _mm512_maskz_add_round_ps(U, A, B, R) \ 2019349cc55cSDimitry Andric ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 20200b57cec5SDimitry Andric (__v16sf)_mm512_add_round_ps((A), (B), (R)), \ 2021349cc55cSDimitry Andric (__v16sf)_mm512_setzero_ps())) 20220b57cec5SDimitry Andric 20230b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 20240b57cec5SDimitry Andric _mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { 20250b57cec5SDimitry Andric __A = _mm_sub_ss(__A, __B); 20260b57cec5SDimitry Andric return __builtin_ia32_selectss_128(__U, __A, __W); 20270b57cec5SDimitry Andric } 20280b57cec5SDimitry Andric 20290b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 20300b57cec5SDimitry Andric _mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) { 20310b57cec5SDimitry Andric __A = _mm_sub_ss(__A, __B); 20320b57cec5SDimitry Andric return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps()); 20330b57cec5SDimitry Andric } 20340b57cec5SDimitry Andric #define _mm_sub_round_ss(A, B, R) \ 2035349cc55cSDimitry Andric ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \ 20360b57cec5SDimitry Andric (__v4sf)(__m128)(B), \ 20370b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps(), \ 2038349cc55cSDimitry Andric (__mmask8)-1, (int)(R))) 20390b57cec5SDimitry Andric 20400b57cec5SDimitry Andric #define _mm_mask_sub_round_ss(W, U, A, B, R) \ 2041349cc55cSDimitry Andric ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \ 20420b57cec5SDimitry Andric (__v4sf)(__m128)(B), \ 20430b57cec5SDimitry Andric (__v4sf)(__m128)(W), (__mmask8)(U), \ 2044349cc55cSDimitry Andric (int)(R))) 20450b57cec5SDimitry Andric 20460b57cec5SDimitry Andric #define _mm_maskz_sub_round_ss(U, A, B, R) \ 2047349cc55cSDimitry Andric ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \ 20480b57cec5SDimitry Andric (__v4sf)(__m128)(B), \ 20490b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps(), \ 2050349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 20510b57cec5SDimitry Andric 20520b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 20530b57cec5SDimitry Andric _mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { 20540b57cec5SDimitry Andric __A = _mm_sub_sd(__A, __B); 20550b57cec5SDimitry Andric return __builtin_ia32_selectsd_128(__U, __A, __W); 20560b57cec5SDimitry Andric } 20570b57cec5SDimitry Andric 20580b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 20590b57cec5SDimitry Andric _mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) { 20600b57cec5SDimitry Andric __A = _mm_sub_sd(__A, __B); 20610b57cec5SDimitry Andric return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd()); 20620b57cec5SDimitry Andric } 20630b57cec5SDimitry Andric 20640b57cec5SDimitry Andric #define _mm_sub_round_sd(A, B, R) \ 2065349cc55cSDimitry Andric ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \ 20660b57cec5SDimitry Andric (__v2df)(__m128d)(B), \ 20670b57cec5SDimitry Andric (__v2df)_mm_setzero_pd(), \ 2068349cc55cSDimitry Andric (__mmask8)-1, (int)(R))) 20690b57cec5SDimitry Andric 20700b57cec5SDimitry Andric #define _mm_mask_sub_round_sd(W, U, A, B, R) \ 2071349cc55cSDimitry Andric ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \ 20720b57cec5SDimitry Andric (__v2df)(__m128d)(B), \ 20730b57cec5SDimitry Andric (__v2df)(__m128d)(W), \ 2074349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 20750b57cec5SDimitry Andric 20760b57cec5SDimitry Andric #define _mm_maskz_sub_round_sd(U, A, B, R) \ 2077349cc55cSDimitry Andric ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \ 20780b57cec5SDimitry Andric (__v2df)(__m128d)(B), \ 20790b57cec5SDimitry Andric (__v2df)_mm_setzero_pd(), \ 2080349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 20810b57cec5SDimitry Andric 20820b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 20830b57cec5SDimitry Andric _mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { 20840b57cec5SDimitry Andric return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 20850b57cec5SDimitry Andric (__v8df)_mm512_sub_pd(__A, __B), 20860b57cec5SDimitry Andric (__v8df)__W); 20870b57cec5SDimitry Andric } 20880b57cec5SDimitry Andric 20890b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 20900b57cec5SDimitry Andric _mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) { 20910b57cec5SDimitry Andric return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 20920b57cec5SDimitry Andric (__v8df)_mm512_sub_pd(__A, __B), 20930b57cec5SDimitry Andric (__v8df)_mm512_setzero_pd()); 20940b57cec5SDimitry Andric } 20950b57cec5SDimitry Andric 20960b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 20970b57cec5SDimitry Andric _mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { 20980b57cec5SDimitry Andric return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 20990b57cec5SDimitry Andric (__v16sf)_mm512_sub_ps(__A, __B), 21000b57cec5SDimitry Andric (__v16sf)__W); 21010b57cec5SDimitry Andric } 21020b57cec5SDimitry Andric 21030b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 21040b57cec5SDimitry Andric _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) { 21050b57cec5SDimitry Andric return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 21060b57cec5SDimitry Andric (__v16sf)_mm512_sub_ps(__A, __B), 21070b57cec5SDimitry Andric (__v16sf)_mm512_setzero_ps()); 21080b57cec5SDimitry Andric } 21090b57cec5SDimitry Andric 21100b57cec5SDimitry Andric #define _mm512_sub_round_pd(A, B, R) \ 2111349cc55cSDimitry Andric ((__m512d)__builtin_ia32_subpd512((__v8df)(__m512d)(A), \ 2112349cc55cSDimitry Andric (__v8df)(__m512d)(B), (int)(R))) 21130b57cec5SDimitry Andric 21140b57cec5SDimitry Andric #define _mm512_mask_sub_round_pd(W, U, A, B, R) \ 2115349cc55cSDimitry Andric ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 21160b57cec5SDimitry Andric (__v8df)_mm512_sub_round_pd((A), (B), (R)), \ 2117349cc55cSDimitry Andric (__v8df)(__m512d)(W))) 21180b57cec5SDimitry Andric 21190b57cec5SDimitry Andric #define _mm512_maskz_sub_round_pd(U, A, B, R) \ 2120349cc55cSDimitry Andric ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 21210b57cec5SDimitry Andric (__v8df)_mm512_sub_round_pd((A), (B), (R)), \ 2122349cc55cSDimitry Andric (__v8df)_mm512_setzero_pd())) 21230b57cec5SDimitry Andric 21240b57cec5SDimitry Andric #define _mm512_sub_round_ps(A, B, R) \ 2125349cc55cSDimitry Andric ((__m512)__builtin_ia32_subps512((__v16sf)(__m512)(A), \ 2126349cc55cSDimitry Andric (__v16sf)(__m512)(B), (int)(R))) 21270b57cec5SDimitry Andric 21280b57cec5SDimitry Andric #define _mm512_mask_sub_round_ps(W, U, A, B, R) \ 2129349cc55cSDimitry Andric ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 21300b57cec5SDimitry Andric (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \ 2131349cc55cSDimitry Andric (__v16sf)(__m512)(W))) 21320b57cec5SDimitry Andric 21330b57cec5SDimitry Andric #define _mm512_maskz_sub_round_ps(U, A, B, R) \ 2134349cc55cSDimitry Andric ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 21350b57cec5SDimitry Andric (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \ 2136349cc55cSDimitry Andric (__v16sf)_mm512_setzero_ps())) 21370b57cec5SDimitry Andric 21380b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 21390b57cec5SDimitry Andric _mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { 21400b57cec5SDimitry Andric __A = _mm_mul_ss(__A, __B); 21410b57cec5SDimitry Andric return __builtin_ia32_selectss_128(__U, __A, __W); 21420b57cec5SDimitry Andric } 21430b57cec5SDimitry Andric 21440b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 21450b57cec5SDimitry Andric _mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) { 21460b57cec5SDimitry Andric __A = _mm_mul_ss(__A, __B); 21470b57cec5SDimitry Andric return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps()); 21480b57cec5SDimitry Andric } 21490b57cec5SDimitry Andric #define _mm_mul_round_ss(A, B, R) \ 2150349cc55cSDimitry Andric ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \ 21510b57cec5SDimitry Andric (__v4sf)(__m128)(B), \ 21520b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps(), \ 2153349cc55cSDimitry Andric (__mmask8)-1, (int)(R))) 21540b57cec5SDimitry Andric 21550b57cec5SDimitry Andric #define _mm_mask_mul_round_ss(W, U, A, B, R) \ 2156349cc55cSDimitry Andric ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \ 21570b57cec5SDimitry Andric (__v4sf)(__m128)(B), \ 21580b57cec5SDimitry Andric (__v4sf)(__m128)(W), (__mmask8)(U), \ 2159349cc55cSDimitry Andric (int)(R))) 21600b57cec5SDimitry Andric 21610b57cec5SDimitry Andric #define _mm_maskz_mul_round_ss(U, A, B, R) \ 2162349cc55cSDimitry Andric ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \ 21630b57cec5SDimitry Andric (__v4sf)(__m128)(B), \ 21640b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps(), \ 2165349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 21660b57cec5SDimitry Andric 21670b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 21680b57cec5SDimitry Andric _mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { 21690b57cec5SDimitry Andric __A = _mm_mul_sd(__A, __B); 21700b57cec5SDimitry Andric return __builtin_ia32_selectsd_128(__U, __A, __W); 21710b57cec5SDimitry Andric } 21720b57cec5SDimitry Andric 21730b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 21740b57cec5SDimitry Andric _mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) { 21750b57cec5SDimitry Andric __A = _mm_mul_sd(__A, __B); 21760b57cec5SDimitry Andric return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd()); 21770b57cec5SDimitry Andric } 21780b57cec5SDimitry Andric 21790b57cec5SDimitry Andric #define _mm_mul_round_sd(A, B, R) \ 2180349cc55cSDimitry Andric ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \ 21810b57cec5SDimitry Andric (__v2df)(__m128d)(B), \ 21820b57cec5SDimitry Andric (__v2df)_mm_setzero_pd(), \ 2183349cc55cSDimitry Andric (__mmask8)-1, (int)(R))) 21840b57cec5SDimitry Andric 21850b57cec5SDimitry Andric #define _mm_mask_mul_round_sd(W, U, A, B, R) \ 2186349cc55cSDimitry Andric ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \ 21870b57cec5SDimitry Andric (__v2df)(__m128d)(B), \ 21880b57cec5SDimitry Andric (__v2df)(__m128d)(W), \ 2189349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 21900b57cec5SDimitry Andric 21910b57cec5SDimitry Andric #define _mm_maskz_mul_round_sd(U, A, B, R) \ 2192349cc55cSDimitry Andric ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \ 21930b57cec5SDimitry Andric (__v2df)(__m128d)(B), \ 21940b57cec5SDimitry Andric (__v2df)_mm_setzero_pd(), \ 2195349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 21960b57cec5SDimitry Andric 21970b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 21980b57cec5SDimitry Andric _mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { 21990b57cec5SDimitry Andric return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 22000b57cec5SDimitry Andric (__v8df)_mm512_mul_pd(__A, __B), 22010b57cec5SDimitry Andric (__v8df)__W); 22020b57cec5SDimitry Andric } 22030b57cec5SDimitry Andric 22040b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 22050b57cec5SDimitry Andric _mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) { 22060b57cec5SDimitry Andric return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 22070b57cec5SDimitry Andric (__v8df)_mm512_mul_pd(__A, __B), 22080b57cec5SDimitry Andric (__v8df)_mm512_setzero_pd()); 22090b57cec5SDimitry Andric } 22100b57cec5SDimitry Andric 22110b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 22120b57cec5SDimitry Andric _mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { 22130b57cec5SDimitry Andric return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 22140b57cec5SDimitry Andric (__v16sf)_mm512_mul_ps(__A, __B), 22150b57cec5SDimitry Andric (__v16sf)__W); 22160b57cec5SDimitry Andric } 22170b57cec5SDimitry Andric 22180b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 22190b57cec5SDimitry Andric _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) { 22200b57cec5SDimitry Andric return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 22210b57cec5SDimitry Andric (__v16sf)_mm512_mul_ps(__A, __B), 22220b57cec5SDimitry Andric (__v16sf)_mm512_setzero_ps()); 22230b57cec5SDimitry Andric } 22240b57cec5SDimitry Andric 22250b57cec5SDimitry Andric #define _mm512_mul_round_pd(A, B, R) \ 2226349cc55cSDimitry Andric ((__m512d)__builtin_ia32_mulpd512((__v8df)(__m512d)(A), \ 2227349cc55cSDimitry Andric (__v8df)(__m512d)(B), (int)(R))) 22280b57cec5SDimitry Andric 22290b57cec5SDimitry Andric #define _mm512_mask_mul_round_pd(W, U, A, B, R) \ 2230349cc55cSDimitry Andric ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 22310b57cec5SDimitry Andric (__v8df)_mm512_mul_round_pd((A), (B), (R)), \ 2232349cc55cSDimitry Andric (__v8df)(__m512d)(W))) 22330b57cec5SDimitry Andric 22340b57cec5SDimitry Andric #define _mm512_maskz_mul_round_pd(U, A, B, R) \ 2235349cc55cSDimitry Andric ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 22360b57cec5SDimitry Andric (__v8df)_mm512_mul_round_pd((A), (B), (R)), \ 2237349cc55cSDimitry Andric (__v8df)_mm512_setzero_pd())) 22380b57cec5SDimitry Andric 22390b57cec5SDimitry Andric #define _mm512_mul_round_ps(A, B, R) \ 2240349cc55cSDimitry Andric ((__m512)__builtin_ia32_mulps512((__v16sf)(__m512)(A), \ 2241349cc55cSDimitry Andric (__v16sf)(__m512)(B), (int)(R))) 22420b57cec5SDimitry Andric 22430b57cec5SDimitry Andric #define _mm512_mask_mul_round_ps(W, U, A, B, R) \ 2244349cc55cSDimitry Andric ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 22450b57cec5SDimitry Andric (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \ 2246349cc55cSDimitry Andric (__v16sf)(__m512)(W))) 22470b57cec5SDimitry Andric 22480b57cec5SDimitry Andric #define _mm512_maskz_mul_round_ps(U, A, B, R) \ 2249349cc55cSDimitry Andric ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 22500b57cec5SDimitry Andric (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \ 2251349cc55cSDimitry Andric (__v16sf)_mm512_setzero_ps())) 22520b57cec5SDimitry Andric 22530b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 22540b57cec5SDimitry Andric _mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { 22550b57cec5SDimitry Andric __A = _mm_div_ss(__A, __B); 22560b57cec5SDimitry Andric return __builtin_ia32_selectss_128(__U, __A, __W); 22570b57cec5SDimitry Andric } 22580b57cec5SDimitry Andric 22590b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 22600b57cec5SDimitry Andric _mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) { 22610b57cec5SDimitry Andric __A = _mm_div_ss(__A, __B); 22620b57cec5SDimitry Andric return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps()); 22630b57cec5SDimitry Andric } 22640b57cec5SDimitry Andric 22650b57cec5SDimitry Andric #define _mm_div_round_ss(A, B, R) \ 2266349cc55cSDimitry Andric ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \ 22670b57cec5SDimitry Andric (__v4sf)(__m128)(B), \ 22680b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps(), \ 2269349cc55cSDimitry Andric (__mmask8)-1, (int)(R))) 22700b57cec5SDimitry Andric 22710b57cec5SDimitry Andric #define _mm_mask_div_round_ss(W, U, A, B, R) \ 2272349cc55cSDimitry Andric ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \ 22730b57cec5SDimitry Andric (__v4sf)(__m128)(B), \ 22740b57cec5SDimitry Andric (__v4sf)(__m128)(W), (__mmask8)(U), \ 2275349cc55cSDimitry Andric (int)(R))) 22760b57cec5SDimitry Andric 22770b57cec5SDimitry Andric #define _mm_maskz_div_round_ss(U, A, B, R) \ 2278349cc55cSDimitry Andric ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \ 22790b57cec5SDimitry Andric (__v4sf)(__m128)(B), \ 22800b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps(), \ 2281349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 22820b57cec5SDimitry Andric 22830b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 22840b57cec5SDimitry Andric _mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { 22850b57cec5SDimitry Andric __A = _mm_div_sd(__A, __B); 22860b57cec5SDimitry Andric return __builtin_ia32_selectsd_128(__U, __A, __W); 22870b57cec5SDimitry Andric } 22880b57cec5SDimitry Andric 22890b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 22900b57cec5SDimitry Andric _mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) { 22910b57cec5SDimitry Andric __A = _mm_div_sd(__A, __B); 22920b57cec5SDimitry Andric return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd()); 22930b57cec5SDimitry Andric } 22940b57cec5SDimitry Andric 22950b57cec5SDimitry Andric #define _mm_div_round_sd(A, B, R) \ 2296349cc55cSDimitry Andric ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \ 22970b57cec5SDimitry Andric (__v2df)(__m128d)(B), \ 22980b57cec5SDimitry Andric (__v2df)_mm_setzero_pd(), \ 2299349cc55cSDimitry Andric (__mmask8)-1, (int)(R))) 23000b57cec5SDimitry Andric 23010b57cec5SDimitry Andric #define _mm_mask_div_round_sd(W, U, A, B, R) \ 2302349cc55cSDimitry Andric ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \ 23030b57cec5SDimitry Andric (__v2df)(__m128d)(B), \ 23040b57cec5SDimitry Andric (__v2df)(__m128d)(W), \ 2305349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 23060b57cec5SDimitry Andric 23070b57cec5SDimitry Andric #define _mm_maskz_div_round_sd(U, A, B, R) \ 2308349cc55cSDimitry Andric ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \ 23090b57cec5SDimitry Andric (__v2df)(__m128d)(B), \ 23100b57cec5SDimitry Andric (__v2df)_mm_setzero_pd(), \ 2311349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 23120b57cec5SDimitry Andric 23130b57cec5SDimitry Andric static __inline __m512d __DEFAULT_FN_ATTRS512 23140b57cec5SDimitry Andric _mm512_div_pd(__m512d __a, __m512d __b) 23150b57cec5SDimitry Andric { 23160b57cec5SDimitry Andric return (__m512d)((__v8df)__a/(__v8df)__b); 23170b57cec5SDimitry Andric } 23180b57cec5SDimitry Andric 23190b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 23200b57cec5SDimitry Andric _mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { 23210b57cec5SDimitry Andric return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 23220b57cec5SDimitry Andric (__v8df)_mm512_div_pd(__A, __B), 23230b57cec5SDimitry Andric (__v8df)__W); 23240b57cec5SDimitry Andric } 23250b57cec5SDimitry Andric 23260b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 23270b57cec5SDimitry Andric _mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) { 23280b57cec5SDimitry Andric return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 23290b57cec5SDimitry Andric (__v8df)_mm512_div_pd(__A, __B), 23300b57cec5SDimitry Andric (__v8df)_mm512_setzero_pd()); 23310b57cec5SDimitry Andric } 23320b57cec5SDimitry Andric 23330b57cec5SDimitry Andric static __inline __m512 __DEFAULT_FN_ATTRS512 23340b57cec5SDimitry Andric _mm512_div_ps(__m512 __a, __m512 __b) 23350b57cec5SDimitry Andric { 23360b57cec5SDimitry Andric return (__m512)((__v16sf)__a/(__v16sf)__b); 23370b57cec5SDimitry Andric } 23380b57cec5SDimitry Andric 23390b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 23400b57cec5SDimitry Andric _mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { 23410b57cec5SDimitry Andric return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 23420b57cec5SDimitry Andric (__v16sf)_mm512_div_ps(__A, __B), 23430b57cec5SDimitry Andric (__v16sf)__W); 23440b57cec5SDimitry Andric } 23450b57cec5SDimitry Andric 23460b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 23470b57cec5SDimitry Andric _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) { 23480b57cec5SDimitry Andric return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 23490b57cec5SDimitry Andric (__v16sf)_mm512_div_ps(__A, __B), 23500b57cec5SDimitry Andric (__v16sf)_mm512_setzero_ps()); 23510b57cec5SDimitry Andric } 23520b57cec5SDimitry Andric 23530b57cec5SDimitry Andric #define _mm512_div_round_pd(A, B, R) \ 2354349cc55cSDimitry Andric ((__m512d)__builtin_ia32_divpd512((__v8df)(__m512d)(A), \ 2355349cc55cSDimitry Andric (__v8df)(__m512d)(B), (int)(R))) 23560b57cec5SDimitry Andric 23570b57cec5SDimitry Andric #define _mm512_mask_div_round_pd(W, U, A, B, R) \ 2358349cc55cSDimitry Andric ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 23590b57cec5SDimitry Andric (__v8df)_mm512_div_round_pd((A), (B), (R)), \ 2360349cc55cSDimitry Andric (__v8df)(__m512d)(W))) 23610b57cec5SDimitry Andric 23620b57cec5SDimitry Andric #define _mm512_maskz_div_round_pd(U, A, B, R) \ 2363349cc55cSDimitry Andric ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 23640b57cec5SDimitry Andric (__v8df)_mm512_div_round_pd((A), (B), (R)), \ 2365349cc55cSDimitry Andric (__v8df)_mm512_setzero_pd())) 23660b57cec5SDimitry Andric 23670b57cec5SDimitry Andric #define _mm512_div_round_ps(A, B, R) \ 2368349cc55cSDimitry Andric ((__m512)__builtin_ia32_divps512((__v16sf)(__m512)(A), \ 2369349cc55cSDimitry Andric (__v16sf)(__m512)(B), (int)(R))) 23700b57cec5SDimitry Andric 23710b57cec5SDimitry Andric #define _mm512_mask_div_round_ps(W, U, A, B, R) \ 2372349cc55cSDimitry Andric ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 23730b57cec5SDimitry Andric (__v16sf)_mm512_div_round_ps((A), (B), (R)), \ 2374349cc55cSDimitry Andric (__v16sf)(__m512)(W))) 23750b57cec5SDimitry Andric 23760b57cec5SDimitry Andric #define _mm512_maskz_div_round_ps(U, A, B, R) \ 2377349cc55cSDimitry Andric ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 23780b57cec5SDimitry Andric (__v16sf)_mm512_div_round_ps((A), (B), (R)), \ 2379349cc55cSDimitry Andric (__v16sf)_mm512_setzero_ps())) 23800b57cec5SDimitry Andric 23810b57cec5SDimitry Andric #define _mm512_roundscale_ps(A, B) \ 2382349cc55cSDimitry Andric ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \ 23830b57cec5SDimitry Andric (__v16sf)_mm512_undefined_ps(), \ 23840b57cec5SDimitry Andric (__mmask16)-1, \ 2385349cc55cSDimitry Andric _MM_FROUND_CUR_DIRECTION)) 23860b57cec5SDimitry Andric 23870b57cec5SDimitry Andric #define _mm512_mask_roundscale_ps(A, B, C, imm) \ 2388349cc55cSDimitry Andric ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \ 23890b57cec5SDimitry Andric (__v16sf)(__m512)(A), (__mmask16)(B), \ 2390349cc55cSDimitry Andric _MM_FROUND_CUR_DIRECTION)) 23910b57cec5SDimitry Andric 23920b57cec5SDimitry Andric #define _mm512_maskz_roundscale_ps(A, B, imm) \ 2393349cc55cSDimitry Andric ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \ 23940b57cec5SDimitry Andric (__v16sf)_mm512_setzero_ps(), \ 23950b57cec5SDimitry Andric (__mmask16)(A), \ 2396349cc55cSDimitry Andric _MM_FROUND_CUR_DIRECTION)) 23970b57cec5SDimitry Andric 23980b57cec5SDimitry Andric #define _mm512_mask_roundscale_round_ps(A, B, C, imm, R) \ 2399349cc55cSDimitry Andric ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \ 24000b57cec5SDimitry Andric (__v16sf)(__m512)(A), (__mmask16)(B), \ 2401349cc55cSDimitry Andric (int)(R))) 24020b57cec5SDimitry Andric 24030b57cec5SDimitry Andric #define _mm512_maskz_roundscale_round_ps(A, B, imm, R) \ 2404349cc55cSDimitry Andric ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \ 24050b57cec5SDimitry Andric (__v16sf)_mm512_setzero_ps(), \ 2406349cc55cSDimitry Andric (__mmask16)(A), (int)(R))) 24070b57cec5SDimitry Andric 24080b57cec5SDimitry Andric #define _mm512_roundscale_round_ps(A, imm, R) \ 2409349cc55cSDimitry Andric ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(imm), \ 24100b57cec5SDimitry Andric (__v16sf)_mm512_undefined_ps(), \ 2411349cc55cSDimitry Andric (__mmask16)-1, (int)(R))) 24120b57cec5SDimitry Andric 24130b57cec5SDimitry Andric #define _mm512_roundscale_pd(A, B) \ 2414349cc55cSDimitry Andric ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(B), \ 24150b57cec5SDimitry Andric (__v8df)_mm512_undefined_pd(), \ 24160b57cec5SDimitry Andric (__mmask8)-1, \ 2417349cc55cSDimitry Andric _MM_FROUND_CUR_DIRECTION)) 24180b57cec5SDimitry Andric 24190b57cec5SDimitry Andric #define _mm512_mask_roundscale_pd(A, B, C, imm) \ 2420349cc55cSDimitry Andric ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \ 24210b57cec5SDimitry Andric (__v8df)(__m512d)(A), (__mmask8)(B), \ 2422349cc55cSDimitry Andric _MM_FROUND_CUR_DIRECTION)) 24230b57cec5SDimitry Andric 24240b57cec5SDimitry Andric #define _mm512_maskz_roundscale_pd(A, B, imm) \ 2425349cc55cSDimitry Andric ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \ 24260b57cec5SDimitry Andric (__v8df)_mm512_setzero_pd(), \ 24270b57cec5SDimitry Andric (__mmask8)(A), \ 2428349cc55cSDimitry Andric _MM_FROUND_CUR_DIRECTION)) 24290b57cec5SDimitry Andric 24300b57cec5SDimitry Andric #define _mm512_mask_roundscale_round_pd(A, B, C, imm, R) \ 2431349cc55cSDimitry Andric ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \ 24320b57cec5SDimitry Andric (__v8df)(__m512d)(A), (__mmask8)(B), \ 2433349cc55cSDimitry Andric (int)(R))) 24340b57cec5SDimitry Andric 24350b57cec5SDimitry Andric #define _mm512_maskz_roundscale_round_pd(A, B, imm, R) \ 2436349cc55cSDimitry Andric ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \ 24370b57cec5SDimitry Andric (__v8df)_mm512_setzero_pd(), \ 2438349cc55cSDimitry Andric (__mmask8)(A), (int)(R))) 24390b57cec5SDimitry Andric 24400b57cec5SDimitry Andric #define _mm512_roundscale_round_pd(A, imm, R) \ 2441349cc55cSDimitry Andric ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(imm), \ 24420b57cec5SDimitry Andric (__v8df)_mm512_undefined_pd(), \ 2443349cc55cSDimitry Andric (__mmask8)-1, (int)(R))) 24440b57cec5SDimitry Andric 24450b57cec5SDimitry Andric #define _mm512_fmadd_round_pd(A, B, C, R) \ 2446349cc55cSDimitry Andric ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ 24470b57cec5SDimitry Andric (__v8df)(__m512d)(B), \ 24480b57cec5SDimitry Andric (__v8df)(__m512d)(C), \ 2449349cc55cSDimitry Andric (__mmask8)-1, (int)(R))) 24500b57cec5SDimitry Andric 24510b57cec5SDimitry Andric 24520b57cec5SDimitry Andric #define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \ 2453349cc55cSDimitry Andric ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ 24540b57cec5SDimitry Andric (__v8df)(__m512d)(B), \ 24550b57cec5SDimitry Andric (__v8df)(__m512d)(C), \ 2456349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 24570b57cec5SDimitry Andric 24580b57cec5SDimitry Andric 24590b57cec5SDimitry Andric #define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \ 2460349cc55cSDimitry Andric ((__m512d)__builtin_ia32_vfmaddpd512_mask3((__v8df)(__m512d)(A), \ 24610b57cec5SDimitry Andric (__v8df)(__m512d)(B), \ 24620b57cec5SDimitry Andric (__v8df)(__m512d)(C), \ 2463349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 24640b57cec5SDimitry Andric 24650b57cec5SDimitry Andric 24660b57cec5SDimitry Andric #define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \ 2467349cc55cSDimitry Andric ((__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \ 24680b57cec5SDimitry Andric (__v8df)(__m512d)(B), \ 24690b57cec5SDimitry Andric (__v8df)(__m512d)(C), \ 2470349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 24710b57cec5SDimitry Andric 24720b57cec5SDimitry Andric 24730b57cec5SDimitry Andric #define _mm512_fmsub_round_pd(A, B, C, R) \ 2474349cc55cSDimitry Andric ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ 24750b57cec5SDimitry Andric (__v8df)(__m512d)(B), \ 24760b57cec5SDimitry Andric -(__v8df)(__m512d)(C), \ 2477349cc55cSDimitry Andric (__mmask8)-1, (int)(R))) 24780b57cec5SDimitry Andric 24790b57cec5SDimitry Andric 24800b57cec5SDimitry Andric #define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \ 2481349cc55cSDimitry Andric ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ 24820b57cec5SDimitry Andric (__v8df)(__m512d)(B), \ 24830b57cec5SDimitry Andric -(__v8df)(__m512d)(C), \ 2484349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 24850b57cec5SDimitry Andric 24860b57cec5SDimitry Andric 24870b57cec5SDimitry Andric #define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \ 2488349cc55cSDimitry Andric ((__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \ 24890b57cec5SDimitry Andric (__v8df)(__m512d)(B), \ 24900b57cec5SDimitry Andric -(__v8df)(__m512d)(C), \ 2491349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 24920b57cec5SDimitry Andric 24930b57cec5SDimitry Andric 24940b57cec5SDimitry Andric #define _mm512_fnmadd_round_pd(A, B, C, R) \ 2495349cc55cSDimitry Andric ((__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \ 24960b57cec5SDimitry Andric (__v8df)(__m512d)(B), \ 24970b57cec5SDimitry Andric (__v8df)(__m512d)(C), \ 2498349cc55cSDimitry Andric (__mmask8)-1, (int)(R))) 24990b57cec5SDimitry Andric 25000b57cec5SDimitry Andric 25010b57cec5SDimitry Andric #define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \ 2502349cc55cSDimitry Andric ((__m512d)__builtin_ia32_vfmaddpd512_mask3(-(__v8df)(__m512d)(A), \ 25030b57cec5SDimitry Andric (__v8df)(__m512d)(B), \ 25040b57cec5SDimitry Andric (__v8df)(__m512d)(C), \ 2505349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 25060b57cec5SDimitry Andric 25070b57cec5SDimitry Andric 25080b57cec5SDimitry Andric #define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \ 2509349cc55cSDimitry Andric ((__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \ 25100b57cec5SDimitry Andric (__v8df)(__m512d)(B), \ 25110b57cec5SDimitry Andric (__v8df)(__m512d)(C), \ 2512349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 25130b57cec5SDimitry Andric 25140b57cec5SDimitry Andric 25150b57cec5SDimitry Andric #define _mm512_fnmsub_round_pd(A, B, C, R) \ 2516349cc55cSDimitry Andric ((__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \ 25170b57cec5SDimitry Andric (__v8df)(__m512d)(B), \ 25180b57cec5SDimitry Andric -(__v8df)(__m512d)(C), \ 2519349cc55cSDimitry Andric (__mmask8)-1, (int)(R))) 25200b57cec5SDimitry Andric 25210b57cec5SDimitry Andric 25220b57cec5SDimitry Andric #define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \ 2523349cc55cSDimitry Andric ((__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \ 25240b57cec5SDimitry Andric (__v8df)(__m512d)(B), \ 25250b57cec5SDimitry Andric -(__v8df)(__m512d)(C), \ 2526349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 25270b57cec5SDimitry Andric 25280b57cec5SDimitry Andric 25290b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 25300b57cec5SDimitry Andric _mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C) 25310b57cec5SDimitry Andric { 25320b57cec5SDimitry Andric return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, 25330b57cec5SDimitry Andric (__v8df) __B, 25340b57cec5SDimitry Andric (__v8df) __C, 25350b57cec5SDimitry Andric (__mmask8) -1, 25360b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 25370b57cec5SDimitry Andric } 25380b57cec5SDimitry Andric 25390b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 25400b57cec5SDimitry Andric _mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 25410b57cec5SDimitry Andric { 25420b57cec5SDimitry Andric return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, 25430b57cec5SDimitry Andric (__v8df) __B, 25440b57cec5SDimitry Andric (__v8df) __C, 25450b57cec5SDimitry Andric (__mmask8) __U, 25460b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 25470b57cec5SDimitry Andric } 25480b57cec5SDimitry Andric 25490b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 25500b57cec5SDimitry Andric _mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 25510b57cec5SDimitry Andric { 25520b57cec5SDimitry Andric return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A, 25530b57cec5SDimitry Andric (__v8df) __B, 25540b57cec5SDimitry Andric (__v8df) __C, 25550b57cec5SDimitry Andric (__mmask8) __U, 25560b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 25570b57cec5SDimitry Andric } 25580b57cec5SDimitry Andric 25590b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 25600b57cec5SDimitry Andric _mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 25610b57cec5SDimitry Andric { 25620b57cec5SDimitry Andric return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A, 25630b57cec5SDimitry Andric (__v8df) __B, 25640b57cec5SDimitry Andric (__v8df) __C, 25650b57cec5SDimitry Andric (__mmask8) __U, 25660b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 25670b57cec5SDimitry Andric } 25680b57cec5SDimitry Andric 25690b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 25700b57cec5SDimitry Andric _mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C) 25710b57cec5SDimitry Andric { 25720b57cec5SDimitry Andric return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, 25730b57cec5SDimitry Andric (__v8df) __B, 25740b57cec5SDimitry Andric -(__v8df) __C, 25750b57cec5SDimitry Andric (__mmask8) -1, 25760b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 25770b57cec5SDimitry Andric } 25780b57cec5SDimitry Andric 25790b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 25800b57cec5SDimitry Andric _mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 25810b57cec5SDimitry Andric { 25820b57cec5SDimitry Andric return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, 25830b57cec5SDimitry Andric (__v8df) __B, 25840b57cec5SDimitry Andric -(__v8df) __C, 25850b57cec5SDimitry Andric (__mmask8) __U, 25860b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 25870b57cec5SDimitry Andric } 25880b57cec5SDimitry Andric 25890b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 25900b57cec5SDimitry Andric _mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 25910b57cec5SDimitry Andric { 25920b57cec5SDimitry Andric return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A, 25930b57cec5SDimitry Andric (__v8df) __B, 25940b57cec5SDimitry Andric -(__v8df) __C, 25950b57cec5SDimitry Andric (__mmask8) __U, 25960b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 25970b57cec5SDimitry Andric } 25980b57cec5SDimitry Andric 25990b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 26000b57cec5SDimitry Andric _mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C) 26010b57cec5SDimitry Andric { 26020b57cec5SDimitry Andric return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, 26030b57cec5SDimitry Andric -(__v8df) __B, 26040b57cec5SDimitry Andric (__v8df) __C, 26050b57cec5SDimitry Andric (__mmask8) -1, 26060b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 26070b57cec5SDimitry Andric } 26080b57cec5SDimitry Andric 26090b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 26100b57cec5SDimitry Andric _mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 26110b57cec5SDimitry Andric { 26120b57cec5SDimitry Andric return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A, 26130b57cec5SDimitry Andric (__v8df) __B, 26140b57cec5SDimitry Andric (__v8df) __C, 26150b57cec5SDimitry Andric (__mmask8) __U, 26160b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 26170b57cec5SDimitry Andric } 26180b57cec5SDimitry Andric 26190b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 26200b57cec5SDimitry Andric _mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 26210b57cec5SDimitry Andric { 26220b57cec5SDimitry Andric return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A, 26230b57cec5SDimitry Andric (__v8df) __B, 26240b57cec5SDimitry Andric (__v8df) __C, 26250b57cec5SDimitry Andric (__mmask8) __U, 26260b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 26270b57cec5SDimitry Andric } 26280b57cec5SDimitry Andric 26290b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 26300b57cec5SDimitry Andric _mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C) 26310b57cec5SDimitry Andric { 26320b57cec5SDimitry Andric return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, 26330b57cec5SDimitry Andric -(__v8df) __B, 26340b57cec5SDimitry Andric -(__v8df) __C, 26350b57cec5SDimitry Andric (__mmask8) -1, 26360b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 26370b57cec5SDimitry Andric } 26380b57cec5SDimitry Andric 26390b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 26400b57cec5SDimitry Andric _mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 26410b57cec5SDimitry Andric { 26420b57cec5SDimitry Andric return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A, 26430b57cec5SDimitry Andric (__v8df) __B, 26440b57cec5SDimitry Andric -(__v8df) __C, 26450b57cec5SDimitry Andric (__mmask8) __U, 26460b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 26470b57cec5SDimitry Andric } 26480b57cec5SDimitry Andric 26490b57cec5SDimitry Andric #define _mm512_fmadd_round_ps(A, B, C, R) \ 2650349cc55cSDimitry Andric ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ 26510b57cec5SDimitry Andric (__v16sf)(__m512)(B), \ 26520b57cec5SDimitry Andric (__v16sf)(__m512)(C), \ 2653349cc55cSDimitry Andric (__mmask16)-1, (int)(R))) 26540b57cec5SDimitry Andric 26550b57cec5SDimitry Andric 26560b57cec5SDimitry Andric #define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \ 2657349cc55cSDimitry Andric ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ 26580b57cec5SDimitry Andric (__v16sf)(__m512)(B), \ 26590b57cec5SDimitry Andric (__v16sf)(__m512)(C), \ 2660349cc55cSDimitry Andric (__mmask16)(U), (int)(R))) 26610b57cec5SDimitry Andric 26620b57cec5SDimitry Andric 26630b57cec5SDimitry Andric #define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \ 2664349cc55cSDimitry Andric ((__m512)__builtin_ia32_vfmaddps512_mask3((__v16sf)(__m512)(A), \ 26650b57cec5SDimitry Andric (__v16sf)(__m512)(B), \ 26660b57cec5SDimitry Andric (__v16sf)(__m512)(C), \ 2667349cc55cSDimitry Andric (__mmask16)(U), (int)(R))) 26680b57cec5SDimitry Andric 26690b57cec5SDimitry Andric 26700b57cec5SDimitry Andric #define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \ 2671349cc55cSDimitry Andric ((__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \ 26720b57cec5SDimitry Andric (__v16sf)(__m512)(B), \ 26730b57cec5SDimitry Andric (__v16sf)(__m512)(C), \ 2674349cc55cSDimitry Andric (__mmask16)(U), (int)(R))) 26750b57cec5SDimitry Andric 26760b57cec5SDimitry Andric 26770b57cec5SDimitry Andric #define _mm512_fmsub_round_ps(A, B, C, R) \ 2678349cc55cSDimitry Andric ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ 26790b57cec5SDimitry Andric (__v16sf)(__m512)(B), \ 26800b57cec5SDimitry Andric -(__v16sf)(__m512)(C), \ 2681349cc55cSDimitry Andric (__mmask16)-1, (int)(R))) 26820b57cec5SDimitry Andric 26830b57cec5SDimitry Andric 26840b57cec5SDimitry Andric #define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \ 2685349cc55cSDimitry Andric ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ 26860b57cec5SDimitry Andric (__v16sf)(__m512)(B), \ 26870b57cec5SDimitry Andric -(__v16sf)(__m512)(C), \ 2688349cc55cSDimitry Andric (__mmask16)(U), (int)(R))) 26890b57cec5SDimitry Andric 26900b57cec5SDimitry Andric 26910b57cec5SDimitry Andric #define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \ 2692349cc55cSDimitry Andric ((__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \ 26930b57cec5SDimitry Andric (__v16sf)(__m512)(B), \ 26940b57cec5SDimitry Andric -(__v16sf)(__m512)(C), \ 2695349cc55cSDimitry Andric (__mmask16)(U), (int)(R))) 26960b57cec5SDimitry Andric 26970b57cec5SDimitry Andric 26980b57cec5SDimitry Andric #define _mm512_fnmadd_round_ps(A, B, C, R) \ 2699349cc55cSDimitry Andric ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ 27000b57cec5SDimitry Andric -(__v16sf)(__m512)(B), \ 27010b57cec5SDimitry Andric (__v16sf)(__m512)(C), \ 2702349cc55cSDimitry Andric (__mmask16)-1, (int)(R))) 27030b57cec5SDimitry Andric 27040b57cec5SDimitry Andric 27050b57cec5SDimitry Andric #define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \ 2706349cc55cSDimitry Andric ((__m512)__builtin_ia32_vfmaddps512_mask3(-(__v16sf)(__m512)(A), \ 27070b57cec5SDimitry Andric (__v16sf)(__m512)(B), \ 27080b57cec5SDimitry Andric (__v16sf)(__m512)(C), \ 2709349cc55cSDimitry Andric (__mmask16)(U), (int)(R))) 27100b57cec5SDimitry Andric 27110b57cec5SDimitry Andric 27120b57cec5SDimitry Andric #define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \ 2713349cc55cSDimitry Andric ((__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \ 27140b57cec5SDimitry Andric (__v16sf)(__m512)(B), \ 27150b57cec5SDimitry Andric (__v16sf)(__m512)(C), \ 2716349cc55cSDimitry Andric (__mmask16)(U), (int)(R))) 27170b57cec5SDimitry Andric 27180b57cec5SDimitry Andric 27190b57cec5SDimitry Andric #define _mm512_fnmsub_round_ps(A, B, C, R) \ 2720349cc55cSDimitry Andric ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ 27210b57cec5SDimitry Andric -(__v16sf)(__m512)(B), \ 27220b57cec5SDimitry Andric -(__v16sf)(__m512)(C), \ 2723349cc55cSDimitry Andric (__mmask16)-1, (int)(R))) 27240b57cec5SDimitry Andric 27250b57cec5SDimitry Andric 27260b57cec5SDimitry Andric #define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \ 2727349cc55cSDimitry Andric ((__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \ 27280b57cec5SDimitry Andric (__v16sf)(__m512)(B), \ 27290b57cec5SDimitry Andric -(__v16sf)(__m512)(C), \ 2730349cc55cSDimitry Andric (__mmask16)(U), (int)(R))) 27310b57cec5SDimitry Andric 27320b57cec5SDimitry Andric 27330b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 27340b57cec5SDimitry Andric _mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C) 27350b57cec5SDimitry Andric { 27360b57cec5SDimitry Andric return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, 27370b57cec5SDimitry Andric (__v16sf) __B, 27380b57cec5SDimitry Andric (__v16sf) __C, 27390b57cec5SDimitry Andric (__mmask16) -1, 27400b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 27410b57cec5SDimitry Andric } 27420b57cec5SDimitry Andric 27430b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 27440b57cec5SDimitry Andric _mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 27450b57cec5SDimitry Andric { 27460b57cec5SDimitry Andric return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, 27470b57cec5SDimitry Andric (__v16sf) __B, 27480b57cec5SDimitry Andric (__v16sf) __C, 27490b57cec5SDimitry Andric (__mmask16) __U, 27500b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 27510b57cec5SDimitry Andric } 27520b57cec5SDimitry Andric 27530b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 27540b57cec5SDimitry Andric _mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 27550b57cec5SDimitry Andric { 27560b57cec5SDimitry Andric return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A, 27570b57cec5SDimitry Andric (__v16sf) __B, 27580b57cec5SDimitry Andric (__v16sf) __C, 27590b57cec5SDimitry Andric (__mmask16) __U, 27600b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 27610b57cec5SDimitry Andric } 27620b57cec5SDimitry Andric 27630b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 27640b57cec5SDimitry Andric _mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 27650b57cec5SDimitry Andric { 27660b57cec5SDimitry Andric return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A, 27670b57cec5SDimitry Andric (__v16sf) __B, 27680b57cec5SDimitry Andric (__v16sf) __C, 27690b57cec5SDimitry Andric (__mmask16) __U, 27700b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 27710b57cec5SDimitry Andric } 27720b57cec5SDimitry Andric 27730b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 27740b57cec5SDimitry Andric _mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C) 27750b57cec5SDimitry Andric { 27760b57cec5SDimitry Andric return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, 27770b57cec5SDimitry Andric (__v16sf) __B, 27780b57cec5SDimitry Andric -(__v16sf) __C, 27790b57cec5SDimitry Andric (__mmask16) -1, 27800b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 27810b57cec5SDimitry Andric } 27820b57cec5SDimitry Andric 27830b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 27840b57cec5SDimitry Andric _mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 27850b57cec5SDimitry Andric { 27860b57cec5SDimitry Andric return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, 27870b57cec5SDimitry Andric (__v16sf) __B, 27880b57cec5SDimitry Andric -(__v16sf) __C, 27890b57cec5SDimitry Andric (__mmask16) __U, 27900b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 27910b57cec5SDimitry Andric } 27920b57cec5SDimitry Andric 27930b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 27940b57cec5SDimitry Andric _mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 27950b57cec5SDimitry Andric { 27960b57cec5SDimitry Andric return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A, 27970b57cec5SDimitry Andric (__v16sf) __B, 27980b57cec5SDimitry Andric -(__v16sf) __C, 27990b57cec5SDimitry Andric (__mmask16) __U, 28000b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 28010b57cec5SDimitry Andric } 28020b57cec5SDimitry Andric 28030b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 28040b57cec5SDimitry Andric _mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C) 28050b57cec5SDimitry Andric { 28060b57cec5SDimitry Andric return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, 28070b57cec5SDimitry Andric -(__v16sf) __B, 28080b57cec5SDimitry Andric (__v16sf) __C, 28090b57cec5SDimitry Andric (__mmask16) -1, 28100b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 28110b57cec5SDimitry Andric } 28120b57cec5SDimitry Andric 28130b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 28140b57cec5SDimitry Andric _mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 28150b57cec5SDimitry Andric { 28160b57cec5SDimitry Andric return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A, 28170b57cec5SDimitry Andric (__v16sf) __B, 28180b57cec5SDimitry Andric (__v16sf) __C, 28190b57cec5SDimitry Andric (__mmask16) __U, 28200b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 28210b57cec5SDimitry Andric } 28220b57cec5SDimitry Andric 28230b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 28240b57cec5SDimitry Andric _mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 28250b57cec5SDimitry Andric { 28260b57cec5SDimitry Andric return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A, 28270b57cec5SDimitry Andric (__v16sf) __B, 28280b57cec5SDimitry Andric (__v16sf) __C, 28290b57cec5SDimitry Andric (__mmask16) __U, 28300b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 28310b57cec5SDimitry Andric } 28320b57cec5SDimitry Andric 28330b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 28340b57cec5SDimitry Andric _mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C) 28350b57cec5SDimitry Andric { 28360b57cec5SDimitry Andric return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, 28370b57cec5SDimitry Andric -(__v16sf) __B, 28380b57cec5SDimitry Andric -(__v16sf) __C, 28390b57cec5SDimitry Andric (__mmask16) -1, 28400b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 28410b57cec5SDimitry Andric } 28420b57cec5SDimitry Andric 28430b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 28440b57cec5SDimitry Andric _mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 28450b57cec5SDimitry Andric { 28460b57cec5SDimitry Andric return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A, 28470b57cec5SDimitry Andric (__v16sf) __B, 28480b57cec5SDimitry Andric -(__v16sf) __C, 28490b57cec5SDimitry Andric (__mmask16) __U, 28500b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 28510b57cec5SDimitry Andric } 28520b57cec5SDimitry Andric 28530b57cec5SDimitry Andric #define _mm512_fmaddsub_round_pd(A, B, C, R) \ 2854349cc55cSDimitry Andric ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \ 28550b57cec5SDimitry Andric (__v8df)(__m512d)(B), \ 28560b57cec5SDimitry Andric (__v8df)(__m512d)(C), \ 2857349cc55cSDimitry Andric (__mmask8)-1, (int)(R))) 28580b57cec5SDimitry Andric 28590b57cec5SDimitry Andric 28600b57cec5SDimitry Andric #define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \ 2861349cc55cSDimitry Andric ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \ 28620b57cec5SDimitry Andric (__v8df)(__m512d)(B), \ 28630b57cec5SDimitry Andric (__v8df)(__m512d)(C), \ 2864349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 28650b57cec5SDimitry Andric 28660b57cec5SDimitry Andric 28670b57cec5SDimitry Andric #define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \ 2868349cc55cSDimitry Andric ((__m512d)__builtin_ia32_vfmaddsubpd512_mask3((__v8df)(__m512d)(A), \ 28690b57cec5SDimitry Andric (__v8df)(__m512d)(B), \ 28700b57cec5SDimitry Andric (__v8df)(__m512d)(C), \ 2871349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 28720b57cec5SDimitry Andric 28730b57cec5SDimitry Andric 28740b57cec5SDimitry Andric #define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \ 2875349cc55cSDimitry Andric ((__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \ 28760b57cec5SDimitry Andric (__v8df)(__m512d)(B), \ 28770b57cec5SDimitry Andric (__v8df)(__m512d)(C), \ 2878349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 28790b57cec5SDimitry Andric 28800b57cec5SDimitry Andric 28810b57cec5SDimitry Andric #define _mm512_fmsubadd_round_pd(A, B, C, R) \ 2882349cc55cSDimitry Andric ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \ 28830b57cec5SDimitry Andric (__v8df)(__m512d)(B), \ 28840b57cec5SDimitry Andric -(__v8df)(__m512d)(C), \ 2885349cc55cSDimitry Andric (__mmask8)-1, (int)(R))) 28860b57cec5SDimitry Andric 28870b57cec5SDimitry Andric 28880b57cec5SDimitry Andric #define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \ 2889349cc55cSDimitry Andric ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \ 28900b57cec5SDimitry Andric (__v8df)(__m512d)(B), \ 28910b57cec5SDimitry Andric -(__v8df)(__m512d)(C), \ 2892349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 28930b57cec5SDimitry Andric 28940b57cec5SDimitry Andric 28950b57cec5SDimitry Andric #define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \ 2896349cc55cSDimitry Andric ((__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \ 28970b57cec5SDimitry Andric (__v8df)(__m512d)(B), \ 28980b57cec5SDimitry Andric -(__v8df)(__m512d)(C), \ 2899349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 29000b57cec5SDimitry Andric 29010b57cec5SDimitry Andric 29020b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 29030b57cec5SDimitry Andric _mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C) 29040b57cec5SDimitry Andric { 29050b57cec5SDimitry Andric return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, 29060b57cec5SDimitry Andric (__v8df) __B, 29070b57cec5SDimitry Andric (__v8df) __C, 29080b57cec5SDimitry Andric (__mmask8) -1, 29090b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 29100b57cec5SDimitry Andric } 29110b57cec5SDimitry Andric 29120b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 29130b57cec5SDimitry Andric _mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 29140b57cec5SDimitry Andric { 29150b57cec5SDimitry Andric return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, 29160b57cec5SDimitry Andric (__v8df) __B, 29170b57cec5SDimitry Andric (__v8df) __C, 29180b57cec5SDimitry Andric (__mmask8) __U, 29190b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 29200b57cec5SDimitry Andric } 29210b57cec5SDimitry Andric 29220b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 29230b57cec5SDimitry Andric _mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 29240b57cec5SDimitry Andric { 29250b57cec5SDimitry Andric return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A, 29260b57cec5SDimitry Andric (__v8df) __B, 29270b57cec5SDimitry Andric (__v8df) __C, 29280b57cec5SDimitry Andric (__mmask8) __U, 29290b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 29300b57cec5SDimitry Andric } 29310b57cec5SDimitry Andric 29320b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 29330b57cec5SDimitry Andric _mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 29340b57cec5SDimitry Andric { 29350b57cec5SDimitry Andric return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A, 29360b57cec5SDimitry Andric (__v8df) __B, 29370b57cec5SDimitry Andric (__v8df) __C, 29380b57cec5SDimitry Andric (__mmask8) __U, 29390b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 29400b57cec5SDimitry Andric } 29410b57cec5SDimitry Andric 29420b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 29430b57cec5SDimitry Andric _mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C) 29440b57cec5SDimitry Andric { 29450b57cec5SDimitry Andric return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, 29460b57cec5SDimitry Andric (__v8df) __B, 29470b57cec5SDimitry Andric -(__v8df) __C, 29480b57cec5SDimitry Andric (__mmask8) -1, 29490b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 29500b57cec5SDimitry Andric } 29510b57cec5SDimitry Andric 29520b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 29530b57cec5SDimitry Andric _mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 29540b57cec5SDimitry Andric { 29550b57cec5SDimitry Andric return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, 29560b57cec5SDimitry Andric (__v8df) __B, 29570b57cec5SDimitry Andric -(__v8df) __C, 29580b57cec5SDimitry Andric (__mmask8) __U, 29590b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 29600b57cec5SDimitry Andric } 29610b57cec5SDimitry Andric 29620b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 29630b57cec5SDimitry Andric _mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 29640b57cec5SDimitry Andric { 29650b57cec5SDimitry Andric return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A, 29660b57cec5SDimitry Andric (__v8df) __B, 29670b57cec5SDimitry Andric -(__v8df) __C, 29680b57cec5SDimitry Andric (__mmask8) __U, 29690b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 29700b57cec5SDimitry Andric } 29710b57cec5SDimitry Andric 29720b57cec5SDimitry Andric #define _mm512_fmaddsub_round_ps(A, B, C, R) \ 2973349cc55cSDimitry Andric ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \ 29740b57cec5SDimitry Andric (__v16sf)(__m512)(B), \ 29750b57cec5SDimitry Andric (__v16sf)(__m512)(C), \ 2976349cc55cSDimitry Andric (__mmask16)-1, (int)(R))) 29770b57cec5SDimitry Andric 29780b57cec5SDimitry Andric 29790b57cec5SDimitry Andric #define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \ 2980349cc55cSDimitry Andric ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \ 29810b57cec5SDimitry Andric (__v16sf)(__m512)(B), \ 29820b57cec5SDimitry Andric (__v16sf)(__m512)(C), \ 2983349cc55cSDimitry Andric (__mmask16)(U), (int)(R))) 29840b57cec5SDimitry Andric 29850b57cec5SDimitry Andric 29860b57cec5SDimitry Andric #define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \ 2987349cc55cSDimitry Andric ((__m512)__builtin_ia32_vfmaddsubps512_mask3((__v16sf)(__m512)(A), \ 29880b57cec5SDimitry Andric (__v16sf)(__m512)(B), \ 29890b57cec5SDimitry Andric (__v16sf)(__m512)(C), \ 2990349cc55cSDimitry Andric (__mmask16)(U), (int)(R))) 29910b57cec5SDimitry Andric 29920b57cec5SDimitry Andric 29930b57cec5SDimitry Andric #define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \ 2994349cc55cSDimitry Andric ((__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \ 29950b57cec5SDimitry Andric (__v16sf)(__m512)(B), \ 29960b57cec5SDimitry Andric (__v16sf)(__m512)(C), \ 2997349cc55cSDimitry Andric (__mmask16)(U), (int)(R))) 29980b57cec5SDimitry Andric 29990b57cec5SDimitry Andric 30000b57cec5SDimitry Andric #define _mm512_fmsubadd_round_ps(A, B, C, R) \ 3001349cc55cSDimitry Andric ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \ 30020b57cec5SDimitry Andric (__v16sf)(__m512)(B), \ 30030b57cec5SDimitry Andric -(__v16sf)(__m512)(C), \ 3004349cc55cSDimitry Andric (__mmask16)-1, (int)(R))) 30050b57cec5SDimitry Andric 30060b57cec5SDimitry Andric 30070b57cec5SDimitry Andric #define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \ 3008349cc55cSDimitry Andric ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \ 30090b57cec5SDimitry Andric (__v16sf)(__m512)(B), \ 30100b57cec5SDimitry Andric -(__v16sf)(__m512)(C), \ 3011349cc55cSDimitry Andric (__mmask16)(U), (int)(R))) 30120b57cec5SDimitry Andric 30130b57cec5SDimitry Andric 30140b57cec5SDimitry Andric #define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \ 3015349cc55cSDimitry Andric ((__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \ 30160b57cec5SDimitry Andric (__v16sf)(__m512)(B), \ 30170b57cec5SDimitry Andric -(__v16sf)(__m512)(C), \ 3018349cc55cSDimitry Andric (__mmask16)(U), (int)(R))) 30190b57cec5SDimitry Andric 30200b57cec5SDimitry Andric 30210b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 30220b57cec5SDimitry Andric _mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C) 30230b57cec5SDimitry Andric { 30240b57cec5SDimitry Andric return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, 30250b57cec5SDimitry Andric (__v16sf) __B, 30260b57cec5SDimitry Andric (__v16sf) __C, 30270b57cec5SDimitry Andric (__mmask16) -1, 30280b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 30290b57cec5SDimitry Andric } 30300b57cec5SDimitry Andric 30310b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 30320b57cec5SDimitry Andric _mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 30330b57cec5SDimitry Andric { 30340b57cec5SDimitry Andric return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, 30350b57cec5SDimitry Andric (__v16sf) __B, 30360b57cec5SDimitry Andric (__v16sf) __C, 30370b57cec5SDimitry Andric (__mmask16) __U, 30380b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 30390b57cec5SDimitry Andric } 30400b57cec5SDimitry Andric 30410b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 30420b57cec5SDimitry Andric _mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 30430b57cec5SDimitry Andric { 30440b57cec5SDimitry Andric return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A, 30450b57cec5SDimitry Andric (__v16sf) __B, 30460b57cec5SDimitry Andric (__v16sf) __C, 30470b57cec5SDimitry Andric (__mmask16) __U, 30480b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 30490b57cec5SDimitry Andric } 30500b57cec5SDimitry Andric 30510b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 30520b57cec5SDimitry Andric _mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 30530b57cec5SDimitry Andric { 30540b57cec5SDimitry Andric return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A, 30550b57cec5SDimitry Andric (__v16sf) __B, 30560b57cec5SDimitry Andric (__v16sf) __C, 30570b57cec5SDimitry Andric (__mmask16) __U, 30580b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 30590b57cec5SDimitry Andric } 30600b57cec5SDimitry Andric 30610b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 30620b57cec5SDimitry Andric _mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C) 30630b57cec5SDimitry Andric { 30640b57cec5SDimitry Andric return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, 30650b57cec5SDimitry Andric (__v16sf) __B, 30660b57cec5SDimitry Andric -(__v16sf) __C, 30670b57cec5SDimitry Andric (__mmask16) -1, 30680b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 30690b57cec5SDimitry Andric } 30700b57cec5SDimitry Andric 30710b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 30720b57cec5SDimitry Andric _mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 30730b57cec5SDimitry Andric { 30740b57cec5SDimitry Andric return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, 30750b57cec5SDimitry Andric (__v16sf) __B, 30760b57cec5SDimitry Andric -(__v16sf) __C, 30770b57cec5SDimitry Andric (__mmask16) __U, 30780b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 30790b57cec5SDimitry Andric } 30800b57cec5SDimitry Andric 30810b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 30820b57cec5SDimitry Andric _mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 30830b57cec5SDimitry Andric { 30840b57cec5SDimitry Andric return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A, 30850b57cec5SDimitry Andric (__v16sf) __B, 30860b57cec5SDimitry Andric -(__v16sf) __C, 30870b57cec5SDimitry Andric (__mmask16) __U, 30880b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 30890b57cec5SDimitry Andric } 30900b57cec5SDimitry Andric 30910b57cec5SDimitry Andric #define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \ 3092349cc55cSDimitry Andric ((__m512d)__builtin_ia32_vfmsubpd512_mask3((__v8df)(__m512d)(A), \ 30930b57cec5SDimitry Andric (__v8df)(__m512d)(B), \ 30940b57cec5SDimitry Andric (__v8df)(__m512d)(C), \ 3095349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 30960b57cec5SDimitry Andric 30970b57cec5SDimitry Andric 30980b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 30990b57cec5SDimitry Andric _mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 31000b57cec5SDimitry Andric { 31010b57cec5SDimitry Andric return (__m512d)__builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A, 31020b57cec5SDimitry Andric (__v8df) __B, 31030b57cec5SDimitry Andric (__v8df) __C, 31040b57cec5SDimitry Andric (__mmask8) __U, 31050b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 31060b57cec5SDimitry Andric } 31070b57cec5SDimitry Andric 31080b57cec5SDimitry Andric #define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \ 3109349cc55cSDimitry Andric ((__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \ 31100b57cec5SDimitry Andric (__v16sf)(__m512)(B), \ 31110b57cec5SDimitry Andric (__v16sf)(__m512)(C), \ 3112349cc55cSDimitry Andric (__mmask16)(U), (int)(R))) 31130b57cec5SDimitry Andric 31140b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 31150b57cec5SDimitry Andric _mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 31160b57cec5SDimitry Andric { 31170b57cec5SDimitry Andric return (__m512)__builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A, 31180b57cec5SDimitry Andric (__v16sf) __B, 31190b57cec5SDimitry Andric (__v16sf) __C, 31200b57cec5SDimitry Andric (__mmask16) __U, 31210b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 31220b57cec5SDimitry Andric } 31230b57cec5SDimitry Andric 31240b57cec5SDimitry Andric #define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \ 3125349cc55cSDimitry Andric ((__m512d)__builtin_ia32_vfmsubaddpd512_mask3((__v8df)(__m512d)(A), \ 31260b57cec5SDimitry Andric (__v8df)(__m512d)(B), \ 31270b57cec5SDimitry Andric (__v8df)(__m512d)(C), \ 3128349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 31290b57cec5SDimitry Andric 31300b57cec5SDimitry Andric 31310b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 31320b57cec5SDimitry Andric _mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 31330b57cec5SDimitry Andric { 31340b57cec5SDimitry Andric return (__m512d)__builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A, 31350b57cec5SDimitry Andric (__v8df) __B, 31360b57cec5SDimitry Andric (__v8df) __C, 31370b57cec5SDimitry Andric (__mmask8) __U, 31380b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 31390b57cec5SDimitry Andric } 31400b57cec5SDimitry Andric 31410b57cec5SDimitry Andric #define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \ 3142349cc55cSDimitry Andric ((__m512)__builtin_ia32_vfmsubaddps512_mask3((__v16sf)(__m512)(A), \ 31430b57cec5SDimitry Andric (__v16sf)(__m512)(B), \ 31440b57cec5SDimitry Andric (__v16sf)(__m512)(C), \ 3145349cc55cSDimitry Andric (__mmask16)(U), (int)(R))) 31460b57cec5SDimitry Andric 31470b57cec5SDimitry Andric 31480b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 31490b57cec5SDimitry Andric _mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 31500b57cec5SDimitry Andric { 31510b57cec5SDimitry Andric return (__m512)__builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A, 31520b57cec5SDimitry Andric (__v16sf) __B, 31530b57cec5SDimitry Andric (__v16sf) __C, 31540b57cec5SDimitry Andric (__mmask16) __U, 31550b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 31560b57cec5SDimitry Andric } 31570b57cec5SDimitry Andric 31580b57cec5SDimitry Andric #define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \ 3159349cc55cSDimitry Andric ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ 31600b57cec5SDimitry Andric -(__v8df)(__m512d)(B), \ 31610b57cec5SDimitry Andric (__v8df)(__m512d)(C), \ 3162349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 31630b57cec5SDimitry Andric 31640b57cec5SDimitry Andric 31650b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 31660b57cec5SDimitry Andric _mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 31670b57cec5SDimitry Andric { 31680b57cec5SDimitry Andric return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, 31690b57cec5SDimitry Andric -(__v8df) __B, 31700b57cec5SDimitry Andric (__v8df) __C, 31710b57cec5SDimitry Andric (__mmask8) __U, 31720b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 31730b57cec5SDimitry Andric } 31740b57cec5SDimitry Andric 31750b57cec5SDimitry Andric #define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \ 3176349cc55cSDimitry Andric ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ 31770b57cec5SDimitry Andric -(__v16sf)(__m512)(B), \ 31780b57cec5SDimitry Andric (__v16sf)(__m512)(C), \ 3179349cc55cSDimitry Andric (__mmask16)(U), (int)(R))) 31800b57cec5SDimitry Andric 31810b57cec5SDimitry Andric 31820b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 31830b57cec5SDimitry Andric _mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 31840b57cec5SDimitry Andric { 31850b57cec5SDimitry Andric return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, 31860b57cec5SDimitry Andric -(__v16sf) __B, 31870b57cec5SDimitry Andric (__v16sf) __C, 31880b57cec5SDimitry Andric (__mmask16) __U, 31890b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 31900b57cec5SDimitry Andric } 31910b57cec5SDimitry Andric 31920b57cec5SDimitry Andric #define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \ 3193349cc55cSDimitry Andric ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ 31940b57cec5SDimitry Andric -(__v8df)(__m512d)(B), \ 31950b57cec5SDimitry Andric -(__v8df)(__m512d)(C), \ 3196349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 31970b57cec5SDimitry Andric 31980b57cec5SDimitry Andric 31990b57cec5SDimitry Andric #define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \ 3200349cc55cSDimitry Andric ((__m512d)__builtin_ia32_vfmsubpd512_mask3(-(__v8df)(__m512d)(A), \ 32010b57cec5SDimitry Andric (__v8df)(__m512d)(B), \ 32020b57cec5SDimitry Andric (__v8df)(__m512d)(C), \ 3203349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 32040b57cec5SDimitry Andric 32050b57cec5SDimitry Andric 32060b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 32070b57cec5SDimitry Andric _mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 32080b57cec5SDimitry Andric { 32090b57cec5SDimitry Andric return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, 32100b57cec5SDimitry Andric -(__v8df) __B, 32110b57cec5SDimitry Andric -(__v8df) __C, 32120b57cec5SDimitry Andric (__mmask8) __U, 32130b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 32140b57cec5SDimitry Andric } 32150b57cec5SDimitry Andric 32160b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 32170b57cec5SDimitry Andric _mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 32180b57cec5SDimitry Andric { 32190b57cec5SDimitry Andric return (__m512d) __builtin_ia32_vfmsubpd512_mask3 (-(__v8df) __A, 32200b57cec5SDimitry Andric (__v8df) __B, 32210b57cec5SDimitry Andric (__v8df) __C, 32220b57cec5SDimitry Andric (__mmask8) __U, 32230b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 32240b57cec5SDimitry Andric } 32250b57cec5SDimitry Andric 32260b57cec5SDimitry Andric #define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \ 3227349cc55cSDimitry Andric ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ 32280b57cec5SDimitry Andric -(__v16sf)(__m512)(B), \ 32290b57cec5SDimitry Andric -(__v16sf)(__m512)(C), \ 3230349cc55cSDimitry Andric (__mmask16)(U), (int)(R))) 32310b57cec5SDimitry Andric 32320b57cec5SDimitry Andric 32330b57cec5SDimitry Andric #define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \ 3234349cc55cSDimitry Andric ((__m512)__builtin_ia32_vfmsubps512_mask3(-(__v16sf)(__m512)(A), \ 32350b57cec5SDimitry Andric (__v16sf)(__m512)(B), \ 32360b57cec5SDimitry Andric (__v16sf)(__m512)(C), \ 3237349cc55cSDimitry Andric (__mmask16)(U), (int)(R))) 32380b57cec5SDimitry Andric 32390b57cec5SDimitry Andric 32400b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 32410b57cec5SDimitry Andric _mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 32420b57cec5SDimitry Andric { 32430b57cec5SDimitry Andric return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, 32440b57cec5SDimitry Andric -(__v16sf) __B, 32450b57cec5SDimitry Andric -(__v16sf) __C, 32460b57cec5SDimitry Andric (__mmask16) __U, 32470b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 32480b57cec5SDimitry Andric } 32490b57cec5SDimitry Andric 32500b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 32510b57cec5SDimitry Andric _mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 32520b57cec5SDimitry Andric { 32530b57cec5SDimitry Andric return (__m512) __builtin_ia32_vfmsubps512_mask3 (-(__v16sf) __A, 32540b57cec5SDimitry Andric (__v16sf) __B, 32550b57cec5SDimitry Andric (__v16sf) __C, 32560b57cec5SDimitry Andric (__mmask16) __U, 32570b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 32580b57cec5SDimitry Andric } 32590b57cec5SDimitry Andric 32600b57cec5SDimitry Andric 32610b57cec5SDimitry Andric 32620b57cec5SDimitry Andric /* Vector permutations */ 32630b57cec5SDimitry Andric 32640b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512 32650b57cec5SDimitry Andric _mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B) 32660b57cec5SDimitry Andric { 32670b57cec5SDimitry Andric return (__m512i)__builtin_ia32_vpermi2vard512((__v16si)__A, (__v16si) __I, 32680b57cec5SDimitry Andric (__v16si) __B); 32690b57cec5SDimitry Andric } 32700b57cec5SDimitry Andric 32710b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 32720b57cec5SDimitry Andric _mm512_mask_permutex2var_epi32(__m512i __A, __mmask16 __U, __m512i __I, 32730b57cec5SDimitry Andric __m512i __B) 32740b57cec5SDimitry Andric { 32750b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512(__U, 32760b57cec5SDimitry Andric (__v16si)_mm512_permutex2var_epi32(__A, __I, __B), 32770b57cec5SDimitry Andric (__v16si)__A); 32780b57cec5SDimitry Andric } 32790b57cec5SDimitry Andric 32800b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 32810b57cec5SDimitry Andric _mm512_mask2_permutex2var_epi32(__m512i __A, __m512i __I, __mmask16 __U, 32820b57cec5SDimitry Andric __m512i __B) 32830b57cec5SDimitry Andric { 32840b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512(__U, 32850b57cec5SDimitry Andric (__v16si)_mm512_permutex2var_epi32(__A, __I, __B), 32860b57cec5SDimitry Andric (__v16si)__I); 32870b57cec5SDimitry Andric } 32880b57cec5SDimitry Andric 32890b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 32900b57cec5SDimitry Andric _mm512_maskz_permutex2var_epi32(__mmask16 __U, __m512i __A, __m512i __I, 32910b57cec5SDimitry Andric __m512i __B) 32920b57cec5SDimitry Andric { 32930b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512(__U, 32940b57cec5SDimitry Andric (__v16si)_mm512_permutex2var_epi32(__A, __I, __B), 32950b57cec5SDimitry Andric (__v16si)_mm512_setzero_si512()); 32960b57cec5SDimitry Andric } 32970b57cec5SDimitry Andric 32980b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512 32990b57cec5SDimitry Andric _mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B) 33000b57cec5SDimitry Andric { 33010b57cec5SDimitry Andric return (__m512i)__builtin_ia32_vpermi2varq512((__v8di)__A, (__v8di) __I, 33020b57cec5SDimitry Andric (__v8di) __B); 33030b57cec5SDimitry Andric } 33040b57cec5SDimitry Andric 33050b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 33060b57cec5SDimitry Andric _mm512_mask_permutex2var_epi64(__m512i __A, __mmask8 __U, __m512i __I, 33070b57cec5SDimitry Andric __m512i __B) 33080b57cec5SDimitry Andric { 33090b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512(__U, 33100b57cec5SDimitry Andric (__v8di)_mm512_permutex2var_epi64(__A, __I, __B), 33110b57cec5SDimitry Andric (__v8di)__A); 33120b57cec5SDimitry Andric } 33130b57cec5SDimitry Andric 33140b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 33150b57cec5SDimitry Andric _mm512_mask2_permutex2var_epi64(__m512i __A, __m512i __I, __mmask8 __U, 33160b57cec5SDimitry Andric __m512i __B) 33170b57cec5SDimitry Andric { 33180b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512(__U, 33190b57cec5SDimitry Andric (__v8di)_mm512_permutex2var_epi64(__A, __I, __B), 33200b57cec5SDimitry Andric (__v8di)__I); 33210b57cec5SDimitry Andric } 33220b57cec5SDimitry Andric 33230b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 33240b57cec5SDimitry Andric _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I, 33250b57cec5SDimitry Andric __m512i __B) 33260b57cec5SDimitry Andric { 33270b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512(__U, 33280b57cec5SDimitry Andric (__v8di)_mm512_permutex2var_epi64(__A, __I, __B), 33290b57cec5SDimitry Andric (__v8di)_mm512_setzero_si512()); 33300b57cec5SDimitry Andric } 33310b57cec5SDimitry Andric 33320b57cec5SDimitry Andric #define _mm512_alignr_epi64(A, B, I) \ 3333349cc55cSDimitry Andric ((__m512i)__builtin_ia32_alignq512((__v8di)(__m512i)(A), \ 3334349cc55cSDimitry Andric (__v8di)(__m512i)(B), (int)(I))) 33350b57cec5SDimitry Andric 33360b57cec5SDimitry Andric #define _mm512_mask_alignr_epi64(W, U, A, B, imm) \ 3337349cc55cSDimitry Andric ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 33380b57cec5SDimitry Andric (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \ 3339349cc55cSDimitry Andric (__v8di)(__m512i)(W))) 33400b57cec5SDimitry Andric 33410b57cec5SDimitry Andric #define _mm512_maskz_alignr_epi64(U, A, B, imm) \ 3342349cc55cSDimitry Andric ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 33430b57cec5SDimitry Andric (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \ 3344349cc55cSDimitry Andric (__v8di)_mm512_setzero_si512())) 33450b57cec5SDimitry Andric 33460b57cec5SDimitry Andric #define _mm512_alignr_epi32(A, B, I) \ 3347349cc55cSDimitry Andric ((__m512i)__builtin_ia32_alignd512((__v16si)(__m512i)(A), \ 3348349cc55cSDimitry Andric (__v16si)(__m512i)(B), (int)(I))) 33490b57cec5SDimitry Andric 33500b57cec5SDimitry Andric #define _mm512_mask_alignr_epi32(W, U, A, B, imm) \ 3351349cc55cSDimitry Andric ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 33520b57cec5SDimitry Andric (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \ 3353349cc55cSDimitry Andric (__v16si)(__m512i)(W))) 33540b57cec5SDimitry Andric 33550b57cec5SDimitry Andric #define _mm512_maskz_alignr_epi32(U, A, B, imm) \ 3356349cc55cSDimitry Andric ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 33570b57cec5SDimitry Andric (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \ 3358349cc55cSDimitry Andric (__v16si)_mm512_setzero_si512())) 33590b57cec5SDimitry Andric /* Vector Extract */ 33600b57cec5SDimitry Andric 33610b57cec5SDimitry Andric #define _mm512_extractf64x4_pd(A, I) \ 3362349cc55cSDimitry Andric ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(I), \ 33630b57cec5SDimitry Andric (__v4df)_mm256_undefined_pd(), \ 3364349cc55cSDimitry Andric (__mmask8)-1)) 33650b57cec5SDimitry Andric 33660b57cec5SDimitry Andric #define _mm512_mask_extractf64x4_pd(W, U, A, imm) \ 3367349cc55cSDimitry Andric ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \ 33680b57cec5SDimitry Andric (__v4df)(__m256d)(W), \ 3369349cc55cSDimitry Andric (__mmask8)(U))) 33700b57cec5SDimitry Andric 33710b57cec5SDimitry Andric #define _mm512_maskz_extractf64x4_pd(U, A, imm) \ 3372349cc55cSDimitry Andric ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \ 33730b57cec5SDimitry Andric (__v4df)_mm256_setzero_pd(), \ 3374349cc55cSDimitry Andric (__mmask8)(U))) 33750b57cec5SDimitry Andric 33760b57cec5SDimitry Andric #define _mm512_extractf32x4_ps(A, I) \ 3377349cc55cSDimitry Andric ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(I), \ 33780b57cec5SDimitry Andric (__v4sf)_mm_undefined_ps(), \ 3379349cc55cSDimitry Andric (__mmask8)-1)) 33800b57cec5SDimitry Andric 33810b57cec5SDimitry Andric #define _mm512_mask_extractf32x4_ps(W, U, A, imm) \ 3382349cc55cSDimitry Andric ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \ 33830b57cec5SDimitry Andric (__v4sf)(__m128)(W), \ 3384349cc55cSDimitry Andric (__mmask8)(U))) 33850b57cec5SDimitry Andric 33860b57cec5SDimitry Andric #define _mm512_maskz_extractf32x4_ps(U, A, imm) \ 3387349cc55cSDimitry Andric ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \ 33880b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps(), \ 3389349cc55cSDimitry Andric (__mmask8)(U))) 33900b57cec5SDimitry Andric 33910b57cec5SDimitry Andric /* Vector Blend */ 33920b57cec5SDimitry Andric 33930b57cec5SDimitry Andric static __inline __m512d __DEFAULT_FN_ATTRS512 33940b57cec5SDimitry Andric _mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W) 33950b57cec5SDimitry Andric { 33960b57cec5SDimitry Andric return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U, 33970b57cec5SDimitry Andric (__v8df) __W, 33980b57cec5SDimitry Andric (__v8df) __A); 33990b57cec5SDimitry Andric } 34000b57cec5SDimitry Andric 34010b57cec5SDimitry Andric static __inline __m512 __DEFAULT_FN_ATTRS512 34020b57cec5SDimitry Andric _mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W) 34030b57cec5SDimitry Andric { 34040b57cec5SDimitry Andric return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U, 34050b57cec5SDimitry Andric (__v16sf) __W, 34060b57cec5SDimitry Andric (__v16sf) __A); 34070b57cec5SDimitry Andric } 34080b57cec5SDimitry Andric 34090b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512 34100b57cec5SDimitry Andric _mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W) 34110b57cec5SDimitry Andric { 34120b57cec5SDimitry Andric return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U, 34130b57cec5SDimitry Andric (__v8di) __W, 34140b57cec5SDimitry Andric (__v8di) __A); 34150b57cec5SDimitry Andric } 34160b57cec5SDimitry Andric 34170b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512 34180b57cec5SDimitry Andric _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W) 34190b57cec5SDimitry Andric { 34200b57cec5SDimitry Andric return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U, 34210b57cec5SDimitry Andric (__v16si) __W, 34220b57cec5SDimitry Andric (__v16si) __A); 34230b57cec5SDimitry Andric } 34240b57cec5SDimitry Andric 34250b57cec5SDimitry Andric /* Compare */ 34260b57cec5SDimitry Andric 34270b57cec5SDimitry Andric #define _mm512_cmp_round_ps_mask(A, B, P, R) \ 3428349cc55cSDimitry Andric ((__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \ 34290b57cec5SDimitry Andric (__v16sf)(__m512)(B), (int)(P), \ 3430349cc55cSDimitry Andric (__mmask16)-1, (int)(R))) 34310b57cec5SDimitry Andric 34320b57cec5SDimitry Andric #define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) \ 3433349cc55cSDimitry Andric ((__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \ 34340b57cec5SDimitry Andric (__v16sf)(__m512)(B), (int)(P), \ 3435349cc55cSDimitry Andric (__mmask16)(U), (int)(R))) 34360b57cec5SDimitry Andric 34370b57cec5SDimitry Andric #define _mm512_cmp_ps_mask(A, B, P) \ 34380b57cec5SDimitry Andric _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION) 34390b57cec5SDimitry Andric #define _mm512_mask_cmp_ps_mask(U, A, B, P) \ 34400b57cec5SDimitry Andric _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION) 34410b57cec5SDimitry Andric 34420b57cec5SDimitry Andric #define _mm512_cmpeq_ps_mask(A, B) \ 34430b57cec5SDimitry Andric _mm512_cmp_ps_mask((A), (B), _CMP_EQ_OQ) 34440b57cec5SDimitry Andric #define _mm512_mask_cmpeq_ps_mask(k, A, B) \ 34450b57cec5SDimitry Andric _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_EQ_OQ) 34460b57cec5SDimitry Andric 34470b57cec5SDimitry Andric #define _mm512_cmplt_ps_mask(A, B) \ 34480b57cec5SDimitry Andric _mm512_cmp_ps_mask((A), (B), _CMP_LT_OS) 34490b57cec5SDimitry Andric #define _mm512_mask_cmplt_ps_mask(k, A, B) \ 34500b57cec5SDimitry Andric _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LT_OS) 34510b57cec5SDimitry Andric 34520b57cec5SDimitry Andric #define _mm512_cmple_ps_mask(A, B) \ 34530b57cec5SDimitry Andric _mm512_cmp_ps_mask((A), (B), _CMP_LE_OS) 34540b57cec5SDimitry Andric #define _mm512_mask_cmple_ps_mask(k, A, B) \ 34550b57cec5SDimitry Andric _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LE_OS) 34560b57cec5SDimitry Andric 34570b57cec5SDimitry Andric #define _mm512_cmpunord_ps_mask(A, B) \ 34580b57cec5SDimitry Andric _mm512_cmp_ps_mask((A), (B), _CMP_UNORD_Q) 34590b57cec5SDimitry Andric #define _mm512_mask_cmpunord_ps_mask(k, A, B) \ 34600b57cec5SDimitry Andric _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_UNORD_Q) 34610b57cec5SDimitry Andric 34620b57cec5SDimitry Andric #define _mm512_cmpneq_ps_mask(A, B) \ 34630b57cec5SDimitry Andric _mm512_cmp_ps_mask((A), (B), _CMP_NEQ_UQ) 34640b57cec5SDimitry Andric #define _mm512_mask_cmpneq_ps_mask(k, A, B) \ 34650b57cec5SDimitry Andric _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NEQ_UQ) 34660b57cec5SDimitry Andric 34670b57cec5SDimitry Andric #define _mm512_cmpnlt_ps_mask(A, B) \ 34680b57cec5SDimitry Andric _mm512_cmp_ps_mask((A), (B), _CMP_NLT_US) 34690b57cec5SDimitry Andric #define _mm512_mask_cmpnlt_ps_mask(k, A, B) \ 34700b57cec5SDimitry Andric _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLT_US) 34710b57cec5SDimitry Andric 34720b57cec5SDimitry Andric #define _mm512_cmpnle_ps_mask(A, B) \ 34730b57cec5SDimitry Andric _mm512_cmp_ps_mask((A), (B), _CMP_NLE_US) 34740b57cec5SDimitry Andric #define _mm512_mask_cmpnle_ps_mask(k, A, B) \ 34750b57cec5SDimitry Andric _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLE_US) 34760b57cec5SDimitry Andric 34770b57cec5SDimitry Andric #define _mm512_cmpord_ps_mask(A, B) \ 34780b57cec5SDimitry Andric _mm512_cmp_ps_mask((A), (B), _CMP_ORD_Q) 34790b57cec5SDimitry Andric #define _mm512_mask_cmpord_ps_mask(k, A, B) \ 34800b57cec5SDimitry Andric _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_ORD_Q) 34810b57cec5SDimitry Andric 34820b57cec5SDimitry Andric #define _mm512_cmp_round_pd_mask(A, B, P, R) \ 3483349cc55cSDimitry Andric ((__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \ 34840b57cec5SDimitry Andric (__v8df)(__m512d)(B), (int)(P), \ 3485349cc55cSDimitry Andric (__mmask8)-1, (int)(R))) 34860b57cec5SDimitry Andric 34870b57cec5SDimitry Andric #define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) \ 3488349cc55cSDimitry Andric ((__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \ 34890b57cec5SDimitry Andric (__v8df)(__m512d)(B), (int)(P), \ 3490349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 34910b57cec5SDimitry Andric 34920b57cec5SDimitry Andric #define _mm512_cmp_pd_mask(A, B, P) \ 34930b57cec5SDimitry Andric _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION) 34940b57cec5SDimitry Andric #define _mm512_mask_cmp_pd_mask(U, A, B, P) \ 34950b57cec5SDimitry Andric _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION) 34960b57cec5SDimitry Andric 34970b57cec5SDimitry Andric #define _mm512_cmpeq_pd_mask(A, B) \ 34980b57cec5SDimitry Andric _mm512_cmp_pd_mask((A), (B), _CMP_EQ_OQ) 34990b57cec5SDimitry Andric #define _mm512_mask_cmpeq_pd_mask(k, A, B) \ 35000b57cec5SDimitry Andric _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_EQ_OQ) 35010b57cec5SDimitry Andric 35020b57cec5SDimitry Andric #define _mm512_cmplt_pd_mask(A, B) \ 35030b57cec5SDimitry Andric _mm512_cmp_pd_mask((A), (B), _CMP_LT_OS) 35040b57cec5SDimitry Andric #define _mm512_mask_cmplt_pd_mask(k, A, B) \ 35050b57cec5SDimitry Andric _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LT_OS) 35060b57cec5SDimitry Andric 35070b57cec5SDimitry Andric #define _mm512_cmple_pd_mask(A, B) \ 35080b57cec5SDimitry Andric _mm512_cmp_pd_mask((A), (B), _CMP_LE_OS) 35090b57cec5SDimitry Andric #define _mm512_mask_cmple_pd_mask(k, A, B) \ 35100b57cec5SDimitry Andric _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LE_OS) 35110b57cec5SDimitry Andric 35120b57cec5SDimitry Andric #define _mm512_cmpunord_pd_mask(A, B) \ 35130b57cec5SDimitry Andric _mm512_cmp_pd_mask((A), (B), _CMP_UNORD_Q) 35140b57cec5SDimitry Andric #define _mm512_mask_cmpunord_pd_mask(k, A, B) \ 35150b57cec5SDimitry Andric _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_UNORD_Q) 35160b57cec5SDimitry Andric 35170b57cec5SDimitry Andric #define _mm512_cmpneq_pd_mask(A, B) \ 35180b57cec5SDimitry Andric _mm512_cmp_pd_mask((A), (B), _CMP_NEQ_UQ) 35190b57cec5SDimitry Andric #define _mm512_mask_cmpneq_pd_mask(k, A, B) \ 35200b57cec5SDimitry Andric _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NEQ_UQ) 35210b57cec5SDimitry Andric 35220b57cec5SDimitry Andric #define _mm512_cmpnlt_pd_mask(A, B) \ 35230b57cec5SDimitry Andric _mm512_cmp_pd_mask((A), (B), _CMP_NLT_US) 35240b57cec5SDimitry Andric #define _mm512_mask_cmpnlt_pd_mask(k, A, B) \ 35250b57cec5SDimitry Andric _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLT_US) 35260b57cec5SDimitry Andric 35270b57cec5SDimitry Andric #define _mm512_cmpnle_pd_mask(A, B) \ 35280b57cec5SDimitry Andric _mm512_cmp_pd_mask((A), (B), _CMP_NLE_US) 35290b57cec5SDimitry Andric #define _mm512_mask_cmpnle_pd_mask(k, A, B) \ 35300b57cec5SDimitry Andric _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLE_US) 35310b57cec5SDimitry Andric 35320b57cec5SDimitry Andric #define _mm512_cmpord_pd_mask(A, B) \ 35330b57cec5SDimitry Andric _mm512_cmp_pd_mask((A), (B), _CMP_ORD_Q) 35340b57cec5SDimitry Andric #define _mm512_mask_cmpord_pd_mask(k, A, B) \ 35350b57cec5SDimitry Andric _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_ORD_Q) 35360b57cec5SDimitry Andric 35370b57cec5SDimitry Andric /* Conversion */ 35380b57cec5SDimitry Andric 35390b57cec5SDimitry Andric #define _mm512_cvtt_roundps_epu32(A, R) \ 3540349cc55cSDimitry Andric ((__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \ 35410b57cec5SDimitry Andric (__v16si)_mm512_undefined_epi32(), \ 3542349cc55cSDimitry Andric (__mmask16)-1, (int)(R))) 35430b57cec5SDimitry Andric 35440b57cec5SDimitry Andric #define _mm512_mask_cvtt_roundps_epu32(W, U, A, R) \ 3545349cc55cSDimitry Andric ((__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \ 35460b57cec5SDimitry Andric (__v16si)(__m512i)(W), \ 3547349cc55cSDimitry Andric (__mmask16)(U), (int)(R))) 35480b57cec5SDimitry Andric 35490b57cec5SDimitry Andric #define _mm512_maskz_cvtt_roundps_epu32(U, A, R) \ 3550349cc55cSDimitry Andric ((__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \ 35510b57cec5SDimitry Andric (__v16si)_mm512_setzero_si512(), \ 3552349cc55cSDimitry Andric (__mmask16)(U), (int)(R))) 35530b57cec5SDimitry Andric 35540b57cec5SDimitry Andric 35550b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512 35560b57cec5SDimitry Andric _mm512_cvttps_epu32(__m512 __A) 35570b57cec5SDimitry Andric { 35580b57cec5SDimitry Andric return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, 35590b57cec5SDimitry Andric (__v16si) 35600b57cec5SDimitry Andric _mm512_setzero_si512 (), 35610b57cec5SDimitry Andric (__mmask16) -1, 35620b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 35630b57cec5SDimitry Andric } 35640b57cec5SDimitry Andric 35650b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 35660b57cec5SDimitry Andric _mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A) 35670b57cec5SDimitry Andric { 35680b57cec5SDimitry Andric return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, 35690b57cec5SDimitry Andric (__v16si) __W, 35700b57cec5SDimitry Andric (__mmask16) __U, 35710b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 35720b57cec5SDimitry Andric } 35730b57cec5SDimitry Andric 35740b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 35750b57cec5SDimitry Andric _mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A) 35760b57cec5SDimitry Andric { 35770b57cec5SDimitry Andric return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, 35780b57cec5SDimitry Andric (__v16si) _mm512_setzero_si512 (), 35790b57cec5SDimitry Andric (__mmask16) __U, 35800b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 35810b57cec5SDimitry Andric } 35820b57cec5SDimitry Andric 35830b57cec5SDimitry Andric #define _mm512_cvt_roundepi32_ps(A, R) \ 3584349cc55cSDimitry Andric ((__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \ 35850b57cec5SDimitry Andric (__v16sf)_mm512_setzero_ps(), \ 3586349cc55cSDimitry Andric (__mmask16)-1, (int)(R))) 35870b57cec5SDimitry Andric 35880b57cec5SDimitry Andric #define _mm512_mask_cvt_roundepi32_ps(W, U, A, R) \ 3589349cc55cSDimitry Andric ((__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \ 35900b57cec5SDimitry Andric (__v16sf)(__m512)(W), \ 3591349cc55cSDimitry Andric (__mmask16)(U), (int)(R))) 35920b57cec5SDimitry Andric 35930b57cec5SDimitry Andric #define _mm512_maskz_cvt_roundepi32_ps(U, A, R) \ 3594349cc55cSDimitry Andric ((__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \ 35950b57cec5SDimitry Andric (__v16sf)_mm512_setzero_ps(), \ 3596349cc55cSDimitry Andric (__mmask16)(U), (int)(R))) 35970b57cec5SDimitry Andric 35980b57cec5SDimitry Andric #define _mm512_cvt_roundepu32_ps(A, R) \ 3599349cc55cSDimitry Andric ((__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \ 36000b57cec5SDimitry Andric (__v16sf)_mm512_setzero_ps(), \ 3601349cc55cSDimitry Andric (__mmask16)-1, (int)(R))) 36020b57cec5SDimitry Andric 36030b57cec5SDimitry Andric #define _mm512_mask_cvt_roundepu32_ps(W, U, A, R) \ 3604349cc55cSDimitry Andric ((__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \ 36050b57cec5SDimitry Andric (__v16sf)(__m512)(W), \ 3606349cc55cSDimitry Andric (__mmask16)(U), (int)(R))) 36070b57cec5SDimitry Andric 36080b57cec5SDimitry Andric #define _mm512_maskz_cvt_roundepu32_ps(U, A, R) \ 3609349cc55cSDimitry Andric ((__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \ 36100b57cec5SDimitry Andric (__v16sf)_mm512_setzero_ps(), \ 3611349cc55cSDimitry Andric (__mmask16)(U), (int)(R))) 36120b57cec5SDimitry Andric 36130b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 36140b57cec5SDimitry Andric _mm512_cvtepu32_ps (__m512i __A) 36150b57cec5SDimitry Andric { 36160b57cec5SDimitry Andric return (__m512)__builtin_convertvector((__v16su)__A, __v16sf); 36170b57cec5SDimitry Andric } 36180b57cec5SDimitry Andric 36190b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 36200b57cec5SDimitry Andric _mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A) 36210b57cec5SDimitry Andric { 36220b57cec5SDimitry Andric return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 36230b57cec5SDimitry Andric (__v16sf)_mm512_cvtepu32_ps(__A), 36240b57cec5SDimitry Andric (__v16sf)__W); 36250b57cec5SDimitry Andric } 36260b57cec5SDimitry Andric 36270b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 36280b57cec5SDimitry Andric _mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A) 36290b57cec5SDimitry Andric { 36300b57cec5SDimitry Andric return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 36310b57cec5SDimitry Andric (__v16sf)_mm512_cvtepu32_ps(__A), 36320b57cec5SDimitry Andric (__v16sf)_mm512_setzero_ps()); 36330b57cec5SDimitry Andric } 36340b57cec5SDimitry Andric 36350b57cec5SDimitry Andric static __inline __m512d __DEFAULT_FN_ATTRS512 36360b57cec5SDimitry Andric _mm512_cvtepi32_pd(__m256i __A) 36370b57cec5SDimitry Andric { 36380b57cec5SDimitry Andric return (__m512d)__builtin_convertvector((__v8si)__A, __v8df); 36390b57cec5SDimitry Andric } 36400b57cec5SDimitry Andric 36410b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 36420b57cec5SDimitry Andric _mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A) 36430b57cec5SDimitry Andric { 36440b57cec5SDimitry Andric return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, 36450b57cec5SDimitry Andric (__v8df)_mm512_cvtepi32_pd(__A), 36460b57cec5SDimitry Andric (__v8df)__W); 36470b57cec5SDimitry Andric } 36480b57cec5SDimitry Andric 36490b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 36500b57cec5SDimitry Andric _mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A) 36510b57cec5SDimitry Andric { 36520b57cec5SDimitry Andric return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, 36530b57cec5SDimitry Andric (__v8df)_mm512_cvtepi32_pd(__A), 36540b57cec5SDimitry Andric (__v8df)_mm512_setzero_pd()); 36550b57cec5SDimitry Andric } 36560b57cec5SDimitry Andric 36570b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 36580b57cec5SDimitry Andric _mm512_cvtepi32lo_pd(__m512i __A) 36590b57cec5SDimitry Andric { 36600b57cec5SDimitry Andric return (__m512d) _mm512_cvtepi32_pd(_mm512_castsi512_si256(__A)); 36610b57cec5SDimitry Andric } 36620b57cec5SDimitry Andric 36630b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 36640b57cec5SDimitry Andric _mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U,__m512i __A) 36650b57cec5SDimitry Andric { 36660b57cec5SDimitry Andric return (__m512d) _mm512_mask_cvtepi32_pd(__W, __U, _mm512_castsi512_si256(__A)); 36670b57cec5SDimitry Andric } 36680b57cec5SDimitry Andric 36690b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 36700b57cec5SDimitry Andric _mm512_cvtepi32_ps (__m512i __A) 36710b57cec5SDimitry Andric { 36720b57cec5SDimitry Andric return (__m512)__builtin_convertvector((__v16si)__A, __v16sf); 36730b57cec5SDimitry Andric } 36740b57cec5SDimitry Andric 36750b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 36760b57cec5SDimitry Andric _mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A) 36770b57cec5SDimitry Andric { 36780b57cec5SDimitry Andric return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 36790b57cec5SDimitry Andric (__v16sf)_mm512_cvtepi32_ps(__A), 36800b57cec5SDimitry Andric (__v16sf)__W); 36810b57cec5SDimitry Andric } 36820b57cec5SDimitry Andric 36830b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 36840b57cec5SDimitry Andric _mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A) 36850b57cec5SDimitry Andric { 36860b57cec5SDimitry Andric return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 36870b57cec5SDimitry Andric (__v16sf)_mm512_cvtepi32_ps(__A), 36880b57cec5SDimitry Andric (__v16sf)_mm512_setzero_ps()); 36890b57cec5SDimitry Andric } 36900b57cec5SDimitry Andric 36910b57cec5SDimitry Andric static __inline __m512d __DEFAULT_FN_ATTRS512 36920b57cec5SDimitry Andric _mm512_cvtepu32_pd(__m256i __A) 36930b57cec5SDimitry Andric { 36940b57cec5SDimitry Andric return (__m512d)__builtin_convertvector((__v8su)__A, __v8df); 36950b57cec5SDimitry Andric } 36960b57cec5SDimitry Andric 36970b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 36980b57cec5SDimitry Andric _mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A) 36990b57cec5SDimitry Andric { 37000b57cec5SDimitry Andric return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, 37010b57cec5SDimitry Andric (__v8df)_mm512_cvtepu32_pd(__A), 37020b57cec5SDimitry Andric (__v8df)__W); 37030b57cec5SDimitry Andric } 37040b57cec5SDimitry Andric 37050b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 37060b57cec5SDimitry Andric _mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A) 37070b57cec5SDimitry Andric { 37080b57cec5SDimitry Andric return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, 37090b57cec5SDimitry Andric (__v8df)_mm512_cvtepu32_pd(__A), 37100b57cec5SDimitry Andric (__v8df)_mm512_setzero_pd()); 37110b57cec5SDimitry Andric } 37120b57cec5SDimitry Andric 37130b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 37140b57cec5SDimitry Andric _mm512_cvtepu32lo_pd(__m512i __A) 37150b57cec5SDimitry Andric { 37160b57cec5SDimitry Andric return (__m512d) _mm512_cvtepu32_pd(_mm512_castsi512_si256(__A)); 37170b57cec5SDimitry Andric } 37180b57cec5SDimitry Andric 37190b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 37200b57cec5SDimitry Andric _mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U,__m512i __A) 37210b57cec5SDimitry Andric { 37220b57cec5SDimitry Andric return (__m512d) _mm512_mask_cvtepu32_pd(__W, __U, _mm512_castsi512_si256(__A)); 37230b57cec5SDimitry Andric } 37240b57cec5SDimitry Andric 37250b57cec5SDimitry Andric #define _mm512_cvt_roundpd_ps(A, R) \ 3726349cc55cSDimitry Andric ((__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \ 37270b57cec5SDimitry Andric (__v8sf)_mm256_setzero_ps(), \ 3728349cc55cSDimitry Andric (__mmask8)-1, (int)(R))) 37290b57cec5SDimitry Andric 37300b57cec5SDimitry Andric #define _mm512_mask_cvt_roundpd_ps(W, U, A, R) \ 3731349cc55cSDimitry Andric ((__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \ 37320b57cec5SDimitry Andric (__v8sf)(__m256)(W), (__mmask8)(U), \ 3733349cc55cSDimitry Andric (int)(R))) 37340b57cec5SDimitry Andric 37350b57cec5SDimitry Andric #define _mm512_maskz_cvt_roundpd_ps(U, A, R) \ 3736349cc55cSDimitry Andric ((__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \ 37370b57cec5SDimitry Andric (__v8sf)_mm256_setzero_ps(), \ 3738349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 37390b57cec5SDimitry Andric 37400b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS512 37410b57cec5SDimitry Andric _mm512_cvtpd_ps (__m512d __A) 37420b57cec5SDimitry Andric { 37430b57cec5SDimitry Andric return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, 37440b57cec5SDimitry Andric (__v8sf) _mm256_undefined_ps (), 37450b57cec5SDimitry Andric (__mmask8) -1, 37460b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 37470b57cec5SDimitry Andric } 37480b57cec5SDimitry Andric 37490b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS512 37500b57cec5SDimitry Andric _mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A) 37510b57cec5SDimitry Andric { 37520b57cec5SDimitry Andric return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, 37530b57cec5SDimitry Andric (__v8sf) __W, 37540b57cec5SDimitry Andric (__mmask8) __U, 37550b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 37560b57cec5SDimitry Andric } 37570b57cec5SDimitry Andric 37580b57cec5SDimitry Andric static __inline__ __m256 __DEFAULT_FN_ATTRS512 37590b57cec5SDimitry Andric _mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A) 37600b57cec5SDimitry Andric { 37610b57cec5SDimitry Andric return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, 37620b57cec5SDimitry Andric (__v8sf) _mm256_setzero_ps (), 37630b57cec5SDimitry Andric (__mmask8) __U, 37640b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 37650b57cec5SDimitry Andric } 37660b57cec5SDimitry Andric 37670b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 37680b57cec5SDimitry Andric _mm512_cvtpd_pslo (__m512d __A) 37690b57cec5SDimitry Andric { 37700b57cec5SDimitry Andric return (__m512) __builtin_shufflevector((__v8sf) _mm512_cvtpd_ps(__A), 37710b57cec5SDimitry Andric (__v8sf) _mm256_setzero_ps (), 37720b57cec5SDimitry Andric 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); 37730b57cec5SDimitry Andric } 37740b57cec5SDimitry Andric 37750b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 37760b57cec5SDimitry Andric _mm512_mask_cvtpd_pslo (__m512 __W, __mmask8 __U,__m512d __A) 37770b57cec5SDimitry Andric { 37780b57cec5SDimitry Andric return (__m512) __builtin_shufflevector ( 37790b57cec5SDimitry Andric (__v8sf) _mm512_mask_cvtpd_ps (_mm512_castps512_ps256(__W), 37800b57cec5SDimitry Andric __U, __A), 37810b57cec5SDimitry Andric (__v8sf) _mm256_setzero_ps (), 37820b57cec5SDimitry Andric 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); 37830b57cec5SDimitry Andric } 37840b57cec5SDimitry Andric 37850b57cec5SDimitry Andric #define _mm512_cvt_roundps_ph(A, I) \ 3786349cc55cSDimitry Andric ((__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ 37870b57cec5SDimitry Andric (__v16hi)_mm256_undefined_si256(), \ 3788349cc55cSDimitry Andric (__mmask16)-1)) 37890b57cec5SDimitry Andric 37900b57cec5SDimitry Andric #define _mm512_mask_cvt_roundps_ph(U, W, A, I) \ 3791349cc55cSDimitry Andric ((__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ 37920b57cec5SDimitry Andric (__v16hi)(__m256i)(U), \ 3793349cc55cSDimitry Andric (__mmask16)(W))) 37940b57cec5SDimitry Andric 37950b57cec5SDimitry Andric #define _mm512_maskz_cvt_roundps_ph(W, A, I) \ 3796349cc55cSDimitry Andric ((__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ 37970b57cec5SDimitry Andric (__v16hi)_mm256_setzero_si256(), \ 3798349cc55cSDimitry Andric (__mmask16)(W))) 37990b57cec5SDimitry Andric 38000b57cec5SDimitry Andric #define _mm512_cvtps_ph _mm512_cvt_roundps_ph 38010b57cec5SDimitry Andric #define _mm512_mask_cvtps_ph _mm512_mask_cvt_roundps_ph 38020b57cec5SDimitry Andric #define _mm512_maskz_cvtps_ph _mm512_maskz_cvt_roundps_ph 38030b57cec5SDimitry Andric 38040b57cec5SDimitry Andric #define _mm512_cvt_roundph_ps(A, R) \ 3805349cc55cSDimitry Andric ((__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \ 38060b57cec5SDimitry Andric (__v16sf)_mm512_undefined_ps(), \ 3807349cc55cSDimitry Andric (__mmask16)-1, (int)(R))) 38080b57cec5SDimitry Andric 38090b57cec5SDimitry Andric #define _mm512_mask_cvt_roundph_ps(W, U, A, R) \ 3810349cc55cSDimitry Andric ((__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \ 38110b57cec5SDimitry Andric (__v16sf)(__m512)(W), \ 3812349cc55cSDimitry Andric (__mmask16)(U), (int)(R))) 38130b57cec5SDimitry Andric 38140b57cec5SDimitry Andric #define _mm512_maskz_cvt_roundph_ps(U, A, R) \ 3815349cc55cSDimitry Andric ((__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \ 38160b57cec5SDimitry Andric (__v16sf)_mm512_setzero_ps(), \ 3817349cc55cSDimitry Andric (__mmask16)(U), (int)(R))) 38180b57cec5SDimitry Andric 38190b57cec5SDimitry Andric 38200b57cec5SDimitry Andric static __inline __m512 __DEFAULT_FN_ATTRS512 38210b57cec5SDimitry Andric _mm512_cvtph_ps(__m256i __A) 38220b57cec5SDimitry Andric { 38230b57cec5SDimitry Andric return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, 38240b57cec5SDimitry Andric (__v16sf) 38250b57cec5SDimitry Andric _mm512_setzero_ps (), 38260b57cec5SDimitry Andric (__mmask16) -1, 38270b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 38280b57cec5SDimitry Andric } 38290b57cec5SDimitry Andric 38300b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 38310b57cec5SDimitry Andric _mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A) 38320b57cec5SDimitry Andric { 38330b57cec5SDimitry Andric return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, 38340b57cec5SDimitry Andric (__v16sf) __W, 38350b57cec5SDimitry Andric (__mmask16) __U, 38360b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 38370b57cec5SDimitry Andric } 38380b57cec5SDimitry Andric 38390b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 38400b57cec5SDimitry Andric _mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A) 38410b57cec5SDimitry Andric { 38420b57cec5SDimitry Andric return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, 38430b57cec5SDimitry Andric (__v16sf) _mm512_setzero_ps (), 38440b57cec5SDimitry Andric (__mmask16) __U, 38450b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 38460b57cec5SDimitry Andric } 38470b57cec5SDimitry Andric 38480b57cec5SDimitry Andric #define _mm512_cvtt_roundpd_epi32(A, R) \ 3849349cc55cSDimitry Andric ((__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \ 38500b57cec5SDimitry Andric (__v8si)_mm256_setzero_si256(), \ 3851349cc55cSDimitry Andric (__mmask8)-1, (int)(R))) 38520b57cec5SDimitry Andric 38530b57cec5SDimitry Andric #define _mm512_mask_cvtt_roundpd_epi32(W, U, A, R) \ 3854349cc55cSDimitry Andric ((__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \ 38550b57cec5SDimitry Andric (__v8si)(__m256i)(W), \ 3856349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 38570b57cec5SDimitry Andric 38580b57cec5SDimitry Andric #define _mm512_maskz_cvtt_roundpd_epi32(U, A, R) \ 3859349cc55cSDimitry Andric ((__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \ 38600b57cec5SDimitry Andric (__v8si)_mm256_setzero_si256(), \ 3861349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 38620b57cec5SDimitry Andric 38630b57cec5SDimitry Andric static __inline __m256i __DEFAULT_FN_ATTRS512 38640b57cec5SDimitry Andric _mm512_cvttpd_epi32(__m512d __a) 38650b57cec5SDimitry Andric { 38660b57cec5SDimitry Andric return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) __a, 38670b57cec5SDimitry Andric (__v8si)_mm256_setzero_si256(), 38680b57cec5SDimitry Andric (__mmask8) -1, 38690b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 38700b57cec5SDimitry Andric } 38710b57cec5SDimitry Andric 38720b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS512 38730b57cec5SDimitry Andric _mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A) 38740b57cec5SDimitry Andric { 38750b57cec5SDimitry Andric return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A, 38760b57cec5SDimitry Andric (__v8si) __W, 38770b57cec5SDimitry Andric (__mmask8) __U, 38780b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 38790b57cec5SDimitry Andric } 38800b57cec5SDimitry Andric 38810b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS512 38820b57cec5SDimitry Andric _mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A) 38830b57cec5SDimitry Andric { 38840b57cec5SDimitry Andric return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A, 38850b57cec5SDimitry Andric (__v8si) _mm256_setzero_si256 (), 38860b57cec5SDimitry Andric (__mmask8) __U, 38870b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 38880b57cec5SDimitry Andric } 38890b57cec5SDimitry Andric 38900b57cec5SDimitry Andric #define _mm512_cvtt_roundps_epi32(A, R) \ 3891349cc55cSDimitry Andric ((__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \ 38920b57cec5SDimitry Andric (__v16si)_mm512_setzero_si512(), \ 3893349cc55cSDimitry Andric (__mmask16)-1, (int)(R))) 38940b57cec5SDimitry Andric 38950b57cec5SDimitry Andric #define _mm512_mask_cvtt_roundps_epi32(W, U, A, R) \ 3896349cc55cSDimitry Andric ((__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \ 38970b57cec5SDimitry Andric (__v16si)(__m512i)(W), \ 3898349cc55cSDimitry Andric (__mmask16)(U), (int)(R))) 38990b57cec5SDimitry Andric 39000b57cec5SDimitry Andric #define _mm512_maskz_cvtt_roundps_epi32(U, A, R) \ 3901349cc55cSDimitry Andric ((__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \ 39020b57cec5SDimitry Andric (__v16si)_mm512_setzero_si512(), \ 3903349cc55cSDimitry Andric (__mmask16)(U), (int)(R))) 39040b57cec5SDimitry Andric 39050b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512 39060b57cec5SDimitry Andric _mm512_cvttps_epi32(__m512 __a) 39070b57cec5SDimitry Andric { 39080b57cec5SDimitry Andric return (__m512i) 39090b57cec5SDimitry Andric __builtin_ia32_cvttps2dq512_mask((__v16sf) __a, 39100b57cec5SDimitry Andric (__v16si) _mm512_setzero_si512 (), 39110b57cec5SDimitry Andric (__mmask16) -1, _MM_FROUND_CUR_DIRECTION); 39120b57cec5SDimitry Andric } 39130b57cec5SDimitry Andric 39140b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 39150b57cec5SDimitry Andric _mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A) 39160b57cec5SDimitry Andric { 39170b57cec5SDimitry Andric return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A, 39180b57cec5SDimitry Andric (__v16si) __W, 39190b57cec5SDimitry Andric (__mmask16) __U, 39200b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 39210b57cec5SDimitry Andric } 39220b57cec5SDimitry Andric 39230b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 39240b57cec5SDimitry Andric _mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A) 39250b57cec5SDimitry Andric { 39260b57cec5SDimitry Andric return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A, 39270b57cec5SDimitry Andric (__v16si) _mm512_setzero_si512 (), 39280b57cec5SDimitry Andric (__mmask16) __U, 39290b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 39300b57cec5SDimitry Andric } 39310b57cec5SDimitry Andric 39320b57cec5SDimitry Andric #define _mm512_cvt_roundps_epi32(A, R) \ 3933349cc55cSDimitry Andric ((__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \ 39340b57cec5SDimitry Andric (__v16si)_mm512_setzero_si512(), \ 3935349cc55cSDimitry Andric (__mmask16)-1, (int)(R))) 39360b57cec5SDimitry Andric 39370b57cec5SDimitry Andric #define _mm512_mask_cvt_roundps_epi32(W, U, A, R) \ 3938349cc55cSDimitry Andric ((__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \ 39390b57cec5SDimitry Andric (__v16si)(__m512i)(W), \ 3940349cc55cSDimitry Andric (__mmask16)(U), (int)(R))) 39410b57cec5SDimitry Andric 39420b57cec5SDimitry Andric #define _mm512_maskz_cvt_roundps_epi32(U, A, R) \ 3943349cc55cSDimitry Andric ((__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \ 39440b57cec5SDimitry Andric (__v16si)_mm512_setzero_si512(), \ 3945349cc55cSDimitry Andric (__mmask16)(U), (int)(R))) 39460b57cec5SDimitry Andric 39470b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 39480b57cec5SDimitry Andric _mm512_cvtps_epi32 (__m512 __A) 39490b57cec5SDimitry Andric { 39500b57cec5SDimitry Andric return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, 39510b57cec5SDimitry Andric (__v16si) _mm512_undefined_epi32 (), 39520b57cec5SDimitry Andric (__mmask16) -1, 39530b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 39540b57cec5SDimitry Andric } 39550b57cec5SDimitry Andric 39560b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 39570b57cec5SDimitry Andric _mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A) 39580b57cec5SDimitry Andric { 39590b57cec5SDimitry Andric return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, 39600b57cec5SDimitry Andric (__v16si) __W, 39610b57cec5SDimitry Andric (__mmask16) __U, 39620b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 39630b57cec5SDimitry Andric } 39640b57cec5SDimitry Andric 39650b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 39660b57cec5SDimitry Andric _mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A) 39670b57cec5SDimitry Andric { 39680b57cec5SDimitry Andric return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, 39690b57cec5SDimitry Andric (__v16si) 39700b57cec5SDimitry Andric _mm512_setzero_si512 (), 39710b57cec5SDimitry Andric (__mmask16) __U, 39720b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 39730b57cec5SDimitry Andric } 39740b57cec5SDimitry Andric 39750b57cec5SDimitry Andric #define _mm512_cvt_roundpd_epi32(A, R) \ 3976349cc55cSDimitry Andric ((__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \ 39770b57cec5SDimitry Andric (__v8si)_mm256_setzero_si256(), \ 3978349cc55cSDimitry Andric (__mmask8)-1, (int)(R))) 39790b57cec5SDimitry Andric 39800b57cec5SDimitry Andric #define _mm512_mask_cvt_roundpd_epi32(W, U, A, R) \ 3981349cc55cSDimitry Andric ((__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \ 39820b57cec5SDimitry Andric (__v8si)(__m256i)(W), \ 3983349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 39840b57cec5SDimitry Andric 39850b57cec5SDimitry Andric #define _mm512_maskz_cvt_roundpd_epi32(U, A, R) \ 3986349cc55cSDimitry Andric ((__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \ 39870b57cec5SDimitry Andric (__v8si)_mm256_setzero_si256(), \ 3988349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 39890b57cec5SDimitry Andric 39900b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS512 39910b57cec5SDimitry Andric _mm512_cvtpd_epi32 (__m512d __A) 39920b57cec5SDimitry Andric { 39930b57cec5SDimitry Andric return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, 39940b57cec5SDimitry Andric (__v8si) 39950b57cec5SDimitry Andric _mm256_undefined_si256 (), 39960b57cec5SDimitry Andric (__mmask8) -1, 39970b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 39980b57cec5SDimitry Andric } 39990b57cec5SDimitry Andric 40000b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS512 40010b57cec5SDimitry Andric _mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A) 40020b57cec5SDimitry Andric { 40030b57cec5SDimitry Andric return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, 40040b57cec5SDimitry Andric (__v8si) __W, 40050b57cec5SDimitry Andric (__mmask8) __U, 40060b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 40070b57cec5SDimitry Andric } 40080b57cec5SDimitry Andric 40090b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS512 40100b57cec5SDimitry Andric _mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A) 40110b57cec5SDimitry Andric { 40120b57cec5SDimitry Andric return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, 40130b57cec5SDimitry Andric (__v8si) 40140b57cec5SDimitry Andric _mm256_setzero_si256 (), 40150b57cec5SDimitry Andric (__mmask8) __U, 40160b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 40170b57cec5SDimitry Andric } 40180b57cec5SDimitry Andric 40190b57cec5SDimitry Andric #define _mm512_cvt_roundps_epu32(A, R) \ 4020349cc55cSDimitry Andric ((__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \ 40210b57cec5SDimitry Andric (__v16si)_mm512_setzero_si512(), \ 4022349cc55cSDimitry Andric (__mmask16)-1, (int)(R))) 40230b57cec5SDimitry Andric 40240b57cec5SDimitry Andric #define _mm512_mask_cvt_roundps_epu32(W, U, A, R) \ 4025349cc55cSDimitry Andric ((__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \ 40260b57cec5SDimitry Andric (__v16si)(__m512i)(W), \ 4027349cc55cSDimitry Andric (__mmask16)(U), (int)(R))) 40280b57cec5SDimitry Andric 40290b57cec5SDimitry Andric #define _mm512_maskz_cvt_roundps_epu32(U, A, R) \ 4030349cc55cSDimitry Andric ((__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \ 40310b57cec5SDimitry Andric (__v16si)_mm512_setzero_si512(), \ 4032349cc55cSDimitry Andric (__mmask16)(U), (int)(R))) 40330b57cec5SDimitry Andric 40340b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 40350b57cec5SDimitry Andric _mm512_cvtps_epu32 ( __m512 __A) 40360b57cec5SDimitry Andric { 40370b57cec5SDimitry Andric return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,\ 40380b57cec5SDimitry Andric (__v16si)\ 40390b57cec5SDimitry Andric _mm512_undefined_epi32 (), 40400b57cec5SDimitry Andric (__mmask16) -1,\ 40410b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 40420b57cec5SDimitry Andric } 40430b57cec5SDimitry Andric 40440b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 40450b57cec5SDimitry Andric _mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A) 40460b57cec5SDimitry Andric { 40470b57cec5SDimitry Andric return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A, 40480b57cec5SDimitry Andric (__v16si) __W, 40490b57cec5SDimitry Andric (__mmask16) __U, 40500b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 40510b57cec5SDimitry Andric } 40520b57cec5SDimitry Andric 40530b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 40540b57cec5SDimitry Andric _mm512_maskz_cvtps_epu32 ( __mmask16 __U, __m512 __A) 40550b57cec5SDimitry Andric { 40560b57cec5SDimitry Andric return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A, 40570b57cec5SDimitry Andric (__v16si) 40580b57cec5SDimitry Andric _mm512_setzero_si512 (), 40590b57cec5SDimitry Andric (__mmask16) __U , 40600b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 40610b57cec5SDimitry Andric } 40620b57cec5SDimitry Andric 40630b57cec5SDimitry Andric #define _mm512_cvt_roundpd_epu32(A, R) \ 4064349cc55cSDimitry Andric ((__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \ 40650b57cec5SDimitry Andric (__v8si)_mm256_setzero_si256(), \ 4066349cc55cSDimitry Andric (__mmask8)-1, (int)(R))) 40670b57cec5SDimitry Andric 40680b57cec5SDimitry Andric #define _mm512_mask_cvt_roundpd_epu32(W, U, A, R) \ 4069349cc55cSDimitry Andric ((__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \ 40700b57cec5SDimitry Andric (__v8si)(__m256i)(W), \ 4071349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 40720b57cec5SDimitry Andric 40730b57cec5SDimitry Andric #define _mm512_maskz_cvt_roundpd_epu32(U, A, R) \ 4074349cc55cSDimitry Andric ((__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \ 40750b57cec5SDimitry Andric (__v8si)_mm256_setzero_si256(), \ 4076349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 40770b57cec5SDimitry Andric 40780b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS512 40790b57cec5SDimitry Andric _mm512_cvtpd_epu32 (__m512d __A) 40800b57cec5SDimitry Andric { 40810b57cec5SDimitry Andric return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, 40820b57cec5SDimitry Andric (__v8si) 40830b57cec5SDimitry Andric _mm256_undefined_si256 (), 40840b57cec5SDimitry Andric (__mmask8) -1, 40850b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 40860b57cec5SDimitry Andric } 40870b57cec5SDimitry Andric 40880b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS512 40890b57cec5SDimitry Andric _mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A) 40900b57cec5SDimitry Andric { 40910b57cec5SDimitry Andric return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, 40920b57cec5SDimitry Andric (__v8si) __W, 40930b57cec5SDimitry Andric (__mmask8) __U, 40940b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 40950b57cec5SDimitry Andric } 40960b57cec5SDimitry Andric 40970b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS512 40980b57cec5SDimitry Andric _mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A) 40990b57cec5SDimitry Andric { 41000b57cec5SDimitry Andric return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, 41010b57cec5SDimitry Andric (__v8si) 41020b57cec5SDimitry Andric _mm256_setzero_si256 (), 41030b57cec5SDimitry Andric (__mmask8) __U, 41040b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 41050b57cec5SDimitry Andric } 41060b57cec5SDimitry Andric 41070b57cec5SDimitry Andric static __inline__ double __DEFAULT_FN_ATTRS512 41080b57cec5SDimitry Andric _mm512_cvtsd_f64(__m512d __a) 41090b57cec5SDimitry Andric { 41100b57cec5SDimitry Andric return __a[0]; 41110b57cec5SDimitry Andric } 41120b57cec5SDimitry Andric 41130b57cec5SDimitry Andric static __inline__ float __DEFAULT_FN_ATTRS512 41140b57cec5SDimitry Andric _mm512_cvtss_f32(__m512 __a) 41150b57cec5SDimitry Andric { 41160b57cec5SDimitry Andric return __a[0]; 41170b57cec5SDimitry Andric } 41180b57cec5SDimitry Andric 41190b57cec5SDimitry Andric /* Unpack and Interleave */ 41200b57cec5SDimitry Andric 41210b57cec5SDimitry Andric static __inline __m512d __DEFAULT_FN_ATTRS512 41220b57cec5SDimitry Andric _mm512_unpackhi_pd(__m512d __a, __m512d __b) 41230b57cec5SDimitry Andric { 41240b57cec5SDimitry Andric return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b, 41250b57cec5SDimitry Andric 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6); 41260b57cec5SDimitry Andric } 41270b57cec5SDimitry Andric 41280b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 41290b57cec5SDimitry Andric _mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) 41300b57cec5SDimitry Andric { 41310b57cec5SDimitry Andric return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, 41320b57cec5SDimitry Andric (__v8df)_mm512_unpackhi_pd(__A, __B), 41330b57cec5SDimitry Andric (__v8df)__W); 41340b57cec5SDimitry Andric } 41350b57cec5SDimitry Andric 41360b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 41370b57cec5SDimitry Andric _mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B) 41380b57cec5SDimitry Andric { 41390b57cec5SDimitry Andric return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, 41400b57cec5SDimitry Andric (__v8df)_mm512_unpackhi_pd(__A, __B), 41410b57cec5SDimitry Andric (__v8df)_mm512_setzero_pd()); 41420b57cec5SDimitry Andric } 41430b57cec5SDimitry Andric 41440b57cec5SDimitry Andric static __inline __m512d __DEFAULT_FN_ATTRS512 41450b57cec5SDimitry Andric _mm512_unpacklo_pd(__m512d __a, __m512d __b) 41460b57cec5SDimitry Andric { 41470b57cec5SDimitry Andric return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b, 41480b57cec5SDimitry Andric 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6); 41490b57cec5SDimitry Andric } 41500b57cec5SDimitry Andric 41510b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 41520b57cec5SDimitry Andric _mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) 41530b57cec5SDimitry Andric { 41540b57cec5SDimitry Andric return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, 41550b57cec5SDimitry Andric (__v8df)_mm512_unpacklo_pd(__A, __B), 41560b57cec5SDimitry Andric (__v8df)__W); 41570b57cec5SDimitry Andric } 41580b57cec5SDimitry Andric 41590b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 41600b57cec5SDimitry Andric _mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B) 41610b57cec5SDimitry Andric { 41620b57cec5SDimitry Andric return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, 41630b57cec5SDimitry Andric (__v8df)_mm512_unpacklo_pd(__A, __B), 41640b57cec5SDimitry Andric (__v8df)_mm512_setzero_pd()); 41650b57cec5SDimitry Andric } 41660b57cec5SDimitry Andric 41670b57cec5SDimitry Andric static __inline __m512 __DEFAULT_FN_ATTRS512 41680b57cec5SDimitry Andric _mm512_unpackhi_ps(__m512 __a, __m512 __b) 41690b57cec5SDimitry Andric { 41700b57cec5SDimitry Andric return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b, 41710b57cec5SDimitry Andric 2, 18, 3, 19, 41720b57cec5SDimitry Andric 2+4, 18+4, 3+4, 19+4, 41730b57cec5SDimitry Andric 2+8, 18+8, 3+8, 19+8, 41740b57cec5SDimitry Andric 2+12, 18+12, 3+12, 19+12); 41750b57cec5SDimitry Andric } 41760b57cec5SDimitry Andric 41770b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 41780b57cec5SDimitry Andric _mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 41790b57cec5SDimitry Andric { 41800b57cec5SDimitry Andric return (__m512)__builtin_ia32_selectps_512((__mmask16) __U, 41810b57cec5SDimitry Andric (__v16sf)_mm512_unpackhi_ps(__A, __B), 41820b57cec5SDimitry Andric (__v16sf)__W); 41830b57cec5SDimitry Andric } 41840b57cec5SDimitry Andric 41850b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 41860b57cec5SDimitry Andric _mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B) 41870b57cec5SDimitry Andric { 41880b57cec5SDimitry Andric return (__m512)__builtin_ia32_selectps_512((__mmask16) __U, 41890b57cec5SDimitry Andric (__v16sf)_mm512_unpackhi_ps(__A, __B), 41900b57cec5SDimitry Andric (__v16sf)_mm512_setzero_ps()); 41910b57cec5SDimitry Andric } 41920b57cec5SDimitry Andric 41930b57cec5SDimitry Andric static __inline __m512 __DEFAULT_FN_ATTRS512 41940b57cec5SDimitry Andric _mm512_unpacklo_ps(__m512 __a, __m512 __b) 41950b57cec5SDimitry Andric { 41960b57cec5SDimitry Andric return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b, 41970b57cec5SDimitry Andric 0, 16, 1, 17, 41980b57cec5SDimitry Andric 0+4, 16+4, 1+4, 17+4, 41990b57cec5SDimitry Andric 0+8, 16+8, 1+8, 17+8, 42000b57cec5SDimitry Andric 0+12, 16+12, 1+12, 17+12); 42010b57cec5SDimitry Andric } 42020b57cec5SDimitry Andric 42030b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 42040b57cec5SDimitry Andric _mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 42050b57cec5SDimitry Andric { 42060b57cec5SDimitry Andric return (__m512)__builtin_ia32_selectps_512((__mmask16) __U, 42070b57cec5SDimitry Andric (__v16sf)_mm512_unpacklo_ps(__A, __B), 42080b57cec5SDimitry Andric (__v16sf)__W); 42090b57cec5SDimitry Andric } 42100b57cec5SDimitry Andric 42110b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 42120b57cec5SDimitry Andric _mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B) 42130b57cec5SDimitry Andric { 42140b57cec5SDimitry Andric return (__m512)__builtin_ia32_selectps_512((__mmask16) __U, 42150b57cec5SDimitry Andric (__v16sf)_mm512_unpacklo_ps(__A, __B), 42160b57cec5SDimitry Andric (__v16sf)_mm512_setzero_ps()); 42170b57cec5SDimitry Andric } 42180b57cec5SDimitry Andric 42190b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 42200b57cec5SDimitry Andric _mm512_unpackhi_epi32(__m512i __A, __m512i __B) 42210b57cec5SDimitry Andric { 42220b57cec5SDimitry Andric return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B, 42230b57cec5SDimitry Andric 2, 18, 3, 19, 42240b57cec5SDimitry Andric 2+4, 18+4, 3+4, 19+4, 42250b57cec5SDimitry Andric 2+8, 18+8, 3+8, 19+8, 42260b57cec5SDimitry Andric 2+12, 18+12, 3+12, 19+12); 42270b57cec5SDimitry Andric } 42280b57cec5SDimitry Andric 42290b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 42300b57cec5SDimitry Andric _mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 42310b57cec5SDimitry Andric { 42320b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U, 42330b57cec5SDimitry Andric (__v16si)_mm512_unpackhi_epi32(__A, __B), 42340b57cec5SDimitry Andric (__v16si)__W); 42350b57cec5SDimitry Andric } 42360b57cec5SDimitry Andric 42370b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 42380b57cec5SDimitry Andric _mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B) 42390b57cec5SDimitry Andric { 42400b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U, 42410b57cec5SDimitry Andric (__v16si)_mm512_unpackhi_epi32(__A, __B), 42420b57cec5SDimitry Andric (__v16si)_mm512_setzero_si512()); 42430b57cec5SDimitry Andric } 42440b57cec5SDimitry Andric 42450b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 42460b57cec5SDimitry Andric _mm512_unpacklo_epi32(__m512i __A, __m512i __B) 42470b57cec5SDimitry Andric { 42480b57cec5SDimitry Andric return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B, 42490b57cec5SDimitry Andric 0, 16, 1, 17, 42500b57cec5SDimitry Andric 0+4, 16+4, 1+4, 17+4, 42510b57cec5SDimitry Andric 0+8, 16+8, 1+8, 17+8, 42520b57cec5SDimitry Andric 0+12, 16+12, 1+12, 17+12); 42530b57cec5SDimitry Andric } 42540b57cec5SDimitry Andric 42550b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 42560b57cec5SDimitry Andric _mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 42570b57cec5SDimitry Andric { 42580b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U, 42590b57cec5SDimitry Andric (__v16si)_mm512_unpacklo_epi32(__A, __B), 42600b57cec5SDimitry Andric (__v16si)__W); 42610b57cec5SDimitry Andric } 42620b57cec5SDimitry Andric 42630b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 42640b57cec5SDimitry Andric _mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B) 42650b57cec5SDimitry Andric { 42660b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U, 42670b57cec5SDimitry Andric (__v16si)_mm512_unpacklo_epi32(__A, __B), 42680b57cec5SDimitry Andric (__v16si)_mm512_setzero_si512()); 42690b57cec5SDimitry Andric } 42700b57cec5SDimitry Andric 42710b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 42720b57cec5SDimitry Andric _mm512_unpackhi_epi64(__m512i __A, __m512i __B) 42730b57cec5SDimitry Andric { 42740b57cec5SDimitry Andric return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B, 42750b57cec5SDimitry Andric 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6); 42760b57cec5SDimitry Andric } 42770b57cec5SDimitry Andric 42780b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 42790b57cec5SDimitry Andric _mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 42800b57cec5SDimitry Andric { 42810b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U, 42820b57cec5SDimitry Andric (__v8di)_mm512_unpackhi_epi64(__A, __B), 42830b57cec5SDimitry Andric (__v8di)__W); 42840b57cec5SDimitry Andric } 42850b57cec5SDimitry Andric 42860b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 42870b57cec5SDimitry Andric _mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B) 42880b57cec5SDimitry Andric { 42890b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U, 42900b57cec5SDimitry Andric (__v8di)_mm512_unpackhi_epi64(__A, __B), 42910b57cec5SDimitry Andric (__v8di)_mm512_setzero_si512()); 42920b57cec5SDimitry Andric } 42930b57cec5SDimitry Andric 42940b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 42950b57cec5SDimitry Andric _mm512_unpacklo_epi64 (__m512i __A, __m512i __B) 42960b57cec5SDimitry Andric { 42970b57cec5SDimitry Andric return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B, 42980b57cec5SDimitry Andric 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6); 42990b57cec5SDimitry Andric } 43000b57cec5SDimitry Andric 43010b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 43020b57cec5SDimitry Andric _mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 43030b57cec5SDimitry Andric { 43040b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U, 43050b57cec5SDimitry Andric (__v8di)_mm512_unpacklo_epi64(__A, __B), 43060b57cec5SDimitry Andric (__v8di)__W); 43070b57cec5SDimitry Andric } 43080b57cec5SDimitry Andric 43090b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 43100b57cec5SDimitry Andric _mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B) 43110b57cec5SDimitry Andric { 43120b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U, 43130b57cec5SDimitry Andric (__v8di)_mm512_unpacklo_epi64(__A, __B), 43140b57cec5SDimitry Andric (__v8di)_mm512_setzero_si512()); 43150b57cec5SDimitry Andric } 43160b57cec5SDimitry Andric 43170b57cec5SDimitry Andric 43180b57cec5SDimitry Andric /* SIMD load ops */ 43190b57cec5SDimitry Andric 43200b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512 43210b57cec5SDimitry Andric _mm512_loadu_si512 (void const *__P) 43220b57cec5SDimitry Andric { 43230b57cec5SDimitry Andric struct __loadu_si512 { 43240b57cec5SDimitry Andric __m512i_u __v; 43250b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 4326480093f4SDimitry Andric return ((const struct __loadu_si512*)__P)->__v; 43270b57cec5SDimitry Andric } 43280b57cec5SDimitry Andric 43290b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512 43300b57cec5SDimitry Andric _mm512_loadu_epi32 (void const *__P) 43310b57cec5SDimitry Andric { 43320b57cec5SDimitry Andric struct __loadu_epi32 { 43330b57cec5SDimitry Andric __m512i_u __v; 43340b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 4335480093f4SDimitry Andric return ((const struct __loadu_epi32*)__P)->__v; 43360b57cec5SDimitry Andric } 43370b57cec5SDimitry Andric 43380b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512 43390b57cec5SDimitry Andric _mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P) 43400b57cec5SDimitry Andric { 43410b57cec5SDimitry Andric return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P, 43420b57cec5SDimitry Andric (__v16si) __W, 43430b57cec5SDimitry Andric (__mmask16) __U); 43440b57cec5SDimitry Andric } 43450b57cec5SDimitry Andric 43460b57cec5SDimitry Andric 43470b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512 43480b57cec5SDimitry Andric _mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P) 43490b57cec5SDimitry Andric { 43500b57cec5SDimitry Andric return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *)__P, 43510b57cec5SDimitry Andric (__v16si) 43520b57cec5SDimitry Andric _mm512_setzero_si512 (), 43530b57cec5SDimitry Andric (__mmask16) __U); 43540b57cec5SDimitry Andric } 43550b57cec5SDimitry Andric 43560b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512 43570b57cec5SDimitry Andric _mm512_loadu_epi64 (void const *__P) 43580b57cec5SDimitry Andric { 43590b57cec5SDimitry Andric struct __loadu_epi64 { 43600b57cec5SDimitry Andric __m512i_u __v; 43610b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 4362480093f4SDimitry Andric return ((const struct __loadu_epi64*)__P)->__v; 43630b57cec5SDimitry Andric } 43640b57cec5SDimitry Andric 43650b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512 43660b57cec5SDimitry Andric _mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P) 43670b57cec5SDimitry Andric { 43680b57cec5SDimitry Andric return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P, 43690b57cec5SDimitry Andric (__v8di) __W, 43700b57cec5SDimitry Andric (__mmask8) __U); 43710b57cec5SDimitry Andric } 43720b57cec5SDimitry Andric 43730b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512 43740b57cec5SDimitry Andric _mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P) 43750b57cec5SDimitry Andric { 43760b57cec5SDimitry Andric return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *)__P, 43770b57cec5SDimitry Andric (__v8di) 43780b57cec5SDimitry Andric _mm512_setzero_si512 (), 43790b57cec5SDimitry Andric (__mmask8) __U); 43800b57cec5SDimitry Andric } 43810b57cec5SDimitry Andric 43820b57cec5SDimitry Andric static __inline __m512 __DEFAULT_FN_ATTRS512 43830b57cec5SDimitry Andric _mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P) 43840b57cec5SDimitry Andric { 43850b57cec5SDimitry Andric return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P, 43860b57cec5SDimitry Andric (__v16sf) __W, 43870b57cec5SDimitry Andric (__mmask16) __U); 43880b57cec5SDimitry Andric } 43890b57cec5SDimitry Andric 43900b57cec5SDimitry Andric static __inline __m512 __DEFAULT_FN_ATTRS512 43910b57cec5SDimitry Andric _mm512_maskz_loadu_ps(__mmask16 __U, void const *__P) 43920b57cec5SDimitry Andric { 43930b57cec5SDimitry Andric return (__m512) __builtin_ia32_loadups512_mask ((const float *)__P, 43940b57cec5SDimitry Andric (__v16sf) 43950b57cec5SDimitry Andric _mm512_setzero_ps (), 43960b57cec5SDimitry Andric (__mmask16) __U); 43970b57cec5SDimitry Andric } 43980b57cec5SDimitry Andric 43990b57cec5SDimitry Andric static __inline __m512d __DEFAULT_FN_ATTRS512 44000b57cec5SDimitry Andric _mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P) 44010b57cec5SDimitry Andric { 44020b57cec5SDimitry Andric return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P, 44030b57cec5SDimitry Andric (__v8df) __W, 44040b57cec5SDimitry Andric (__mmask8) __U); 44050b57cec5SDimitry Andric } 44060b57cec5SDimitry Andric 44070b57cec5SDimitry Andric static __inline __m512d __DEFAULT_FN_ATTRS512 44080b57cec5SDimitry Andric _mm512_maskz_loadu_pd(__mmask8 __U, void const *__P) 44090b57cec5SDimitry Andric { 44100b57cec5SDimitry Andric return (__m512d) __builtin_ia32_loadupd512_mask ((const double *)__P, 44110b57cec5SDimitry Andric (__v8df) 44120b57cec5SDimitry Andric _mm512_setzero_pd (), 44130b57cec5SDimitry Andric (__mmask8) __U); 44140b57cec5SDimitry Andric } 44150b57cec5SDimitry Andric 44160b57cec5SDimitry Andric static __inline __m512d __DEFAULT_FN_ATTRS512 44170b57cec5SDimitry Andric _mm512_loadu_pd(void const *__p) 44180b57cec5SDimitry Andric { 44190b57cec5SDimitry Andric struct __loadu_pd { 44200b57cec5SDimitry Andric __m512d_u __v; 44210b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 4422480093f4SDimitry Andric return ((const struct __loadu_pd*)__p)->__v; 44230b57cec5SDimitry Andric } 44240b57cec5SDimitry Andric 44250b57cec5SDimitry Andric static __inline __m512 __DEFAULT_FN_ATTRS512 44260b57cec5SDimitry Andric _mm512_loadu_ps(void const *__p) 44270b57cec5SDimitry Andric { 44280b57cec5SDimitry Andric struct __loadu_ps { 44290b57cec5SDimitry Andric __m512_u __v; 44300b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 4431480093f4SDimitry Andric return ((const struct __loadu_ps*)__p)->__v; 44320b57cec5SDimitry Andric } 44330b57cec5SDimitry Andric 44340b57cec5SDimitry Andric static __inline __m512 __DEFAULT_FN_ATTRS512 44350b57cec5SDimitry Andric _mm512_load_ps(void const *__p) 44360b57cec5SDimitry Andric { 4437480093f4SDimitry Andric return *(const __m512*)__p; 44380b57cec5SDimitry Andric } 44390b57cec5SDimitry Andric 44400b57cec5SDimitry Andric static __inline __m512 __DEFAULT_FN_ATTRS512 44410b57cec5SDimitry Andric _mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P) 44420b57cec5SDimitry Andric { 44430b57cec5SDimitry Andric return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P, 44440b57cec5SDimitry Andric (__v16sf) __W, 44450b57cec5SDimitry Andric (__mmask16) __U); 44460b57cec5SDimitry Andric } 44470b57cec5SDimitry Andric 44480b57cec5SDimitry Andric static __inline __m512 __DEFAULT_FN_ATTRS512 44490b57cec5SDimitry Andric _mm512_maskz_load_ps(__mmask16 __U, void const *__P) 44500b57cec5SDimitry Andric { 44510b57cec5SDimitry Andric return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P, 44520b57cec5SDimitry Andric (__v16sf) 44530b57cec5SDimitry Andric _mm512_setzero_ps (), 44540b57cec5SDimitry Andric (__mmask16) __U); 44550b57cec5SDimitry Andric } 44560b57cec5SDimitry Andric 44570b57cec5SDimitry Andric static __inline __m512d __DEFAULT_FN_ATTRS512 44580b57cec5SDimitry Andric _mm512_load_pd(void const *__p) 44590b57cec5SDimitry Andric { 4460480093f4SDimitry Andric return *(const __m512d*)__p; 44610b57cec5SDimitry Andric } 44620b57cec5SDimitry Andric 44630b57cec5SDimitry Andric static __inline __m512d __DEFAULT_FN_ATTRS512 44640b57cec5SDimitry Andric _mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P) 44650b57cec5SDimitry Andric { 44660b57cec5SDimitry Andric return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P, 44670b57cec5SDimitry Andric (__v8df) __W, 44680b57cec5SDimitry Andric (__mmask8) __U); 44690b57cec5SDimitry Andric } 44700b57cec5SDimitry Andric 44710b57cec5SDimitry Andric static __inline __m512d __DEFAULT_FN_ATTRS512 44720b57cec5SDimitry Andric _mm512_maskz_load_pd(__mmask8 __U, void const *__P) 44730b57cec5SDimitry Andric { 44740b57cec5SDimitry Andric return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P, 44750b57cec5SDimitry Andric (__v8df) 44760b57cec5SDimitry Andric _mm512_setzero_pd (), 44770b57cec5SDimitry Andric (__mmask8) __U); 44780b57cec5SDimitry Andric } 44790b57cec5SDimitry Andric 44800b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512 44810b57cec5SDimitry Andric _mm512_load_si512 (void const *__P) 44820b57cec5SDimitry Andric { 4483480093f4SDimitry Andric return *(const __m512i *) __P; 44840b57cec5SDimitry Andric } 44850b57cec5SDimitry Andric 44860b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512 44870b57cec5SDimitry Andric _mm512_load_epi32 (void const *__P) 44880b57cec5SDimitry Andric { 4489480093f4SDimitry Andric return *(const __m512i *) __P; 44900b57cec5SDimitry Andric } 44910b57cec5SDimitry Andric 44920b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512 44930b57cec5SDimitry Andric _mm512_load_epi64 (void const *__P) 44940b57cec5SDimitry Andric { 4495480093f4SDimitry Andric return *(const __m512i *) __P; 44960b57cec5SDimitry Andric } 44970b57cec5SDimitry Andric 44980b57cec5SDimitry Andric /* SIMD store ops */ 44990b57cec5SDimitry Andric 45000b57cec5SDimitry Andric static __inline void __DEFAULT_FN_ATTRS512 45010b57cec5SDimitry Andric _mm512_storeu_epi64 (void *__P, __m512i __A) 45020b57cec5SDimitry Andric { 45030b57cec5SDimitry Andric struct __storeu_epi64 { 45040b57cec5SDimitry Andric __m512i_u __v; 45050b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 45060b57cec5SDimitry Andric ((struct __storeu_epi64*)__P)->__v = __A; 45070b57cec5SDimitry Andric } 45080b57cec5SDimitry Andric 45090b57cec5SDimitry Andric static __inline void __DEFAULT_FN_ATTRS512 45100b57cec5SDimitry Andric _mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A) 45110b57cec5SDimitry Andric { 45120b57cec5SDimitry Andric __builtin_ia32_storedqudi512_mask ((long long *)__P, (__v8di) __A, 45130b57cec5SDimitry Andric (__mmask8) __U); 45140b57cec5SDimitry Andric } 45150b57cec5SDimitry Andric 45160b57cec5SDimitry Andric static __inline void __DEFAULT_FN_ATTRS512 45170b57cec5SDimitry Andric _mm512_storeu_si512 (void *__P, __m512i __A) 45180b57cec5SDimitry Andric { 45190b57cec5SDimitry Andric struct __storeu_si512 { 45200b57cec5SDimitry Andric __m512i_u __v; 45210b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 45220b57cec5SDimitry Andric ((struct __storeu_si512*)__P)->__v = __A; 45230b57cec5SDimitry Andric } 45240b57cec5SDimitry Andric 45250b57cec5SDimitry Andric static __inline void __DEFAULT_FN_ATTRS512 45260b57cec5SDimitry Andric _mm512_storeu_epi32 (void *__P, __m512i __A) 45270b57cec5SDimitry Andric { 45280b57cec5SDimitry Andric struct __storeu_epi32 { 45290b57cec5SDimitry Andric __m512i_u __v; 45300b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 45310b57cec5SDimitry Andric ((struct __storeu_epi32*)__P)->__v = __A; 45320b57cec5SDimitry Andric } 45330b57cec5SDimitry Andric 45340b57cec5SDimitry Andric static __inline void __DEFAULT_FN_ATTRS512 45350b57cec5SDimitry Andric _mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A) 45360b57cec5SDimitry Andric { 45370b57cec5SDimitry Andric __builtin_ia32_storedqusi512_mask ((int *)__P, (__v16si) __A, 45380b57cec5SDimitry Andric (__mmask16) __U); 45390b57cec5SDimitry Andric } 45400b57cec5SDimitry Andric 45410b57cec5SDimitry Andric static __inline void __DEFAULT_FN_ATTRS512 45420b57cec5SDimitry Andric _mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A) 45430b57cec5SDimitry Andric { 45440b57cec5SDimitry Andric __builtin_ia32_storeupd512_mask ((double *)__P, (__v8df) __A, (__mmask8) __U); 45450b57cec5SDimitry Andric } 45460b57cec5SDimitry Andric 45470b57cec5SDimitry Andric static __inline void __DEFAULT_FN_ATTRS512 45480b57cec5SDimitry Andric _mm512_storeu_pd(void *__P, __m512d __A) 45490b57cec5SDimitry Andric { 45500b57cec5SDimitry Andric struct __storeu_pd { 45510b57cec5SDimitry Andric __m512d_u __v; 45520b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 45530b57cec5SDimitry Andric ((struct __storeu_pd*)__P)->__v = __A; 45540b57cec5SDimitry Andric } 45550b57cec5SDimitry Andric 45560b57cec5SDimitry Andric static __inline void __DEFAULT_FN_ATTRS512 45570b57cec5SDimitry Andric _mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A) 45580b57cec5SDimitry Andric { 45590b57cec5SDimitry Andric __builtin_ia32_storeups512_mask ((float *)__P, (__v16sf) __A, 45600b57cec5SDimitry Andric (__mmask16) __U); 45610b57cec5SDimitry Andric } 45620b57cec5SDimitry Andric 45630b57cec5SDimitry Andric static __inline void __DEFAULT_FN_ATTRS512 45640b57cec5SDimitry Andric _mm512_storeu_ps(void *__P, __m512 __A) 45650b57cec5SDimitry Andric { 45660b57cec5SDimitry Andric struct __storeu_ps { 45670b57cec5SDimitry Andric __m512_u __v; 45680b57cec5SDimitry Andric } __attribute__((__packed__, __may_alias__)); 45690b57cec5SDimitry Andric ((struct __storeu_ps*)__P)->__v = __A; 45700b57cec5SDimitry Andric } 45710b57cec5SDimitry Andric 45720b57cec5SDimitry Andric static __inline void __DEFAULT_FN_ATTRS512 45730b57cec5SDimitry Andric _mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A) 45740b57cec5SDimitry Andric { 45750b57cec5SDimitry Andric __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U); 45760b57cec5SDimitry Andric } 45770b57cec5SDimitry Andric 45780b57cec5SDimitry Andric static __inline void __DEFAULT_FN_ATTRS512 45790b57cec5SDimitry Andric _mm512_store_pd(void *__P, __m512d __A) 45800b57cec5SDimitry Andric { 45810b57cec5SDimitry Andric *(__m512d*)__P = __A; 45820b57cec5SDimitry Andric } 45830b57cec5SDimitry Andric 45840b57cec5SDimitry Andric static __inline void __DEFAULT_FN_ATTRS512 45850b57cec5SDimitry Andric _mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A) 45860b57cec5SDimitry Andric { 45870b57cec5SDimitry Andric __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A, 45880b57cec5SDimitry Andric (__mmask16) __U); 45890b57cec5SDimitry Andric } 45900b57cec5SDimitry Andric 45910b57cec5SDimitry Andric static __inline void __DEFAULT_FN_ATTRS512 45920b57cec5SDimitry Andric _mm512_store_ps(void *__P, __m512 __A) 45930b57cec5SDimitry Andric { 45940b57cec5SDimitry Andric *(__m512*)__P = __A; 45950b57cec5SDimitry Andric } 45960b57cec5SDimitry Andric 45970b57cec5SDimitry Andric static __inline void __DEFAULT_FN_ATTRS512 45980b57cec5SDimitry Andric _mm512_store_si512 (void *__P, __m512i __A) 45990b57cec5SDimitry Andric { 46000b57cec5SDimitry Andric *(__m512i *) __P = __A; 46010b57cec5SDimitry Andric } 46020b57cec5SDimitry Andric 46030b57cec5SDimitry Andric static __inline void __DEFAULT_FN_ATTRS512 46040b57cec5SDimitry Andric _mm512_store_epi32 (void *__P, __m512i __A) 46050b57cec5SDimitry Andric { 46060b57cec5SDimitry Andric *(__m512i *) __P = __A; 46070b57cec5SDimitry Andric } 46080b57cec5SDimitry Andric 46090b57cec5SDimitry Andric static __inline void __DEFAULT_FN_ATTRS512 46100b57cec5SDimitry Andric _mm512_store_epi64 (void *__P, __m512i __A) 46110b57cec5SDimitry Andric { 46120b57cec5SDimitry Andric *(__m512i *) __P = __A; 46130b57cec5SDimitry Andric } 46140b57cec5SDimitry Andric 46150b57cec5SDimitry Andric /* Mask ops */ 46160b57cec5SDimitry Andric 46170b57cec5SDimitry Andric static __inline __mmask16 __DEFAULT_FN_ATTRS 46180b57cec5SDimitry Andric _mm512_knot(__mmask16 __M) 46190b57cec5SDimitry Andric { 46200b57cec5SDimitry Andric return __builtin_ia32_knothi(__M); 46210b57cec5SDimitry Andric } 46220b57cec5SDimitry Andric 46230b57cec5SDimitry Andric /* Integer compare */ 46240b57cec5SDimitry Andric 46250b57cec5SDimitry Andric #define _mm512_cmpeq_epi32_mask(A, B) \ 46260b57cec5SDimitry Andric _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ) 46270b57cec5SDimitry Andric #define _mm512_mask_cmpeq_epi32_mask(k, A, B) \ 46280b57cec5SDimitry Andric _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ) 46290b57cec5SDimitry Andric #define _mm512_cmpge_epi32_mask(A, B) \ 46300b57cec5SDimitry Andric _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GE) 46310b57cec5SDimitry Andric #define _mm512_mask_cmpge_epi32_mask(k, A, B) \ 46320b57cec5SDimitry Andric _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE) 46330b57cec5SDimitry Andric #define _mm512_cmpgt_epi32_mask(A, B) \ 46340b57cec5SDimitry Andric _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GT) 46350b57cec5SDimitry Andric #define _mm512_mask_cmpgt_epi32_mask(k, A, B) \ 46360b57cec5SDimitry Andric _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT) 46370b57cec5SDimitry Andric #define _mm512_cmple_epi32_mask(A, B) \ 46380b57cec5SDimitry Andric _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LE) 46390b57cec5SDimitry Andric #define _mm512_mask_cmple_epi32_mask(k, A, B) \ 46400b57cec5SDimitry Andric _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE) 46410b57cec5SDimitry Andric #define _mm512_cmplt_epi32_mask(A, B) \ 46420b57cec5SDimitry Andric _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LT) 46430b57cec5SDimitry Andric #define _mm512_mask_cmplt_epi32_mask(k, A, B) \ 46440b57cec5SDimitry Andric _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT) 46450b57cec5SDimitry Andric #define _mm512_cmpneq_epi32_mask(A, B) \ 46460b57cec5SDimitry Andric _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_NE) 46470b57cec5SDimitry Andric #define _mm512_mask_cmpneq_epi32_mask(k, A, B) \ 46480b57cec5SDimitry Andric _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE) 46490b57cec5SDimitry Andric 46500b57cec5SDimitry Andric #define _mm512_cmpeq_epu32_mask(A, B) \ 46510b57cec5SDimitry Andric _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ) 46520b57cec5SDimitry Andric #define _mm512_mask_cmpeq_epu32_mask(k, A, B) \ 46530b57cec5SDimitry Andric _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ) 46540b57cec5SDimitry Andric #define _mm512_cmpge_epu32_mask(A, B) \ 46550b57cec5SDimitry Andric _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GE) 46560b57cec5SDimitry Andric #define _mm512_mask_cmpge_epu32_mask(k, A, B) \ 46570b57cec5SDimitry Andric _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE) 46580b57cec5SDimitry Andric #define _mm512_cmpgt_epu32_mask(A, B) \ 46590b57cec5SDimitry Andric _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GT) 46600b57cec5SDimitry Andric #define _mm512_mask_cmpgt_epu32_mask(k, A, B) \ 46610b57cec5SDimitry Andric _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT) 46620b57cec5SDimitry Andric #define _mm512_cmple_epu32_mask(A, B) \ 46630b57cec5SDimitry Andric _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LE) 46640b57cec5SDimitry Andric #define _mm512_mask_cmple_epu32_mask(k, A, B) \ 46650b57cec5SDimitry Andric _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE) 46660b57cec5SDimitry Andric #define _mm512_cmplt_epu32_mask(A, B) \ 46670b57cec5SDimitry Andric _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LT) 46680b57cec5SDimitry Andric #define _mm512_mask_cmplt_epu32_mask(k, A, B) \ 46690b57cec5SDimitry Andric _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT) 46700b57cec5SDimitry Andric #define _mm512_cmpneq_epu32_mask(A, B) \ 46710b57cec5SDimitry Andric _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_NE) 46720b57cec5SDimitry Andric #define _mm512_mask_cmpneq_epu32_mask(k, A, B) \ 46730b57cec5SDimitry Andric _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE) 46740b57cec5SDimitry Andric 46750b57cec5SDimitry Andric #define _mm512_cmpeq_epi64_mask(A, B) \ 46760b57cec5SDimitry Andric _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ) 46770b57cec5SDimitry Andric #define _mm512_mask_cmpeq_epi64_mask(k, A, B) \ 46780b57cec5SDimitry Andric _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ) 46790b57cec5SDimitry Andric #define _mm512_cmpge_epi64_mask(A, B) \ 46800b57cec5SDimitry Andric _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GE) 46810b57cec5SDimitry Andric #define _mm512_mask_cmpge_epi64_mask(k, A, B) \ 46820b57cec5SDimitry Andric _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE) 46830b57cec5SDimitry Andric #define _mm512_cmpgt_epi64_mask(A, B) \ 46840b57cec5SDimitry Andric _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GT) 46850b57cec5SDimitry Andric #define _mm512_mask_cmpgt_epi64_mask(k, A, B) \ 46860b57cec5SDimitry Andric _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT) 46870b57cec5SDimitry Andric #define _mm512_cmple_epi64_mask(A, B) \ 46880b57cec5SDimitry Andric _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LE) 46890b57cec5SDimitry Andric #define _mm512_mask_cmple_epi64_mask(k, A, B) \ 46900b57cec5SDimitry Andric _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE) 46910b57cec5SDimitry Andric #define _mm512_cmplt_epi64_mask(A, B) \ 46920b57cec5SDimitry Andric _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LT) 46930b57cec5SDimitry Andric #define _mm512_mask_cmplt_epi64_mask(k, A, B) \ 46940b57cec5SDimitry Andric _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT) 46950b57cec5SDimitry Andric #define _mm512_cmpneq_epi64_mask(A, B) \ 46960b57cec5SDimitry Andric _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_NE) 46970b57cec5SDimitry Andric #define _mm512_mask_cmpneq_epi64_mask(k, A, B) \ 46980b57cec5SDimitry Andric _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE) 46990b57cec5SDimitry Andric 47000b57cec5SDimitry Andric #define _mm512_cmpeq_epu64_mask(A, B) \ 47010b57cec5SDimitry Andric _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ) 47020b57cec5SDimitry Andric #define _mm512_mask_cmpeq_epu64_mask(k, A, B) \ 47030b57cec5SDimitry Andric _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ) 47040b57cec5SDimitry Andric #define _mm512_cmpge_epu64_mask(A, B) \ 47050b57cec5SDimitry Andric _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GE) 47060b57cec5SDimitry Andric #define _mm512_mask_cmpge_epu64_mask(k, A, B) \ 47070b57cec5SDimitry Andric _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE) 47080b57cec5SDimitry Andric #define _mm512_cmpgt_epu64_mask(A, B) \ 47090b57cec5SDimitry Andric _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GT) 47100b57cec5SDimitry Andric #define _mm512_mask_cmpgt_epu64_mask(k, A, B) \ 47110b57cec5SDimitry Andric _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT) 47120b57cec5SDimitry Andric #define _mm512_cmple_epu64_mask(A, B) \ 47130b57cec5SDimitry Andric _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LE) 47140b57cec5SDimitry Andric #define _mm512_mask_cmple_epu64_mask(k, A, B) \ 47150b57cec5SDimitry Andric _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE) 47160b57cec5SDimitry Andric #define _mm512_cmplt_epu64_mask(A, B) \ 47170b57cec5SDimitry Andric _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LT) 47180b57cec5SDimitry Andric #define _mm512_mask_cmplt_epu64_mask(k, A, B) \ 47190b57cec5SDimitry Andric _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT) 47200b57cec5SDimitry Andric #define _mm512_cmpneq_epu64_mask(A, B) \ 47210b57cec5SDimitry Andric _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_NE) 47220b57cec5SDimitry Andric #define _mm512_mask_cmpneq_epu64_mask(k, A, B) \ 47230b57cec5SDimitry Andric _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE) 47240b57cec5SDimitry Andric 47250b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 47260b57cec5SDimitry Andric _mm512_cvtepi8_epi32(__m128i __A) 47270b57cec5SDimitry Andric { 47280b57cec5SDimitry Andric /* This function always performs a signed extension, but __v16qi is a char 47290b57cec5SDimitry Andric which may be signed or unsigned, so use __v16qs. */ 47300b57cec5SDimitry Andric return (__m512i)__builtin_convertvector((__v16qs)__A, __v16si); 47310b57cec5SDimitry Andric } 47320b57cec5SDimitry Andric 47330b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 47340b57cec5SDimitry Andric _mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A) 47350b57cec5SDimitry Andric { 47360b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 47370b57cec5SDimitry Andric (__v16si)_mm512_cvtepi8_epi32(__A), 47380b57cec5SDimitry Andric (__v16si)__W); 47390b57cec5SDimitry Andric } 47400b57cec5SDimitry Andric 47410b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 47420b57cec5SDimitry Andric _mm512_maskz_cvtepi8_epi32(__mmask16 __U, __m128i __A) 47430b57cec5SDimitry Andric { 47440b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 47450b57cec5SDimitry Andric (__v16si)_mm512_cvtepi8_epi32(__A), 47460b57cec5SDimitry Andric (__v16si)_mm512_setzero_si512()); 47470b57cec5SDimitry Andric } 47480b57cec5SDimitry Andric 47490b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 47500b57cec5SDimitry Andric _mm512_cvtepi8_epi64(__m128i __A) 47510b57cec5SDimitry Andric { 47520b57cec5SDimitry Andric /* This function always performs a signed extension, but __v16qi is a char 47530b57cec5SDimitry Andric which may be signed or unsigned, so use __v16qs. */ 47540b57cec5SDimitry Andric return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__A, (__v16qs)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di); 47550b57cec5SDimitry Andric } 47560b57cec5SDimitry Andric 47570b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 47580b57cec5SDimitry Andric _mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A) 47590b57cec5SDimitry Andric { 47600b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 47610b57cec5SDimitry Andric (__v8di)_mm512_cvtepi8_epi64(__A), 47620b57cec5SDimitry Andric (__v8di)__W); 47630b57cec5SDimitry Andric } 47640b57cec5SDimitry Andric 47650b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 47660b57cec5SDimitry Andric _mm512_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A) 47670b57cec5SDimitry Andric { 47680b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 47690b57cec5SDimitry Andric (__v8di)_mm512_cvtepi8_epi64(__A), 47700b57cec5SDimitry Andric (__v8di)_mm512_setzero_si512 ()); 47710b57cec5SDimitry Andric } 47720b57cec5SDimitry Andric 47730b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 47740b57cec5SDimitry Andric _mm512_cvtepi32_epi64(__m256i __X) 47750b57cec5SDimitry Andric { 47760b57cec5SDimitry Andric return (__m512i)__builtin_convertvector((__v8si)__X, __v8di); 47770b57cec5SDimitry Andric } 47780b57cec5SDimitry Andric 47790b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 47800b57cec5SDimitry Andric _mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X) 47810b57cec5SDimitry Andric { 47820b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 47830b57cec5SDimitry Andric (__v8di)_mm512_cvtepi32_epi64(__X), 47840b57cec5SDimitry Andric (__v8di)__W); 47850b57cec5SDimitry Andric } 47860b57cec5SDimitry Andric 47870b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 47880b57cec5SDimitry Andric _mm512_maskz_cvtepi32_epi64(__mmask8 __U, __m256i __X) 47890b57cec5SDimitry Andric { 47900b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 47910b57cec5SDimitry Andric (__v8di)_mm512_cvtepi32_epi64(__X), 47920b57cec5SDimitry Andric (__v8di)_mm512_setzero_si512()); 47930b57cec5SDimitry Andric } 47940b57cec5SDimitry Andric 47950b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 47960b57cec5SDimitry Andric _mm512_cvtepi16_epi32(__m256i __A) 47970b57cec5SDimitry Andric { 47980b57cec5SDimitry Andric return (__m512i)__builtin_convertvector((__v16hi)__A, __v16si); 47990b57cec5SDimitry Andric } 48000b57cec5SDimitry Andric 48010b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 48020b57cec5SDimitry Andric _mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A) 48030b57cec5SDimitry Andric { 48040b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 48050b57cec5SDimitry Andric (__v16si)_mm512_cvtepi16_epi32(__A), 48060b57cec5SDimitry Andric (__v16si)__W); 48070b57cec5SDimitry Andric } 48080b57cec5SDimitry Andric 48090b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 48100b57cec5SDimitry Andric _mm512_maskz_cvtepi16_epi32(__mmask16 __U, __m256i __A) 48110b57cec5SDimitry Andric { 48120b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 48130b57cec5SDimitry Andric (__v16si)_mm512_cvtepi16_epi32(__A), 48140b57cec5SDimitry Andric (__v16si)_mm512_setzero_si512 ()); 48150b57cec5SDimitry Andric } 48160b57cec5SDimitry Andric 48170b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 48180b57cec5SDimitry Andric _mm512_cvtepi16_epi64(__m128i __A) 48190b57cec5SDimitry Andric { 48200b57cec5SDimitry Andric return (__m512i)__builtin_convertvector((__v8hi)__A, __v8di); 48210b57cec5SDimitry Andric } 48220b57cec5SDimitry Andric 48230b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 48240b57cec5SDimitry Andric _mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A) 48250b57cec5SDimitry Andric { 48260b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 48270b57cec5SDimitry Andric (__v8di)_mm512_cvtepi16_epi64(__A), 48280b57cec5SDimitry Andric (__v8di)__W); 48290b57cec5SDimitry Andric } 48300b57cec5SDimitry Andric 48310b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 48320b57cec5SDimitry Andric _mm512_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A) 48330b57cec5SDimitry Andric { 48340b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 48350b57cec5SDimitry Andric (__v8di)_mm512_cvtepi16_epi64(__A), 48360b57cec5SDimitry Andric (__v8di)_mm512_setzero_si512()); 48370b57cec5SDimitry Andric } 48380b57cec5SDimitry Andric 48390b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 48400b57cec5SDimitry Andric _mm512_cvtepu8_epi32(__m128i __A) 48410b57cec5SDimitry Andric { 48420b57cec5SDimitry Andric return (__m512i)__builtin_convertvector((__v16qu)__A, __v16si); 48430b57cec5SDimitry Andric } 48440b57cec5SDimitry Andric 48450b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 48460b57cec5SDimitry Andric _mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A) 48470b57cec5SDimitry Andric { 48480b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 48490b57cec5SDimitry Andric (__v16si)_mm512_cvtepu8_epi32(__A), 48500b57cec5SDimitry Andric (__v16si)__W); 48510b57cec5SDimitry Andric } 48520b57cec5SDimitry Andric 48530b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 48540b57cec5SDimitry Andric _mm512_maskz_cvtepu8_epi32(__mmask16 __U, __m128i __A) 48550b57cec5SDimitry Andric { 48560b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 48570b57cec5SDimitry Andric (__v16si)_mm512_cvtepu8_epi32(__A), 48580b57cec5SDimitry Andric (__v16si)_mm512_setzero_si512()); 48590b57cec5SDimitry Andric } 48600b57cec5SDimitry Andric 48610b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 48620b57cec5SDimitry Andric _mm512_cvtepu8_epi64(__m128i __A) 48630b57cec5SDimitry Andric { 48640b57cec5SDimitry Andric return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__A, (__v16qu)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di); 48650b57cec5SDimitry Andric } 48660b57cec5SDimitry Andric 48670b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 48680b57cec5SDimitry Andric _mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A) 48690b57cec5SDimitry Andric { 48700b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 48710b57cec5SDimitry Andric (__v8di)_mm512_cvtepu8_epi64(__A), 48720b57cec5SDimitry Andric (__v8di)__W); 48730b57cec5SDimitry Andric } 48740b57cec5SDimitry Andric 48750b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 48760b57cec5SDimitry Andric _mm512_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A) 48770b57cec5SDimitry Andric { 48780b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 48790b57cec5SDimitry Andric (__v8di)_mm512_cvtepu8_epi64(__A), 48800b57cec5SDimitry Andric (__v8di)_mm512_setzero_si512()); 48810b57cec5SDimitry Andric } 48820b57cec5SDimitry Andric 48830b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 48840b57cec5SDimitry Andric _mm512_cvtepu32_epi64(__m256i __X) 48850b57cec5SDimitry Andric { 48860b57cec5SDimitry Andric return (__m512i)__builtin_convertvector((__v8su)__X, __v8di); 48870b57cec5SDimitry Andric } 48880b57cec5SDimitry Andric 48890b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 48900b57cec5SDimitry Andric _mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X) 48910b57cec5SDimitry Andric { 48920b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 48930b57cec5SDimitry Andric (__v8di)_mm512_cvtepu32_epi64(__X), 48940b57cec5SDimitry Andric (__v8di)__W); 48950b57cec5SDimitry Andric } 48960b57cec5SDimitry Andric 48970b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 48980b57cec5SDimitry Andric _mm512_maskz_cvtepu32_epi64(__mmask8 __U, __m256i __X) 48990b57cec5SDimitry Andric { 49000b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 49010b57cec5SDimitry Andric (__v8di)_mm512_cvtepu32_epi64(__X), 49020b57cec5SDimitry Andric (__v8di)_mm512_setzero_si512()); 49030b57cec5SDimitry Andric } 49040b57cec5SDimitry Andric 49050b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 49060b57cec5SDimitry Andric _mm512_cvtepu16_epi32(__m256i __A) 49070b57cec5SDimitry Andric { 49080b57cec5SDimitry Andric return (__m512i)__builtin_convertvector((__v16hu)__A, __v16si); 49090b57cec5SDimitry Andric } 49100b57cec5SDimitry Andric 49110b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 49120b57cec5SDimitry Andric _mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A) 49130b57cec5SDimitry Andric { 49140b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 49150b57cec5SDimitry Andric (__v16si)_mm512_cvtepu16_epi32(__A), 49160b57cec5SDimitry Andric (__v16si)__W); 49170b57cec5SDimitry Andric } 49180b57cec5SDimitry Andric 49190b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 49200b57cec5SDimitry Andric _mm512_maskz_cvtepu16_epi32(__mmask16 __U, __m256i __A) 49210b57cec5SDimitry Andric { 49220b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 49230b57cec5SDimitry Andric (__v16si)_mm512_cvtepu16_epi32(__A), 49240b57cec5SDimitry Andric (__v16si)_mm512_setzero_si512()); 49250b57cec5SDimitry Andric } 49260b57cec5SDimitry Andric 49270b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 49280b57cec5SDimitry Andric _mm512_cvtepu16_epi64(__m128i __A) 49290b57cec5SDimitry Andric { 49300b57cec5SDimitry Andric return (__m512i)__builtin_convertvector((__v8hu)__A, __v8di); 49310b57cec5SDimitry Andric } 49320b57cec5SDimitry Andric 49330b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 49340b57cec5SDimitry Andric _mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A) 49350b57cec5SDimitry Andric { 49360b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 49370b57cec5SDimitry Andric (__v8di)_mm512_cvtepu16_epi64(__A), 49380b57cec5SDimitry Andric (__v8di)__W); 49390b57cec5SDimitry Andric } 49400b57cec5SDimitry Andric 49410b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 49420b57cec5SDimitry Andric _mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A) 49430b57cec5SDimitry Andric { 49440b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 49450b57cec5SDimitry Andric (__v8di)_mm512_cvtepu16_epi64(__A), 49460b57cec5SDimitry Andric (__v8di)_mm512_setzero_si512()); 49470b57cec5SDimitry Andric } 49480b57cec5SDimitry Andric 49490b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 49500b57cec5SDimitry Andric _mm512_rorv_epi32 (__m512i __A, __m512i __B) 49510b57cec5SDimitry Andric { 49520b57cec5SDimitry Andric return (__m512i)__builtin_ia32_prorvd512((__v16si)__A, (__v16si)__B); 49530b57cec5SDimitry Andric } 49540b57cec5SDimitry Andric 49550b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 49560b57cec5SDimitry Andric _mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 49570b57cec5SDimitry Andric { 49580b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512(__U, 49590b57cec5SDimitry Andric (__v16si)_mm512_rorv_epi32(__A, __B), 49600b57cec5SDimitry Andric (__v16si)__W); 49610b57cec5SDimitry Andric } 49620b57cec5SDimitry Andric 49630b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 49640b57cec5SDimitry Andric _mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B) 49650b57cec5SDimitry Andric { 49660b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512(__U, 49670b57cec5SDimitry Andric (__v16si)_mm512_rorv_epi32(__A, __B), 49680b57cec5SDimitry Andric (__v16si)_mm512_setzero_si512()); 49690b57cec5SDimitry Andric } 49700b57cec5SDimitry Andric 49710b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 49720b57cec5SDimitry Andric _mm512_rorv_epi64 (__m512i __A, __m512i __B) 49730b57cec5SDimitry Andric { 49740b57cec5SDimitry Andric return (__m512i)__builtin_ia32_prorvq512((__v8di)__A, (__v8di)__B); 49750b57cec5SDimitry Andric } 49760b57cec5SDimitry Andric 49770b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 49780b57cec5SDimitry Andric _mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 49790b57cec5SDimitry Andric { 49800b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512(__U, 49810b57cec5SDimitry Andric (__v8di)_mm512_rorv_epi64(__A, __B), 49820b57cec5SDimitry Andric (__v8di)__W); 49830b57cec5SDimitry Andric } 49840b57cec5SDimitry Andric 49850b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 49860b57cec5SDimitry Andric _mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B) 49870b57cec5SDimitry Andric { 49880b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512(__U, 49890b57cec5SDimitry Andric (__v8di)_mm512_rorv_epi64(__A, __B), 49900b57cec5SDimitry Andric (__v8di)_mm512_setzero_si512()); 49910b57cec5SDimitry Andric } 49920b57cec5SDimitry Andric 49930b57cec5SDimitry Andric 49940b57cec5SDimitry Andric 49950b57cec5SDimitry Andric #define _mm512_cmp_epi32_mask(a, b, p) \ 4996349cc55cSDimitry Andric ((__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \ 49970b57cec5SDimitry Andric (__v16si)(__m512i)(b), (int)(p), \ 4998349cc55cSDimitry Andric (__mmask16)-1)) 49990b57cec5SDimitry Andric 50000b57cec5SDimitry Andric #define _mm512_cmp_epu32_mask(a, b, p) \ 5001349cc55cSDimitry Andric ((__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \ 50020b57cec5SDimitry Andric (__v16si)(__m512i)(b), (int)(p), \ 5003349cc55cSDimitry Andric (__mmask16)-1)) 50040b57cec5SDimitry Andric 50050b57cec5SDimitry Andric #define _mm512_cmp_epi64_mask(a, b, p) \ 5006349cc55cSDimitry Andric ((__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \ 50070b57cec5SDimitry Andric (__v8di)(__m512i)(b), (int)(p), \ 5008349cc55cSDimitry Andric (__mmask8)-1)) 50090b57cec5SDimitry Andric 50100b57cec5SDimitry Andric #define _mm512_cmp_epu64_mask(a, b, p) \ 5011349cc55cSDimitry Andric ((__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \ 50120b57cec5SDimitry Andric (__v8di)(__m512i)(b), (int)(p), \ 5013349cc55cSDimitry Andric (__mmask8)-1)) 50140b57cec5SDimitry Andric 50150b57cec5SDimitry Andric #define _mm512_mask_cmp_epi32_mask(m, a, b, p) \ 5016349cc55cSDimitry Andric ((__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \ 50170b57cec5SDimitry Andric (__v16si)(__m512i)(b), (int)(p), \ 5018349cc55cSDimitry Andric (__mmask16)(m))) 50190b57cec5SDimitry Andric 50200b57cec5SDimitry Andric #define _mm512_mask_cmp_epu32_mask(m, a, b, p) \ 5021349cc55cSDimitry Andric ((__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \ 50220b57cec5SDimitry Andric (__v16si)(__m512i)(b), (int)(p), \ 5023349cc55cSDimitry Andric (__mmask16)(m))) 50240b57cec5SDimitry Andric 50250b57cec5SDimitry Andric #define _mm512_mask_cmp_epi64_mask(m, a, b, p) \ 5026349cc55cSDimitry Andric ((__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \ 50270b57cec5SDimitry Andric (__v8di)(__m512i)(b), (int)(p), \ 5028349cc55cSDimitry Andric (__mmask8)(m))) 50290b57cec5SDimitry Andric 50300b57cec5SDimitry Andric #define _mm512_mask_cmp_epu64_mask(m, a, b, p) \ 5031349cc55cSDimitry Andric ((__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \ 50320b57cec5SDimitry Andric (__v8di)(__m512i)(b), (int)(p), \ 5033349cc55cSDimitry Andric (__mmask8)(m))) 50340b57cec5SDimitry Andric 50350b57cec5SDimitry Andric #define _mm512_rol_epi32(a, b) \ 5036349cc55cSDimitry Andric ((__m512i)__builtin_ia32_prold512((__v16si)(__m512i)(a), (int)(b))) 50370b57cec5SDimitry Andric 50380b57cec5SDimitry Andric #define _mm512_mask_rol_epi32(W, U, a, b) \ 5039349cc55cSDimitry Andric ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 50400b57cec5SDimitry Andric (__v16si)_mm512_rol_epi32((a), (b)), \ 5041349cc55cSDimitry Andric (__v16si)(__m512i)(W))) 50420b57cec5SDimitry Andric 50430b57cec5SDimitry Andric #define _mm512_maskz_rol_epi32(U, a, b) \ 5044349cc55cSDimitry Andric ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 50450b57cec5SDimitry Andric (__v16si)_mm512_rol_epi32((a), (b)), \ 5046349cc55cSDimitry Andric (__v16si)_mm512_setzero_si512())) 50470b57cec5SDimitry Andric 50480b57cec5SDimitry Andric #define _mm512_rol_epi64(a, b) \ 5049349cc55cSDimitry Andric ((__m512i)__builtin_ia32_prolq512((__v8di)(__m512i)(a), (int)(b))) 50500b57cec5SDimitry Andric 50510b57cec5SDimitry Andric #define _mm512_mask_rol_epi64(W, U, a, b) \ 5052349cc55cSDimitry Andric ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 50530b57cec5SDimitry Andric (__v8di)_mm512_rol_epi64((a), (b)), \ 5054349cc55cSDimitry Andric (__v8di)(__m512i)(W))) 50550b57cec5SDimitry Andric 50560b57cec5SDimitry Andric #define _mm512_maskz_rol_epi64(U, a, b) \ 5057349cc55cSDimitry Andric ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 50580b57cec5SDimitry Andric (__v8di)_mm512_rol_epi64((a), (b)), \ 5059349cc55cSDimitry Andric (__v8di)_mm512_setzero_si512())) 50600b57cec5SDimitry Andric 50610b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 50620b57cec5SDimitry Andric _mm512_rolv_epi32 (__m512i __A, __m512i __B) 50630b57cec5SDimitry Andric { 50640b57cec5SDimitry Andric return (__m512i)__builtin_ia32_prolvd512((__v16si)__A, (__v16si)__B); 50650b57cec5SDimitry Andric } 50660b57cec5SDimitry Andric 50670b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 50680b57cec5SDimitry Andric _mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 50690b57cec5SDimitry Andric { 50700b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512(__U, 50710b57cec5SDimitry Andric (__v16si)_mm512_rolv_epi32(__A, __B), 50720b57cec5SDimitry Andric (__v16si)__W); 50730b57cec5SDimitry Andric } 50740b57cec5SDimitry Andric 50750b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 50760b57cec5SDimitry Andric _mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B) 50770b57cec5SDimitry Andric { 50780b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512(__U, 50790b57cec5SDimitry Andric (__v16si)_mm512_rolv_epi32(__A, __B), 50800b57cec5SDimitry Andric (__v16si)_mm512_setzero_si512()); 50810b57cec5SDimitry Andric } 50820b57cec5SDimitry Andric 50830b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 50840b57cec5SDimitry Andric _mm512_rolv_epi64 (__m512i __A, __m512i __B) 50850b57cec5SDimitry Andric { 50860b57cec5SDimitry Andric return (__m512i)__builtin_ia32_prolvq512((__v8di)__A, (__v8di)__B); 50870b57cec5SDimitry Andric } 50880b57cec5SDimitry Andric 50890b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 50900b57cec5SDimitry Andric _mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 50910b57cec5SDimitry Andric { 50920b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512(__U, 50930b57cec5SDimitry Andric (__v8di)_mm512_rolv_epi64(__A, __B), 50940b57cec5SDimitry Andric (__v8di)__W); 50950b57cec5SDimitry Andric } 50960b57cec5SDimitry Andric 50970b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 50980b57cec5SDimitry Andric _mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B) 50990b57cec5SDimitry Andric { 51000b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512(__U, 51010b57cec5SDimitry Andric (__v8di)_mm512_rolv_epi64(__A, __B), 51020b57cec5SDimitry Andric (__v8di)_mm512_setzero_si512()); 51030b57cec5SDimitry Andric } 51040b57cec5SDimitry Andric 51050b57cec5SDimitry Andric #define _mm512_ror_epi32(A, B) \ 5106349cc55cSDimitry Andric ((__m512i)__builtin_ia32_prord512((__v16si)(__m512i)(A), (int)(B))) 51070b57cec5SDimitry Andric 51080b57cec5SDimitry Andric #define _mm512_mask_ror_epi32(W, U, A, B) \ 5109349cc55cSDimitry Andric ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 51100b57cec5SDimitry Andric (__v16si)_mm512_ror_epi32((A), (B)), \ 5111349cc55cSDimitry Andric (__v16si)(__m512i)(W))) 51120b57cec5SDimitry Andric 51130b57cec5SDimitry Andric #define _mm512_maskz_ror_epi32(U, A, B) \ 5114349cc55cSDimitry Andric ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 51150b57cec5SDimitry Andric (__v16si)_mm512_ror_epi32((A), (B)), \ 5116349cc55cSDimitry Andric (__v16si)_mm512_setzero_si512())) 51170b57cec5SDimitry Andric 51180b57cec5SDimitry Andric #define _mm512_ror_epi64(A, B) \ 5119349cc55cSDimitry Andric ((__m512i)__builtin_ia32_prorq512((__v8di)(__m512i)(A), (int)(B))) 51200b57cec5SDimitry Andric 51210b57cec5SDimitry Andric #define _mm512_mask_ror_epi64(W, U, A, B) \ 5122349cc55cSDimitry Andric ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 51230b57cec5SDimitry Andric (__v8di)_mm512_ror_epi64((A), (B)), \ 5124349cc55cSDimitry Andric (__v8di)(__m512i)(W))) 51250b57cec5SDimitry Andric 51260b57cec5SDimitry Andric #define _mm512_maskz_ror_epi64(U, A, B) \ 5127349cc55cSDimitry Andric ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 51280b57cec5SDimitry Andric (__v8di)_mm512_ror_epi64((A), (B)), \ 5129349cc55cSDimitry Andric (__v8di)_mm512_setzero_si512())) 51300b57cec5SDimitry Andric 51310b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 51325ffd83dbSDimitry Andric _mm512_slli_epi32(__m512i __A, unsigned int __B) 51330b57cec5SDimitry Andric { 513481ad6265SDimitry Andric return (__m512i)__builtin_ia32_pslldi512((__v16si)__A, (int)__B); 51350b57cec5SDimitry Andric } 51360b57cec5SDimitry Andric 51370b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 51385ffd83dbSDimitry Andric _mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A, 51395ffd83dbSDimitry Andric unsigned int __B) 51400b57cec5SDimitry Andric { 51410b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 51420b57cec5SDimitry Andric (__v16si)_mm512_slli_epi32(__A, __B), 51430b57cec5SDimitry Andric (__v16si)__W); 51440b57cec5SDimitry Andric } 51450b57cec5SDimitry Andric 51460b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 51475ffd83dbSDimitry Andric _mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A, unsigned int __B) { 51480b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 51490b57cec5SDimitry Andric (__v16si)_mm512_slli_epi32(__A, __B), 51500b57cec5SDimitry Andric (__v16si)_mm512_setzero_si512()); 51510b57cec5SDimitry Andric } 51520b57cec5SDimitry Andric 51530b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 51545ffd83dbSDimitry Andric _mm512_slli_epi64(__m512i __A, unsigned int __B) 51550b57cec5SDimitry Andric { 515681ad6265SDimitry Andric return (__m512i)__builtin_ia32_psllqi512((__v8di)__A, (int)__B); 51570b57cec5SDimitry Andric } 51580b57cec5SDimitry Andric 51590b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 51605ffd83dbSDimitry Andric _mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B) 51610b57cec5SDimitry Andric { 51620b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 51630b57cec5SDimitry Andric (__v8di)_mm512_slli_epi64(__A, __B), 51640b57cec5SDimitry Andric (__v8di)__W); 51650b57cec5SDimitry Andric } 51660b57cec5SDimitry Andric 51670b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 51685ffd83dbSDimitry Andric _mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, unsigned int __B) 51690b57cec5SDimitry Andric { 51700b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 51710b57cec5SDimitry Andric (__v8di)_mm512_slli_epi64(__A, __B), 51720b57cec5SDimitry Andric (__v8di)_mm512_setzero_si512()); 51730b57cec5SDimitry Andric } 51740b57cec5SDimitry Andric 51750b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 51765ffd83dbSDimitry Andric _mm512_srli_epi32(__m512i __A, unsigned int __B) 51770b57cec5SDimitry Andric { 517881ad6265SDimitry Andric return (__m512i)__builtin_ia32_psrldi512((__v16si)__A, (int)__B); 51790b57cec5SDimitry Andric } 51800b57cec5SDimitry Andric 51810b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 51825ffd83dbSDimitry Andric _mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A, 51835ffd83dbSDimitry Andric unsigned int __B) 51840b57cec5SDimitry Andric { 51850b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 51860b57cec5SDimitry Andric (__v16si)_mm512_srli_epi32(__A, __B), 51870b57cec5SDimitry Andric (__v16si)__W); 51880b57cec5SDimitry Andric } 51890b57cec5SDimitry Andric 51900b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 51915ffd83dbSDimitry Andric _mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A, unsigned int __B) { 51920b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 51930b57cec5SDimitry Andric (__v16si)_mm512_srli_epi32(__A, __B), 51940b57cec5SDimitry Andric (__v16si)_mm512_setzero_si512()); 51950b57cec5SDimitry Andric } 51960b57cec5SDimitry Andric 51970b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 51985ffd83dbSDimitry Andric _mm512_srli_epi64(__m512i __A, unsigned int __B) 51990b57cec5SDimitry Andric { 520081ad6265SDimitry Andric return (__m512i)__builtin_ia32_psrlqi512((__v8di)__A, (int)__B); 52010b57cec5SDimitry Andric } 52020b57cec5SDimitry Andric 52030b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 52045ffd83dbSDimitry Andric _mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A, 52055ffd83dbSDimitry Andric unsigned int __B) 52060b57cec5SDimitry Andric { 52070b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 52080b57cec5SDimitry Andric (__v8di)_mm512_srli_epi64(__A, __B), 52090b57cec5SDimitry Andric (__v8di)__W); 52100b57cec5SDimitry Andric } 52110b57cec5SDimitry Andric 52120b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 52135ffd83dbSDimitry Andric _mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A, 52145ffd83dbSDimitry Andric unsigned int __B) 52150b57cec5SDimitry Andric { 52160b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 52170b57cec5SDimitry Andric (__v8di)_mm512_srli_epi64(__A, __B), 52180b57cec5SDimitry Andric (__v8di)_mm512_setzero_si512()); 52190b57cec5SDimitry Andric } 52200b57cec5SDimitry Andric 52210b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 52220b57cec5SDimitry Andric _mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P) 52230b57cec5SDimitry Andric { 52240b57cec5SDimitry Andric return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P, 52250b57cec5SDimitry Andric (__v16si) __W, 52260b57cec5SDimitry Andric (__mmask16) __U); 52270b57cec5SDimitry Andric } 52280b57cec5SDimitry Andric 52290b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 52300b57cec5SDimitry Andric _mm512_maskz_load_epi32 (__mmask16 __U, void const *__P) 52310b57cec5SDimitry Andric { 52320b57cec5SDimitry Andric return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P, 52330b57cec5SDimitry Andric (__v16si) 52340b57cec5SDimitry Andric _mm512_setzero_si512 (), 52350b57cec5SDimitry Andric (__mmask16) __U); 52360b57cec5SDimitry Andric } 52370b57cec5SDimitry Andric 52380b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS512 52390b57cec5SDimitry Andric _mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A) 52400b57cec5SDimitry Andric { 52410b57cec5SDimitry Andric __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A, 52420b57cec5SDimitry Andric (__mmask16) __U); 52430b57cec5SDimitry Andric } 52440b57cec5SDimitry Andric 52450b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 52460b57cec5SDimitry Andric _mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A) 52470b57cec5SDimitry Andric { 52480b57cec5SDimitry Andric return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U, 52490b57cec5SDimitry Andric (__v16si) __A, 52500b57cec5SDimitry Andric (__v16si) __W); 52510b57cec5SDimitry Andric } 52520b57cec5SDimitry Andric 52530b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 52540b57cec5SDimitry Andric _mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A) 52550b57cec5SDimitry Andric { 52560b57cec5SDimitry Andric return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U, 52570b57cec5SDimitry Andric (__v16si) __A, 52580b57cec5SDimitry Andric (__v16si) _mm512_setzero_si512 ()); 52590b57cec5SDimitry Andric } 52600b57cec5SDimitry Andric 52610b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 52620b57cec5SDimitry Andric _mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A) 52630b57cec5SDimitry Andric { 52640b57cec5SDimitry Andric return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U, 52650b57cec5SDimitry Andric (__v8di) __A, 52660b57cec5SDimitry Andric (__v8di) __W); 52670b57cec5SDimitry Andric } 52680b57cec5SDimitry Andric 52690b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 52700b57cec5SDimitry Andric _mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A) 52710b57cec5SDimitry Andric { 52720b57cec5SDimitry Andric return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U, 52730b57cec5SDimitry Andric (__v8di) __A, 52740b57cec5SDimitry Andric (__v8di) _mm512_setzero_si512 ()); 52750b57cec5SDimitry Andric } 52760b57cec5SDimitry Andric 52770b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 52780b57cec5SDimitry Andric _mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P) 52790b57cec5SDimitry Andric { 52800b57cec5SDimitry Andric return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P, 52810b57cec5SDimitry Andric (__v8di) __W, 52820b57cec5SDimitry Andric (__mmask8) __U); 52830b57cec5SDimitry Andric } 52840b57cec5SDimitry Andric 52850b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 52860b57cec5SDimitry Andric _mm512_maskz_load_epi64 (__mmask8 __U, void const *__P) 52870b57cec5SDimitry Andric { 52880b57cec5SDimitry Andric return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P, 52890b57cec5SDimitry Andric (__v8di) 52900b57cec5SDimitry Andric _mm512_setzero_si512 (), 52910b57cec5SDimitry Andric (__mmask8) __U); 52920b57cec5SDimitry Andric } 52930b57cec5SDimitry Andric 52940b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS512 52950b57cec5SDimitry Andric _mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A) 52960b57cec5SDimitry Andric { 52970b57cec5SDimitry Andric __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A, 52980b57cec5SDimitry Andric (__mmask8) __U); 52990b57cec5SDimitry Andric } 53000b57cec5SDimitry Andric 53010b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 53020b57cec5SDimitry Andric _mm512_movedup_pd (__m512d __A) 53030b57cec5SDimitry Andric { 53040b57cec5SDimitry Andric return (__m512d)__builtin_shufflevector((__v8df)__A, (__v8df)__A, 53050b57cec5SDimitry Andric 0, 0, 2, 2, 4, 4, 6, 6); 53060b57cec5SDimitry Andric } 53070b57cec5SDimitry Andric 53080b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 53090b57cec5SDimitry Andric _mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A) 53100b57cec5SDimitry Andric { 53110b57cec5SDimitry Andric return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 53120b57cec5SDimitry Andric (__v8df)_mm512_movedup_pd(__A), 53130b57cec5SDimitry Andric (__v8df)__W); 53140b57cec5SDimitry Andric } 53150b57cec5SDimitry Andric 53160b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 53170b57cec5SDimitry Andric _mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A) 53180b57cec5SDimitry Andric { 53190b57cec5SDimitry Andric return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 53200b57cec5SDimitry Andric (__v8df)_mm512_movedup_pd(__A), 53210b57cec5SDimitry Andric (__v8df)_mm512_setzero_pd()); 53220b57cec5SDimitry Andric } 53230b57cec5SDimitry Andric 53240b57cec5SDimitry Andric #define _mm512_fixupimm_round_pd(A, B, C, imm, R) \ 5325349cc55cSDimitry Andric ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \ 53260b57cec5SDimitry Andric (__v8df)(__m512d)(B), \ 53270b57cec5SDimitry Andric (__v8di)(__m512i)(C), (int)(imm), \ 5328349cc55cSDimitry Andric (__mmask8)-1, (int)(R))) 53290b57cec5SDimitry Andric 53300b57cec5SDimitry Andric #define _mm512_mask_fixupimm_round_pd(A, U, B, C, imm, R) \ 5331349cc55cSDimitry Andric ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \ 53320b57cec5SDimitry Andric (__v8df)(__m512d)(B), \ 53330b57cec5SDimitry Andric (__v8di)(__m512i)(C), (int)(imm), \ 5334349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 53350b57cec5SDimitry Andric 53360b57cec5SDimitry Andric #define _mm512_fixupimm_pd(A, B, C, imm) \ 5337349cc55cSDimitry Andric ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \ 53380b57cec5SDimitry Andric (__v8df)(__m512d)(B), \ 53390b57cec5SDimitry Andric (__v8di)(__m512i)(C), (int)(imm), \ 53400b57cec5SDimitry Andric (__mmask8)-1, \ 5341349cc55cSDimitry Andric _MM_FROUND_CUR_DIRECTION)) 53420b57cec5SDimitry Andric 53430b57cec5SDimitry Andric #define _mm512_mask_fixupimm_pd(A, U, B, C, imm) \ 5344349cc55cSDimitry Andric ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \ 53450b57cec5SDimitry Andric (__v8df)(__m512d)(B), \ 53460b57cec5SDimitry Andric (__v8di)(__m512i)(C), (int)(imm), \ 53470b57cec5SDimitry Andric (__mmask8)(U), \ 5348349cc55cSDimitry Andric _MM_FROUND_CUR_DIRECTION)) 53490b57cec5SDimitry Andric 53500b57cec5SDimitry Andric #define _mm512_maskz_fixupimm_round_pd(U, A, B, C, imm, R) \ 5351349cc55cSDimitry Andric ((__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \ 53520b57cec5SDimitry Andric (__v8df)(__m512d)(B), \ 53530b57cec5SDimitry Andric (__v8di)(__m512i)(C), \ 53540b57cec5SDimitry Andric (int)(imm), (__mmask8)(U), \ 5355349cc55cSDimitry Andric (int)(R))) 53560b57cec5SDimitry Andric 53570b57cec5SDimitry Andric #define _mm512_maskz_fixupimm_pd(U, A, B, C, imm) \ 5358349cc55cSDimitry Andric ((__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \ 53590b57cec5SDimitry Andric (__v8df)(__m512d)(B), \ 53600b57cec5SDimitry Andric (__v8di)(__m512i)(C), \ 53610b57cec5SDimitry Andric (int)(imm), (__mmask8)(U), \ 5362349cc55cSDimitry Andric _MM_FROUND_CUR_DIRECTION)) 53630b57cec5SDimitry Andric 53640b57cec5SDimitry Andric #define _mm512_fixupimm_round_ps(A, B, C, imm, R) \ 5365349cc55cSDimitry Andric ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \ 53660b57cec5SDimitry Andric (__v16sf)(__m512)(B), \ 53670b57cec5SDimitry Andric (__v16si)(__m512i)(C), (int)(imm), \ 5368349cc55cSDimitry Andric (__mmask16)-1, (int)(R))) 53690b57cec5SDimitry Andric 53700b57cec5SDimitry Andric #define _mm512_mask_fixupimm_round_ps(A, U, B, C, imm, R) \ 5371349cc55cSDimitry Andric ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \ 53720b57cec5SDimitry Andric (__v16sf)(__m512)(B), \ 53730b57cec5SDimitry Andric (__v16si)(__m512i)(C), (int)(imm), \ 5374349cc55cSDimitry Andric (__mmask16)(U), (int)(R))) 53750b57cec5SDimitry Andric 53760b57cec5SDimitry Andric #define _mm512_fixupimm_ps(A, B, C, imm) \ 5377349cc55cSDimitry Andric ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \ 53780b57cec5SDimitry Andric (__v16sf)(__m512)(B), \ 53790b57cec5SDimitry Andric (__v16si)(__m512i)(C), (int)(imm), \ 53800b57cec5SDimitry Andric (__mmask16)-1, \ 5381349cc55cSDimitry Andric _MM_FROUND_CUR_DIRECTION)) 53820b57cec5SDimitry Andric 53830b57cec5SDimitry Andric #define _mm512_mask_fixupimm_ps(A, U, B, C, imm) \ 5384349cc55cSDimitry Andric ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \ 53850b57cec5SDimitry Andric (__v16sf)(__m512)(B), \ 53860b57cec5SDimitry Andric (__v16si)(__m512i)(C), (int)(imm), \ 53870b57cec5SDimitry Andric (__mmask16)(U), \ 5388349cc55cSDimitry Andric _MM_FROUND_CUR_DIRECTION)) 53890b57cec5SDimitry Andric 53900b57cec5SDimitry Andric #define _mm512_maskz_fixupimm_round_ps(U, A, B, C, imm, R) \ 5391349cc55cSDimitry Andric ((__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \ 53920b57cec5SDimitry Andric (__v16sf)(__m512)(B), \ 53930b57cec5SDimitry Andric (__v16si)(__m512i)(C), \ 53940b57cec5SDimitry Andric (int)(imm), (__mmask16)(U), \ 5395349cc55cSDimitry Andric (int)(R))) 53960b57cec5SDimitry Andric 53970b57cec5SDimitry Andric #define _mm512_maskz_fixupimm_ps(U, A, B, C, imm) \ 5398349cc55cSDimitry Andric ((__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \ 53990b57cec5SDimitry Andric (__v16sf)(__m512)(B), \ 54000b57cec5SDimitry Andric (__v16si)(__m512i)(C), \ 54010b57cec5SDimitry Andric (int)(imm), (__mmask16)(U), \ 5402349cc55cSDimitry Andric _MM_FROUND_CUR_DIRECTION)) 54030b57cec5SDimitry Andric 54040b57cec5SDimitry Andric #define _mm_fixupimm_round_sd(A, B, C, imm, R) \ 5405349cc55cSDimitry Andric ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \ 54060b57cec5SDimitry Andric (__v2df)(__m128d)(B), \ 54070b57cec5SDimitry Andric (__v2di)(__m128i)(C), (int)(imm), \ 5408349cc55cSDimitry Andric (__mmask8)-1, (int)(R))) 54090b57cec5SDimitry Andric 54100b57cec5SDimitry Andric #define _mm_mask_fixupimm_round_sd(A, U, B, C, imm, R) \ 5411349cc55cSDimitry Andric ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \ 54120b57cec5SDimitry Andric (__v2df)(__m128d)(B), \ 54130b57cec5SDimitry Andric (__v2di)(__m128i)(C), (int)(imm), \ 5414349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 54150b57cec5SDimitry Andric 54160b57cec5SDimitry Andric #define _mm_fixupimm_sd(A, B, C, imm) \ 5417349cc55cSDimitry Andric ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \ 54180b57cec5SDimitry Andric (__v2df)(__m128d)(B), \ 54190b57cec5SDimitry Andric (__v2di)(__m128i)(C), (int)(imm), \ 54200b57cec5SDimitry Andric (__mmask8)-1, \ 5421349cc55cSDimitry Andric _MM_FROUND_CUR_DIRECTION)) 54220b57cec5SDimitry Andric 54230b57cec5SDimitry Andric #define _mm_mask_fixupimm_sd(A, U, B, C, imm) \ 5424349cc55cSDimitry Andric ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \ 54250b57cec5SDimitry Andric (__v2df)(__m128d)(B), \ 54260b57cec5SDimitry Andric (__v2di)(__m128i)(C), (int)(imm), \ 54270b57cec5SDimitry Andric (__mmask8)(U), \ 5428349cc55cSDimitry Andric _MM_FROUND_CUR_DIRECTION)) 54290b57cec5SDimitry Andric 54300b57cec5SDimitry Andric #define _mm_maskz_fixupimm_round_sd(U, A, B, C, imm, R) \ 5431349cc55cSDimitry Andric ((__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \ 54320b57cec5SDimitry Andric (__v2df)(__m128d)(B), \ 54330b57cec5SDimitry Andric (__v2di)(__m128i)(C), (int)(imm), \ 5434349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 54350b57cec5SDimitry Andric 54360b57cec5SDimitry Andric #define _mm_maskz_fixupimm_sd(U, A, B, C, imm) \ 5437349cc55cSDimitry Andric ((__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \ 54380b57cec5SDimitry Andric (__v2df)(__m128d)(B), \ 54390b57cec5SDimitry Andric (__v2di)(__m128i)(C), (int)(imm), \ 54400b57cec5SDimitry Andric (__mmask8)(U), \ 5441349cc55cSDimitry Andric _MM_FROUND_CUR_DIRECTION)) 54420b57cec5SDimitry Andric 54430b57cec5SDimitry Andric #define _mm_fixupimm_round_ss(A, B, C, imm, R) \ 5444349cc55cSDimitry Andric ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \ 54450b57cec5SDimitry Andric (__v4sf)(__m128)(B), \ 54460b57cec5SDimitry Andric (__v4si)(__m128i)(C), (int)(imm), \ 5447349cc55cSDimitry Andric (__mmask8)-1, (int)(R))) 54480b57cec5SDimitry Andric 54490b57cec5SDimitry Andric #define _mm_mask_fixupimm_round_ss(A, U, B, C, imm, R) \ 5450349cc55cSDimitry Andric ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \ 54510b57cec5SDimitry Andric (__v4sf)(__m128)(B), \ 54520b57cec5SDimitry Andric (__v4si)(__m128i)(C), (int)(imm), \ 5453349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 54540b57cec5SDimitry Andric 54550b57cec5SDimitry Andric #define _mm_fixupimm_ss(A, B, C, imm) \ 5456349cc55cSDimitry Andric ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \ 54570b57cec5SDimitry Andric (__v4sf)(__m128)(B), \ 54580b57cec5SDimitry Andric (__v4si)(__m128i)(C), (int)(imm), \ 54590b57cec5SDimitry Andric (__mmask8)-1, \ 5460349cc55cSDimitry Andric _MM_FROUND_CUR_DIRECTION)) 54610b57cec5SDimitry Andric 54620b57cec5SDimitry Andric #define _mm_mask_fixupimm_ss(A, U, B, C, imm) \ 5463349cc55cSDimitry Andric ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \ 54640b57cec5SDimitry Andric (__v4sf)(__m128)(B), \ 54650b57cec5SDimitry Andric (__v4si)(__m128i)(C), (int)(imm), \ 54660b57cec5SDimitry Andric (__mmask8)(U), \ 5467349cc55cSDimitry Andric _MM_FROUND_CUR_DIRECTION)) 54680b57cec5SDimitry Andric 54690b57cec5SDimitry Andric #define _mm_maskz_fixupimm_round_ss(U, A, B, C, imm, R) \ 5470349cc55cSDimitry Andric ((__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \ 54710b57cec5SDimitry Andric (__v4sf)(__m128)(B), \ 54720b57cec5SDimitry Andric (__v4si)(__m128i)(C), (int)(imm), \ 5473349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 54740b57cec5SDimitry Andric 54750b57cec5SDimitry Andric #define _mm_maskz_fixupimm_ss(U, A, B, C, imm) \ 5476349cc55cSDimitry Andric ((__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \ 54770b57cec5SDimitry Andric (__v4sf)(__m128)(B), \ 54780b57cec5SDimitry Andric (__v4si)(__m128i)(C), (int)(imm), \ 54790b57cec5SDimitry Andric (__mmask8)(U), \ 5480349cc55cSDimitry Andric _MM_FROUND_CUR_DIRECTION)) 54810b57cec5SDimitry Andric 54820b57cec5SDimitry Andric #define _mm_getexp_round_sd(A, B, R) \ 5483349cc55cSDimitry Andric ((__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \ 54840b57cec5SDimitry Andric (__v2df)(__m128d)(B), \ 54850b57cec5SDimitry Andric (__v2df)_mm_setzero_pd(), \ 5486349cc55cSDimitry Andric (__mmask8)-1, (int)(R))) 54870b57cec5SDimitry Andric 54880b57cec5SDimitry Andric 54890b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 54900b57cec5SDimitry Andric _mm_getexp_sd (__m128d __A, __m128d __B) 54910b57cec5SDimitry Andric { 54920b57cec5SDimitry Andric return (__m128d) __builtin_ia32_getexpsd128_round_mask ((__v2df) __A, 54930b57cec5SDimitry Andric (__v2df) __B, (__v2df) _mm_setzero_pd(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION); 54940b57cec5SDimitry Andric } 54950b57cec5SDimitry Andric 54960b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 54970b57cec5SDimitry Andric _mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 54980b57cec5SDimitry Andric { 54990b57cec5SDimitry Andric return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A, 55000b57cec5SDimitry Andric (__v2df) __B, 55010b57cec5SDimitry Andric (__v2df) __W, 55020b57cec5SDimitry Andric (__mmask8) __U, 55030b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 55040b57cec5SDimitry Andric } 55050b57cec5SDimitry Andric 55060b57cec5SDimitry Andric #define _mm_mask_getexp_round_sd(W, U, A, B, R) \ 5507349cc55cSDimitry Andric ((__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \ 55080b57cec5SDimitry Andric (__v2df)(__m128d)(B), \ 55090b57cec5SDimitry Andric (__v2df)(__m128d)(W), \ 5510349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 55110b57cec5SDimitry Andric 55120b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 55130b57cec5SDimitry Andric _mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B) 55140b57cec5SDimitry Andric { 55150b57cec5SDimitry Andric return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A, 55160b57cec5SDimitry Andric (__v2df) __B, 55170b57cec5SDimitry Andric (__v2df) _mm_setzero_pd (), 55180b57cec5SDimitry Andric (__mmask8) __U, 55190b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 55200b57cec5SDimitry Andric } 55210b57cec5SDimitry Andric 55220b57cec5SDimitry Andric #define _mm_maskz_getexp_round_sd(U, A, B, R) \ 5523349cc55cSDimitry Andric ((__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \ 55240b57cec5SDimitry Andric (__v2df)(__m128d)(B), \ 55250b57cec5SDimitry Andric (__v2df)_mm_setzero_pd(), \ 5526349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 55270b57cec5SDimitry Andric 55280b57cec5SDimitry Andric #define _mm_getexp_round_ss(A, B, R) \ 5529349cc55cSDimitry Andric ((__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \ 55300b57cec5SDimitry Andric (__v4sf)(__m128)(B), \ 55310b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps(), \ 5532349cc55cSDimitry Andric (__mmask8)-1, (int)(R))) 55330b57cec5SDimitry Andric 55340b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 55350b57cec5SDimitry Andric _mm_getexp_ss (__m128 __A, __m128 __B) 55360b57cec5SDimitry Andric { 55370b57cec5SDimitry Andric return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A, 55380b57cec5SDimitry Andric (__v4sf) __B, (__v4sf) _mm_setzero_ps(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION); 55390b57cec5SDimitry Andric } 55400b57cec5SDimitry Andric 55410b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 55420b57cec5SDimitry Andric _mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 55430b57cec5SDimitry Andric { 55440b57cec5SDimitry Andric return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A, 55450b57cec5SDimitry Andric (__v4sf) __B, 55460b57cec5SDimitry Andric (__v4sf) __W, 55470b57cec5SDimitry Andric (__mmask8) __U, 55480b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 55490b57cec5SDimitry Andric } 55500b57cec5SDimitry Andric 55510b57cec5SDimitry Andric #define _mm_mask_getexp_round_ss(W, U, A, B, R) \ 5552349cc55cSDimitry Andric ((__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \ 55530b57cec5SDimitry Andric (__v4sf)(__m128)(B), \ 55540b57cec5SDimitry Andric (__v4sf)(__m128)(W), \ 5555349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 55560b57cec5SDimitry Andric 55570b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 55580b57cec5SDimitry Andric _mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B) 55590b57cec5SDimitry Andric { 55600b57cec5SDimitry Andric return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A, 55610b57cec5SDimitry Andric (__v4sf) __B, 55620b57cec5SDimitry Andric (__v4sf) _mm_setzero_ps (), 55630b57cec5SDimitry Andric (__mmask8) __U, 55640b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 55650b57cec5SDimitry Andric } 55660b57cec5SDimitry Andric 55670b57cec5SDimitry Andric #define _mm_maskz_getexp_round_ss(U, A, B, R) \ 5568349cc55cSDimitry Andric ((__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \ 55690b57cec5SDimitry Andric (__v4sf)(__m128)(B), \ 55700b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps(), \ 5571349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 55720b57cec5SDimitry Andric 55730b57cec5SDimitry Andric #define _mm_getmant_round_sd(A, B, C, D, R) \ 5574349cc55cSDimitry Andric ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ 55750b57cec5SDimitry Andric (__v2df)(__m128d)(B), \ 55760b57cec5SDimitry Andric (int)(((D)<<2) | (C)), \ 55770b57cec5SDimitry Andric (__v2df)_mm_setzero_pd(), \ 5578349cc55cSDimitry Andric (__mmask8)-1, (int)(R))) 55790b57cec5SDimitry Andric 55800b57cec5SDimitry Andric #define _mm_getmant_sd(A, B, C, D) \ 5581349cc55cSDimitry Andric ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ 55820b57cec5SDimitry Andric (__v2df)(__m128d)(B), \ 55830b57cec5SDimitry Andric (int)(((D)<<2) | (C)), \ 55840b57cec5SDimitry Andric (__v2df)_mm_setzero_pd(), \ 55850b57cec5SDimitry Andric (__mmask8)-1, \ 5586349cc55cSDimitry Andric _MM_FROUND_CUR_DIRECTION)) 55870b57cec5SDimitry Andric 55880b57cec5SDimitry Andric #define _mm_mask_getmant_sd(W, U, A, B, C, D) \ 5589349cc55cSDimitry Andric ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ 55900b57cec5SDimitry Andric (__v2df)(__m128d)(B), \ 55910b57cec5SDimitry Andric (int)(((D)<<2) | (C)), \ 55920b57cec5SDimitry Andric (__v2df)(__m128d)(W), \ 55930b57cec5SDimitry Andric (__mmask8)(U), \ 5594349cc55cSDimitry Andric _MM_FROUND_CUR_DIRECTION)) 55950b57cec5SDimitry Andric 55960b57cec5SDimitry Andric #define _mm_mask_getmant_round_sd(W, U, A, B, C, D, R) \ 5597349cc55cSDimitry Andric ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ 55980b57cec5SDimitry Andric (__v2df)(__m128d)(B), \ 55990b57cec5SDimitry Andric (int)(((D)<<2) | (C)), \ 56000b57cec5SDimitry Andric (__v2df)(__m128d)(W), \ 5601349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 56020b57cec5SDimitry Andric 56030b57cec5SDimitry Andric #define _mm_maskz_getmant_sd(U, A, B, C, D) \ 5604349cc55cSDimitry Andric ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ 56050b57cec5SDimitry Andric (__v2df)(__m128d)(B), \ 56060b57cec5SDimitry Andric (int)(((D)<<2) | (C)), \ 56070b57cec5SDimitry Andric (__v2df)_mm_setzero_pd(), \ 56080b57cec5SDimitry Andric (__mmask8)(U), \ 5609349cc55cSDimitry Andric _MM_FROUND_CUR_DIRECTION)) 56100b57cec5SDimitry Andric 56110b57cec5SDimitry Andric #define _mm_maskz_getmant_round_sd(U, A, B, C, D, R) \ 5612349cc55cSDimitry Andric ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ 56130b57cec5SDimitry Andric (__v2df)(__m128d)(B), \ 56140b57cec5SDimitry Andric (int)(((D)<<2) | (C)), \ 56150b57cec5SDimitry Andric (__v2df)_mm_setzero_pd(), \ 5616349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 56170b57cec5SDimitry Andric 56180b57cec5SDimitry Andric #define _mm_getmant_round_ss(A, B, C, D, R) \ 5619349cc55cSDimitry Andric ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ 56200b57cec5SDimitry Andric (__v4sf)(__m128)(B), \ 56210b57cec5SDimitry Andric (int)(((D)<<2) | (C)), \ 56220b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps(), \ 5623349cc55cSDimitry Andric (__mmask8)-1, (int)(R))) 56240b57cec5SDimitry Andric 56250b57cec5SDimitry Andric #define _mm_getmant_ss(A, B, C, D) \ 5626349cc55cSDimitry Andric ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ 56270b57cec5SDimitry Andric (__v4sf)(__m128)(B), \ 56280b57cec5SDimitry Andric (int)(((D)<<2) | (C)), \ 56290b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps(), \ 56300b57cec5SDimitry Andric (__mmask8)-1, \ 5631349cc55cSDimitry Andric _MM_FROUND_CUR_DIRECTION)) 56320b57cec5SDimitry Andric 56330b57cec5SDimitry Andric #define _mm_mask_getmant_ss(W, U, A, B, C, D) \ 5634349cc55cSDimitry Andric ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ 56350b57cec5SDimitry Andric (__v4sf)(__m128)(B), \ 56360b57cec5SDimitry Andric (int)(((D)<<2) | (C)), \ 56370b57cec5SDimitry Andric (__v4sf)(__m128)(W), \ 56380b57cec5SDimitry Andric (__mmask8)(U), \ 5639349cc55cSDimitry Andric _MM_FROUND_CUR_DIRECTION)) 56400b57cec5SDimitry Andric 56410b57cec5SDimitry Andric #define _mm_mask_getmant_round_ss(W, U, A, B, C, D, R) \ 5642349cc55cSDimitry Andric ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ 56430b57cec5SDimitry Andric (__v4sf)(__m128)(B), \ 56440b57cec5SDimitry Andric (int)(((D)<<2) | (C)), \ 56450b57cec5SDimitry Andric (__v4sf)(__m128)(W), \ 5646349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 56470b57cec5SDimitry Andric 56480b57cec5SDimitry Andric #define _mm_maskz_getmant_ss(U, A, B, C, D) \ 5649349cc55cSDimitry Andric ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ 56500b57cec5SDimitry Andric (__v4sf)(__m128)(B), \ 56510b57cec5SDimitry Andric (int)(((D)<<2) | (C)), \ 56520b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps(), \ 56530b57cec5SDimitry Andric (__mmask8)(U), \ 5654349cc55cSDimitry Andric _MM_FROUND_CUR_DIRECTION)) 56550b57cec5SDimitry Andric 56560b57cec5SDimitry Andric #define _mm_maskz_getmant_round_ss(U, A, B, C, D, R) \ 5657349cc55cSDimitry Andric ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ 56580b57cec5SDimitry Andric (__v4sf)(__m128)(B), \ 56590b57cec5SDimitry Andric (int)(((D)<<2) | (C)), \ 56600b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps(), \ 5661349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 56620b57cec5SDimitry Andric 56630b57cec5SDimitry Andric static __inline__ __mmask16 __DEFAULT_FN_ATTRS 56640b57cec5SDimitry Andric _mm512_kmov (__mmask16 __A) 56650b57cec5SDimitry Andric { 56660b57cec5SDimitry Andric return __A; 56670b57cec5SDimitry Andric } 56680b57cec5SDimitry Andric 56690b57cec5SDimitry Andric #define _mm_comi_round_sd(A, B, P, R) \ 5670349cc55cSDimitry Andric ((int)__builtin_ia32_vcomisd((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), \ 5671349cc55cSDimitry Andric (int)(P), (int)(R))) 56720b57cec5SDimitry Andric 56730b57cec5SDimitry Andric #define _mm_comi_round_ss(A, B, P, R) \ 5674349cc55cSDimitry Andric ((int)__builtin_ia32_vcomiss((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \ 5675349cc55cSDimitry Andric (int)(P), (int)(R))) 56760b57cec5SDimitry Andric 56770b57cec5SDimitry Andric #ifdef __x86_64__ 56780b57cec5SDimitry Andric #define _mm_cvt_roundsd_si64(A, R) \ 5679349cc55cSDimitry Andric ((long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R))) 56800b57cec5SDimitry Andric #endif 56810b57cec5SDimitry Andric 56820b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 56830b57cec5SDimitry Andric _mm512_sll_epi32(__m512i __A, __m128i __B) 56840b57cec5SDimitry Andric { 56850b57cec5SDimitry Andric return (__m512i)__builtin_ia32_pslld512((__v16si) __A, (__v4si)__B); 56860b57cec5SDimitry Andric } 56870b57cec5SDimitry Andric 56880b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 56890b57cec5SDimitry Andric _mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) 56900b57cec5SDimitry Andric { 56910b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 56920b57cec5SDimitry Andric (__v16si)_mm512_sll_epi32(__A, __B), 56930b57cec5SDimitry Andric (__v16si)__W); 56940b57cec5SDimitry Andric } 56950b57cec5SDimitry Andric 56960b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 56970b57cec5SDimitry Andric _mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B) 56980b57cec5SDimitry Andric { 56990b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 57000b57cec5SDimitry Andric (__v16si)_mm512_sll_epi32(__A, __B), 57010b57cec5SDimitry Andric (__v16si)_mm512_setzero_si512()); 57020b57cec5SDimitry Andric } 57030b57cec5SDimitry Andric 57040b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 57050b57cec5SDimitry Andric _mm512_sll_epi64(__m512i __A, __m128i __B) 57060b57cec5SDimitry Andric { 57070b57cec5SDimitry Andric return (__m512i)__builtin_ia32_psllq512((__v8di)__A, (__v2di)__B); 57080b57cec5SDimitry Andric } 57090b57cec5SDimitry Andric 57100b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 57110b57cec5SDimitry Andric _mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) 57120b57cec5SDimitry Andric { 57130b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 57140b57cec5SDimitry Andric (__v8di)_mm512_sll_epi64(__A, __B), 57150b57cec5SDimitry Andric (__v8di)__W); 57160b57cec5SDimitry Andric } 57170b57cec5SDimitry Andric 57180b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 57190b57cec5SDimitry Andric _mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B) 57200b57cec5SDimitry Andric { 57210b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 57220b57cec5SDimitry Andric (__v8di)_mm512_sll_epi64(__A, __B), 57230b57cec5SDimitry Andric (__v8di)_mm512_setzero_si512()); 57240b57cec5SDimitry Andric } 57250b57cec5SDimitry Andric 57260b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 57270b57cec5SDimitry Andric _mm512_sllv_epi32(__m512i __X, __m512i __Y) 57280b57cec5SDimitry Andric { 57290b57cec5SDimitry Andric return (__m512i)__builtin_ia32_psllv16si((__v16si)__X, (__v16si)__Y); 57300b57cec5SDimitry Andric } 57310b57cec5SDimitry Andric 57320b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 57330b57cec5SDimitry Andric _mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) 57340b57cec5SDimitry Andric { 57350b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 57360b57cec5SDimitry Andric (__v16si)_mm512_sllv_epi32(__X, __Y), 57370b57cec5SDimitry Andric (__v16si)__W); 57380b57cec5SDimitry Andric } 57390b57cec5SDimitry Andric 57400b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 57410b57cec5SDimitry Andric _mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y) 57420b57cec5SDimitry Andric { 57430b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 57440b57cec5SDimitry Andric (__v16si)_mm512_sllv_epi32(__X, __Y), 57450b57cec5SDimitry Andric (__v16si)_mm512_setzero_si512()); 57460b57cec5SDimitry Andric } 57470b57cec5SDimitry Andric 57480b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 57490b57cec5SDimitry Andric _mm512_sllv_epi64(__m512i __X, __m512i __Y) 57500b57cec5SDimitry Andric { 57510b57cec5SDimitry Andric return (__m512i)__builtin_ia32_psllv8di((__v8di)__X, (__v8di)__Y); 57520b57cec5SDimitry Andric } 57530b57cec5SDimitry Andric 57540b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 57550b57cec5SDimitry Andric _mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) 57560b57cec5SDimitry Andric { 57570b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 57580b57cec5SDimitry Andric (__v8di)_mm512_sllv_epi64(__X, __Y), 57590b57cec5SDimitry Andric (__v8di)__W); 57600b57cec5SDimitry Andric } 57610b57cec5SDimitry Andric 57620b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 57630b57cec5SDimitry Andric _mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y) 57640b57cec5SDimitry Andric { 57650b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 57660b57cec5SDimitry Andric (__v8di)_mm512_sllv_epi64(__X, __Y), 57670b57cec5SDimitry Andric (__v8di)_mm512_setzero_si512()); 57680b57cec5SDimitry Andric } 57690b57cec5SDimitry Andric 57700b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 57710b57cec5SDimitry Andric _mm512_sra_epi32(__m512i __A, __m128i __B) 57720b57cec5SDimitry Andric { 57730b57cec5SDimitry Andric return (__m512i)__builtin_ia32_psrad512((__v16si) __A, (__v4si)__B); 57740b57cec5SDimitry Andric } 57750b57cec5SDimitry Andric 57760b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 57770b57cec5SDimitry Andric _mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) 57780b57cec5SDimitry Andric { 57790b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 57800b57cec5SDimitry Andric (__v16si)_mm512_sra_epi32(__A, __B), 57810b57cec5SDimitry Andric (__v16si)__W); 57820b57cec5SDimitry Andric } 57830b57cec5SDimitry Andric 57840b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 57850b57cec5SDimitry Andric _mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B) 57860b57cec5SDimitry Andric { 57870b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 57880b57cec5SDimitry Andric (__v16si)_mm512_sra_epi32(__A, __B), 57890b57cec5SDimitry Andric (__v16si)_mm512_setzero_si512()); 57900b57cec5SDimitry Andric } 57910b57cec5SDimitry Andric 57920b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 57930b57cec5SDimitry Andric _mm512_sra_epi64(__m512i __A, __m128i __B) 57940b57cec5SDimitry Andric { 57950b57cec5SDimitry Andric return (__m512i)__builtin_ia32_psraq512((__v8di)__A, (__v2di)__B); 57960b57cec5SDimitry Andric } 57970b57cec5SDimitry Andric 57980b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 57990b57cec5SDimitry Andric _mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) 58000b57cec5SDimitry Andric { 58010b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 58020b57cec5SDimitry Andric (__v8di)_mm512_sra_epi64(__A, __B), 58030b57cec5SDimitry Andric (__v8di)__W); 58040b57cec5SDimitry Andric } 58050b57cec5SDimitry Andric 58060b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 58070b57cec5SDimitry Andric _mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B) 58080b57cec5SDimitry Andric { 58090b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 58100b57cec5SDimitry Andric (__v8di)_mm512_sra_epi64(__A, __B), 58110b57cec5SDimitry Andric (__v8di)_mm512_setzero_si512()); 58120b57cec5SDimitry Andric } 58130b57cec5SDimitry Andric 58140b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 58150b57cec5SDimitry Andric _mm512_srav_epi32(__m512i __X, __m512i __Y) 58160b57cec5SDimitry Andric { 58170b57cec5SDimitry Andric return (__m512i)__builtin_ia32_psrav16si((__v16si)__X, (__v16si)__Y); 58180b57cec5SDimitry Andric } 58190b57cec5SDimitry Andric 58200b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 58210b57cec5SDimitry Andric _mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) 58220b57cec5SDimitry Andric { 58230b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 58240b57cec5SDimitry Andric (__v16si)_mm512_srav_epi32(__X, __Y), 58250b57cec5SDimitry Andric (__v16si)__W); 58260b57cec5SDimitry Andric } 58270b57cec5SDimitry Andric 58280b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 58290b57cec5SDimitry Andric _mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y) 58300b57cec5SDimitry Andric { 58310b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 58320b57cec5SDimitry Andric (__v16si)_mm512_srav_epi32(__X, __Y), 58330b57cec5SDimitry Andric (__v16si)_mm512_setzero_si512()); 58340b57cec5SDimitry Andric } 58350b57cec5SDimitry Andric 58360b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 58370b57cec5SDimitry Andric _mm512_srav_epi64(__m512i __X, __m512i __Y) 58380b57cec5SDimitry Andric { 58390b57cec5SDimitry Andric return (__m512i)__builtin_ia32_psrav8di((__v8di)__X, (__v8di)__Y); 58400b57cec5SDimitry Andric } 58410b57cec5SDimitry Andric 58420b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 58430b57cec5SDimitry Andric _mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) 58440b57cec5SDimitry Andric { 58450b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 58460b57cec5SDimitry Andric (__v8di)_mm512_srav_epi64(__X, __Y), 58470b57cec5SDimitry Andric (__v8di)__W); 58480b57cec5SDimitry Andric } 58490b57cec5SDimitry Andric 58500b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 58510b57cec5SDimitry Andric _mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y) 58520b57cec5SDimitry Andric { 58530b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 58540b57cec5SDimitry Andric (__v8di)_mm512_srav_epi64(__X, __Y), 58550b57cec5SDimitry Andric (__v8di)_mm512_setzero_si512()); 58560b57cec5SDimitry Andric } 58570b57cec5SDimitry Andric 58580b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 58590b57cec5SDimitry Andric _mm512_srl_epi32(__m512i __A, __m128i __B) 58600b57cec5SDimitry Andric { 58610b57cec5SDimitry Andric return (__m512i)__builtin_ia32_psrld512((__v16si) __A, (__v4si)__B); 58620b57cec5SDimitry Andric } 58630b57cec5SDimitry Andric 58640b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 58650b57cec5SDimitry Andric _mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) 58660b57cec5SDimitry Andric { 58670b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 58680b57cec5SDimitry Andric (__v16si)_mm512_srl_epi32(__A, __B), 58690b57cec5SDimitry Andric (__v16si)__W); 58700b57cec5SDimitry Andric } 58710b57cec5SDimitry Andric 58720b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 58730b57cec5SDimitry Andric _mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B) 58740b57cec5SDimitry Andric { 58750b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 58760b57cec5SDimitry Andric (__v16si)_mm512_srl_epi32(__A, __B), 58770b57cec5SDimitry Andric (__v16si)_mm512_setzero_si512()); 58780b57cec5SDimitry Andric } 58790b57cec5SDimitry Andric 58800b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 58810b57cec5SDimitry Andric _mm512_srl_epi64(__m512i __A, __m128i __B) 58820b57cec5SDimitry Andric { 58830b57cec5SDimitry Andric return (__m512i)__builtin_ia32_psrlq512((__v8di)__A, (__v2di)__B); 58840b57cec5SDimitry Andric } 58850b57cec5SDimitry Andric 58860b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 58870b57cec5SDimitry Andric _mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) 58880b57cec5SDimitry Andric { 58890b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 58900b57cec5SDimitry Andric (__v8di)_mm512_srl_epi64(__A, __B), 58910b57cec5SDimitry Andric (__v8di)__W); 58920b57cec5SDimitry Andric } 58930b57cec5SDimitry Andric 58940b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 58950b57cec5SDimitry Andric _mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B) 58960b57cec5SDimitry Andric { 58970b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 58980b57cec5SDimitry Andric (__v8di)_mm512_srl_epi64(__A, __B), 58990b57cec5SDimitry Andric (__v8di)_mm512_setzero_si512()); 59000b57cec5SDimitry Andric } 59010b57cec5SDimitry Andric 59020b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 59030b57cec5SDimitry Andric _mm512_srlv_epi32(__m512i __X, __m512i __Y) 59040b57cec5SDimitry Andric { 59050b57cec5SDimitry Andric return (__m512i)__builtin_ia32_psrlv16si((__v16si)__X, (__v16si)__Y); 59060b57cec5SDimitry Andric } 59070b57cec5SDimitry Andric 59080b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 59090b57cec5SDimitry Andric _mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) 59100b57cec5SDimitry Andric { 59110b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 59120b57cec5SDimitry Andric (__v16si)_mm512_srlv_epi32(__X, __Y), 59130b57cec5SDimitry Andric (__v16si)__W); 59140b57cec5SDimitry Andric } 59150b57cec5SDimitry Andric 59160b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 59170b57cec5SDimitry Andric _mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y) 59180b57cec5SDimitry Andric { 59190b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 59200b57cec5SDimitry Andric (__v16si)_mm512_srlv_epi32(__X, __Y), 59210b57cec5SDimitry Andric (__v16si)_mm512_setzero_si512()); 59220b57cec5SDimitry Andric } 59230b57cec5SDimitry Andric 59240b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 59250b57cec5SDimitry Andric _mm512_srlv_epi64 (__m512i __X, __m512i __Y) 59260b57cec5SDimitry Andric { 59270b57cec5SDimitry Andric return (__m512i)__builtin_ia32_psrlv8di((__v8di)__X, (__v8di)__Y); 59280b57cec5SDimitry Andric } 59290b57cec5SDimitry Andric 59300b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 59310b57cec5SDimitry Andric _mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) 59320b57cec5SDimitry Andric { 59330b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 59340b57cec5SDimitry Andric (__v8di)_mm512_srlv_epi64(__X, __Y), 59350b57cec5SDimitry Andric (__v8di)__W); 59360b57cec5SDimitry Andric } 59370b57cec5SDimitry Andric 59380b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 59390b57cec5SDimitry Andric _mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y) 59400b57cec5SDimitry Andric { 59410b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 59420b57cec5SDimitry Andric (__v8di)_mm512_srlv_epi64(__X, __Y), 59430b57cec5SDimitry Andric (__v8di)_mm512_setzero_si512()); 59440b57cec5SDimitry Andric } 59450b57cec5SDimitry Andric 594681ad6265SDimitry Andric /// \enum _MM_TERNLOG_ENUM 594781ad6265SDimitry Andric /// A helper to represent the ternary logic operations among vector \a A, 594881ad6265SDimitry Andric /// \a B and \a C. The representation is passed to \a imm. 594981ad6265SDimitry Andric typedef enum { 595081ad6265SDimitry Andric _MM_TERNLOG_A = 0xF0, 595181ad6265SDimitry Andric _MM_TERNLOG_B = 0xCC, 595281ad6265SDimitry Andric _MM_TERNLOG_C = 0xAA 595381ad6265SDimitry Andric } _MM_TERNLOG_ENUM; 595481ad6265SDimitry Andric 59550b57cec5SDimitry Andric #define _mm512_ternarylogic_epi32(A, B, C, imm) \ 595681ad6265SDimitry Andric ((__m512i)__builtin_ia32_pternlogd512_mask( \ 595781ad6265SDimitry Andric (__v16si)(__m512i)(A), (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), \ 595881ad6265SDimitry Andric (unsigned char)(imm), (__mmask16)-1)) 59590b57cec5SDimitry Andric 59600b57cec5SDimitry Andric #define _mm512_mask_ternarylogic_epi32(A, U, B, C, imm) \ 596181ad6265SDimitry Andric ((__m512i)__builtin_ia32_pternlogd512_mask( \ 596281ad6265SDimitry Andric (__v16si)(__m512i)(A), (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), \ 596381ad6265SDimitry Andric (unsigned char)(imm), (__mmask16)(U))) 59640b57cec5SDimitry Andric 59650b57cec5SDimitry Andric #define _mm512_maskz_ternarylogic_epi32(U, A, B, C, imm) \ 596681ad6265SDimitry Andric ((__m512i)__builtin_ia32_pternlogd512_maskz( \ 596781ad6265SDimitry Andric (__v16si)(__m512i)(A), (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), \ 596881ad6265SDimitry Andric (unsigned char)(imm), (__mmask16)(U))) 59690b57cec5SDimitry Andric 59700b57cec5SDimitry Andric #define _mm512_ternarylogic_epi64(A, B, C, imm) \ 597181ad6265SDimitry Andric ((__m512i)__builtin_ia32_pternlogq512_mask( \ 597281ad6265SDimitry Andric (__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), \ 597381ad6265SDimitry Andric (unsigned char)(imm), (__mmask8)-1)) 59740b57cec5SDimitry Andric 59750b57cec5SDimitry Andric #define _mm512_mask_ternarylogic_epi64(A, U, B, C, imm) \ 597681ad6265SDimitry Andric ((__m512i)__builtin_ia32_pternlogq512_mask( \ 597781ad6265SDimitry Andric (__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), \ 597881ad6265SDimitry Andric (unsigned char)(imm), (__mmask8)(U))) 59790b57cec5SDimitry Andric 59800b57cec5SDimitry Andric #define _mm512_maskz_ternarylogic_epi64(U, A, B, C, imm) \ 598181ad6265SDimitry Andric ((__m512i)__builtin_ia32_pternlogq512_maskz( \ 598281ad6265SDimitry Andric (__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), \ 598381ad6265SDimitry Andric (unsigned char)(imm), (__mmask8)(U))) 59840b57cec5SDimitry Andric 59850b57cec5SDimitry Andric #ifdef __x86_64__ 59860b57cec5SDimitry Andric #define _mm_cvt_roundsd_i64(A, R) \ 5987349cc55cSDimitry Andric ((long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R))) 59880b57cec5SDimitry Andric #endif 59890b57cec5SDimitry Andric 59900b57cec5SDimitry Andric #define _mm_cvt_roundsd_si32(A, R) \ 5991349cc55cSDimitry Andric ((int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R))) 59920b57cec5SDimitry Andric 59930b57cec5SDimitry Andric #define _mm_cvt_roundsd_i32(A, R) \ 5994349cc55cSDimitry Andric ((int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R))) 59950b57cec5SDimitry Andric 59960b57cec5SDimitry Andric #define _mm_cvt_roundsd_u32(A, R) \ 5997349cc55cSDimitry Andric ((unsigned int)__builtin_ia32_vcvtsd2usi32((__v2df)(__m128d)(A), (int)(R))) 59980b57cec5SDimitry Andric 59990b57cec5SDimitry Andric static __inline__ unsigned __DEFAULT_FN_ATTRS128 60000b57cec5SDimitry Andric _mm_cvtsd_u32 (__m128d __A) 60010b57cec5SDimitry Andric { 60020b57cec5SDimitry Andric return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A, 60030b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 60040b57cec5SDimitry Andric } 60050b57cec5SDimitry Andric 60060b57cec5SDimitry Andric #ifdef __x86_64__ 60070b57cec5SDimitry Andric #define _mm_cvt_roundsd_u64(A, R) \ 6008349cc55cSDimitry Andric ((unsigned long long)__builtin_ia32_vcvtsd2usi64((__v2df)(__m128d)(A), \ 6009349cc55cSDimitry Andric (int)(R))) 60100b57cec5SDimitry Andric 60110b57cec5SDimitry Andric static __inline__ unsigned long long __DEFAULT_FN_ATTRS128 60120b57cec5SDimitry Andric _mm_cvtsd_u64 (__m128d __A) 60130b57cec5SDimitry Andric { 60140b57cec5SDimitry Andric return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df) 60150b57cec5SDimitry Andric __A, 60160b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 60170b57cec5SDimitry Andric } 60180b57cec5SDimitry Andric #endif 60190b57cec5SDimitry Andric 60200b57cec5SDimitry Andric #define _mm_cvt_roundss_si32(A, R) \ 6021349cc55cSDimitry Andric ((int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R))) 60220b57cec5SDimitry Andric 60230b57cec5SDimitry Andric #define _mm_cvt_roundss_i32(A, R) \ 6024349cc55cSDimitry Andric ((int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R))) 60250b57cec5SDimitry Andric 60260b57cec5SDimitry Andric #ifdef __x86_64__ 60270b57cec5SDimitry Andric #define _mm_cvt_roundss_si64(A, R) \ 6028349cc55cSDimitry Andric ((long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R))) 60290b57cec5SDimitry Andric 60300b57cec5SDimitry Andric #define _mm_cvt_roundss_i64(A, R) \ 6031349cc55cSDimitry Andric ((long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R))) 60320b57cec5SDimitry Andric #endif 60330b57cec5SDimitry Andric 60340b57cec5SDimitry Andric #define _mm_cvt_roundss_u32(A, R) \ 6035349cc55cSDimitry Andric ((unsigned int)__builtin_ia32_vcvtss2usi32((__v4sf)(__m128)(A), (int)(R))) 60360b57cec5SDimitry Andric 60370b57cec5SDimitry Andric static __inline__ unsigned __DEFAULT_FN_ATTRS128 60380b57cec5SDimitry Andric _mm_cvtss_u32 (__m128 __A) 60390b57cec5SDimitry Andric { 60400b57cec5SDimitry Andric return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A, 60410b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 60420b57cec5SDimitry Andric } 60430b57cec5SDimitry Andric 60440b57cec5SDimitry Andric #ifdef __x86_64__ 60450b57cec5SDimitry Andric #define _mm_cvt_roundss_u64(A, R) \ 6046349cc55cSDimitry Andric ((unsigned long long)__builtin_ia32_vcvtss2usi64((__v4sf)(__m128)(A), \ 6047349cc55cSDimitry Andric (int)(R))) 60480b57cec5SDimitry Andric 60490b57cec5SDimitry Andric static __inline__ unsigned long long __DEFAULT_FN_ATTRS128 60500b57cec5SDimitry Andric _mm_cvtss_u64 (__m128 __A) 60510b57cec5SDimitry Andric { 60520b57cec5SDimitry Andric return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf) 60530b57cec5SDimitry Andric __A, 60540b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 60550b57cec5SDimitry Andric } 60560b57cec5SDimitry Andric #endif 60570b57cec5SDimitry Andric 60580b57cec5SDimitry Andric #define _mm_cvtt_roundsd_i32(A, R) \ 6059349cc55cSDimitry Andric ((int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R))) 60600b57cec5SDimitry Andric 60610b57cec5SDimitry Andric #define _mm_cvtt_roundsd_si32(A, R) \ 6062349cc55cSDimitry Andric ((int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R))) 60630b57cec5SDimitry Andric 60640b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS128 60650b57cec5SDimitry Andric _mm_cvttsd_i32 (__m128d __A) 60660b57cec5SDimitry Andric { 60670b57cec5SDimitry Andric return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, 60680b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 60690b57cec5SDimitry Andric } 60700b57cec5SDimitry Andric 60710b57cec5SDimitry Andric #ifdef __x86_64__ 60720b57cec5SDimitry Andric #define _mm_cvtt_roundsd_si64(A, R) \ 6073349cc55cSDimitry Andric ((long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R))) 60740b57cec5SDimitry Andric 60750b57cec5SDimitry Andric #define _mm_cvtt_roundsd_i64(A, R) \ 6076349cc55cSDimitry Andric ((long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R))) 60770b57cec5SDimitry Andric 60780b57cec5SDimitry Andric static __inline__ long long __DEFAULT_FN_ATTRS128 60790b57cec5SDimitry Andric _mm_cvttsd_i64 (__m128d __A) 60800b57cec5SDimitry Andric { 60810b57cec5SDimitry Andric return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, 60820b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 60830b57cec5SDimitry Andric } 60840b57cec5SDimitry Andric #endif 60850b57cec5SDimitry Andric 60860b57cec5SDimitry Andric #define _mm_cvtt_roundsd_u32(A, R) \ 6087349cc55cSDimitry Andric ((unsigned int)__builtin_ia32_vcvttsd2usi32((__v2df)(__m128d)(A), (int)(R))) 60880b57cec5SDimitry Andric 60890b57cec5SDimitry Andric static __inline__ unsigned __DEFAULT_FN_ATTRS128 60900b57cec5SDimitry Andric _mm_cvttsd_u32 (__m128d __A) 60910b57cec5SDimitry Andric { 60920b57cec5SDimitry Andric return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A, 60930b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 60940b57cec5SDimitry Andric } 60950b57cec5SDimitry Andric 60960b57cec5SDimitry Andric #ifdef __x86_64__ 60970b57cec5SDimitry Andric #define _mm_cvtt_roundsd_u64(A, R) \ 6098349cc55cSDimitry Andric ((unsigned long long)__builtin_ia32_vcvttsd2usi64((__v2df)(__m128d)(A), \ 6099349cc55cSDimitry Andric (int)(R))) 61000b57cec5SDimitry Andric 61010b57cec5SDimitry Andric static __inline__ unsigned long long __DEFAULT_FN_ATTRS128 61020b57cec5SDimitry Andric _mm_cvttsd_u64 (__m128d __A) 61030b57cec5SDimitry Andric { 61040b57cec5SDimitry Andric return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df) 61050b57cec5SDimitry Andric __A, 61060b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 61070b57cec5SDimitry Andric } 61080b57cec5SDimitry Andric #endif 61090b57cec5SDimitry Andric 61100b57cec5SDimitry Andric #define _mm_cvtt_roundss_i32(A, R) \ 6111349cc55cSDimitry Andric ((int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R))) 61120b57cec5SDimitry Andric 61130b57cec5SDimitry Andric #define _mm_cvtt_roundss_si32(A, R) \ 6114349cc55cSDimitry Andric ((int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R))) 61150b57cec5SDimitry Andric 61160b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS128 61170b57cec5SDimitry Andric _mm_cvttss_i32 (__m128 __A) 61180b57cec5SDimitry Andric { 61190b57cec5SDimitry Andric return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, 61200b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 61210b57cec5SDimitry Andric } 61220b57cec5SDimitry Andric 61230b57cec5SDimitry Andric #ifdef __x86_64__ 61240b57cec5SDimitry Andric #define _mm_cvtt_roundss_i64(A, R) \ 6125349cc55cSDimitry Andric ((long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R))) 61260b57cec5SDimitry Andric 61270b57cec5SDimitry Andric #define _mm_cvtt_roundss_si64(A, R) \ 6128349cc55cSDimitry Andric ((long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R))) 61290b57cec5SDimitry Andric 61300b57cec5SDimitry Andric static __inline__ long long __DEFAULT_FN_ATTRS128 61310b57cec5SDimitry Andric _mm_cvttss_i64 (__m128 __A) 61320b57cec5SDimitry Andric { 61330b57cec5SDimitry Andric return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, 61340b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 61350b57cec5SDimitry Andric } 61360b57cec5SDimitry Andric #endif 61370b57cec5SDimitry Andric 61380b57cec5SDimitry Andric #define _mm_cvtt_roundss_u32(A, R) \ 6139349cc55cSDimitry Andric ((unsigned int)__builtin_ia32_vcvttss2usi32((__v4sf)(__m128)(A), (int)(R))) 61400b57cec5SDimitry Andric 61410b57cec5SDimitry Andric static __inline__ unsigned __DEFAULT_FN_ATTRS128 61420b57cec5SDimitry Andric _mm_cvttss_u32 (__m128 __A) 61430b57cec5SDimitry Andric { 61440b57cec5SDimitry Andric return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A, 61450b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 61460b57cec5SDimitry Andric } 61470b57cec5SDimitry Andric 61480b57cec5SDimitry Andric #ifdef __x86_64__ 61490b57cec5SDimitry Andric #define _mm_cvtt_roundss_u64(A, R) \ 6150349cc55cSDimitry Andric ((unsigned long long)__builtin_ia32_vcvttss2usi64((__v4sf)(__m128)(A), \ 6151349cc55cSDimitry Andric (int)(R))) 61520b57cec5SDimitry Andric 61530b57cec5SDimitry Andric static __inline__ unsigned long long __DEFAULT_FN_ATTRS128 61540b57cec5SDimitry Andric _mm_cvttss_u64 (__m128 __A) 61550b57cec5SDimitry Andric { 61560b57cec5SDimitry Andric return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf) 61570b57cec5SDimitry Andric __A, 61580b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 61590b57cec5SDimitry Andric } 61600b57cec5SDimitry Andric #endif 61610b57cec5SDimitry Andric 61620b57cec5SDimitry Andric #define _mm512_permute_pd(X, C) \ 6163349cc55cSDimitry Andric ((__m512d)__builtin_ia32_vpermilpd512((__v8df)(__m512d)(X), (int)(C))) 61640b57cec5SDimitry Andric 61650b57cec5SDimitry Andric #define _mm512_mask_permute_pd(W, U, X, C) \ 6166349cc55cSDimitry Andric ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 61670b57cec5SDimitry Andric (__v8df)_mm512_permute_pd((X), (C)), \ 6168349cc55cSDimitry Andric (__v8df)(__m512d)(W))) 61690b57cec5SDimitry Andric 61700b57cec5SDimitry Andric #define _mm512_maskz_permute_pd(U, X, C) \ 6171349cc55cSDimitry Andric ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 61720b57cec5SDimitry Andric (__v8df)_mm512_permute_pd((X), (C)), \ 6173349cc55cSDimitry Andric (__v8df)_mm512_setzero_pd())) 61740b57cec5SDimitry Andric 61750b57cec5SDimitry Andric #define _mm512_permute_ps(X, C) \ 6176349cc55cSDimitry Andric ((__m512)__builtin_ia32_vpermilps512((__v16sf)(__m512)(X), (int)(C))) 61770b57cec5SDimitry Andric 61780b57cec5SDimitry Andric #define _mm512_mask_permute_ps(W, U, X, C) \ 6179349cc55cSDimitry Andric ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 61800b57cec5SDimitry Andric (__v16sf)_mm512_permute_ps((X), (C)), \ 6181349cc55cSDimitry Andric (__v16sf)(__m512)(W))) 61820b57cec5SDimitry Andric 61830b57cec5SDimitry Andric #define _mm512_maskz_permute_ps(U, X, C) \ 6184349cc55cSDimitry Andric ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 61850b57cec5SDimitry Andric (__v16sf)_mm512_permute_ps((X), (C)), \ 6186349cc55cSDimitry Andric (__v16sf)_mm512_setzero_ps())) 61870b57cec5SDimitry Andric 61880b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 61890b57cec5SDimitry Andric _mm512_permutevar_pd(__m512d __A, __m512i __C) 61900b57cec5SDimitry Andric { 61910b57cec5SDimitry Andric return (__m512d)__builtin_ia32_vpermilvarpd512((__v8df)__A, (__v8di)__C); 61920b57cec5SDimitry Andric } 61930b57cec5SDimitry Andric 61940b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 61950b57cec5SDimitry Andric _mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C) 61960b57cec5SDimitry Andric { 61970b57cec5SDimitry Andric return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 61980b57cec5SDimitry Andric (__v8df)_mm512_permutevar_pd(__A, __C), 61990b57cec5SDimitry Andric (__v8df)__W); 62000b57cec5SDimitry Andric } 62010b57cec5SDimitry Andric 62020b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 62030b57cec5SDimitry Andric _mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C) 62040b57cec5SDimitry Andric { 62050b57cec5SDimitry Andric return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 62060b57cec5SDimitry Andric (__v8df)_mm512_permutevar_pd(__A, __C), 62070b57cec5SDimitry Andric (__v8df)_mm512_setzero_pd()); 62080b57cec5SDimitry Andric } 62090b57cec5SDimitry Andric 62100b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 62110b57cec5SDimitry Andric _mm512_permutevar_ps(__m512 __A, __m512i __C) 62120b57cec5SDimitry Andric { 62130b57cec5SDimitry Andric return (__m512)__builtin_ia32_vpermilvarps512((__v16sf)__A, (__v16si)__C); 62140b57cec5SDimitry Andric } 62150b57cec5SDimitry Andric 62160b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 62170b57cec5SDimitry Andric _mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C) 62180b57cec5SDimitry Andric { 62190b57cec5SDimitry Andric return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 62200b57cec5SDimitry Andric (__v16sf)_mm512_permutevar_ps(__A, __C), 62210b57cec5SDimitry Andric (__v16sf)__W); 62220b57cec5SDimitry Andric } 62230b57cec5SDimitry Andric 62240b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 62250b57cec5SDimitry Andric _mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C) 62260b57cec5SDimitry Andric { 62270b57cec5SDimitry Andric return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 62280b57cec5SDimitry Andric (__v16sf)_mm512_permutevar_ps(__A, __C), 62290b57cec5SDimitry Andric (__v16sf)_mm512_setzero_ps()); 62300b57cec5SDimitry Andric } 62310b57cec5SDimitry Andric 62320b57cec5SDimitry Andric static __inline __m512d __DEFAULT_FN_ATTRS512 62330b57cec5SDimitry Andric _mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B) 62340b57cec5SDimitry Andric { 62350b57cec5SDimitry Andric return (__m512d)__builtin_ia32_vpermi2varpd512((__v8df)__A, (__v8di)__I, 62360b57cec5SDimitry Andric (__v8df)__B); 62370b57cec5SDimitry Andric } 62380b57cec5SDimitry Andric 62390b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 62400b57cec5SDimitry Andric _mm512_mask_permutex2var_pd(__m512d __A, __mmask8 __U, __m512i __I, __m512d __B) 62410b57cec5SDimitry Andric { 62420b57cec5SDimitry Andric return (__m512d)__builtin_ia32_selectpd_512(__U, 62430b57cec5SDimitry Andric (__v8df)_mm512_permutex2var_pd(__A, __I, __B), 62440b57cec5SDimitry Andric (__v8df)__A); 62450b57cec5SDimitry Andric } 62460b57cec5SDimitry Andric 62470b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 62480b57cec5SDimitry Andric _mm512_mask2_permutex2var_pd(__m512d __A, __m512i __I, __mmask8 __U, 62490b57cec5SDimitry Andric __m512d __B) 62500b57cec5SDimitry Andric { 62510b57cec5SDimitry Andric return (__m512d)__builtin_ia32_selectpd_512(__U, 62520b57cec5SDimitry Andric (__v8df)_mm512_permutex2var_pd(__A, __I, __B), 62530b57cec5SDimitry Andric (__v8df)(__m512d)__I); 62540b57cec5SDimitry Andric } 62550b57cec5SDimitry Andric 62560b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 62570b57cec5SDimitry Andric _mm512_maskz_permutex2var_pd(__mmask8 __U, __m512d __A, __m512i __I, 62580b57cec5SDimitry Andric __m512d __B) 62590b57cec5SDimitry Andric { 62600b57cec5SDimitry Andric return (__m512d)__builtin_ia32_selectpd_512(__U, 62610b57cec5SDimitry Andric (__v8df)_mm512_permutex2var_pd(__A, __I, __B), 62620b57cec5SDimitry Andric (__v8df)_mm512_setzero_pd()); 62630b57cec5SDimitry Andric } 62640b57cec5SDimitry Andric 62650b57cec5SDimitry Andric static __inline __m512 __DEFAULT_FN_ATTRS512 62660b57cec5SDimitry Andric _mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B) 62670b57cec5SDimitry Andric { 62680b57cec5SDimitry Andric return (__m512)__builtin_ia32_vpermi2varps512((__v16sf)__A, (__v16si)__I, 62690b57cec5SDimitry Andric (__v16sf) __B); 62700b57cec5SDimitry Andric } 62710b57cec5SDimitry Andric 62720b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 62730b57cec5SDimitry Andric _mm512_mask_permutex2var_ps(__m512 __A, __mmask16 __U, __m512i __I, __m512 __B) 62740b57cec5SDimitry Andric { 62750b57cec5SDimitry Andric return (__m512)__builtin_ia32_selectps_512(__U, 62760b57cec5SDimitry Andric (__v16sf)_mm512_permutex2var_ps(__A, __I, __B), 62770b57cec5SDimitry Andric (__v16sf)__A); 62780b57cec5SDimitry Andric } 62790b57cec5SDimitry Andric 62800b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 62810b57cec5SDimitry Andric _mm512_mask2_permutex2var_ps(__m512 __A, __m512i __I, __mmask16 __U, __m512 __B) 62820b57cec5SDimitry Andric { 62830b57cec5SDimitry Andric return (__m512)__builtin_ia32_selectps_512(__U, 62840b57cec5SDimitry Andric (__v16sf)_mm512_permutex2var_ps(__A, __I, __B), 62850b57cec5SDimitry Andric (__v16sf)(__m512)__I); 62860b57cec5SDimitry Andric } 62870b57cec5SDimitry Andric 62880b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 62890b57cec5SDimitry Andric _mm512_maskz_permutex2var_ps(__mmask16 __U, __m512 __A, __m512i __I, __m512 __B) 62900b57cec5SDimitry Andric { 62910b57cec5SDimitry Andric return (__m512)__builtin_ia32_selectps_512(__U, 62920b57cec5SDimitry Andric (__v16sf)_mm512_permutex2var_ps(__A, __I, __B), 62930b57cec5SDimitry Andric (__v16sf)_mm512_setzero_ps()); 62940b57cec5SDimitry Andric } 62950b57cec5SDimitry Andric 62960b57cec5SDimitry Andric 62970b57cec5SDimitry Andric #define _mm512_cvtt_roundpd_epu32(A, R) \ 6298349cc55cSDimitry Andric ((__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \ 62990b57cec5SDimitry Andric (__v8si)_mm256_undefined_si256(), \ 6300349cc55cSDimitry Andric (__mmask8)-1, (int)(R))) 63010b57cec5SDimitry Andric 63020b57cec5SDimitry Andric #define _mm512_mask_cvtt_roundpd_epu32(W, U, A, R) \ 6303349cc55cSDimitry Andric ((__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \ 63040b57cec5SDimitry Andric (__v8si)(__m256i)(W), \ 6305349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 63060b57cec5SDimitry Andric 63070b57cec5SDimitry Andric #define _mm512_maskz_cvtt_roundpd_epu32(U, A, R) \ 6308349cc55cSDimitry Andric ((__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \ 63090b57cec5SDimitry Andric (__v8si)_mm256_setzero_si256(), \ 6310349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 63110b57cec5SDimitry Andric 63120b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS512 63130b57cec5SDimitry Andric _mm512_cvttpd_epu32 (__m512d __A) 63140b57cec5SDimitry Andric { 63150b57cec5SDimitry Andric return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A, 63160b57cec5SDimitry Andric (__v8si) 63170b57cec5SDimitry Andric _mm256_undefined_si256 (), 63180b57cec5SDimitry Andric (__mmask8) -1, 63190b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 63200b57cec5SDimitry Andric } 63210b57cec5SDimitry Andric 63220b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS512 63230b57cec5SDimitry Andric _mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A) 63240b57cec5SDimitry Andric { 63250b57cec5SDimitry Andric return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A, 63260b57cec5SDimitry Andric (__v8si) __W, 63270b57cec5SDimitry Andric (__mmask8) __U, 63280b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 63290b57cec5SDimitry Andric } 63300b57cec5SDimitry Andric 63310b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS512 63320b57cec5SDimitry Andric _mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A) 63330b57cec5SDimitry Andric { 63340b57cec5SDimitry Andric return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A, 63350b57cec5SDimitry Andric (__v8si) 63360b57cec5SDimitry Andric _mm256_setzero_si256 (), 63370b57cec5SDimitry Andric (__mmask8) __U, 63380b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 63390b57cec5SDimitry Andric } 63400b57cec5SDimitry Andric 63410b57cec5SDimitry Andric #define _mm_roundscale_round_sd(A, B, imm, R) \ 6342349cc55cSDimitry Andric ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ 63430b57cec5SDimitry Andric (__v2df)(__m128d)(B), \ 63440b57cec5SDimitry Andric (__v2df)_mm_setzero_pd(), \ 63450b57cec5SDimitry Andric (__mmask8)-1, (int)(imm), \ 6346349cc55cSDimitry Andric (int)(R))) 63470b57cec5SDimitry Andric 63480b57cec5SDimitry Andric #define _mm_roundscale_sd(A, B, imm) \ 6349349cc55cSDimitry Andric ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ 63500b57cec5SDimitry Andric (__v2df)(__m128d)(B), \ 63510b57cec5SDimitry Andric (__v2df)_mm_setzero_pd(), \ 63520b57cec5SDimitry Andric (__mmask8)-1, (int)(imm), \ 6353349cc55cSDimitry Andric _MM_FROUND_CUR_DIRECTION)) 63540b57cec5SDimitry Andric 63550b57cec5SDimitry Andric #define _mm_mask_roundscale_sd(W, U, A, B, imm) \ 6356349cc55cSDimitry Andric ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ 63570b57cec5SDimitry Andric (__v2df)(__m128d)(B), \ 63580b57cec5SDimitry Andric (__v2df)(__m128d)(W), \ 63590b57cec5SDimitry Andric (__mmask8)(U), (int)(imm), \ 6360349cc55cSDimitry Andric _MM_FROUND_CUR_DIRECTION)) 63610b57cec5SDimitry Andric 63620b57cec5SDimitry Andric #define _mm_mask_roundscale_round_sd(W, U, A, B, I, R) \ 6363349cc55cSDimitry Andric ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ 63640b57cec5SDimitry Andric (__v2df)(__m128d)(B), \ 63650b57cec5SDimitry Andric (__v2df)(__m128d)(W), \ 63660b57cec5SDimitry Andric (__mmask8)(U), (int)(I), \ 6367349cc55cSDimitry Andric (int)(R))) 63680b57cec5SDimitry Andric 63690b57cec5SDimitry Andric #define _mm_maskz_roundscale_sd(U, A, B, I) \ 6370349cc55cSDimitry Andric ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ 63710b57cec5SDimitry Andric (__v2df)(__m128d)(B), \ 63720b57cec5SDimitry Andric (__v2df)_mm_setzero_pd(), \ 63730b57cec5SDimitry Andric (__mmask8)(U), (int)(I), \ 6374349cc55cSDimitry Andric _MM_FROUND_CUR_DIRECTION)) 63750b57cec5SDimitry Andric 63760b57cec5SDimitry Andric #define _mm_maskz_roundscale_round_sd(U, A, B, I, R) \ 6377349cc55cSDimitry Andric ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ 63780b57cec5SDimitry Andric (__v2df)(__m128d)(B), \ 63790b57cec5SDimitry Andric (__v2df)_mm_setzero_pd(), \ 63800b57cec5SDimitry Andric (__mmask8)(U), (int)(I), \ 6381349cc55cSDimitry Andric (int)(R))) 63820b57cec5SDimitry Andric 63830b57cec5SDimitry Andric #define _mm_roundscale_round_ss(A, B, imm, R) \ 6384349cc55cSDimitry Andric ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ 63850b57cec5SDimitry Andric (__v4sf)(__m128)(B), \ 63860b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps(), \ 63870b57cec5SDimitry Andric (__mmask8)-1, (int)(imm), \ 6388349cc55cSDimitry Andric (int)(R))) 63890b57cec5SDimitry Andric 63900b57cec5SDimitry Andric #define _mm_roundscale_ss(A, B, imm) \ 6391349cc55cSDimitry Andric ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ 63920b57cec5SDimitry Andric (__v4sf)(__m128)(B), \ 63930b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps(), \ 63940b57cec5SDimitry Andric (__mmask8)-1, (int)(imm), \ 6395349cc55cSDimitry Andric _MM_FROUND_CUR_DIRECTION)) 63960b57cec5SDimitry Andric 63970b57cec5SDimitry Andric #define _mm_mask_roundscale_ss(W, U, A, B, I) \ 6398349cc55cSDimitry Andric ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ 63990b57cec5SDimitry Andric (__v4sf)(__m128)(B), \ 64000b57cec5SDimitry Andric (__v4sf)(__m128)(W), \ 64010b57cec5SDimitry Andric (__mmask8)(U), (int)(I), \ 6402349cc55cSDimitry Andric _MM_FROUND_CUR_DIRECTION)) 64030b57cec5SDimitry Andric 64040b57cec5SDimitry Andric #define _mm_mask_roundscale_round_ss(W, U, A, B, I, R) \ 6405349cc55cSDimitry Andric ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ 64060b57cec5SDimitry Andric (__v4sf)(__m128)(B), \ 64070b57cec5SDimitry Andric (__v4sf)(__m128)(W), \ 64080b57cec5SDimitry Andric (__mmask8)(U), (int)(I), \ 6409349cc55cSDimitry Andric (int)(R))) 64100b57cec5SDimitry Andric 64110b57cec5SDimitry Andric #define _mm_maskz_roundscale_ss(U, A, B, I) \ 6412349cc55cSDimitry Andric ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ 64130b57cec5SDimitry Andric (__v4sf)(__m128)(B), \ 64140b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps(), \ 64150b57cec5SDimitry Andric (__mmask8)(U), (int)(I), \ 6416349cc55cSDimitry Andric _MM_FROUND_CUR_DIRECTION)) 64170b57cec5SDimitry Andric 64180b57cec5SDimitry Andric #define _mm_maskz_roundscale_round_ss(U, A, B, I, R) \ 6419349cc55cSDimitry Andric ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ 64200b57cec5SDimitry Andric (__v4sf)(__m128)(B), \ 64210b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps(), \ 64220b57cec5SDimitry Andric (__mmask8)(U), (int)(I), \ 6423349cc55cSDimitry Andric (int)(R))) 64240b57cec5SDimitry Andric 64250b57cec5SDimitry Andric #define _mm512_scalef_round_pd(A, B, R) \ 6426349cc55cSDimitry Andric ((__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \ 64270b57cec5SDimitry Andric (__v8df)(__m512d)(B), \ 64280b57cec5SDimitry Andric (__v8df)_mm512_undefined_pd(), \ 6429349cc55cSDimitry Andric (__mmask8)-1, (int)(R))) 64300b57cec5SDimitry Andric 64310b57cec5SDimitry Andric #define _mm512_mask_scalef_round_pd(W, U, A, B, R) \ 6432349cc55cSDimitry Andric ((__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \ 64330b57cec5SDimitry Andric (__v8df)(__m512d)(B), \ 64340b57cec5SDimitry Andric (__v8df)(__m512d)(W), \ 6435349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 64360b57cec5SDimitry Andric 64370b57cec5SDimitry Andric #define _mm512_maskz_scalef_round_pd(U, A, B, R) \ 6438349cc55cSDimitry Andric ((__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \ 64390b57cec5SDimitry Andric (__v8df)(__m512d)(B), \ 64400b57cec5SDimitry Andric (__v8df)_mm512_setzero_pd(), \ 6441349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 64420b57cec5SDimitry Andric 64430b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 64440b57cec5SDimitry Andric _mm512_scalef_pd (__m512d __A, __m512d __B) 64450b57cec5SDimitry Andric { 64460b57cec5SDimitry Andric return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A, 64470b57cec5SDimitry Andric (__v8df) __B, 64480b57cec5SDimitry Andric (__v8df) 64490b57cec5SDimitry Andric _mm512_undefined_pd (), 64500b57cec5SDimitry Andric (__mmask8) -1, 64510b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 64520b57cec5SDimitry Andric } 64530b57cec5SDimitry Andric 64540b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 64550b57cec5SDimitry Andric _mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) 64560b57cec5SDimitry Andric { 64570b57cec5SDimitry Andric return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A, 64580b57cec5SDimitry Andric (__v8df) __B, 64590b57cec5SDimitry Andric (__v8df) __W, 64600b57cec5SDimitry Andric (__mmask8) __U, 64610b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 64620b57cec5SDimitry Andric } 64630b57cec5SDimitry Andric 64640b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 64650b57cec5SDimitry Andric _mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B) 64660b57cec5SDimitry Andric { 64670b57cec5SDimitry Andric return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A, 64680b57cec5SDimitry Andric (__v8df) __B, 64690b57cec5SDimitry Andric (__v8df) 64700b57cec5SDimitry Andric _mm512_setzero_pd (), 64710b57cec5SDimitry Andric (__mmask8) __U, 64720b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 64730b57cec5SDimitry Andric } 64740b57cec5SDimitry Andric 64750b57cec5SDimitry Andric #define _mm512_scalef_round_ps(A, B, R) \ 6476349cc55cSDimitry Andric ((__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \ 64770b57cec5SDimitry Andric (__v16sf)(__m512)(B), \ 64780b57cec5SDimitry Andric (__v16sf)_mm512_undefined_ps(), \ 6479349cc55cSDimitry Andric (__mmask16)-1, (int)(R))) 64800b57cec5SDimitry Andric 64810b57cec5SDimitry Andric #define _mm512_mask_scalef_round_ps(W, U, A, B, R) \ 6482349cc55cSDimitry Andric ((__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \ 64830b57cec5SDimitry Andric (__v16sf)(__m512)(B), \ 64840b57cec5SDimitry Andric (__v16sf)(__m512)(W), \ 6485349cc55cSDimitry Andric (__mmask16)(U), (int)(R))) 64860b57cec5SDimitry Andric 64870b57cec5SDimitry Andric #define _mm512_maskz_scalef_round_ps(U, A, B, R) \ 6488349cc55cSDimitry Andric ((__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \ 64890b57cec5SDimitry Andric (__v16sf)(__m512)(B), \ 64900b57cec5SDimitry Andric (__v16sf)_mm512_setzero_ps(), \ 6491349cc55cSDimitry Andric (__mmask16)(U), (int)(R))) 64920b57cec5SDimitry Andric 64930b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 64940b57cec5SDimitry Andric _mm512_scalef_ps (__m512 __A, __m512 __B) 64950b57cec5SDimitry Andric { 64960b57cec5SDimitry Andric return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A, 64970b57cec5SDimitry Andric (__v16sf) __B, 64980b57cec5SDimitry Andric (__v16sf) 64990b57cec5SDimitry Andric _mm512_undefined_ps (), 65000b57cec5SDimitry Andric (__mmask16) -1, 65010b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 65020b57cec5SDimitry Andric } 65030b57cec5SDimitry Andric 65040b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 65050b57cec5SDimitry Andric _mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 65060b57cec5SDimitry Andric { 65070b57cec5SDimitry Andric return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A, 65080b57cec5SDimitry Andric (__v16sf) __B, 65090b57cec5SDimitry Andric (__v16sf) __W, 65100b57cec5SDimitry Andric (__mmask16) __U, 65110b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 65120b57cec5SDimitry Andric } 65130b57cec5SDimitry Andric 65140b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 65150b57cec5SDimitry Andric _mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B) 65160b57cec5SDimitry Andric { 65170b57cec5SDimitry Andric return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A, 65180b57cec5SDimitry Andric (__v16sf) __B, 65190b57cec5SDimitry Andric (__v16sf) 65200b57cec5SDimitry Andric _mm512_setzero_ps (), 65210b57cec5SDimitry Andric (__mmask16) __U, 65220b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 65230b57cec5SDimitry Andric } 65240b57cec5SDimitry Andric 65250b57cec5SDimitry Andric #define _mm_scalef_round_sd(A, B, R) \ 6526349cc55cSDimitry Andric ((__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \ 65270b57cec5SDimitry Andric (__v2df)(__m128d)(B), \ 65280b57cec5SDimitry Andric (__v2df)_mm_setzero_pd(), \ 6529349cc55cSDimitry Andric (__mmask8)-1, (int)(R))) 65300b57cec5SDimitry Andric 65310b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 65320b57cec5SDimitry Andric _mm_scalef_sd (__m128d __A, __m128d __B) 65330b57cec5SDimitry Andric { 65340b57cec5SDimitry Andric return (__m128d) __builtin_ia32_scalefsd_round_mask ((__v2df) __A, 65350b57cec5SDimitry Andric (__v2df)( __B), (__v2df) _mm_setzero_pd(), 65360b57cec5SDimitry Andric (__mmask8) -1, 65370b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 65380b57cec5SDimitry Andric } 65390b57cec5SDimitry Andric 65400b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 65410b57cec5SDimitry Andric _mm_mask_scalef_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 65420b57cec5SDimitry Andric { 65430b57cec5SDimitry Andric return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A, 65440b57cec5SDimitry Andric (__v2df) __B, 65450b57cec5SDimitry Andric (__v2df) __W, 65460b57cec5SDimitry Andric (__mmask8) __U, 65470b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 65480b57cec5SDimitry Andric } 65490b57cec5SDimitry Andric 65500b57cec5SDimitry Andric #define _mm_mask_scalef_round_sd(W, U, A, B, R) \ 6551349cc55cSDimitry Andric ((__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \ 65520b57cec5SDimitry Andric (__v2df)(__m128d)(B), \ 65530b57cec5SDimitry Andric (__v2df)(__m128d)(W), \ 6554349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 65550b57cec5SDimitry Andric 65560b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 65570b57cec5SDimitry Andric _mm_maskz_scalef_sd (__mmask8 __U, __m128d __A, __m128d __B) 65580b57cec5SDimitry Andric { 65590b57cec5SDimitry Andric return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A, 65600b57cec5SDimitry Andric (__v2df) __B, 65610b57cec5SDimitry Andric (__v2df) _mm_setzero_pd (), 65620b57cec5SDimitry Andric (__mmask8) __U, 65630b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 65640b57cec5SDimitry Andric } 65650b57cec5SDimitry Andric 65660b57cec5SDimitry Andric #define _mm_maskz_scalef_round_sd(U, A, B, R) \ 6567349cc55cSDimitry Andric ((__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \ 65680b57cec5SDimitry Andric (__v2df)(__m128d)(B), \ 65690b57cec5SDimitry Andric (__v2df)_mm_setzero_pd(), \ 6570349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 65710b57cec5SDimitry Andric 65720b57cec5SDimitry Andric #define _mm_scalef_round_ss(A, B, R) \ 6573349cc55cSDimitry Andric ((__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \ 65740b57cec5SDimitry Andric (__v4sf)(__m128)(B), \ 65750b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps(), \ 6576349cc55cSDimitry Andric (__mmask8)-1, (int)(R))) 65770b57cec5SDimitry Andric 65780b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 65790b57cec5SDimitry Andric _mm_scalef_ss (__m128 __A, __m128 __B) 65800b57cec5SDimitry Andric { 65810b57cec5SDimitry Andric return (__m128) __builtin_ia32_scalefss_round_mask ((__v4sf) __A, 65820b57cec5SDimitry Andric (__v4sf)( __B), (__v4sf) _mm_setzero_ps(), 65830b57cec5SDimitry Andric (__mmask8) -1, 65840b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 65850b57cec5SDimitry Andric } 65860b57cec5SDimitry Andric 65870b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 65880b57cec5SDimitry Andric _mm_mask_scalef_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 65890b57cec5SDimitry Andric { 65900b57cec5SDimitry Andric return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A, 65910b57cec5SDimitry Andric (__v4sf) __B, 65920b57cec5SDimitry Andric (__v4sf) __W, 65930b57cec5SDimitry Andric (__mmask8) __U, 65940b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 65950b57cec5SDimitry Andric } 65960b57cec5SDimitry Andric 65970b57cec5SDimitry Andric #define _mm_mask_scalef_round_ss(W, U, A, B, R) \ 6598349cc55cSDimitry Andric ((__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \ 65990b57cec5SDimitry Andric (__v4sf)(__m128)(B), \ 66000b57cec5SDimitry Andric (__v4sf)(__m128)(W), \ 6601349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 66020b57cec5SDimitry Andric 66030b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 66040b57cec5SDimitry Andric _mm_maskz_scalef_ss (__mmask8 __U, __m128 __A, __m128 __B) 66050b57cec5SDimitry Andric { 66060b57cec5SDimitry Andric return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A, 66070b57cec5SDimitry Andric (__v4sf) __B, 66080b57cec5SDimitry Andric (__v4sf) _mm_setzero_ps (), 66090b57cec5SDimitry Andric (__mmask8) __U, 66100b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 66110b57cec5SDimitry Andric } 66120b57cec5SDimitry Andric 66130b57cec5SDimitry Andric #define _mm_maskz_scalef_round_ss(U, A, B, R) \ 6614349cc55cSDimitry Andric ((__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \ 66150b57cec5SDimitry Andric (__v4sf)(__m128)(B), \ 66160b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps(), \ 66170b57cec5SDimitry Andric (__mmask8)(U), \ 6618349cc55cSDimitry Andric (int)(R))) 66190b57cec5SDimitry Andric 66200b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 66215ffd83dbSDimitry Andric _mm512_srai_epi32(__m512i __A, unsigned int __B) 66220b57cec5SDimitry Andric { 662381ad6265SDimitry Andric return (__m512i)__builtin_ia32_psradi512((__v16si)__A, (int)__B); 66240b57cec5SDimitry Andric } 66250b57cec5SDimitry Andric 66260b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 66275ffd83dbSDimitry Andric _mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A, 66285ffd83dbSDimitry Andric unsigned int __B) 66290b57cec5SDimitry Andric { 66300b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 66310b57cec5SDimitry Andric (__v16si)_mm512_srai_epi32(__A, __B), 66320b57cec5SDimitry Andric (__v16si)__W); 66330b57cec5SDimitry Andric } 66340b57cec5SDimitry Andric 66350b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 66365ffd83dbSDimitry Andric _mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A, 66375ffd83dbSDimitry Andric unsigned int __B) { 66380b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 66390b57cec5SDimitry Andric (__v16si)_mm512_srai_epi32(__A, __B), 66400b57cec5SDimitry Andric (__v16si)_mm512_setzero_si512()); 66410b57cec5SDimitry Andric } 66420b57cec5SDimitry Andric 66430b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 66445ffd83dbSDimitry Andric _mm512_srai_epi64(__m512i __A, unsigned int __B) 66450b57cec5SDimitry Andric { 664681ad6265SDimitry Andric return (__m512i)__builtin_ia32_psraqi512((__v8di)__A, (int)__B); 66470b57cec5SDimitry Andric } 66480b57cec5SDimitry Andric 66490b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 66505ffd83dbSDimitry Andric _mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B) 66510b57cec5SDimitry Andric { 66520b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 66530b57cec5SDimitry Andric (__v8di)_mm512_srai_epi64(__A, __B), 66540b57cec5SDimitry Andric (__v8di)__W); 66550b57cec5SDimitry Andric } 66560b57cec5SDimitry Andric 66570b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 66585ffd83dbSDimitry Andric _mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, unsigned int __B) 66590b57cec5SDimitry Andric { 66600b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 66610b57cec5SDimitry Andric (__v8di)_mm512_srai_epi64(__A, __B), 66620b57cec5SDimitry Andric (__v8di)_mm512_setzero_si512()); 66630b57cec5SDimitry Andric } 66640b57cec5SDimitry Andric 66650b57cec5SDimitry Andric #define _mm512_shuffle_f32x4(A, B, imm) \ 6666349cc55cSDimitry Andric ((__m512)__builtin_ia32_shuf_f32x4((__v16sf)(__m512)(A), \ 6667349cc55cSDimitry Andric (__v16sf)(__m512)(B), (int)(imm))) 66680b57cec5SDimitry Andric 66690b57cec5SDimitry Andric #define _mm512_mask_shuffle_f32x4(W, U, A, B, imm) \ 6670349cc55cSDimitry Andric ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 66710b57cec5SDimitry Andric (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \ 6672349cc55cSDimitry Andric (__v16sf)(__m512)(W))) 66730b57cec5SDimitry Andric 66740b57cec5SDimitry Andric #define _mm512_maskz_shuffle_f32x4(U, A, B, imm) \ 6675349cc55cSDimitry Andric ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 66760b57cec5SDimitry Andric (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \ 6677349cc55cSDimitry Andric (__v16sf)_mm512_setzero_ps())) 66780b57cec5SDimitry Andric 66790b57cec5SDimitry Andric #define _mm512_shuffle_f64x2(A, B, imm) \ 6680349cc55cSDimitry Andric ((__m512d)__builtin_ia32_shuf_f64x2((__v8df)(__m512d)(A), \ 6681349cc55cSDimitry Andric (__v8df)(__m512d)(B), (int)(imm))) 66820b57cec5SDimitry Andric 66830b57cec5SDimitry Andric #define _mm512_mask_shuffle_f64x2(W, U, A, B, imm) \ 6684349cc55cSDimitry Andric ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 66850b57cec5SDimitry Andric (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \ 6686349cc55cSDimitry Andric (__v8df)(__m512d)(W))) 66870b57cec5SDimitry Andric 66880b57cec5SDimitry Andric #define _mm512_maskz_shuffle_f64x2(U, A, B, imm) \ 6689349cc55cSDimitry Andric ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 66900b57cec5SDimitry Andric (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \ 6691349cc55cSDimitry Andric (__v8df)_mm512_setzero_pd())) 66920b57cec5SDimitry Andric 66930b57cec5SDimitry Andric #define _mm512_shuffle_i32x4(A, B, imm) \ 6694349cc55cSDimitry Andric ((__m512i)__builtin_ia32_shuf_i32x4((__v16si)(__m512i)(A), \ 6695349cc55cSDimitry Andric (__v16si)(__m512i)(B), (int)(imm))) 66960b57cec5SDimitry Andric 66970b57cec5SDimitry Andric #define _mm512_mask_shuffle_i32x4(W, U, A, B, imm) \ 6698349cc55cSDimitry Andric ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 66990b57cec5SDimitry Andric (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \ 6700349cc55cSDimitry Andric (__v16si)(__m512i)(W))) 67010b57cec5SDimitry Andric 67020b57cec5SDimitry Andric #define _mm512_maskz_shuffle_i32x4(U, A, B, imm) \ 6703349cc55cSDimitry Andric ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 67040b57cec5SDimitry Andric (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \ 6705349cc55cSDimitry Andric (__v16si)_mm512_setzero_si512())) 67060b57cec5SDimitry Andric 67070b57cec5SDimitry Andric #define _mm512_shuffle_i64x2(A, B, imm) \ 6708349cc55cSDimitry Andric ((__m512i)__builtin_ia32_shuf_i64x2((__v8di)(__m512i)(A), \ 6709349cc55cSDimitry Andric (__v8di)(__m512i)(B), (int)(imm))) 67100b57cec5SDimitry Andric 67110b57cec5SDimitry Andric #define _mm512_mask_shuffle_i64x2(W, U, A, B, imm) \ 6712349cc55cSDimitry Andric ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 67130b57cec5SDimitry Andric (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \ 6714349cc55cSDimitry Andric (__v8di)(__m512i)(W))) 67150b57cec5SDimitry Andric 67160b57cec5SDimitry Andric #define _mm512_maskz_shuffle_i64x2(U, A, B, imm) \ 6717349cc55cSDimitry Andric ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 67180b57cec5SDimitry Andric (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \ 6719349cc55cSDimitry Andric (__v8di)_mm512_setzero_si512())) 67200b57cec5SDimitry Andric 67210b57cec5SDimitry Andric #define _mm512_shuffle_pd(A, B, M) \ 6722349cc55cSDimitry Andric ((__m512d)__builtin_ia32_shufpd512((__v8df)(__m512d)(A), \ 6723349cc55cSDimitry Andric (__v8df)(__m512d)(B), (int)(M))) 67240b57cec5SDimitry Andric 67250b57cec5SDimitry Andric #define _mm512_mask_shuffle_pd(W, U, A, B, M) \ 6726349cc55cSDimitry Andric ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 67270b57cec5SDimitry Andric (__v8df)_mm512_shuffle_pd((A), (B), (M)), \ 6728349cc55cSDimitry Andric (__v8df)(__m512d)(W))) 67290b57cec5SDimitry Andric 67300b57cec5SDimitry Andric #define _mm512_maskz_shuffle_pd(U, A, B, M) \ 6731349cc55cSDimitry Andric ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 67320b57cec5SDimitry Andric (__v8df)_mm512_shuffle_pd((A), (B), (M)), \ 6733349cc55cSDimitry Andric (__v8df)_mm512_setzero_pd())) 67340b57cec5SDimitry Andric 67350b57cec5SDimitry Andric #define _mm512_shuffle_ps(A, B, M) \ 6736349cc55cSDimitry Andric ((__m512)__builtin_ia32_shufps512((__v16sf)(__m512)(A), \ 6737349cc55cSDimitry Andric (__v16sf)(__m512)(B), (int)(M))) 67380b57cec5SDimitry Andric 67390b57cec5SDimitry Andric #define _mm512_mask_shuffle_ps(W, U, A, B, M) \ 6740349cc55cSDimitry Andric ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 67410b57cec5SDimitry Andric (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \ 6742349cc55cSDimitry Andric (__v16sf)(__m512)(W))) 67430b57cec5SDimitry Andric 67440b57cec5SDimitry Andric #define _mm512_maskz_shuffle_ps(U, A, B, M) \ 6745349cc55cSDimitry Andric ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 67460b57cec5SDimitry Andric (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \ 6747349cc55cSDimitry Andric (__v16sf)_mm512_setzero_ps())) 67480b57cec5SDimitry Andric 67490b57cec5SDimitry Andric #define _mm_sqrt_round_sd(A, B, R) \ 6750349cc55cSDimitry Andric ((__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \ 67510b57cec5SDimitry Andric (__v2df)(__m128d)(B), \ 67520b57cec5SDimitry Andric (__v2df)_mm_setzero_pd(), \ 6753349cc55cSDimitry Andric (__mmask8)-1, (int)(R))) 67540b57cec5SDimitry Andric 67550b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 67560b57cec5SDimitry Andric _mm_mask_sqrt_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 67570b57cec5SDimitry Andric { 67580b57cec5SDimitry Andric return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A, 67590b57cec5SDimitry Andric (__v2df) __B, 67600b57cec5SDimitry Andric (__v2df) __W, 67610b57cec5SDimitry Andric (__mmask8) __U, 67620b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 67630b57cec5SDimitry Andric } 67640b57cec5SDimitry Andric 67650b57cec5SDimitry Andric #define _mm_mask_sqrt_round_sd(W, U, A, B, R) \ 6766349cc55cSDimitry Andric ((__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \ 67670b57cec5SDimitry Andric (__v2df)(__m128d)(B), \ 67680b57cec5SDimitry Andric (__v2df)(__m128d)(W), \ 6769349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 67700b57cec5SDimitry Andric 67710b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 67720b57cec5SDimitry Andric _mm_maskz_sqrt_sd (__mmask8 __U, __m128d __A, __m128d __B) 67730b57cec5SDimitry Andric { 67740b57cec5SDimitry Andric return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A, 67750b57cec5SDimitry Andric (__v2df) __B, 67760b57cec5SDimitry Andric (__v2df) _mm_setzero_pd (), 67770b57cec5SDimitry Andric (__mmask8) __U, 67780b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 67790b57cec5SDimitry Andric } 67800b57cec5SDimitry Andric 67810b57cec5SDimitry Andric #define _mm_maskz_sqrt_round_sd(U, A, B, R) \ 6782349cc55cSDimitry Andric ((__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \ 67830b57cec5SDimitry Andric (__v2df)(__m128d)(B), \ 67840b57cec5SDimitry Andric (__v2df)_mm_setzero_pd(), \ 6785349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 67860b57cec5SDimitry Andric 67870b57cec5SDimitry Andric #define _mm_sqrt_round_ss(A, B, R) \ 6788349cc55cSDimitry Andric ((__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \ 67890b57cec5SDimitry Andric (__v4sf)(__m128)(B), \ 67900b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps(), \ 6791349cc55cSDimitry Andric (__mmask8)-1, (int)(R))) 67920b57cec5SDimitry Andric 67930b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 67940b57cec5SDimitry Andric _mm_mask_sqrt_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 67950b57cec5SDimitry Andric { 67960b57cec5SDimitry Andric return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A, 67970b57cec5SDimitry Andric (__v4sf) __B, 67980b57cec5SDimitry Andric (__v4sf) __W, 67990b57cec5SDimitry Andric (__mmask8) __U, 68000b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 68010b57cec5SDimitry Andric } 68020b57cec5SDimitry Andric 68030b57cec5SDimitry Andric #define _mm_mask_sqrt_round_ss(W, U, A, B, R) \ 6804349cc55cSDimitry Andric ((__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \ 68050b57cec5SDimitry Andric (__v4sf)(__m128)(B), \ 68060b57cec5SDimitry Andric (__v4sf)(__m128)(W), (__mmask8)(U), \ 6807349cc55cSDimitry Andric (int)(R))) 68080b57cec5SDimitry Andric 68090b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 68100b57cec5SDimitry Andric _mm_maskz_sqrt_ss (__mmask8 __U, __m128 __A, __m128 __B) 68110b57cec5SDimitry Andric { 68120b57cec5SDimitry Andric return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A, 68130b57cec5SDimitry Andric (__v4sf) __B, 68140b57cec5SDimitry Andric (__v4sf) _mm_setzero_ps (), 68150b57cec5SDimitry Andric (__mmask8) __U, 68160b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 68170b57cec5SDimitry Andric } 68180b57cec5SDimitry Andric 68190b57cec5SDimitry Andric #define _mm_maskz_sqrt_round_ss(U, A, B, R) \ 6820349cc55cSDimitry Andric ((__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \ 68210b57cec5SDimitry Andric (__v4sf)(__m128)(B), \ 68220b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps(), \ 6823349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 68240b57cec5SDimitry Andric 68250b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 68260b57cec5SDimitry Andric _mm512_broadcast_f32x4(__m128 __A) 68270b57cec5SDimitry Andric { 68280b57cec5SDimitry Andric return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A, 68290b57cec5SDimitry Andric 0, 1, 2, 3, 0, 1, 2, 3, 68300b57cec5SDimitry Andric 0, 1, 2, 3, 0, 1, 2, 3); 68310b57cec5SDimitry Andric } 68320b57cec5SDimitry Andric 68330b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 68340b57cec5SDimitry Andric _mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A) 68350b57cec5SDimitry Andric { 68360b57cec5SDimitry Andric return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, 68370b57cec5SDimitry Andric (__v16sf)_mm512_broadcast_f32x4(__A), 68380b57cec5SDimitry Andric (__v16sf)__O); 68390b57cec5SDimitry Andric } 68400b57cec5SDimitry Andric 68410b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 68420b57cec5SDimitry Andric _mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A) 68430b57cec5SDimitry Andric { 68440b57cec5SDimitry Andric return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, 68450b57cec5SDimitry Andric (__v16sf)_mm512_broadcast_f32x4(__A), 68460b57cec5SDimitry Andric (__v16sf)_mm512_setzero_ps()); 68470b57cec5SDimitry Andric } 68480b57cec5SDimitry Andric 68490b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 68500b57cec5SDimitry Andric _mm512_broadcast_f64x4(__m256d __A) 68510b57cec5SDimitry Andric { 68520b57cec5SDimitry Andric return (__m512d)__builtin_shufflevector((__v4df)__A, (__v4df)__A, 68530b57cec5SDimitry Andric 0, 1, 2, 3, 0, 1, 2, 3); 68540b57cec5SDimitry Andric } 68550b57cec5SDimitry Andric 68560b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 68570b57cec5SDimitry Andric _mm512_mask_broadcast_f64x4(__m512d __O, __mmask8 __M, __m256d __A) 68580b57cec5SDimitry Andric { 68590b57cec5SDimitry Andric return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M, 68600b57cec5SDimitry Andric (__v8df)_mm512_broadcast_f64x4(__A), 68610b57cec5SDimitry Andric (__v8df)__O); 68620b57cec5SDimitry Andric } 68630b57cec5SDimitry Andric 68640b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 68650b57cec5SDimitry Andric _mm512_maskz_broadcast_f64x4(__mmask8 __M, __m256d __A) 68660b57cec5SDimitry Andric { 68670b57cec5SDimitry Andric return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M, 68680b57cec5SDimitry Andric (__v8df)_mm512_broadcast_f64x4(__A), 68690b57cec5SDimitry Andric (__v8df)_mm512_setzero_pd()); 68700b57cec5SDimitry Andric } 68710b57cec5SDimitry Andric 68720b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 68730b57cec5SDimitry Andric _mm512_broadcast_i32x4(__m128i __A) 68740b57cec5SDimitry Andric { 68750b57cec5SDimitry Andric return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A, 68760b57cec5SDimitry Andric 0, 1, 2, 3, 0, 1, 2, 3, 68770b57cec5SDimitry Andric 0, 1, 2, 3, 0, 1, 2, 3); 68780b57cec5SDimitry Andric } 68790b57cec5SDimitry Andric 68800b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 68810b57cec5SDimitry Andric _mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A) 68820b57cec5SDimitry Andric { 68830b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, 68840b57cec5SDimitry Andric (__v16si)_mm512_broadcast_i32x4(__A), 68850b57cec5SDimitry Andric (__v16si)__O); 68860b57cec5SDimitry Andric } 68870b57cec5SDimitry Andric 68880b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 68890b57cec5SDimitry Andric _mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A) 68900b57cec5SDimitry Andric { 68910b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, 68920b57cec5SDimitry Andric (__v16si)_mm512_broadcast_i32x4(__A), 68930b57cec5SDimitry Andric (__v16si)_mm512_setzero_si512()); 68940b57cec5SDimitry Andric } 68950b57cec5SDimitry Andric 68960b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 68970b57cec5SDimitry Andric _mm512_broadcast_i64x4(__m256i __A) 68980b57cec5SDimitry Andric { 68990b57cec5SDimitry Andric return (__m512i)__builtin_shufflevector((__v4di)__A, (__v4di)__A, 69000b57cec5SDimitry Andric 0, 1, 2, 3, 0, 1, 2, 3); 69010b57cec5SDimitry Andric } 69020b57cec5SDimitry Andric 69030b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 69040b57cec5SDimitry Andric _mm512_mask_broadcast_i64x4(__m512i __O, __mmask8 __M, __m256i __A) 69050b57cec5SDimitry Andric { 69060b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, 69070b57cec5SDimitry Andric (__v8di)_mm512_broadcast_i64x4(__A), 69080b57cec5SDimitry Andric (__v8di)__O); 69090b57cec5SDimitry Andric } 69100b57cec5SDimitry Andric 69110b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 69120b57cec5SDimitry Andric _mm512_maskz_broadcast_i64x4(__mmask8 __M, __m256i __A) 69130b57cec5SDimitry Andric { 69140b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, 69150b57cec5SDimitry Andric (__v8di)_mm512_broadcast_i64x4(__A), 69160b57cec5SDimitry Andric (__v8di)_mm512_setzero_si512()); 69170b57cec5SDimitry Andric } 69180b57cec5SDimitry Andric 69190b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 69200b57cec5SDimitry Andric _mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A) 69210b57cec5SDimitry Andric { 69220b57cec5SDimitry Andric return (__m512d)__builtin_ia32_selectpd_512(__M, 69230b57cec5SDimitry Andric (__v8df) _mm512_broadcastsd_pd(__A), 69240b57cec5SDimitry Andric (__v8df) __O); 69250b57cec5SDimitry Andric } 69260b57cec5SDimitry Andric 69270b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 69280b57cec5SDimitry Andric _mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A) 69290b57cec5SDimitry Andric { 69300b57cec5SDimitry Andric return (__m512d)__builtin_ia32_selectpd_512(__M, 69310b57cec5SDimitry Andric (__v8df) _mm512_broadcastsd_pd(__A), 69320b57cec5SDimitry Andric (__v8df) _mm512_setzero_pd()); 69330b57cec5SDimitry Andric } 69340b57cec5SDimitry Andric 69350b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 69360b57cec5SDimitry Andric _mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A) 69370b57cec5SDimitry Andric { 69380b57cec5SDimitry Andric return (__m512)__builtin_ia32_selectps_512(__M, 69390b57cec5SDimitry Andric (__v16sf) _mm512_broadcastss_ps(__A), 69400b57cec5SDimitry Andric (__v16sf) __O); 69410b57cec5SDimitry Andric } 69420b57cec5SDimitry Andric 69430b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 69440b57cec5SDimitry Andric _mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A) 69450b57cec5SDimitry Andric { 69460b57cec5SDimitry Andric return (__m512)__builtin_ia32_selectps_512(__M, 69470b57cec5SDimitry Andric (__v16sf) _mm512_broadcastss_ps(__A), 69480b57cec5SDimitry Andric (__v16sf) _mm512_setzero_ps()); 69490b57cec5SDimitry Andric } 69500b57cec5SDimitry Andric 69510b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS512 69520b57cec5SDimitry Andric _mm512_cvtsepi32_epi8 (__m512i __A) 69530b57cec5SDimitry Andric { 69540b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A, 69550b57cec5SDimitry Andric (__v16qi) _mm_undefined_si128 (), 69560b57cec5SDimitry Andric (__mmask16) -1); 69570b57cec5SDimitry Andric } 69580b57cec5SDimitry Andric 69590b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS512 69600b57cec5SDimitry Andric _mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A) 69610b57cec5SDimitry Andric { 69620b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A, 69630b57cec5SDimitry Andric (__v16qi) __O, __M); 69640b57cec5SDimitry Andric } 69650b57cec5SDimitry Andric 69660b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS512 69670b57cec5SDimitry Andric _mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A) 69680b57cec5SDimitry Andric { 69690b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A, 69700b57cec5SDimitry Andric (__v16qi) _mm_setzero_si128 (), 69710b57cec5SDimitry Andric __M); 69720b57cec5SDimitry Andric } 69730b57cec5SDimitry Andric 69740b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS512 69750b57cec5SDimitry Andric _mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A) 69760b57cec5SDimitry Andric { 69770b57cec5SDimitry Andric __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M); 69780b57cec5SDimitry Andric } 69790b57cec5SDimitry Andric 69800b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS512 69810b57cec5SDimitry Andric _mm512_cvtsepi32_epi16 (__m512i __A) 69820b57cec5SDimitry Andric { 69830b57cec5SDimitry Andric return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A, 69840b57cec5SDimitry Andric (__v16hi) _mm256_undefined_si256 (), 69850b57cec5SDimitry Andric (__mmask16) -1); 69860b57cec5SDimitry Andric } 69870b57cec5SDimitry Andric 69880b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS512 69890b57cec5SDimitry Andric _mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A) 69900b57cec5SDimitry Andric { 69910b57cec5SDimitry Andric return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A, 69920b57cec5SDimitry Andric (__v16hi) __O, __M); 69930b57cec5SDimitry Andric } 69940b57cec5SDimitry Andric 69950b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS512 69960b57cec5SDimitry Andric _mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A) 69970b57cec5SDimitry Andric { 69980b57cec5SDimitry Andric return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A, 69990b57cec5SDimitry Andric (__v16hi) _mm256_setzero_si256 (), 70000b57cec5SDimitry Andric __M); 70010b57cec5SDimitry Andric } 70020b57cec5SDimitry Andric 70030b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS512 70040b57cec5SDimitry Andric _mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A) 70050b57cec5SDimitry Andric { 70060b57cec5SDimitry Andric __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M); 70070b57cec5SDimitry Andric } 70080b57cec5SDimitry Andric 70090b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS512 70100b57cec5SDimitry Andric _mm512_cvtsepi64_epi8 (__m512i __A) 70110b57cec5SDimitry Andric { 70120b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A, 70130b57cec5SDimitry Andric (__v16qi) _mm_undefined_si128 (), 70140b57cec5SDimitry Andric (__mmask8) -1); 70150b57cec5SDimitry Andric } 70160b57cec5SDimitry Andric 70170b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS512 70180b57cec5SDimitry Andric _mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A) 70190b57cec5SDimitry Andric { 70200b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A, 70210b57cec5SDimitry Andric (__v16qi) __O, __M); 70220b57cec5SDimitry Andric } 70230b57cec5SDimitry Andric 70240b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS512 70250b57cec5SDimitry Andric _mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A) 70260b57cec5SDimitry Andric { 70270b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A, 70280b57cec5SDimitry Andric (__v16qi) _mm_setzero_si128 (), 70290b57cec5SDimitry Andric __M); 70300b57cec5SDimitry Andric } 70310b57cec5SDimitry Andric 70320b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS512 70330b57cec5SDimitry Andric _mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A) 70340b57cec5SDimitry Andric { 70350b57cec5SDimitry Andric __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M); 70360b57cec5SDimitry Andric } 70370b57cec5SDimitry Andric 70380b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS512 70390b57cec5SDimitry Andric _mm512_cvtsepi64_epi32 (__m512i __A) 70400b57cec5SDimitry Andric { 70410b57cec5SDimitry Andric return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A, 70420b57cec5SDimitry Andric (__v8si) _mm256_undefined_si256 (), 70430b57cec5SDimitry Andric (__mmask8) -1); 70440b57cec5SDimitry Andric } 70450b57cec5SDimitry Andric 70460b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS512 70470b57cec5SDimitry Andric _mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A) 70480b57cec5SDimitry Andric { 70490b57cec5SDimitry Andric return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A, 70500b57cec5SDimitry Andric (__v8si) __O, __M); 70510b57cec5SDimitry Andric } 70520b57cec5SDimitry Andric 70530b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS512 70540b57cec5SDimitry Andric _mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A) 70550b57cec5SDimitry Andric { 70560b57cec5SDimitry Andric return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A, 70570b57cec5SDimitry Andric (__v8si) _mm256_setzero_si256 (), 70580b57cec5SDimitry Andric __M); 70590b57cec5SDimitry Andric } 70600b57cec5SDimitry Andric 70610b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS512 70620b57cec5SDimitry Andric _mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A) 70630b57cec5SDimitry Andric { 70640b57cec5SDimitry Andric __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M); 70650b57cec5SDimitry Andric } 70660b57cec5SDimitry Andric 70670b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS512 70680b57cec5SDimitry Andric _mm512_cvtsepi64_epi16 (__m512i __A) 70690b57cec5SDimitry Andric { 70700b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A, 70710b57cec5SDimitry Andric (__v8hi) _mm_undefined_si128 (), 70720b57cec5SDimitry Andric (__mmask8) -1); 70730b57cec5SDimitry Andric } 70740b57cec5SDimitry Andric 70750b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS512 70760b57cec5SDimitry Andric _mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A) 70770b57cec5SDimitry Andric { 70780b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A, 70790b57cec5SDimitry Andric (__v8hi) __O, __M); 70800b57cec5SDimitry Andric } 70810b57cec5SDimitry Andric 70820b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS512 70830b57cec5SDimitry Andric _mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A) 70840b57cec5SDimitry Andric { 70850b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A, 70860b57cec5SDimitry Andric (__v8hi) _mm_setzero_si128 (), 70870b57cec5SDimitry Andric __M); 70880b57cec5SDimitry Andric } 70890b57cec5SDimitry Andric 70900b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS512 70910b57cec5SDimitry Andric _mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A) 70920b57cec5SDimitry Andric { 70930b57cec5SDimitry Andric __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M); 70940b57cec5SDimitry Andric } 70950b57cec5SDimitry Andric 70960b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS512 70970b57cec5SDimitry Andric _mm512_cvtusepi32_epi8 (__m512i __A) 70980b57cec5SDimitry Andric { 70990b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A, 71000b57cec5SDimitry Andric (__v16qi) _mm_undefined_si128 (), 71010b57cec5SDimitry Andric (__mmask16) -1); 71020b57cec5SDimitry Andric } 71030b57cec5SDimitry Andric 71040b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS512 71050b57cec5SDimitry Andric _mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A) 71060b57cec5SDimitry Andric { 71070b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A, 71080b57cec5SDimitry Andric (__v16qi) __O, 71090b57cec5SDimitry Andric __M); 71100b57cec5SDimitry Andric } 71110b57cec5SDimitry Andric 71120b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS512 71130b57cec5SDimitry Andric _mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A) 71140b57cec5SDimitry Andric { 71150b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A, 71160b57cec5SDimitry Andric (__v16qi) _mm_setzero_si128 (), 71170b57cec5SDimitry Andric __M); 71180b57cec5SDimitry Andric } 71190b57cec5SDimitry Andric 71200b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS512 71210b57cec5SDimitry Andric _mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A) 71220b57cec5SDimitry Andric { 71230b57cec5SDimitry Andric __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M); 71240b57cec5SDimitry Andric } 71250b57cec5SDimitry Andric 71260b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS512 71270b57cec5SDimitry Andric _mm512_cvtusepi32_epi16 (__m512i __A) 71280b57cec5SDimitry Andric { 71290b57cec5SDimitry Andric return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A, 71300b57cec5SDimitry Andric (__v16hi) _mm256_undefined_si256 (), 71310b57cec5SDimitry Andric (__mmask16) -1); 71320b57cec5SDimitry Andric } 71330b57cec5SDimitry Andric 71340b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS512 71350b57cec5SDimitry Andric _mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A) 71360b57cec5SDimitry Andric { 71370b57cec5SDimitry Andric return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A, 71380b57cec5SDimitry Andric (__v16hi) __O, 71390b57cec5SDimitry Andric __M); 71400b57cec5SDimitry Andric } 71410b57cec5SDimitry Andric 71420b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS512 71430b57cec5SDimitry Andric _mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A) 71440b57cec5SDimitry Andric { 71450b57cec5SDimitry Andric return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A, 71460b57cec5SDimitry Andric (__v16hi) _mm256_setzero_si256 (), 71470b57cec5SDimitry Andric __M); 71480b57cec5SDimitry Andric } 71490b57cec5SDimitry Andric 71500b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS512 71510b57cec5SDimitry Andric _mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A) 71520b57cec5SDimitry Andric { 71530b57cec5SDimitry Andric __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M); 71540b57cec5SDimitry Andric } 71550b57cec5SDimitry Andric 71560b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS512 71570b57cec5SDimitry Andric _mm512_cvtusepi64_epi8 (__m512i __A) 71580b57cec5SDimitry Andric { 71590b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A, 71600b57cec5SDimitry Andric (__v16qi) _mm_undefined_si128 (), 71610b57cec5SDimitry Andric (__mmask8) -1); 71620b57cec5SDimitry Andric } 71630b57cec5SDimitry Andric 71640b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS512 71650b57cec5SDimitry Andric _mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A) 71660b57cec5SDimitry Andric { 71670b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A, 71680b57cec5SDimitry Andric (__v16qi) __O, 71690b57cec5SDimitry Andric __M); 71700b57cec5SDimitry Andric } 71710b57cec5SDimitry Andric 71720b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS512 71730b57cec5SDimitry Andric _mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A) 71740b57cec5SDimitry Andric { 71750b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A, 71760b57cec5SDimitry Andric (__v16qi) _mm_setzero_si128 (), 71770b57cec5SDimitry Andric __M); 71780b57cec5SDimitry Andric } 71790b57cec5SDimitry Andric 71800b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS512 71810b57cec5SDimitry Andric _mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A) 71820b57cec5SDimitry Andric { 71830b57cec5SDimitry Andric __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M); 71840b57cec5SDimitry Andric } 71850b57cec5SDimitry Andric 71860b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS512 71870b57cec5SDimitry Andric _mm512_cvtusepi64_epi32 (__m512i __A) 71880b57cec5SDimitry Andric { 71890b57cec5SDimitry Andric return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A, 71900b57cec5SDimitry Andric (__v8si) _mm256_undefined_si256 (), 71910b57cec5SDimitry Andric (__mmask8) -1); 71920b57cec5SDimitry Andric } 71930b57cec5SDimitry Andric 71940b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS512 71950b57cec5SDimitry Andric _mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A) 71960b57cec5SDimitry Andric { 71970b57cec5SDimitry Andric return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A, 71980b57cec5SDimitry Andric (__v8si) __O, __M); 71990b57cec5SDimitry Andric } 72000b57cec5SDimitry Andric 72010b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS512 72020b57cec5SDimitry Andric _mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A) 72030b57cec5SDimitry Andric { 72040b57cec5SDimitry Andric return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A, 72050b57cec5SDimitry Andric (__v8si) _mm256_setzero_si256 (), 72060b57cec5SDimitry Andric __M); 72070b57cec5SDimitry Andric } 72080b57cec5SDimitry Andric 72090b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS512 72100b57cec5SDimitry Andric _mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A) 72110b57cec5SDimitry Andric { 72120b57cec5SDimitry Andric __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M); 72130b57cec5SDimitry Andric } 72140b57cec5SDimitry Andric 72150b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS512 72160b57cec5SDimitry Andric _mm512_cvtusepi64_epi16 (__m512i __A) 72170b57cec5SDimitry Andric { 72180b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A, 72190b57cec5SDimitry Andric (__v8hi) _mm_undefined_si128 (), 72200b57cec5SDimitry Andric (__mmask8) -1); 72210b57cec5SDimitry Andric } 72220b57cec5SDimitry Andric 72230b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS512 72240b57cec5SDimitry Andric _mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A) 72250b57cec5SDimitry Andric { 72260b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A, 72270b57cec5SDimitry Andric (__v8hi) __O, __M); 72280b57cec5SDimitry Andric } 72290b57cec5SDimitry Andric 72300b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS512 72310b57cec5SDimitry Andric _mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A) 72320b57cec5SDimitry Andric { 72330b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A, 72340b57cec5SDimitry Andric (__v8hi) _mm_setzero_si128 (), 72350b57cec5SDimitry Andric __M); 72360b57cec5SDimitry Andric } 72370b57cec5SDimitry Andric 72380b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS512 72390b57cec5SDimitry Andric _mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A) 72400b57cec5SDimitry Andric { 72410b57cec5SDimitry Andric __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M); 72420b57cec5SDimitry Andric } 72430b57cec5SDimitry Andric 72440b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS512 72450b57cec5SDimitry Andric _mm512_cvtepi32_epi8 (__m512i __A) 72460b57cec5SDimitry Andric { 72470b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A, 72480b57cec5SDimitry Andric (__v16qi) _mm_undefined_si128 (), 72490b57cec5SDimitry Andric (__mmask16) -1); 72500b57cec5SDimitry Andric } 72510b57cec5SDimitry Andric 72520b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS512 72530b57cec5SDimitry Andric _mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A) 72540b57cec5SDimitry Andric { 72550b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A, 72560b57cec5SDimitry Andric (__v16qi) __O, __M); 72570b57cec5SDimitry Andric } 72580b57cec5SDimitry Andric 72590b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS512 72600b57cec5SDimitry Andric _mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A) 72610b57cec5SDimitry Andric { 72620b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A, 72630b57cec5SDimitry Andric (__v16qi) _mm_setzero_si128 (), 72640b57cec5SDimitry Andric __M); 72650b57cec5SDimitry Andric } 72660b57cec5SDimitry Andric 72670b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS512 72680b57cec5SDimitry Andric _mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A) 72690b57cec5SDimitry Andric { 72700b57cec5SDimitry Andric __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M); 72710b57cec5SDimitry Andric } 72720b57cec5SDimitry Andric 72730b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS512 72740b57cec5SDimitry Andric _mm512_cvtepi32_epi16 (__m512i __A) 72750b57cec5SDimitry Andric { 72760b57cec5SDimitry Andric return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A, 72770b57cec5SDimitry Andric (__v16hi) _mm256_undefined_si256 (), 72780b57cec5SDimitry Andric (__mmask16) -1); 72790b57cec5SDimitry Andric } 72800b57cec5SDimitry Andric 72810b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS512 72820b57cec5SDimitry Andric _mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A) 72830b57cec5SDimitry Andric { 72840b57cec5SDimitry Andric return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A, 72850b57cec5SDimitry Andric (__v16hi) __O, __M); 72860b57cec5SDimitry Andric } 72870b57cec5SDimitry Andric 72880b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS512 72890b57cec5SDimitry Andric _mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A) 72900b57cec5SDimitry Andric { 72910b57cec5SDimitry Andric return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A, 72920b57cec5SDimitry Andric (__v16hi) _mm256_setzero_si256 (), 72930b57cec5SDimitry Andric __M); 72940b57cec5SDimitry Andric } 72950b57cec5SDimitry Andric 72960b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS512 72970b57cec5SDimitry Andric _mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A) 72980b57cec5SDimitry Andric { 72990b57cec5SDimitry Andric __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M); 73000b57cec5SDimitry Andric } 73010b57cec5SDimitry Andric 73020b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS512 73030b57cec5SDimitry Andric _mm512_cvtepi64_epi8 (__m512i __A) 73040b57cec5SDimitry Andric { 73050b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A, 73060b57cec5SDimitry Andric (__v16qi) _mm_undefined_si128 (), 73070b57cec5SDimitry Andric (__mmask8) -1); 73080b57cec5SDimitry Andric } 73090b57cec5SDimitry Andric 73100b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS512 73110b57cec5SDimitry Andric _mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A) 73120b57cec5SDimitry Andric { 73130b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A, 73140b57cec5SDimitry Andric (__v16qi) __O, __M); 73150b57cec5SDimitry Andric } 73160b57cec5SDimitry Andric 73170b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS512 73180b57cec5SDimitry Andric _mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A) 73190b57cec5SDimitry Andric { 73200b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A, 73210b57cec5SDimitry Andric (__v16qi) _mm_setzero_si128 (), 73220b57cec5SDimitry Andric __M); 73230b57cec5SDimitry Andric } 73240b57cec5SDimitry Andric 73250b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS512 73260b57cec5SDimitry Andric _mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A) 73270b57cec5SDimitry Andric { 73280b57cec5SDimitry Andric __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M); 73290b57cec5SDimitry Andric } 73300b57cec5SDimitry Andric 73310b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS512 73320b57cec5SDimitry Andric _mm512_cvtepi64_epi32 (__m512i __A) 73330b57cec5SDimitry Andric { 73340b57cec5SDimitry Andric return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A, 73350b57cec5SDimitry Andric (__v8si) _mm256_undefined_si256 (), 73360b57cec5SDimitry Andric (__mmask8) -1); 73370b57cec5SDimitry Andric } 73380b57cec5SDimitry Andric 73390b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS512 73400b57cec5SDimitry Andric _mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A) 73410b57cec5SDimitry Andric { 73420b57cec5SDimitry Andric return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A, 73430b57cec5SDimitry Andric (__v8si) __O, __M); 73440b57cec5SDimitry Andric } 73450b57cec5SDimitry Andric 73460b57cec5SDimitry Andric static __inline__ __m256i __DEFAULT_FN_ATTRS512 73470b57cec5SDimitry Andric _mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A) 73480b57cec5SDimitry Andric { 73490b57cec5SDimitry Andric return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A, 73500b57cec5SDimitry Andric (__v8si) _mm256_setzero_si256 (), 73510b57cec5SDimitry Andric __M); 73520b57cec5SDimitry Andric } 73530b57cec5SDimitry Andric 73540b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS512 73550b57cec5SDimitry Andric _mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A) 73560b57cec5SDimitry Andric { 73570b57cec5SDimitry Andric __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M); 73580b57cec5SDimitry Andric } 73590b57cec5SDimitry Andric 73600b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS512 73610b57cec5SDimitry Andric _mm512_cvtepi64_epi16 (__m512i __A) 73620b57cec5SDimitry Andric { 73630b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A, 73640b57cec5SDimitry Andric (__v8hi) _mm_undefined_si128 (), 73650b57cec5SDimitry Andric (__mmask8) -1); 73660b57cec5SDimitry Andric } 73670b57cec5SDimitry Andric 73680b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS512 73690b57cec5SDimitry Andric _mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A) 73700b57cec5SDimitry Andric { 73710b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A, 73720b57cec5SDimitry Andric (__v8hi) __O, __M); 73730b57cec5SDimitry Andric } 73740b57cec5SDimitry Andric 73750b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS512 73760b57cec5SDimitry Andric _mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A) 73770b57cec5SDimitry Andric { 73780b57cec5SDimitry Andric return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A, 73790b57cec5SDimitry Andric (__v8hi) _mm_setzero_si128 (), 73800b57cec5SDimitry Andric __M); 73810b57cec5SDimitry Andric } 73820b57cec5SDimitry Andric 73830b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS512 73840b57cec5SDimitry Andric _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A) 73850b57cec5SDimitry Andric { 73860b57cec5SDimitry Andric __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M); 73870b57cec5SDimitry Andric } 73880b57cec5SDimitry Andric 73890b57cec5SDimitry Andric #define _mm512_extracti32x4_epi32(A, imm) \ 7390349cc55cSDimitry Andric ((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \ 73910b57cec5SDimitry Andric (__v4si)_mm_undefined_si128(), \ 7392349cc55cSDimitry Andric (__mmask8)-1)) 73930b57cec5SDimitry Andric 73940b57cec5SDimitry Andric #define _mm512_mask_extracti32x4_epi32(W, U, A, imm) \ 7395349cc55cSDimitry Andric ((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \ 73960b57cec5SDimitry Andric (__v4si)(__m128i)(W), \ 7397349cc55cSDimitry Andric (__mmask8)(U))) 73980b57cec5SDimitry Andric 73990b57cec5SDimitry Andric #define _mm512_maskz_extracti32x4_epi32(U, A, imm) \ 7400349cc55cSDimitry Andric ((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \ 74010b57cec5SDimitry Andric (__v4si)_mm_setzero_si128(), \ 7402349cc55cSDimitry Andric (__mmask8)(U))) 74030b57cec5SDimitry Andric 74040b57cec5SDimitry Andric #define _mm512_extracti64x4_epi64(A, imm) \ 7405349cc55cSDimitry Andric ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \ 74060b57cec5SDimitry Andric (__v4di)_mm256_undefined_si256(), \ 7407349cc55cSDimitry Andric (__mmask8)-1)) 74080b57cec5SDimitry Andric 74090b57cec5SDimitry Andric #define _mm512_mask_extracti64x4_epi64(W, U, A, imm) \ 7410349cc55cSDimitry Andric ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \ 74110b57cec5SDimitry Andric (__v4di)(__m256i)(W), \ 7412349cc55cSDimitry Andric (__mmask8)(U))) 74130b57cec5SDimitry Andric 74140b57cec5SDimitry Andric #define _mm512_maskz_extracti64x4_epi64(U, A, imm) \ 7415349cc55cSDimitry Andric ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \ 74160b57cec5SDimitry Andric (__v4di)_mm256_setzero_si256(), \ 7417349cc55cSDimitry Andric (__mmask8)(U))) 74180b57cec5SDimitry Andric 74190b57cec5SDimitry Andric #define _mm512_insertf64x4(A, B, imm) \ 7420349cc55cSDimitry Andric ((__m512d)__builtin_ia32_insertf64x4((__v8df)(__m512d)(A), \ 7421349cc55cSDimitry Andric (__v4df)(__m256d)(B), (int)(imm))) 74220b57cec5SDimitry Andric 74230b57cec5SDimitry Andric #define _mm512_mask_insertf64x4(W, U, A, B, imm) \ 7424349cc55cSDimitry Andric ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 74250b57cec5SDimitry Andric (__v8df)_mm512_insertf64x4((A), (B), (imm)), \ 7426349cc55cSDimitry Andric (__v8df)(__m512d)(W))) 74270b57cec5SDimitry Andric 74280b57cec5SDimitry Andric #define _mm512_maskz_insertf64x4(U, A, B, imm) \ 7429349cc55cSDimitry Andric ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 74300b57cec5SDimitry Andric (__v8df)_mm512_insertf64x4((A), (B), (imm)), \ 7431349cc55cSDimitry Andric (__v8df)_mm512_setzero_pd())) 74320b57cec5SDimitry Andric 74330b57cec5SDimitry Andric #define _mm512_inserti64x4(A, B, imm) \ 7434349cc55cSDimitry Andric ((__m512i)__builtin_ia32_inserti64x4((__v8di)(__m512i)(A), \ 7435349cc55cSDimitry Andric (__v4di)(__m256i)(B), (int)(imm))) 74360b57cec5SDimitry Andric 74370b57cec5SDimitry Andric #define _mm512_mask_inserti64x4(W, U, A, B, imm) \ 7438349cc55cSDimitry Andric ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 74390b57cec5SDimitry Andric (__v8di)_mm512_inserti64x4((A), (B), (imm)), \ 7440349cc55cSDimitry Andric (__v8di)(__m512i)(W))) 74410b57cec5SDimitry Andric 74420b57cec5SDimitry Andric #define _mm512_maskz_inserti64x4(U, A, B, imm) \ 7443349cc55cSDimitry Andric ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 74440b57cec5SDimitry Andric (__v8di)_mm512_inserti64x4((A), (B), (imm)), \ 7445349cc55cSDimitry Andric (__v8di)_mm512_setzero_si512())) 74460b57cec5SDimitry Andric 74470b57cec5SDimitry Andric #define _mm512_insertf32x4(A, B, imm) \ 7448349cc55cSDimitry Andric ((__m512)__builtin_ia32_insertf32x4((__v16sf)(__m512)(A), \ 7449349cc55cSDimitry Andric (__v4sf)(__m128)(B), (int)(imm))) 74500b57cec5SDimitry Andric 74510b57cec5SDimitry Andric #define _mm512_mask_insertf32x4(W, U, A, B, imm) \ 7452349cc55cSDimitry Andric ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 74530b57cec5SDimitry Andric (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \ 7454349cc55cSDimitry Andric (__v16sf)(__m512)(W))) 74550b57cec5SDimitry Andric 74560b57cec5SDimitry Andric #define _mm512_maskz_insertf32x4(U, A, B, imm) \ 7457349cc55cSDimitry Andric ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 74580b57cec5SDimitry Andric (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \ 7459349cc55cSDimitry Andric (__v16sf)_mm512_setzero_ps())) 74600b57cec5SDimitry Andric 74610b57cec5SDimitry Andric #define _mm512_inserti32x4(A, B, imm) \ 7462349cc55cSDimitry Andric ((__m512i)__builtin_ia32_inserti32x4((__v16si)(__m512i)(A), \ 7463349cc55cSDimitry Andric (__v4si)(__m128i)(B), (int)(imm))) 74640b57cec5SDimitry Andric 74650b57cec5SDimitry Andric #define _mm512_mask_inserti32x4(W, U, A, B, imm) \ 7466349cc55cSDimitry Andric ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 74670b57cec5SDimitry Andric (__v16si)_mm512_inserti32x4((A), (B), (imm)), \ 7468349cc55cSDimitry Andric (__v16si)(__m512i)(W))) 74690b57cec5SDimitry Andric 74700b57cec5SDimitry Andric #define _mm512_maskz_inserti32x4(U, A, B, imm) \ 7471349cc55cSDimitry Andric ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 74720b57cec5SDimitry Andric (__v16si)_mm512_inserti32x4((A), (B), (imm)), \ 7473349cc55cSDimitry Andric (__v16si)_mm512_setzero_si512())) 74740b57cec5SDimitry Andric 74750b57cec5SDimitry Andric #define _mm512_getmant_round_pd(A, B, C, R) \ 7476349cc55cSDimitry Andric ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ 74770b57cec5SDimitry Andric (int)(((C)<<2) | (B)), \ 74780b57cec5SDimitry Andric (__v8df)_mm512_undefined_pd(), \ 7479349cc55cSDimitry Andric (__mmask8)-1, (int)(R))) 74800b57cec5SDimitry Andric 74810b57cec5SDimitry Andric #define _mm512_mask_getmant_round_pd(W, U, A, B, C, R) \ 7482349cc55cSDimitry Andric ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ 74830b57cec5SDimitry Andric (int)(((C)<<2) | (B)), \ 74840b57cec5SDimitry Andric (__v8df)(__m512d)(W), \ 7485349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 74860b57cec5SDimitry Andric 74870b57cec5SDimitry Andric #define _mm512_maskz_getmant_round_pd(U, A, B, C, R) \ 7488349cc55cSDimitry Andric ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ 74890b57cec5SDimitry Andric (int)(((C)<<2) | (B)), \ 74900b57cec5SDimitry Andric (__v8df)_mm512_setzero_pd(), \ 7491349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 74920b57cec5SDimitry Andric 74930b57cec5SDimitry Andric #define _mm512_getmant_pd(A, B, C) \ 7494349cc55cSDimitry Andric ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ 74950b57cec5SDimitry Andric (int)(((C)<<2) | (B)), \ 74960b57cec5SDimitry Andric (__v8df)_mm512_setzero_pd(), \ 74970b57cec5SDimitry Andric (__mmask8)-1, \ 7498349cc55cSDimitry Andric _MM_FROUND_CUR_DIRECTION)) 74990b57cec5SDimitry Andric 75000b57cec5SDimitry Andric #define _mm512_mask_getmant_pd(W, U, A, B, C) \ 7501349cc55cSDimitry Andric ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ 75020b57cec5SDimitry Andric (int)(((C)<<2) | (B)), \ 75030b57cec5SDimitry Andric (__v8df)(__m512d)(W), \ 75040b57cec5SDimitry Andric (__mmask8)(U), \ 7505349cc55cSDimitry Andric _MM_FROUND_CUR_DIRECTION)) 75060b57cec5SDimitry Andric 75070b57cec5SDimitry Andric #define _mm512_maskz_getmant_pd(U, A, B, C) \ 7508349cc55cSDimitry Andric ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ 75090b57cec5SDimitry Andric (int)(((C)<<2) | (B)), \ 75100b57cec5SDimitry Andric (__v8df)_mm512_setzero_pd(), \ 75110b57cec5SDimitry Andric (__mmask8)(U), \ 7512349cc55cSDimitry Andric _MM_FROUND_CUR_DIRECTION)) 75130b57cec5SDimitry Andric 75140b57cec5SDimitry Andric #define _mm512_getmant_round_ps(A, B, C, R) \ 7515349cc55cSDimitry Andric ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ 75160b57cec5SDimitry Andric (int)(((C)<<2) | (B)), \ 75170b57cec5SDimitry Andric (__v16sf)_mm512_undefined_ps(), \ 7518349cc55cSDimitry Andric (__mmask16)-1, (int)(R))) 75190b57cec5SDimitry Andric 75200b57cec5SDimitry Andric #define _mm512_mask_getmant_round_ps(W, U, A, B, C, R) \ 7521349cc55cSDimitry Andric ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ 75220b57cec5SDimitry Andric (int)(((C)<<2) | (B)), \ 75230b57cec5SDimitry Andric (__v16sf)(__m512)(W), \ 7524349cc55cSDimitry Andric (__mmask16)(U), (int)(R))) 75250b57cec5SDimitry Andric 75260b57cec5SDimitry Andric #define _mm512_maskz_getmant_round_ps(U, A, B, C, R) \ 7527349cc55cSDimitry Andric ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ 75280b57cec5SDimitry Andric (int)(((C)<<2) | (B)), \ 75290b57cec5SDimitry Andric (__v16sf)_mm512_setzero_ps(), \ 7530349cc55cSDimitry Andric (__mmask16)(U), (int)(R))) 75310b57cec5SDimitry Andric 75320b57cec5SDimitry Andric #define _mm512_getmant_ps(A, B, C) \ 7533349cc55cSDimitry Andric ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ 75340b57cec5SDimitry Andric (int)(((C)<<2)|(B)), \ 75350b57cec5SDimitry Andric (__v16sf)_mm512_undefined_ps(), \ 75360b57cec5SDimitry Andric (__mmask16)-1, \ 7537349cc55cSDimitry Andric _MM_FROUND_CUR_DIRECTION)) 75380b57cec5SDimitry Andric 75390b57cec5SDimitry Andric #define _mm512_mask_getmant_ps(W, U, A, B, C) \ 7540349cc55cSDimitry Andric ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ 75410b57cec5SDimitry Andric (int)(((C)<<2)|(B)), \ 75420b57cec5SDimitry Andric (__v16sf)(__m512)(W), \ 75430b57cec5SDimitry Andric (__mmask16)(U), \ 7544349cc55cSDimitry Andric _MM_FROUND_CUR_DIRECTION)) 75450b57cec5SDimitry Andric 75460b57cec5SDimitry Andric #define _mm512_maskz_getmant_ps(U, A, B, C) \ 7547349cc55cSDimitry Andric ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ 75480b57cec5SDimitry Andric (int)(((C)<<2)|(B)), \ 75490b57cec5SDimitry Andric (__v16sf)_mm512_setzero_ps(), \ 75500b57cec5SDimitry Andric (__mmask16)(U), \ 7551349cc55cSDimitry Andric _MM_FROUND_CUR_DIRECTION)) 75520b57cec5SDimitry Andric 75530b57cec5SDimitry Andric #define _mm512_getexp_round_pd(A, R) \ 7554349cc55cSDimitry Andric ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ 75550b57cec5SDimitry Andric (__v8df)_mm512_undefined_pd(), \ 7556349cc55cSDimitry Andric (__mmask8)-1, (int)(R))) 75570b57cec5SDimitry Andric 75580b57cec5SDimitry Andric #define _mm512_mask_getexp_round_pd(W, U, A, R) \ 7559349cc55cSDimitry Andric ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ 75600b57cec5SDimitry Andric (__v8df)(__m512d)(W), \ 7561349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 75620b57cec5SDimitry Andric 75630b57cec5SDimitry Andric #define _mm512_maskz_getexp_round_pd(U, A, R) \ 7564349cc55cSDimitry Andric ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ 75650b57cec5SDimitry Andric (__v8df)_mm512_setzero_pd(), \ 7566349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 75670b57cec5SDimitry Andric 75680b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 75690b57cec5SDimitry Andric _mm512_getexp_pd (__m512d __A) 75700b57cec5SDimitry Andric { 75710b57cec5SDimitry Andric return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A, 75720b57cec5SDimitry Andric (__v8df) _mm512_undefined_pd (), 75730b57cec5SDimitry Andric (__mmask8) -1, 75740b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 75750b57cec5SDimitry Andric } 75760b57cec5SDimitry Andric 75770b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 75780b57cec5SDimitry Andric _mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A) 75790b57cec5SDimitry Andric { 75800b57cec5SDimitry Andric return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A, 75810b57cec5SDimitry Andric (__v8df) __W, 75820b57cec5SDimitry Andric (__mmask8) __U, 75830b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 75840b57cec5SDimitry Andric } 75850b57cec5SDimitry Andric 75860b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 75870b57cec5SDimitry Andric _mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A) 75880b57cec5SDimitry Andric { 75890b57cec5SDimitry Andric return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A, 75900b57cec5SDimitry Andric (__v8df) _mm512_setzero_pd (), 75910b57cec5SDimitry Andric (__mmask8) __U, 75920b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 75930b57cec5SDimitry Andric } 75940b57cec5SDimitry Andric 75950b57cec5SDimitry Andric #define _mm512_getexp_round_ps(A, R) \ 7596349cc55cSDimitry Andric ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ 75970b57cec5SDimitry Andric (__v16sf)_mm512_undefined_ps(), \ 7598349cc55cSDimitry Andric (__mmask16)-1, (int)(R))) 75990b57cec5SDimitry Andric 76000b57cec5SDimitry Andric #define _mm512_mask_getexp_round_ps(W, U, A, R) \ 7601349cc55cSDimitry Andric ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ 76020b57cec5SDimitry Andric (__v16sf)(__m512)(W), \ 7603349cc55cSDimitry Andric (__mmask16)(U), (int)(R))) 76040b57cec5SDimitry Andric 76050b57cec5SDimitry Andric #define _mm512_maskz_getexp_round_ps(U, A, R) \ 7606349cc55cSDimitry Andric ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ 76070b57cec5SDimitry Andric (__v16sf)_mm512_setzero_ps(), \ 7608349cc55cSDimitry Andric (__mmask16)(U), (int)(R))) 76090b57cec5SDimitry Andric 76100b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 76110b57cec5SDimitry Andric _mm512_getexp_ps (__m512 __A) 76120b57cec5SDimitry Andric { 76130b57cec5SDimitry Andric return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A, 76140b57cec5SDimitry Andric (__v16sf) _mm512_undefined_ps (), 76150b57cec5SDimitry Andric (__mmask16) -1, 76160b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 76170b57cec5SDimitry Andric } 76180b57cec5SDimitry Andric 76190b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 76200b57cec5SDimitry Andric _mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A) 76210b57cec5SDimitry Andric { 76220b57cec5SDimitry Andric return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A, 76230b57cec5SDimitry Andric (__v16sf) __W, 76240b57cec5SDimitry Andric (__mmask16) __U, 76250b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 76260b57cec5SDimitry Andric } 76270b57cec5SDimitry Andric 76280b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 76290b57cec5SDimitry Andric _mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A) 76300b57cec5SDimitry Andric { 76310b57cec5SDimitry Andric return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A, 76320b57cec5SDimitry Andric (__v16sf) _mm512_setzero_ps (), 76330b57cec5SDimitry Andric (__mmask16) __U, 76340b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 76350b57cec5SDimitry Andric } 76360b57cec5SDimitry Andric 76370b57cec5SDimitry Andric #define _mm512_i64gather_ps(index, addr, scale) \ 7638349cc55cSDimitry Andric ((__m256)__builtin_ia32_gatherdiv16sf((__v8sf)_mm256_undefined_ps(), \ 76390b57cec5SDimitry Andric (void const *)(addr), \ 76400b57cec5SDimitry Andric (__v8di)(__m512i)(index), (__mmask8)-1, \ 7641349cc55cSDimitry Andric (int)(scale))) 76420b57cec5SDimitry Andric 76430b57cec5SDimitry Andric #define _mm512_mask_i64gather_ps(v1_old, mask, index, addr, scale) \ 7644349cc55cSDimitry Andric ((__m256)__builtin_ia32_gatherdiv16sf((__v8sf)(__m256)(v1_old),\ 76450b57cec5SDimitry Andric (void const *)(addr), \ 76460b57cec5SDimitry Andric (__v8di)(__m512i)(index), \ 7647349cc55cSDimitry Andric (__mmask8)(mask), (int)(scale))) 76480b57cec5SDimitry Andric 76490b57cec5SDimitry Andric #define _mm512_i64gather_epi32(index, addr, scale) \ 7650349cc55cSDimitry Andric ((__m256i)__builtin_ia32_gatherdiv16si((__v8si)_mm256_undefined_si256(), \ 76510b57cec5SDimitry Andric (void const *)(addr), \ 76520b57cec5SDimitry Andric (__v8di)(__m512i)(index), \ 7653349cc55cSDimitry Andric (__mmask8)-1, (int)(scale))) 76540b57cec5SDimitry Andric 76550b57cec5SDimitry Andric #define _mm512_mask_i64gather_epi32(v1_old, mask, index, addr, scale) \ 7656349cc55cSDimitry Andric ((__m256i)__builtin_ia32_gatherdiv16si((__v8si)(__m256i)(v1_old), \ 76570b57cec5SDimitry Andric (void const *)(addr), \ 76580b57cec5SDimitry Andric (__v8di)(__m512i)(index), \ 7659349cc55cSDimitry Andric (__mmask8)(mask), (int)(scale))) 76600b57cec5SDimitry Andric 76610b57cec5SDimitry Andric #define _mm512_i64gather_pd(index, addr, scale) \ 7662349cc55cSDimitry Andric ((__m512d)__builtin_ia32_gatherdiv8df((__v8df)_mm512_undefined_pd(), \ 76630b57cec5SDimitry Andric (void const *)(addr), \ 76640b57cec5SDimitry Andric (__v8di)(__m512i)(index), (__mmask8)-1, \ 7665349cc55cSDimitry Andric (int)(scale))) 76660b57cec5SDimitry Andric 76670b57cec5SDimitry Andric #define _mm512_mask_i64gather_pd(v1_old, mask, index, addr, scale) \ 7668349cc55cSDimitry Andric ((__m512d)__builtin_ia32_gatherdiv8df((__v8df)(__m512d)(v1_old), \ 76690b57cec5SDimitry Andric (void const *)(addr), \ 76700b57cec5SDimitry Andric (__v8di)(__m512i)(index), \ 7671349cc55cSDimitry Andric (__mmask8)(mask), (int)(scale))) 76720b57cec5SDimitry Andric 76730b57cec5SDimitry Andric #define _mm512_i64gather_epi64(index, addr, scale) \ 7674349cc55cSDimitry Andric ((__m512i)__builtin_ia32_gatherdiv8di((__v8di)_mm512_undefined_epi32(), \ 76750b57cec5SDimitry Andric (void const *)(addr), \ 76760b57cec5SDimitry Andric (__v8di)(__m512i)(index), (__mmask8)-1, \ 7677349cc55cSDimitry Andric (int)(scale))) 76780b57cec5SDimitry Andric 76790b57cec5SDimitry Andric #define _mm512_mask_i64gather_epi64(v1_old, mask, index, addr, scale) \ 7680349cc55cSDimitry Andric ((__m512i)__builtin_ia32_gatherdiv8di((__v8di)(__m512i)(v1_old), \ 76810b57cec5SDimitry Andric (void const *)(addr), \ 76820b57cec5SDimitry Andric (__v8di)(__m512i)(index), \ 7683349cc55cSDimitry Andric (__mmask8)(mask), (int)(scale))) 76840b57cec5SDimitry Andric 76850b57cec5SDimitry Andric #define _mm512_i32gather_ps(index, addr, scale) \ 7686349cc55cSDimitry Andric ((__m512)__builtin_ia32_gathersiv16sf((__v16sf)_mm512_undefined_ps(), \ 76870b57cec5SDimitry Andric (void const *)(addr), \ 7688a7dea167SDimitry Andric (__v16si)(__m512)(index), \ 7689349cc55cSDimitry Andric (__mmask16)-1, (int)(scale))) 76900b57cec5SDimitry Andric 76910b57cec5SDimitry Andric #define _mm512_mask_i32gather_ps(v1_old, mask, index, addr, scale) \ 7692349cc55cSDimitry Andric ((__m512)__builtin_ia32_gathersiv16sf((__v16sf)(__m512)(v1_old), \ 76930b57cec5SDimitry Andric (void const *)(addr), \ 7694a7dea167SDimitry Andric (__v16si)(__m512)(index), \ 7695349cc55cSDimitry Andric (__mmask16)(mask), (int)(scale))) 76960b57cec5SDimitry Andric 76970b57cec5SDimitry Andric #define _mm512_i32gather_epi32(index, addr, scale) \ 7698349cc55cSDimitry Andric ((__m512i)__builtin_ia32_gathersiv16si((__v16si)_mm512_undefined_epi32(), \ 76990b57cec5SDimitry Andric (void const *)(addr), \ 77000b57cec5SDimitry Andric (__v16si)(__m512i)(index), \ 7701349cc55cSDimitry Andric (__mmask16)-1, (int)(scale))) 77020b57cec5SDimitry Andric 77030b57cec5SDimitry Andric #define _mm512_mask_i32gather_epi32(v1_old, mask, index, addr, scale) \ 7704349cc55cSDimitry Andric ((__m512i)__builtin_ia32_gathersiv16si((__v16si)(__m512i)(v1_old), \ 77050b57cec5SDimitry Andric (void const *)(addr), \ 77060b57cec5SDimitry Andric (__v16si)(__m512i)(index), \ 7707349cc55cSDimitry Andric (__mmask16)(mask), (int)(scale))) 77080b57cec5SDimitry Andric 77090b57cec5SDimitry Andric #define _mm512_i32gather_pd(index, addr, scale) \ 7710349cc55cSDimitry Andric ((__m512d)__builtin_ia32_gathersiv8df((__v8df)_mm512_undefined_pd(), \ 77110b57cec5SDimitry Andric (void const *)(addr), \ 77120b57cec5SDimitry Andric (__v8si)(__m256i)(index), (__mmask8)-1, \ 7713349cc55cSDimitry Andric (int)(scale))) 77140b57cec5SDimitry Andric 77150b57cec5SDimitry Andric #define _mm512_mask_i32gather_pd(v1_old, mask, index, addr, scale) \ 7716349cc55cSDimitry Andric ((__m512d)__builtin_ia32_gathersiv8df((__v8df)(__m512d)(v1_old), \ 77170b57cec5SDimitry Andric (void const *)(addr), \ 77180b57cec5SDimitry Andric (__v8si)(__m256i)(index), \ 7719349cc55cSDimitry Andric (__mmask8)(mask), (int)(scale))) 77200b57cec5SDimitry Andric 77210b57cec5SDimitry Andric #define _mm512_i32gather_epi64(index, addr, scale) \ 7722349cc55cSDimitry Andric ((__m512i)__builtin_ia32_gathersiv8di((__v8di)_mm512_undefined_epi32(), \ 77230b57cec5SDimitry Andric (void const *)(addr), \ 77240b57cec5SDimitry Andric (__v8si)(__m256i)(index), (__mmask8)-1, \ 7725349cc55cSDimitry Andric (int)(scale))) 77260b57cec5SDimitry Andric 77270b57cec5SDimitry Andric #define _mm512_mask_i32gather_epi64(v1_old, mask, index, addr, scale) \ 7728349cc55cSDimitry Andric ((__m512i)__builtin_ia32_gathersiv8di((__v8di)(__m512i)(v1_old), \ 77290b57cec5SDimitry Andric (void const *)(addr), \ 77300b57cec5SDimitry Andric (__v8si)(__m256i)(index), \ 7731349cc55cSDimitry Andric (__mmask8)(mask), (int)(scale))) 77320b57cec5SDimitry Andric 77330b57cec5SDimitry Andric #define _mm512_i64scatter_ps(addr, index, v1, scale) \ 77340b57cec5SDimitry Andric __builtin_ia32_scatterdiv16sf((void *)(addr), (__mmask8)-1, \ 77350b57cec5SDimitry Andric (__v8di)(__m512i)(index), \ 77360b57cec5SDimitry Andric (__v8sf)(__m256)(v1), (int)(scale)) 77370b57cec5SDimitry Andric 77380b57cec5SDimitry Andric #define _mm512_mask_i64scatter_ps(addr, mask, index, v1, scale) \ 77390b57cec5SDimitry Andric __builtin_ia32_scatterdiv16sf((void *)(addr), (__mmask8)(mask), \ 77400b57cec5SDimitry Andric (__v8di)(__m512i)(index), \ 77410b57cec5SDimitry Andric (__v8sf)(__m256)(v1), (int)(scale)) 77420b57cec5SDimitry Andric 77430b57cec5SDimitry Andric #define _mm512_i64scatter_epi32(addr, index, v1, scale) \ 77440b57cec5SDimitry Andric __builtin_ia32_scatterdiv16si((void *)(addr), (__mmask8)-1, \ 77450b57cec5SDimitry Andric (__v8di)(__m512i)(index), \ 77460b57cec5SDimitry Andric (__v8si)(__m256i)(v1), (int)(scale)) 77470b57cec5SDimitry Andric 77480b57cec5SDimitry Andric #define _mm512_mask_i64scatter_epi32(addr, mask, index, v1, scale) \ 77490b57cec5SDimitry Andric __builtin_ia32_scatterdiv16si((void *)(addr), (__mmask8)(mask), \ 77500b57cec5SDimitry Andric (__v8di)(__m512i)(index), \ 77510b57cec5SDimitry Andric (__v8si)(__m256i)(v1), (int)(scale)) 77520b57cec5SDimitry Andric 77530b57cec5SDimitry Andric #define _mm512_i64scatter_pd(addr, index, v1, scale) \ 77540b57cec5SDimitry Andric __builtin_ia32_scatterdiv8df((void *)(addr), (__mmask8)-1, \ 77550b57cec5SDimitry Andric (__v8di)(__m512i)(index), \ 77560b57cec5SDimitry Andric (__v8df)(__m512d)(v1), (int)(scale)) 77570b57cec5SDimitry Andric 77580b57cec5SDimitry Andric #define _mm512_mask_i64scatter_pd(addr, mask, index, v1, scale) \ 77590b57cec5SDimitry Andric __builtin_ia32_scatterdiv8df((void *)(addr), (__mmask8)(mask), \ 77600b57cec5SDimitry Andric (__v8di)(__m512i)(index), \ 77610b57cec5SDimitry Andric (__v8df)(__m512d)(v1), (int)(scale)) 77620b57cec5SDimitry Andric 77630b57cec5SDimitry Andric #define _mm512_i64scatter_epi64(addr, index, v1, scale) \ 77640b57cec5SDimitry Andric __builtin_ia32_scatterdiv8di((void *)(addr), (__mmask8)-1, \ 77650b57cec5SDimitry Andric (__v8di)(__m512i)(index), \ 77660b57cec5SDimitry Andric (__v8di)(__m512i)(v1), (int)(scale)) 77670b57cec5SDimitry Andric 77680b57cec5SDimitry Andric #define _mm512_mask_i64scatter_epi64(addr, mask, index, v1, scale) \ 77690b57cec5SDimitry Andric __builtin_ia32_scatterdiv8di((void *)(addr), (__mmask8)(mask), \ 77700b57cec5SDimitry Andric (__v8di)(__m512i)(index), \ 77710b57cec5SDimitry Andric (__v8di)(__m512i)(v1), (int)(scale)) 77720b57cec5SDimitry Andric 77730b57cec5SDimitry Andric #define _mm512_i32scatter_ps(addr, index, v1, scale) \ 77740b57cec5SDimitry Andric __builtin_ia32_scattersiv16sf((void *)(addr), (__mmask16)-1, \ 77750b57cec5SDimitry Andric (__v16si)(__m512i)(index), \ 77760b57cec5SDimitry Andric (__v16sf)(__m512)(v1), (int)(scale)) 77770b57cec5SDimitry Andric 77780b57cec5SDimitry Andric #define _mm512_mask_i32scatter_ps(addr, mask, index, v1, scale) \ 77790b57cec5SDimitry Andric __builtin_ia32_scattersiv16sf((void *)(addr), (__mmask16)(mask), \ 77800b57cec5SDimitry Andric (__v16si)(__m512i)(index), \ 77810b57cec5SDimitry Andric (__v16sf)(__m512)(v1), (int)(scale)) 77820b57cec5SDimitry Andric 77830b57cec5SDimitry Andric #define _mm512_i32scatter_epi32(addr, index, v1, scale) \ 77840b57cec5SDimitry Andric __builtin_ia32_scattersiv16si((void *)(addr), (__mmask16)-1, \ 77850b57cec5SDimitry Andric (__v16si)(__m512i)(index), \ 77860b57cec5SDimitry Andric (__v16si)(__m512i)(v1), (int)(scale)) 77870b57cec5SDimitry Andric 77880b57cec5SDimitry Andric #define _mm512_mask_i32scatter_epi32(addr, mask, index, v1, scale) \ 77890b57cec5SDimitry Andric __builtin_ia32_scattersiv16si((void *)(addr), (__mmask16)(mask), \ 77900b57cec5SDimitry Andric (__v16si)(__m512i)(index), \ 77910b57cec5SDimitry Andric (__v16si)(__m512i)(v1), (int)(scale)) 77920b57cec5SDimitry Andric 77930b57cec5SDimitry Andric #define _mm512_i32scatter_pd(addr, index, v1, scale) \ 77940b57cec5SDimitry Andric __builtin_ia32_scattersiv8df((void *)(addr), (__mmask8)-1, \ 77950b57cec5SDimitry Andric (__v8si)(__m256i)(index), \ 77960b57cec5SDimitry Andric (__v8df)(__m512d)(v1), (int)(scale)) 77970b57cec5SDimitry Andric 77980b57cec5SDimitry Andric #define _mm512_mask_i32scatter_pd(addr, mask, index, v1, scale) \ 77990b57cec5SDimitry Andric __builtin_ia32_scattersiv8df((void *)(addr), (__mmask8)(mask), \ 78000b57cec5SDimitry Andric (__v8si)(__m256i)(index), \ 78010b57cec5SDimitry Andric (__v8df)(__m512d)(v1), (int)(scale)) 78020b57cec5SDimitry Andric 78030b57cec5SDimitry Andric #define _mm512_i32scatter_epi64(addr, index, v1, scale) \ 78040b57cec5SDimitry Andric __builtin_ia32_scattersiv8di((void *)(addr), (__mmask8)-1, \ 78050b57cec5SDimitry Andric (__v8si)(__m256i)(index), \ 78060b57cec5SDimitry Andric (__v8di)(__m512i)(v1), (int)(scale)) 78070b57cec5SDimitry Andric 78080b57cec5SDimitry Andric #define _mm512_mask_i32scatter_epi64(addr, mask, index, v1, scale) \ 78090b57cec5SDimitry Andric __builtin_ia32_scattersiv8di((void *)(addr), (__mmask8)(mask), \ 78100b57cec5SDimitry Andric (__v8si)(__m256i)(index), \ 78110b57cec5SDimitry Andric (__v8di)(__m512i)(v1), (int)(scale)) 78120b57cec5SDimitry Andric 78130b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 78140b57cec5SDimitry Andric _mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 78150b57cec5SDimitry Andric { 78160b57cec5SDimitry Andric return __builtin_ia32_vfmaddss3_mask((__v4sf)__W, 78170b57cec5SDimitry Andric (__v4sf)__A, 78180b57cec5SDimitry Andric (__v4sf)__B, 78190b57cec5SDimitry Andric (__mmask8)__U, 78200b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 78210b57cec5SDimitry Andric } 78220b57cec5SDimitry Andric 78230b57cec5SDimitry Andric #define _mm_fmadd_round_ss(A, B, C, R) \ 7824349cc55cSDimitry Andric ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \ 78250b57cec5SDimitry Andric (__v4sf)(__m128)(B), \ 78260b57cec5SDimitry Andric (__v4sf)(__m128)(C), (__mmask8)-1, \ 7827349cc55cSDimitry Andric (int)(R))) 78280b57cec5SDimitry Andric 78290b57cec5SDimitry Andric #define _mm_mask_fmadd_round_ss(W, U, A, B, R) \ 7830349cc55cSDimitry Andric ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \ 78310b57cec5SDimitry Andric (__v4sf)(__m128)(A), \ 78320b57cec5SDimitry Andric (__v4sf)(__m128)(B), (__mmask8)(U), \ 7833349cc55cSDimitry Andric (int)(R))) 78340b57cec5SDimitry Andric 78350b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 78360b57cec5SDimitry Andric _mm_maskz_fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 78370b57cec5SDimitry Andric { 78380b57cec5SDimitry Andric return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A, 78390b57cec5SDimitry Andric (__v4sf)__B, 78400b57cec5SDimitry Andric (__v4sf)__C, 78410b57cec5SDimitry Andric (__mmask8)__U, 78420b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 78430b57cec5SDimitry Andric } 78440b57cec5SDimitry Andric 78450b57cec5SDimitry Andric #define _mm_maskz_fmadd_round_ss(U, A, B, C, R) \ 7846349cc55cSDimitry Andric ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \ 78470b57cec5SDimitry Andric (__v4sf)(__m128)(B), \ 78480b57cec5SDimitry Andric (__v4sf)(__m128)(C), (__mmask8)(U), \ 7849349cc55cSDimitry Andric (int)(R))) 78500b57cec5SDimitry Andric 78510b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 78520b57cec5SDimitry Andric _mm_mask3_fmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) 78530b57cec5SDimitry Andric { 78540b57cec5SDimitry Andric return __builtin_ia32_vfmaddss3_mask3((__v4sf)__W, 78550b57cec5SDimitry Andric (__v4sf)__X, 78560b57cec5SDimitry Andric (__v4sf)__Y, 78570b57cec5SDimitry Andric (__mmask8)__U, 78580b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 78590b57cec5SDimitry Andric } 78600b57cec5SDimitry Andric 78610b57cec5SDimitry Andric #define _mm_mask3_fmadd_round_ss(W, X, Y, U, R) \ 7862349cc55cSDimitry Andric ((__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \ 78630b57cec5SDimitry Andric (__v4sf)(__m128)(X), \ 78640b57cec5SDimitry Andric (__v4sf)(__m128)(Y), (__mmask8)(U), \ 7865349cc55cSDimitry Andric (int)(R))) 78660b57cec5SDimitry Andric 78670b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 78680b57cec5SDimitry Andric _mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 78690b57cec5SDimitry Andric { 78700b57cec5SDimitry Andric return __builtin_ia32_vfmaddss3_mask((__v4sf)__W, 78710b57cec5SDimitry Andric (__v4sf)__A, 78720b57cec5SDimitry Andric -(__v4sf)__B, 78730b57cec5SDimitry Andric (__mmask8)__U, 78740b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 78750b57cec5SDimitry Andric } 78760b57cec5SDimitry Andric 78770b57cec5SDimitry Andric #define _mm_fmsub_round_ss(A, B, C, R) \ 7878349cc55cSDimitry Andric ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \ 78790b57cec5SDimitry Andric (__v4sf)(__m128)(B), \ 78800b57cec5SDimitry Andric -(__v4sf)(__m128)(C), (__mmask8)-1, \ 7881349cc55cSDimitry Andric (int)(R))) 78820b57cec5SDimitry Andric 78830b57cec5SDimitry Andric #define _mm_mask_fmsub_round_ss(W, U, A, B, R) \ 7884349cc55cSDimitry Andric ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \ 78850b57cec5SDimitry Andric (__v4sf)(__m128)(A), \ 78860b57cec5SDimitry Andric -(__v4sf)(__m128)(B), (__mmask8)(U), \ 7887349cc55cSDimitry Andric (int)(R))) 78880b57cec5SDimitry Andric 78890b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 78900b57cec5SDimitry Andric _mm_maskz_fmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 78910b57cec5SDimitry Andric { 78920b57cec5SDimitry Andric return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A, 78930b57cec5SDimitry Andric (__v4sf)__B, 78940b57cec5SDimitry Andric -(__v4sf)__C, 78950b57cec5SDimitry Andric (__mmask8)__U, 78960b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 78970b57cec5SDimitry Andric } 78980b57cec5SDimitry Andric 78990b57cec5SDimitry Andric #define _mm_maskz_fmsub_round_ss(U, A, B, C, R) \ 7900349cc55cSDimitry Andric ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \ 79010b57cec5SDimitry Andric (__v4sf)(__m128)(B), \ 79020b57cec5SDimitry Andric -(__v4sf)(__m128)(C), (__mmask8)(U), \ 7903349cc55cSDimitry Andric (int)(R))) 79040b57cec5SDimitry Andric 79050b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 79060b57cec5SDimitry Andric _mm_mask3_fmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) 79070b57cec5SDimitry Andric { 79080b57cec5SDimitry Andric return __builtin_ia32_vfmsubss3_mask3((__v4sf)__W, 79090b57cec5SDimitry Andric (__v4sf)__X, 79100b57cec5SDimitry Andric (__v4sf)__Y, 79110b57cec5SDimitry Andric (__mmask8)__U, 79120b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 79130b57cec5SDimitry Andric } 79140b57cec5SDimitry Andric 79150b57cec5SDimitry Andric #define _mm_mask3_fmsub_round_ss(W, X, Y, U, R) \ 7916349cc55cSDimitry Andric ((__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \ 79170b57cec5SDimitry Andric (__v4sf)(__m128)(X), \ 79180b57cec5SDimitry Andric (__v4sf)(__m128)(Y), (__mmask8)(U), \ 7919349cc55cSDimitry Andric (int)(R))) 79200b57cec5SDimitry Andric 79210b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 79220b57cec5SDimitry Andric _mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 79230b57cec5SDimitry Andric { 79240b57cec5SDimitry Andric return __builtin_ia32_vfmaddss3_mask((__v4sf)__W, 79250b57cec5SDimitry Andric -(__v4sf)__A, 79260b57cec5SDimitry Andric (__v4sf)__B, 79270b57cec5SDimitry Andric (__mmask8)__U, 79280b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 79290b57cec5SDimitry Andric } 79300b57cec5SDimitry Andric 79310b57cec5SDimitry Andric #define _mm_fnmadd_round_ss(A, B, C, R) \ 7932349cc55cSDimitry Andric ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \ 79330b57cec5SDimitry Andric -(__v4sf)(__m128)(B), \ 79340b57cec5SDimitry Andric (__v4sf)(__m128)(C), (__mmask8)-1, \ 7935349cc55cSDimitry Andric (int)(R))) 79360b57cec5SDimitry Andric 79370b57cec5SDimitry Andric #define _mm_mask_fnmadd_round_ss(W, U, A, B, R) \ 7938349cc55cSDimitry Andric ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \ 79390b57cec5SDimitry Andric -(__v4sf)(__m128)(A), \ 79400b57cec5SDimitry Andric (__v4sf)(__m128)(B), (__mmask8)(U), \ 7941349cc55cSDimitry Andric (int)(R))) 79420b57cec5SDimitry Andric 79430b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 79440b57cec5SDimitry Andric _mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 79450b57cec5SDimitry Andric { 79460b57cec5SDimitry Andric return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A, 79470b57cec5SDimitry Andric -(__v4sf)__B, 79480b57cec5SDimitry Andric (__v4sf)__C, 79490b57cec5SDimitry Andric (__mmask8)__U, 79500b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 79510b57cec5SDimitry Andric } 79520b57cec5SDimitry Andric 79530b57cec5SDimitry Andric #define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) \ 7954349cc55cSDimitry Andric ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \ 79550b57cec5SDimitry Andric -(__v4sf)(__m128)(B), \ 79560b57cec5SDimitry Andric (__v4sf)(__m128)(C), (__mmask8)(U), \ 7957349cc55cSDimitry Andric (int)(R))) 79580b57cec5SDimitry Andric 79590b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 79600b57cec5SDimitry Andric _mm_mask3_fnmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) 79610b57cec5SDimitry Andric { 79620b57cec5SDimitry Andric return __builtin_ia32_vfmaddss3_mask3((__v4sf)__W, 79630b57cec5SDimitry Andric -(__v4sf)__X, 79640b57cec5SDimitry Andric (__v4sf)__Y, 79650b57cec5SDimitry Andric (__mmask8)__U, 79660b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 79670b57cec5SDimitry Andric } 79680b57cec5SDimitry Andric 79690b57cec5SDimitry Andric #define _mm_mask3_fnmadd_round_ss(W, X, Y, U, R) \ 7970349cc55cSDimitry Andric ((__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \ 79710b57cec5SDimitry Andric -(__v4sf)(__m128)(X), \ 79720b57cec5SDimitry Andric (__v4sf)(__m128)(Y), (__mmask8)(U), \ 7973349cc55cSDimitry Andric (int)(R))) 79740b57cec5SDimitry Andric 79750b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 79760b57cec5SDimitry Andric _mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 79770b57cec5SDimitry Andric { 79780b57cec5SDimitry Andric return __builtin_ia32_vfmaddss3_mask((__v4sf)__W, 79790b57cec5SDimitry Andric -(__v4sf)__A, 79800b57cec5SDimitry Andric -(__v4sf)__B, 79810b57cec5SDimitry Andric (__mmask8)__U, 79820b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 79830b57cec5SDimitry Andric } 79840b57cec5SDimitry Andric 79850b57cec5SDimitry Andric #define _mm_fnmsub_round_ss(A, B, C, R) \ 7986349cc55cSDimitry Andric ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \ 79870b57cec5SDimitry Andric -(__v4sf)(__m128)(B), \ 79880b57cec5SDimitry Andric -(__v4sf)(__m128)(C), (__mmask8)-1, \ 7989349cc55cSDimitry Andric (int)(R))) 79900b57cec5SDimitry Andric 79910b57cec5SDimitry Andric #define _mm_mask_fnmsub_round_ss(W, U, A, B, R) \ 7992349cc55cSDimitry Andric ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \ 79930b57cec5SDimitry Andric -(__v4sf)(__m128)(A), \ 79940b57cec5SDimitry Andric -(__v4sf)(__m128)(B), (__mmask8)(U), \ 7995349cc55cSDimitry Andric (int)(R))) 79960b57cec5SDimitry Andric 79970b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 79980b57cec5SDimitry Andric _mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 79990b57cec5SDimitry Andric { 80000b57cec5SDimitry Andric return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A, 80010b57cec5SDimitry Andric -(__v4sf)__B, 80020b57cec5SDimitry Andric -(__v4sf)__C, 80030b57cec5SDimitry Andric (__mmask8)__U, 80040b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 80050b57cec5SDimitry Andric } 80060b57cec5SDimitry Andric 80070b57cec5SDimitry Andric #define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) \ 8008349cc55cSDimitry Andric ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \ 80090b57cec5SDimitry Andric -(__v4sf)(__m128)(B), \ 80100b57cec5SDimitry Andric -(__v4sf)(__m128)(C), (__mmask8)(U), \ 8011349cc55cSDimitry Andric (int)(R))) 80120b57cec5SDimitry Andric 80130b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 80140b57cec5SDimitry Andric _mm_mask3_fnmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) 80150b57cec5SDimitry Andric { 80160b57cec5SDimitry Andric return __builtin_ia32_vfmsubss3_mask3((__v4sf)__W, 80170b57cec5SDimitry Andric -(__v4sf)__X, 80180b57cec5SDimitry Andric (__v4sf)__Y, 80190b57cec5SDimitry Andric (__mmask8)__U, 80200b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 80210b57cec5SDimitry Andric } 80220b57cec5SDimitry Andric 80230b57cec5SDimitry Andric #define _mm_mask3_fnmsub_round_ss(W, X, Y, U, R) \ 8024349cc55cSDimitry Andric ((__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \ 80250b57cec5SDimitry Andric -(__v4sf)(__m128)(X), \ 80260b57cec5SDimitry Andric (__v4sf)(__m128)(Y), (__mmask8)(U), \ 8027349cc55cSDimitry Andric (int)(R))) 80280b57cec5SDimitry Andric 80290b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 80300b57cec5SDimitry Andric _mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 80310b57cec5SDimitry Andric { 80320b57cec5SDimitry Andric return __builtin_ia32_vfmaddsd3_mask((__v2df)__W, 80330b57cec5SDimitry Andric (__v2df)__A, 80340b57cec5SDimitry Andric (__v2df)__B, 80350b57cec5SDimitry Andric (__mmask8)__U, 80360b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 80370b57cec5SDimitry Andric } 80380b57cec5SDimitry Andric 80390b57cec5SDimitry Andric #define _mm_fmadd_round_sd(A, B, C, R) \ 8040349cc55cSDimitry Andric ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \ 80410b57cec5SDimitry Andric (__v2df)(__m128d)(B), \ 80420b57cec5SDimitry Andric (__v2df)(__m128d)(C), (__mmask8)-1, \ 8043349cc55cSDimitry Andric (int)(R))) 80440b57cec5SDimitry Andric 80450b57cec5SDimitry Andric #define _mm_mask_fmadd_round_sd(W, U, A, B, R) \ 8046349cc55cSDimitry Andric ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \ 80470b57cec5SDimitry Andric (__v2df)(__m128d)(A), \ 80480b57cec5SDimitry Andric (__v2df)(__m128d)(B), (__mmask8)(U), \ 8049349cc55cSDimitry Andric (int)(R))) 80500b57cec5SDimitry Andric 80510b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 80520b57cec5SDimitry Andric _mm_maskz_fmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 80530b57cec5SDimitry Andric { 80540b57cec5SDimitry Andric return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A, 80550b57cec5SDimitry Andric (__v2df)__B, 80560b57cec5SDimitry Andric (__v2df)__C, 80570b57cec5SDimitry Andric (__mmask8)__U, 80580b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 80590b57cec5SDimitry Andric } 80600b57cec5SDimitry Andric 80610b57cec5SDimitry Andric #define _mm_maskz_fmadd_round_sd(U, A, B, C, R) \ 8062349cc55cSDimitry Andric ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \ 80630b57cec5SDimitry Andric (__v2df)(__m128d)(B), \ 80640b57cec5SDimitry Andric (__v2df)(__m128d)(C), (__mmask8)(U), \ 8065349cc55cSDimitry Andric (int)(R))) 80660b57cec5SDimitry Andric 80670b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 80680b57cec5SDimitry Andric _mm_mask3_fmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) 80690b57cec5SDimitry Andric { 80700b57cec5SDimitry Andric return __builtin_ia32_vfmaddsd3_mask3((__v2df)__W, 80710b57cec5SDimitry Andric (__v2df)__X, 80720b57cec5SDimitry Andric (__v2df)__Y, 80730b57cec5SDimitry Andric (__mmask8)__U, 80740b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 80750b57cec5SDimitry Andric } 80760b57cec5SDimitry Andric 80770b57cec5SDimitry Andric #define _mm_mask3_fmadd_round_sd(W, X, Y, U, R) \ 8078349cc55cSDimitry Andric ((__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \ 80790b57cec5SDimitry Andric (__v2df)(__m128d)(X), \ 80800b57cec5SDimitry Andric (__v2df)(__m128d)(Y), (__mmask8)(U), \ 8081349cc55cSDimitry Andric (int)(R))) 80820b57cec5SDimitry Andric 80830b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 80840b57cec5SDimitry Andric _mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 80850b57cec5SDimitry Andric { 80860b57cec5SDimitry Andric return __builtin_ia32_vfmaddsd3_mask((__v2df)__W, 80870b57cec5SDimitry Andric (__v2df)__A, 80880b57cec5SDimitry Andric -(__v2df)__B, 80890b57cec5SDimitry Andric (__mmask8)__U, 80900b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 80910b57cec5SDimitry Andric } 80920b57cec5SDimitry Andric 80930b57cec5SDimitry Andric #define _mm_fmsub_round_sd(A, B, C, R) \ 8094349cc55cSDimitry Andric ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \ 80950b57cec5SDimitry Andric (__v2df)(__m128d)(B), \ 80960b57cec5SDimitry Andric -(__v2df)(__m128d)(C), (__mmask8)-1, \ 8097349cc55cSDimitry Andric (int)(R))) 80980b57cec5SDimitry Andric 80990b57cec5SDimitry Andric #define _mm_mask_fmsub_round_sd(W, U, A, B, R) \ 8100349cc55cSDimitry Andric ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \ 81010b57cec5SDimitry Andric (__v2df)(__m128d)(A), \ 81020b57cec5SDimitry Andric -(__v2df)(__m128d)(B), (__mmask8)(U), \ 8103349cc55cSDimitry Andric (int)(R))) 81040b57cec5SDimitry Andric 81050b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 81060b57cec5SDimitry Andric _mm_maskz_fmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 81070b57cec5SDimitry Andric { 81080b57cec5SDimitry Andric return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A, 81090b57cec5SDimitry Andric (__v2df)__B, 81100b57cec5SDimitry Andric -(__v2df)__C, 81110b57cec5SDimitry Andric (__mmask8)__U, 81120b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 81130b57cec5SDimitry Andric } 81140b57cec5SDimitry Andric 81150b57cec5SDimitry Andric #define _mm_maskz_fmsub_round_sd(U, A, B, C, R) \ 8116349cc55cSDimitry Andric ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \ 81170b57cec5SDimitry Andric (__v2df)(__m128d)(B), \ 81180b57cec5SDimitry Andric -(__v2df)(__m128d)(C), \ 8119349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 81200b57cec5SDimitry Andric 81210b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 81220b57cec5SDimitry Andric _mm_mask3_fmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) 81230b57cec5SDimitry Andric { 81240b57cec5SDimitry Andric return __builtin_ia32_vfmsubsd3_mask3((__v2df)__W, 81250b57cec5SDimitry Andric (__v2df)__X, 81260b57cec5SDimitry Andric (__v2df)__Y, 81270b57cec5SDimitry Andric (__mmask8)__U, 81280b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 81290b57cec5SDimitry Andric } 81300b57cec5SDimitry Andric 81310b57cec5SDimitry Andric #define _mm_mask3_fmsub_round_sd(W, X, Y, U, R) \ 8132349cc55cSDimitry Andric ((__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \ 81330b57cec5SDimitry Andric (__v2df)(__m128d)(X), \ 81340b57cec5SDimitry Andric (__v2df)(__m128d)(Y), \ 8135349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 81360b57cec5SDimitry Andric 81370b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 81380b57cec5SDimitry Andric _mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 81390b57cec5SDimitry Andric { 81400b57cec5SDimitry Andric return __builtin_ia32_vfmaddsd3_mask((__v2df)__W, 81410b57cec5SDimitry Andric -(__v2df)__A, 81420b57cec5SDimitry Andric (__v2df)__B, 81430b57cec5SDimitry Andric (__mmask8)__U, 81440b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 81450b57cec5SDimitry Andric } 81460b57cec5SDimitry Andric 81470b57cec5SDimitry Andric #define _mm_fnmadd_round_sd(A, B, C, R) \ 8148349cc55cSDimitry Andric ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \ 81490b57cec5SDimitry Andric -(__v2df)(__m128d)(B), \ 81500b57cec5SDimitry Andric (__v2df)(__m128d)(C), (__mmask8)-1, \ 8151349cc55cSDimitry Andric (int)(R))) 81520b57cec5SDimitry Andric 81530b57cec5SDimitry Andric #define _mm_mask_fnmadd_round_sd(W, U, A, B, R) \ 8154349cc55cSDimitry Andric ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \ 81550b57cec5SDimitry Andric -(__v2df)(__m128d)(A), \ 81560b57cec5SDimitry Andric (__v2df)(__m128d)(B), (__mmask8)(U), \ 8157349cc55cSDimitry Andric (int)(R))) 81580b57cec5SDimitry Andric 81590b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 81600b57cec5SDimitry Andric _mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 81610b57cec5SDimitry Andric { 81620b57cec5SDimitry Andric return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A, 81630b57cec5SDimitry Andric -(__v2df)__B, 81640b57cec5SDimitry Andric (__v2df)__C, 81650b57cec5SDimitry Andric (__mmask8)__U, 81660b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 81670b57cec5SDimitry Andric } 81680b57cec5SDimitry Andric 81690b57cec5SDimitry Andric #define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) \ 8170349cc55cSDimitry Andric ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \ 81710b57cec5SDimitry Andric -(__v2df)(__m128d)(B), \ 81720b57cec5SDimitry Andric (__v2df)(__m128d)(C), (__mmask8)(U), \ 8173349cc55cSDimitry Andric (int)(R))) 81740b57cec5SDimitry Andric 81750b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 81760b57cec5SDimitry Andric _mm_mask3_fnmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) 81770b57cec5SDimitry Andric { 81780b57cec5SDimitry Andric return __builtin_ia32_vfmaddsd3_mask3((__v2df)__W, 81790b57cec5SDimitry Andric -(__v2df)__X, 81800b57cec5SDimitry Andric (__v2df)__Y, 81810b57cec5SDimitry Andric (__mmask8)__U, 81820b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 81830b57cec5SDimitry Andric } 81840b57cec5SDimitry Andric 81850b57cec5SDimitry Andric #define _mm_mask3_fnmadd_round_sd(W, X, Y, U, R) \ 8186349cc55cSDimitry Andric ((__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \ 81870b57cec5SDimitry Andric -(__v2df)(__m128d)(X), \ 81880b57cec5SDimitry Andric (__v2df)(__m128d)(Y), (__mmask8)(U), \ 8189349cc55cSDimitry Andric (int)(R))) 81900b57cec5SDimitry Andric 81910b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 81920b57cec5SDimitry Andric _mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 81930b57cec5SDimitry Andric { 81940b57cec5SDimitry Andric return __builtin_ia32_vfmaddsd3_mask((__v2df)__W, 81950b57cec5SDimitry Andric -(__v2df)__A, 81960b57cec5SDimitry Andric -(__v2df)__B, 81970b57cec5SDimitry Andric (__mmask8)__U, 81980b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 81990b57cec5SDimitry Andric } 82000b57cec5SDimitry Andric 82010b57cec5SDimitry Andric #define _mm_fnmsub_round_sd(A, B, C, R) \ 8202349cc55cSDimitry Andric ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \ 82030b57cec5SDimitry Andric -(__v2df)(__m128d)(B), \ 82040b57cec5SDimitry Andric -(__v2df)(__m128d)(C), (__mmask8)-1, \ 8205349cc55cSDimitry Andric (int)(R))) 82060b57cec5SDimitry Andric 82070b57cec5SDimitry Andric #define _mm_mask_fnmsub_round_sd(W, U, A, B, R) \ 8208349cc55cSDimitry Andric ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \ 82090b57cec5SDimitry Andric -(__v2df)(__m128d)(A), \ 82100b57cec5SDimitry Andric -(__v2df)(__m128d)(B), (__mmask8)(U), \ 8211349cc55cSDimitry Andric (int)(R))) 82120b57cec5SDimitry Andric 82130b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 82140b57cec5SDimitry Andric _mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 82150b57cec5SDimitry Andric { 82160b57cec5SDimitry Andric return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A, 82170b57cec5SDimitry Andric -(__v2df)__B, 82180b57cec5SDimitry Andric -(__v2df)__C, 82190b57cec5SDimitry Andric (__mmask8)__U, 82200b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 82210b57cec5SDimitry Andric } 82220b57cec5SDimitry Andric 82230b57cec5SDimitry Andric #define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) \ 8224349cc55cSDimitry Andric ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \ 82250b57cec5SDimitry Andric -(__v2df)(__m128d)(B), \ 82260b57cec5SDimitry Andric -(__v2df)(__m128d)(C), \ 82270b57cec5SDimitry Andric (__mmask8)(U), \ 8228349cc55cSDimitry Andric (int)(R))) 82290b57cec5SDimitry Andric 82300b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 82310b57cec5SDimitry Andric _mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) 82320b57cec5SDimitry Andric { 82330b57cec5SDimitry Andric return __builtin_ia32_vfmsubsd3_mask3((__v2df)__W, 82340b57cec5SDimitry Andric -(__v2df)__X, 82350b57cec5SDimitry Andric (__v2df)__Y, 82360b57cec5SDimitry Andric (__mmask8)__U, 82370b57cec5SDimitry Andric _MM_FROUND_CUR_DIRECTION); 82380b57cec5SDimitry Andric } 82390b57cec5SDimitry Andric 82400b57cec5SDimitry Andric #define _mm_mask3_fnmsub_round_sd(W, X, Y, U, R) \ 8241349cc55cSDimitry Andric ((__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \ 82420b57cec5SDimitry Andric -(__v2df)(__m128d)(X), \ 82430b57cec5SDimitry Andric (__v2df)(__m128d)(Y), \ 8244349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 82450b57cec5SDimitry Andric 82460b57cec5SDimitry Andric #define _mm512_permutex_pd(X, C) \ 8247349cc55cSDimitry Andric ((__m512d)__builtin_ia32_permdf512((__v8df)(__m512d)(X), (int)(C))) 82480b57cec5SDimitry Andric 82490b57cec5SDimitry Andric #define _mm512_mask_permutex_pd(W, U, X, C) \ 8250349cc55cSDimitry Andric ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 82510b57cec5SDimitry Andric (__v8df)_mm512_permutex_pd((X), (C)), \ 8252349cc55cSDimitry Andric (__v8df)(__m512d)(W))) 82530b57cec5SDimitry Andric 82540b57cec5SDimitry Andric #define _mm512_maskz_permutex_pd(U, X, C) \ 8255349cc55cSDimitry Andric ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 82560b57cec5SDimitry Andric (__v8df)_mm512_permutex_pd((X), (C)), \ 8257349cc55cSDimitry Andric (__v8df)_mm512_setzero_pd())) 82580b57cec5SDimitry Andric 82590b57cec5SDimitry Andric #define _mm512_permutex_epi64(X, C) \ 8260349cc55cSDimitry Andric ((__m512i)__builtin_ia32_permdi512((__v8di)(__m512i)(X), (int)(C))) 82610b57cec5SDimitry Andric 82620b57cec5SDimitry Andric #define _mm512_mask_permutex_epi64(W, U, X, C) \ 8263349cc55cSDimitry Andric ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 82640b57cec5SDimitry Andric (__v8di)_mm512_permutex_epi64((X), (C)), \ 8265349cc55cSDimitry Andric (__v8di)(__m512i)(W))) 82660b57cec5SDimitry Andric 82670b57cec5SDimitry Andric #define _mm512_maskz_permutex_epi64(U, X, C) \ 8268349cc55cSDimitry Andric ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 82690b57cec5SDimitry Andric (__v8di)_mm512_permutex_epi64((X), (C)), \ 8270349cc55cSDimitry Andric (__v8di)_mm512_setzero_si512())) 82710b57cec5SDimitry Andric 82720b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 82730b57cec5SDimitry Andric _mm512_permutexvar_pd (__m512i __X, __m512d __Y) 82740b57cec5SDimitry Andric { 82750b57cec5SDimitry Andric return (__m512d)__builtin_ia32_permvardf512((__v8df) __Y, (__v8di) __X); 82760b57cec5SDimitry Andric } 82770b57cec5SDimitry Andric 82780b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 82790b57cec5SDimitry Andric _mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y) 82800b57cec5SDimitry Andric { 82810b57cec5SDimitry Andric return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 82820b57cec5SDimitry Andric (__v8df)_mm512_permutexvar_pd(__X, __Y), 82830b57cec5SDimitry Andric (__v8df)__W); 82840b57cec5SDimitry Andric } 82850b57cec5SDimitry Andric 82860b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 82870b57cec5SDimitry Andric _mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y) 82880b57cec5SDimitry Andric { 82890b57cec5SDimitry Andric return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 82900b57cec5SDimitry Andric (__v8df)_mm512_permutexvar_pd(__X, __Y), 82910b57cec5SDimitry Andric (__v8df)_mm512_setzero_pd()); 82920b57cec5SDimitry Andric } 82930b57cec5SDimitry Andric 82940b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 82950b57cec5SDimitry Andric _mm512_permutexvar_epi64 (__m512i __X, __m512i __Y) 82960b57cec5SDimitry Andric { 82970b57cec5SDimitry Andric return (__m512i)__builtin_ia32_permvardi512((__v8di)__Y, (__v8di)__X); 82980b57cec5SDimitry Andric } 82990b57cec5SDimitry Andric 83000b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 83010b57cec5SDimitry Andric _mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y) 83020b57cec5SDimitry Andric { 83030b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, 83040b57cec5SDimitry Andric (__v8di)_mm512_permutexvar_epi64(__X, __Y), 83050b57cec5SDimitry Andric (__v8di)_mm512_setzero_si512()); 83060b57cec5SDimitry Andric } 83070b57cec5SDimitry Andric 83080b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 83090b57cec5SDimitry Andric _mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X, 83100b57cec5SDimitry Andric __m512i __Y) 83110b57cec5SDimitry Andric { 83120b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, 83130b57cec5SDimitry Andric (__v8di)_mm512_permutexvar_epi64(__X, __Y), 83140b57cec5SDimitry Andric (__v8di)__W); 83150b57cec5SDimitry Andric } 83160b57cec5SDimitry Andric 83170b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 83180b57cec5SDimitry Andric _mm512_permutexvar_ps (__m512i __X, __m512 __Y) 83190b57cec5SDimitry Andric { 83200b57cec5SDimitry Andric return (__m512)__builtin_ia32_permvarsf512((__v16sf)__Y, (__v16si)__X); 83210b57cec5SDimitry Andric } 83220b57cec5SDimitry Andric 83230b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 83240b57cec5SDimitry Andric _mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y) 83250b57cec5SDimitry Andric { 83260b57cec5SDimitry Andric return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 83270b57cec5SDimitry Andric (__v16sf)_mm512_permutexvar_ps(__X, __Y), 83280b57cec5SDimitry Andric (__v16sf)__W); 83290b57cec5SDimitry Andric } 83300b57cec5SDimitry Andric 83310b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 83320b57cec5SDimitry Andric _mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y) 83330b57cec5SDimitry Andric { 83340b57cec5SDimitry Andric return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 83350b57cec5SDimitry Andric (__v16sf)_mm512_permutexvar_ps(__X, __Y), 83360b57cec5SDimitry Andric (__v16sf)_mm512_setzero_ps()); 83370b57cec5SDimitry Andric } 83380b57cec5SDimitry Andric 83390b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 83400b57cec5SDimitry Andric _mm512_permutexvar_epi32 (__m512i __X, __m512i __Y) 83410b57cec5SDimitry Andric { 83420b57cec5SDimitry Andric return (__m512i)__builtin_ia32_permvarsi512((__v16si)__Y, (__v16si)__X); 83430b57cec5SDimitry Andric } 83440b57cec5SDimitry Andric 83450b57cec5SDimitry Andric #define _mm512_permutevar_epi32 _mm512_permutexvar_epi32 83460b57cec5SDimitry Andric 83470b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 83480b57cec5SDimitry Andric _mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y) 83490b57cec5SDimitry Andric { 83500b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, 83510b57cec5SDimitry Andric (__v16si)_mm512_permutexvar_epi32(__X, __Y), 83520b57cec5SDimitry Andric (__v16si)_mm512_setzero_si512()); 83530b57cec5SDimitry Andric } 83540b57cec5SDimitry Andric 83550b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 83560b57cec5SDimitry Andric _mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X, 83570b57cec5SDimitry Andric __m512i __Y) 83580b57cec5SDimitry Andric { 83590b57cec5SDimitry Andric return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, 83600b57cec5SDimitry Andric (__v16si)_mm512_permutexvar_epi32(__X, __Y), 83610b57cec5SDimitry Andric (__v16si)__W); 83620b57cec5SDimitry Andric } 83630b57cec5SDimitry Andric 83640b57cec5SDimitry Andric #define _mm512_mask_permutevar_epi32 _mm512_mask_permutexvar_epi32 83650b57cec5SDimitry Andric 83660b57cec5SDimitry Andric static __inline__ __mmask16 __DEFAULT_FN_ATTRS 83670b57cec5SDimitry Andric _mm512_kand (__mmask16 __A, __mmask16 __B) 83680b57cec5SDimitry Andric { 83690b57cec5SDimitry Andric return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B); 83700b57cec5SDimitry Andric } 83710b57cec5SDimitry Andric 83720b57cec5SDimitry Andric static __inline__ __mmask16 __DEFAULT_FN_ATTRS 83730b57cec5SDimitry Andric _mm512_kandn (__mmask16 __A, __mmask16 __B) 83740b57cec5SDimitry Andric { 83750b57cec5SDimitry Andric return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B); 83760b57cec5SDimitry Andric } 83770b57cec5SDimitry Andric 83780b57cec5SDimitry Andric static __inline__ __mmask16 __DEFAULT_FN_ATTRS 83790b57cec5SDimitry Andric _mm512_kor (__mmask16 __A, __mmask16 __B) 83800b57cec5SDimitry Andric { 83810b57cec5SDimitry Andric return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B); 83820b57cec5SDimitry Andric } 83830b57cec5SDimitry Andric 83840b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS 83850b57cec5SDimitry Andric _mm512_kortestc (__mmask16 __A, __mmask16 __B) 83860b57cec5SDimitry Andric { 83870b57cec5SDimitry Andric return __builtin_ia32_kortestchi ((__mmask16) __A, (__mmask16) __B); 83880b57cec5SDimitry Andric } 83890b57cec5SDimitry Andric 83900b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS 83910b57cec5SDimitry Andric _mm512_kortestz (__mmask16 __A, __mmask16 __B) 83920b57cec5SDimitry Andric { 83930b57cec5SDimitry Andric return __builtin_ia32_kortestzhi ((__mmask16) __A, (__mmask16) __B); 83940b57cec5SDimitry Andric } 83950b57cec5SDimitry Andric 83960b57cec5SDimitry Andric static __inline__ unsigned char __DEFAULT_FN_ATTRS 83970b57cec5SDimitry Andric _kortestc_mask16_u8(__mmask16 __A, __mmask16 __B) 83980b57cec5SDimitry Andric { 83990b57cec5SDimitry Andric return (unsigned char)__builtin_ia32_kortestchi(__A, __B); 84000b57cec5SDimitry Andric } 84010b57cec5SDimitry Andric 84020b57cec5SDimitry Andric static __inline__ unsigned char __DEFAULT_FN_ATTRS 84030b57cec5SDimitry Andric _kortestz_mask16_u8(__mmask16 __A, __mmask16 __B) 84040b57cec5SDimitry Andric { 84050b57cec5SDimitry Andric return (unsigned char)__builtin_ia32_kortestzhi(__A, __B); 84060b57cec5SDimitry Andric } 84070b57cec5SDimitry Andric 84080b57cec5SDimitry Andric static __inline__ unsigned char __DEFAULT_FN_ATTRS 84090b57cec5SDimitry Andric _kortest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C) { 84100b57cec5SDimitry Andric *__C = (unsigned char)__builtin_ia32_kortestchi(__A, __B); 84110b57cec5SDimitry Andric return (unsigned char)__builtin_ia32_kortestzhi(__A, __B); 84120b57cec5SDimitry Andric } 84130b57cec5SDimitry Andric 84140b57cec5SDimitry Andric static __inline__ __mmask16 __DEFAULT_FN_ATTRS 84150b57cec5SDimitry Andric _mm512_kunpackb (__mmask16 __A, __mmask16 __B) 84160b57cec5SDimitry Andric { 84170b57cec5SDimitry Andric return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B); 84180b57cec5SDimitry Andric } 84190b57cec5SDimitry Andric 84200b57cec5SDimitry Andric static __inline__ __mmask16 __DEFAULT_FN_ATTRS 84210b57cec5SDimitry Andric _mm512_kxnor (__mmask16 __A, __mmask16 __B) 84220b57cec5SDimitry Andric { 84230b57cec5SDimitry Andric return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B); 84240b57cec5SDimitry Andric } 84250b57cec5SDimitry Andric 84260b57cec5SDimitry Andric static __inline__ __mmask16 __DEFAULT_FN_ATTRS 84270b57cec5SDimitry Andric _mm512_kxor (__mmask16 __A, __mmask16 __B) 84280b57cec5SDimitry Andric { 84290b57cec5SDimitry Andric return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B); 84300b57cec5SDimitry Andric } 84310b57cec5SDimitry Andric 84320b57cec5SDimitry Andric #define _kand_mask16 _mm512_kand 84330b57cec5SDimitry Andric #define _kandn_mask16 _mm512_kandn 84340b57cec5SDimitry Andric #define _knot_mask16 _mm512_knot 84350b57cec5SDimitry Andric #define _kor_mask16 _mm512_kor 84360b57cec5SDimitry Andric #define _kxnor_mask16 _mm512_kxnor 84370b57cec5SDimitry Andric #define _kxor_mask16 _mm512_kxor 84380b57cec5SDimitry Andric 84390b57cec5SDimitry Andric #define _kshiftli_mask16(A, I) \ 8440349cc55cSDimitry Andric ((__mmask16)__builtin_ia32_kshiftlihi((__mmask16)(A), (unsigned int)(I))) 84410b57cec5SDimitry Andric 84420b57cec5SDimitry Andric #define _kshiftri_mask16(A, I) \ 8443349cc55cSDimitry Andric ((__mmask16)__builtin_ia32_kshiftrihi((__mmask16)(A), (unsigned int)(I))) 84440b57cec5SDimitry Andric 84450b57cec5SDimitry Andric static __inline__ unsigned int __DEFAULT_FN_ATTRS 84460b57cec5SDimitry Andric _cvtmask16_u32(__mmask16 __A) { 84470b57cec5SDimitry Andric return (unsigned int)__builtin_ia32_kmovw((__mmask16)__A); 84480b57cec5SDimitry Andric } 84490b57cec5SDimitry Andric 84500b57cec5SDimitry Andric static __inline__ __mmask16 __DEFAULT_FN_ATTRS 84510b57cec5SDimitry Andric _cvtu32_mask16(unsigned int __A) { 84520b57cec5SDimitry Andric return (__mmask16)__builtin_ia32_kmovw((__mmask16)__A); 84530b57cec5SDimitry Andric } 84540b57cec5SDimitry Andric 84550b57cec5SDimitry Andric static __inline__ __mmask16 __DEFAULT_FN_ATTRS 84560b57cec5SDimitry Andric _load_mask16(__mmask16 *__A) { 84570b57cec5SDimitry Andric return (__mmask16)__builtin_ia32_kmovw(*(__mmask16 *)__A); 84580b57cec5SDimitry Andric } 84590b57cec5SDimitry Andric 84600b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS 84610b57cec5SDimitry Andric _store_mask16(__mmask16 *__A, __mmask16 __B) { 84620b57cec5SDimitry Andric *(__mmask16 *)__A = __builtin_ia32_kmovw((__mmask16)__B); 84630b57cec5SDimitry Andric } 84640b57cec5SDimitry Andric 84650b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS512 8466a7dea167SDimitry Andric _mm512_stream_si512 (void * __P, __m512i __A) 84670b57cec5SDimitry Andric { 84680b57cec5SDimitry Andric typedef __v8di __v8di_aligned __attribute__((aligned(64))); 84690b57cec5SDimitry Andric __builtin_nontemporal_store((__v8di_aligned)__A, (__v8di_aligned*)__P); 84700b57cec5SDimitry Andric } 84710b57cec5SDimitry Andric 84720b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 84730b57cec5SDimitry Andric _mm512_stream_load_si512 (void const *__P) 84740b57cec5SDimitry Andric { 84750b57cec5SDimitry Andric typedef __v8di __v8di_aligned __attribute__((aligned(64))); 84760b57cec5SDimitry Andric return (__m512i) __builtin_nontemporal_load((const __v8di_aligned *)__P); 84770b57cec5SDimitry Andric } 84780b57cec5SDimitry Andric 84790b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS512 8480a7dea167SDimitry Andric _mm512_stream_pd (void *__P, __m512d __A) 84810b57cec5SDimitry Andric { 84820b57cec5SDimitry Andric typedef __v8df __v8df_aligned __attribute__((aligned(64))); 84830b57cec5SDimitry Andric __builtin_nontemporal_store((__v8df_aligned)__A, (__v8df_aligned*)__P); 84840b57cec5SDimitry Andric } 84850b57cec5SDimitry Andric 84860b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS512 8487a7dea167SDimitry Andric _mm512_stream_ps (void *__P, __m512 __A) 84880b57cec5SDimitry Andric { 84890b57cec5SDimitry Andric typedef __v16sf __v16sf_aligned __attribute__((aligned(64))); 84900b57cec5SDimitry Andric __builtin_nontemporal_store((__v16sf_aligned)__A, (__v16sf_aligned*)__P); 84910b57cec5SDimitry Andric } 84920b57cec5SDimitry Andric 84930b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 84940b57cec5SDimitry Andric _mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A) 84950b57cec5SDimitry Andric { 84960b57cec5SDimitry Andric return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A, 84970b57cec5SDimitry Andric (__v8df) __W, 84980b57cec5SDimitry Andric (__mmask8) __U); 84990b57cec5SDimitry Andric } 85000b57cec5SDimitry Andric 85010b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 85020b57cec5SDimitry Andric _mm512_maskz_compress_pd (__mmask8 __U, __m512d __A) 85030b57cec5SDimitry Andric { 85040b57cec5SDimitry Andric return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A, 85050b57cec5SDimitry Andric (__v8df) 85060b57cec5SDimitry Andric _mm512_setzero_pd (), 85070b57cec5SDimitry Andric (__mmask8) __U); 85080b57cec5SDimitry Andric } 85090b57cec5SDimitry Andric 85100b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 85110b57cec5SDimitry Andric _mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A) 85120b57cec5SDimitry Andric { 85130b57cec5SDimitry Andric return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A, 85140b57cec5SDimitry Andric (__v8di) __W, 85150b57cec5SDimitry Andric (__mmask8) __U); 85160b57cec5SDimitry Andric } 85170b57cec5SDimitry Andric 85180b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 85190b57cec5SDimitry Andric _mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A) 85200b57cec5SDimitry Andric { 85210b57cec5SDimitry Andric return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A, 85220b57cec5SDimitry Andric (__v8di) 85230b57cec5SDimitry Andric _mm512_setzero_si512 (), 85240b57cec5SDimitry Andric (__mmask8) __U); 85250b57cec5SDimitry Andric } 85260b57cec5SDimitry Andric 85270b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 85280b57cec5SDimitry Andric _mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A) 85290b57cec5SDimitry Andric { 85300b57cec5SDimitry Andric return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A, 85310b57cec5SDimitry Andric (__v16sf) __W, 85320b57cec5SDimitry Andric (__mmask16) __U); 85330b57cec5SDimitry Andric } 85340b57cec5SDimitry Andric 85350b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 85360b57cec5SDimitry Andric _mm512_maskz_compress_ps (__mmask16 __U, __m512 __A) 85370b57cec5SDimitry Andric { 85380b57cec5SDimitry Andric return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A, 85390b57cec5SDimitry Andric (__v16sf) 85400b57cec5SDimitry Andric _mm512_setzero_ps (), 85410b57cec5SDimitry Andric (__mmask16) __U); 85420b57cec5SDimitry Andric } 85430b57cec5SDimitry Andric 85440b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 85450b57cec5SDimitry Andric _mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A) 85460b57cec5SDimitry Andric { 85470b57cec5SDimitry Andric return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A, 85480b57cec5SDimitry Andric (__v16si) __W, 85490b57cec5SDimitry Andric (__mmask16) __U); 85500b57cec5SDimitry Andric } 85510b57cec5SDimitry Andric 85520b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 85530b57cec5SDimitry Andric _mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A) 85540b57cec5SDimitry Andric { 85550b57cec5SDimitry Andric return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A, 85560b57cec5SDimitry Andric (__v16si) 85570b57cec5SDimitry Andric _mm512_setzero_si512 (), 85580b57cec5SDimitry Andric (__mmask16) __U); 85590b57cec5SDimitry Andric } 85600b57cec5SDimitry Andric 85610b57cec5SDimitry Andric #define _mm_cmp_round_ss_mask(X, Y, P, R) \ 8562349cc55cSDimitry Andric ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \ 85630b57cec5SDimitry Andric (__v4sf)(__m128)(Y), (int)(P), \ 8564349cc55cSDimitry Andric (__mmask8)-1, (int)(R))) 85650b57cec5SDimitry Andric 85660b57cec5SDimitry Andric #define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \ 8567349cc55cSDimitry Andric ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \ 85680b57cec5SDimitry Andric (__v4sf)(__m128)(Y), (int)(P), \ 8569349cc55cSDimitry Andric (__mmask8)(M), (int)(R))) 85700b57cec5SDimitry Andric 85710b57cec5SDimitry Andric #define _mm_cmp_ss_mask(X, Y, P) \ 8572349cc55cSDimitry Andric ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \ 85730b57cec5SDimitry Andric (__v4sf)(__m128)(Y), (int)(P), \ 85740b57cec5SDimitry Andric (__mmask8)-1, \ 8575349cc55cSDimitry Andric _MM_FROUND_CUR_DIRECTION)) 85760b57cec5SDimitry Andric 85770b57cec5SDimitry Andric #define _mm_mask_cmp_ss_mask(M, X, Y, P) \ 8578349cc55cSDimitry Andric ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \ 85790b57cec5SDimitry Andric (__v4sf)(__m128)(Y), (int)(P), \ 85800b57cec5SDimitry Andric (__mmask8)(M), \ 8581349cc55cSDimitry Andric _MM_FROUND_CUR_DIRECTION)) 85820b57cec5SDimitry Andric 85830b57cec5SDimitry Andric #define _mm_cmp_round_sd_mask(X, Y, P, R) \ 8584349cc55cSDimitry Andric ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \ 85850b57cec5SDimitry Andric (__v2df)(__m128d)(Y), (int)(P), \ 8586349cc55cSDimitry Andric (__mmask8)-1, (int)(R))) 85870b57cec5SDimitry Andric 85880b57cec5SDimitry Andric #define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \ 8589349cc55cSDimitry Andric ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \ 85900b57cec5SDimitry Andric (__v2df)(__m128d)(Y), (int)(P), \ 8591349cc55cSDimitry Andric (__mmask8)(M), (int)(R))) 85920b57cec5SDimitry Andric 85930b57cec5SDimitry Andric #define _mm_cmp_sd_mask(X, Y, P) \ 8594349cc55cSDimitry Andric ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \ 85950b57cec5SDimitry Andric (__v2df)(__m128d)(Y), (int)(P), \ 85960b57cec5SDimitry Andric (__mmask8)-1, \ 8597349cc55cSDimitry Andric _MM_FROUND_CUR_DIRECTION)) 85980b57cec5SDimitry Andric 85990b57cec5SDimitry Andric #define _mm_mask_cmp_sd_mask(M, X, Y, P) \ 8600349cc55cSDimitry Andric ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \ 86010b57cec5SDimitry Andric (__v2df)(__m128d)(Y), (int)(P), \ 86020b57cec5SDimitry Andric (__mmask8)(M), \ 8603349cc55cSDimitry Andric _MM_FROUND_CUR_DIRECTION)) 86040b57cec5SDimitry Andric 86050b57cec5SDimitry Andric /* Bit Test */ 86060b57cec5SDimitry Andric 86070b57cec5SDimitry Andric static __inline __mmask16 __DEFAULT_FN_ATTRS512 86080b57cec5SDimitry Andric _mm512_test_epi32_mask (__m512i __A, __m512i __B) 86090b57cec5SDimitry Andric { 86100b57cec5SDimitry Andric return _mm512_cmpneq_epi32_mask (_mm512_and_epi32(__A, __B), 86110b57cec5SDimitry Andric _mm512_setzero_si512()); 86120b57cec5SDimitry Andric } 86130b57cec5SDimitry Andric 86140b57cec5SDimitry Andric static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 86150b57cec5SDimitry Andric _mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B) 86160b57cec5SDimitry Andric { 86170b57cec5SDimitry Andric return _mm512_mask_cmpneq_epi32_mask (__U, _mm512_and_epi32 (__A, __B), 86180b57cec5SDimitry Andric _mm512_setzero_si512()); 86190b57cec5SDimitry Andric } 86200b57cec5SDimitry Andric 86210b57cec5SDimitry Andric static __inline __mmask8 __DEFAULT_FN_ATTRS512 86220b57cec5SDimitry Andric _mm512_test_epi64_mask (__m512i __A, __m512i __B) 86230b57cec5SDimitry Andric { 86240b57cec5SDimitry Andric return _mm512_cmpneq_epi64_mask (_mm512_and_epi32 (__A, __B), 86250b57cec5SDimitry Andric _mm512_setzero_si512()); 86260b57cec5SDimitry Andric } 86270b57cec5SDimitry Andric 86280b57cec5SDimitry Andric static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 86290b57cec5SDimitry Andric _mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B) 86300b57cec5SDimitry Andric { 86310b57cec5SDimitry Andric return _mm512_mask_cmpneq_epi64_mask (__U, _mm512_and_epi32 (__A, __B), 86320b57cec5SDimitry Andric _mm512_setzero_si512()); 86330b57cec5SDimitry Andric } 86340b57cec5SDimitry Andric 86350b57cec5SDimitry Andric static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 86360b57cec5SDimitry Andric _mm512_testn_epi32_mask (__m512i __A, __m512i __B) 86370b57cec5SDimitry Andric { 86380b57cec5SDimitry Andric return _mm512_cmpeq_epi32_mask (_mm512_and_epi32 (__A, __B), 86390b57cec5SDimitry Andric _mm512_setzero_si512()); 86400b57cec5SDimitry Andric } 86410b57cec5SDimitry Andric 86420b57cec5SDimitry Andric static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 86430b57cec5SDimitry Andric _mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B) 86440b57cec5SDimitry Andric { 86450b57cec5SDimitry Andric return _mm512_mask_cmpeq_epi32_mask (__U, _mm512_and_epi32 (__A, __B), 86460b57cec5SDimitry Andric _mm512_setzero_si512()); 86470b57cec5SDimitry Andric } 86480b57cec5SDimitry Andric 86490b57cec5SDimitry Andric static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 86500b57cec5SDimitry Andric _mm512_testn_epi64_mask (__m512i __A, __m512i __B) 86510b57cec5SDimitry Andric { 86520b57cec5SDimitry Andric return _mm512_cmpeq_epi64_mask (_mm512_and_epi32 (__A, __B), 86530b57cec5SDimitry Andric _mm512_setzero_si512()); 86540b57cec5SDimitry Andric } 86550b57cec5SDimitry Andric 86560b57cec5SDimitry Andric static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 86570b57cec5SDimitry Andric _mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B) 86580b57cec5SDimitry Andric { 86590b57cec5SDimitry Andric return _mm512_mask_cmpeq_epi64_mask (__U, _mm512_and_epi32 (__A, __B), 86600b57cec5SDimitry Andric _mm512_setzero_si512()); 86610b57cec5SDimitry Andric } 86620b57cec5SDimitry Andric 86630b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 86640b57cec5SDimitry Andric _mm512_movehdup_ps (__m512 __A) 86650b57cec5SDimitry Andric { 86660b57cec5SDimitry Andric return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A, 86670b57cec5SDimitry Andric 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15); 86680b57cec5SDimitry Andric } 86690b57cec5SDimitry Andric 86700b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 86710b57cec5SDimitry Andric _mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A) 86720b57cec5SDimitry Andric { 86730b57cec5SDimitry Andric return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 86740b57cec5SDimitry Andric (__v16sf)_mm512_movehdup_ps(__A), 86750b57cec5SDimitry Andric (__v16sf)__W); 86760b57cec5SDimitry Andric } 86770b57cec5SDimitry Andric 86780b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 86790b57cec5SDimitry Andric _mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A) 86800b57cec5SDimitry Andric { 86810b57cec5SDimitry Andric return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 86820b57cec5SDimitry Andric (__v16sf)_mm512_movehdup_ps(__A), 86830b57cec5SDimitry Andric (__v16sf)_mm512_setzero_ps()); 86840b57cec5SDimitry Andric } 86850b57cec5SDimitry Andric 86860b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 86870b57cec5SDimitry Andric _mm512_moveldup_ps (__m512 __A) 86880b57cec5SDimitry Andric { 86890b57cec5SDimitry Andric return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A, 86900b57cec5SDimitry Andric 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14); 86910b57cec5SDimitry Andric } 86920b57cec5SDimitry Andric 86930b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 86940b57cec5SDimitry Andric _mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A) 86950b57cec5SDimitry Andric { 86960b57cec5SDimitry Andric return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 86970b57cec5SDimitry Andric (__v16sf)_mm512_moveldup_ps(__A), 86980b57cec5SDimitry Andric (__v16sf)__W); 86990b57cec5SDimitry Andric } 87000b57cec5SDimitry Andric 87010b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 87020b57cec5SDimitry Andric _mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A) 87030b57cec5SDimitry Andric { 87040b57cec5SDimitry Andric return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 87050b57cec5SDimitry Andric (__v16sf)_mm512_moveldup_ps(__A), 87060b57cec5SDimitry Andric (__v16sf)_mm512_setzero_ps()); 87070b57cec5SDimitry Andric } 87080b57cec5SDimitry Andric 87090b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 87100b57cec5SDimitry Andric _mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 87110b57cec5SDimitry Andric { 87120b57cec5SDimitry Andric return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B), __W); 87130b57cec5SDimitry Andric } 87140b57cec5SDimitry Andric 87150b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 87160b57cec5SDimitry Andric _mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B) 87170b57cec5SDimitry Andric { 87180b57cec5SDimitry Andric return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B), 87190b57cec5SDimitry Andric _mm_setzero_ps()); 87200b57cec5SDimitry Andric } 87210b57cec5SDimitry Andric 87220b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 87230b57cec5SDimitry Andric _mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 87240b57cec5SDimitry Andric { 87250b57cec5SDimitry Andric return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B), __W); 87260b57cec5SDimitry Andric } 87270b57cec5SDimitry Andric 87280b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 87290b57cec5SDimitry Andric _mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B) 87300b57cec5SDimitry Andric { 87310b57cec5SDimitry Andric return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B), 87320b57cec5SDimitry Andric _mm_setzero_pd()); 87330b57cec5SDimitry Andric } 87340b57cec5SDimitry Andric 87350b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS128 87360b57cec5SDimitry Andric _mm_mask_store_ss (float * __W, __mmask8 __U, __m128 __A) 87370b57cec5SDimitry Andric { 87380b57cec5SDimitry Andric __builtin_ia32_storess128_mask ((__v4sf *)__W, __A, __U & 1); 87390b57cec5SDimitry Andric } 87400b57cec5SDimitry Andric 87410b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS128 87420b57cec5SDimitry Andric _mm_mask_store_sd (double * __W, __mmask8 __U, __m128d __A) 87430b57cec5SDimitry Andric { 87440b57cec5SDimitry Andric __builtin_ia32_storesd128_mask ((__v2df *)__W, __A, __U & 1); 87450b57cec5SDimitry Andric } 87460b57cec5SDimitry Andric 87470b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 87480b57cec5SDimitry Andric _mm_mask_load_ss (__m128 __W, __mmask8 __U, const float* __A) 87490b57cec5SDimitry Andric { 87500b57cec5SDimitry Andric __m128 src = (__v4sf) __builtin_shufflevector((__v4sf) __W, 87510b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps(), 87520b57cec5SDimitry Andric 0, 4, 4, 4); 87530b57cec5SDimitry Andric 8754480093f4SDimitry Andric return (__m128) __builtin_ia32_loadss128_mask ((const __v4sf *) __A, src, __U & 1); 87550b57cec5SDimitry Andric } 87560b57cec5SDimitry Andric 87570b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 87580b57cec5SDimitry Andric _mm_maskz_load_ss (__mmask8 __U, const float* __A) 87590b57cec5SDimitry Andric { 8760480093f4SDimitry Andric return (__m128)__builtin_ia32_loadss128_mask ((const __v4sf *) __A, 87610b57cec5SDimitry Andric (__v4sf) _mm_setzero_ps(), 87620b57cec5SDimitry Andric __U & 1); 87630b57cec5SDimitry Andric } 87640b57cec5SDimitry Andric 87650b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 87660b57cec5SDimitry Andric _mm_mask_load_sd (__m128d __W, __mmask8 __U, const double* __A) 87670b57cec5SDimitry Andric { 87680b57cec5SDimitry Andric __m128d src = (__v2df) __builtin_shufflevector((__v2df) __W, 87690b57cec5SDimitry Andric (__v2df)_mm_setzero_pd(), 87700b57cec5SDimitry Andric 0, 2); 87710b57cec5SDimitry Andric 8772480093f4SDimitry Andric return (__m128d) __builtin_ia32_loadsd128_mask ((const __v2df *) __A, src, __U & 1); 87730b57cec5SDimitry Andric } 87740b57cec5SDimitry Andric 87750b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 87760b57cec5SDimitry Andric _mm_maskz_load_sd (__mmask8 __U, const double* __A) 87770b57cec5SDimitry Andric { 8778480093f4SDimitry Andric return (__m128d) __builtin_ia32_loadsd128_mask ((const __v2df *) __A, 87790b57cec5SDimitry Andric (__v2df) _mm_setzero_pd(), 87800b57cec5SDimitry Andric __U & 1); 87810b57cec5SDimitry Andric } 87820b57cec5SDimitry Andric 87830b57cec5SDimitry Andric #define _mm512_shuffle_epi32(A, I) \ 8784349cc55cSDimitry Andric ((__m512i)__builtin_ia32_pshufd512((__v16si)(__m512i)(A), (int)(I))) 87850b57cec5SDimitry Andric 87860b57cec5SDimitry Andric #define _mm512_mask_shuffle_epi32(W, U, A, I) \ 8787349cc55cSDimitry Andric ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 87880b57cec5SDimitry Andric (__v16si)_mm512_shuffle_epi32((A), (I)), \ 8789349cc55cSDimitry Andric (__v16si)(__m512i)(W))) 87900b57cec5SDimitry Andric 87910b57cec5SDimitry Andric #define _mm512_maskz_shuffle_epi32(U, A, I) \ 8792349cc55cSDimitry Andric ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 87930b57cec5SDimitry Andric (__v16si)_mm512_shuffle_epi32((A), (I)), \ 8794349cc55cSDimitry Andric (__v16si)_mm512_setzero_si512())) 87950b57cec5SDimitry Andric 87960b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 87970b57cec5SDimitry Andric _mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A) 87980b57cec5SDimitry Andric { 87990b57cec5SDimitry Andric return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A, 88000b57cec5SDimitry Andric (__v8df) __W, 88010b57cec5SDimitry Andric (__mmask8) __U); 88020b57cec5SDimitry Andric } 88030b57cec5SDimitry Andric 88040b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 88050b57cec5SDimitry Andric _mm512_maskz_expand_pd (__mmask8 __U, __m512d __A) 88060b57cec5SDimitry Andric { 88070b57cec5SDimitry Andric return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A, 88080b57cec5SDimitry Andric (__v8df) _mm512_setzero_pd (), 88090b57cec5SDimitry Andric (__mmask8) __U); 88100b57cec5SDimitry Andric } 88110b57cec5SDimitry Andric 88120b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 88130b57cec5SDimitry Andric _mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A) 88140b57cec5SDimitry Andric { 88150b57cec5SDimitry Andric return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A, 88160b57cec5SDimitry Andric (__v8di) __W, 88170b57cec5SDimitry Andric (__mmask8) __U); 88180b57cec5SDimitry Andric } 88190b57cec5SDimitry Andric 88200b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 88210b57cec5SDimitry Andric _mm512_maskz_expand_epi64 ( __mmask8 __U, __m512i __A) 88220b57cec5SDimitry Andric { 88230b57cec5SDimitry Andric return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A, 88240b57cec5SDimitry Andric (__v8di) _mm512_setzero_si512 (), 88250b57cec5SDimitry Andric (__mmask8) __U); 88260b57cec5SDimitry Andric } 88270b57cec5SDimitry Andric 88280b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 88290b57cec5SDimitry Andric _mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P) 88300b57cec5SDimitry Andric { 88310b57cec5SDimitry Andric return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P, 88320b57cec5SDimitry Andric (__v8df) __W, 88330b57cec5SDimitry Andric (__mmask8) __U); 88340b57cec5SDimitry Andric } 88350b57cec5SDimitry Andric 88360b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 88370b57cec5SDimitry Andric _mm512_maskz_expandloadu_pd(__mmask8 __U, void const *__P) 88380b57cec5SDimitry Andric { 88390b57cec5SDimitry Andric return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P, 88400b57cec5SDimitry Andric (__v8df) _mm512_setzero_pd(), 88410b57cec5SDimitry Andric (__mmask8) __U); 88420b57cec5SDimitry Andric } 88430b57cec5SDimitry Andric 88440b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 88450b57cec5SDimitry Andric _mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P) 88460b57cec5SDimitry Andric { 88470b57cec5SDimitry Andric return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P, 88480b57cec5SDimitry Andric (__v8di) __W, 88490b57cec5SDimitry Andric (__mmask8) __U); 88500b57cec5SDimitry Andric } 88510b57cec5SDimitry Andric 88520b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 88530b57cec5SDimitry Andric _mm512_maskz_expandloadu_epi64(__mmask8 __U, void const *__P) 88540b57cec5SDimitry Andric { 88550b57cec5SDimitry Andric return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P, 88560b57cec5SDimitry Andric (__v8di) _mm512_setzero_si512(), 88570b57cec5SDimitry Andric (__mmask8) __U); 88580b57cec5SDimitry Andric } 88590b57cec5SDimitry Andric 88600b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 88610b57cec5SDimitry Andric _mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P) 88620b57cec5SDimitry Andric { 88630b57cec5SDimitry Andric return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P, 88640b57cec5SDimitry Andric (__v16sf) __W, 88650b57cec5SDimitry Andric (__mmask16) __U); 88660b57cec5SDimitry Andric } 88670b57cec5SDimitry Andric 88680b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 88690b57cec5SDimitry Andric _mm512_maskz_expandloadu_ps(__mmask16 __U, void const *__P) 88700b57cec5SDimitry Andric { 88710b57cec5SDimitry Andric return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P, 88720b57cec5SDimitry Andric (__v16sf) _mm512_setzero_ps(), 88730b57cec5SDimitry Andric (__mmask16) __U); 88740b57cec5SDimitry Andric } 88750b57cec5SDimitry Andric 88760b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 88770b57cec5SDimitry Andric _mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P) 88780b57cec5SDimitry Andric { 88790b57cec5SDimitry Andric return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P, 88800b57cec5SDimitry Andric (__v16si) __W, 88810b57cec5SDimitry Andric (__mmask16) __U); 88820b57cec5SDimitry Andric } 88830b57cec5SDimitry Andric 88840b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 88850b57cec5SDimitry Andric _mm512_maskz_expandloadu_epi32(__mmask16 __U, void const *__P) 88860b57cec5SDimitry Andric { 88870b57cec5SDimitry Andric return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P, 88880b57cec5SDimitry Andric (__v16si) _mm512_setzero_si512(), 88890b57cec5SDimitry Andric (__mmask16) __U); 88900b57cec5SDimitry Andric } 88910b57cec5SDimitry Andric 88920b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 88930b57cec5SDimitry Andric _mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A) 88940b57cec5SDimitry Andric { 88950b57cec5SDimitry Andric return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A, 88960b57cec5SDimitry Andric (__v16sf) __W, 88970b57cec5SDimitry Andric (__mmask16) __U); 88980b57cec5SDimitry Andric } 88990b57cec5SDimitry Andric 89000b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 89010b57cec5SDimitry Andric _mm512_maskz_expand_ps (__mmask16 __U, __m512 __A) 89020b57cec5SDimitry Andric { 89030b57cec5SDimitry Andric return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A, 89040b57cec5SDimitry Andric (__v16sf) _mm512_setzero_ps(), 89050b57cec5SDimitry Andric (__mmask16) __U); 89060b57cec5SDimitry Andric } 89070b57cec5SDimitry Andric 89080b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 89090b57cec5SDimitry Andric _mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A) 89100b57cec5SDimitry Andric { 89110b57cec5SDimitry Andric return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A, 89120b57cec5SDimitry Andric (__v16si) __W, 89130b57cec5SDimitry Andric (__mmask16) __U); 89140b57cec5SDimitry Andric } 89150b57cec5SDimitry Andric 89160b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 89170b57cec5SDimitry Andric _mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A) 89180b57cec5SDimitry Andric { 89190b57cec5SDimitry Andric return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A, 89200b57cec5SDimitry Andric (__v16si) _mm512_setzero_si512(), 89210b57cec5SDimitry Andric (__mmask16) __U); 89220b57cec5SDimitry Andric } 89230b57cec5SDimitry Andric 89240b57cec5SDimitry Andric #define _mm512_cvt_roundps_pd(A, R) \ 8925349cc55cSDimitry Andric ((__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \ 89260b57cec5SDimitry Andric (__v8df)_mm512_undefined_pd(), \ 8927349cc55cSDimitry Andric (__mmask8)-1, (int)(R))) 89280b57cec5SDimitry Andric 89290b57cec5SDimitry Andric #define _mm512_mask_cvt_roundps_pd(W, U, A, R) \ 8930349cc55cSDimitry Andric ((__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \ 89310b57cec5SDimitry Andric (__v8df)(__m512d)(W), \ 8932349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 89330b57cec5SDimitry Andric 89340b57cec5SDimitry Andric #define _mm512_maskz_cvt_roundps_pd(U, A, R) \ 8935349cc55cSDimitry Andric ((__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \ 89360b57cec5SDimitry Andric (__v8df)_mm512_setzero_pd(), \ 8937349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 89380b57cec5SDimitry Andric 89390b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 89400b57cec5SDimitry Andric _mm512_cvtps_pd (__m256 __A) 89410b57cec5SDimitry Andric { 89420b57cec5SDimitry Andric return (__m512d) __builtin_convertvector((__v8sf)__A, __v8df); 89430b57cec5SDimitry Andric } 89440b57cec5SDimitry Andric 89450b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 89460b57cec5SDimitry Andric _mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A) 89470b57cec5SDimitry Andric { 89480b57cec5SDimitry Andric return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 89490b57cec5SDimitry Andric (__v8df)_mm512_cvtps_pd(__A), 89500b57cec5SDimitry Andric (__v8df)__W); 89510b57cec5SDimitry Andric } 89520b57cec5SDimitry Andric 89530b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 89540b57cec5SDimitry Andric _mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A) 89550b57cec5SDimitry Andric { 89560b57cec5SDimitry Andric return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 89570b57cec5SDimitry Andric (__v8df)_mm512_cvtps_pd(__A), 89580b57cec5SDimitry Andric (__v8df)_mm512_setzero_pd()); 89590b57cec5SDimitry Andric } 89600b57cec5SDimitry Andric 89610b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 89620b57cec5SDimitry Andric _mm512_cvtpslo_pd (__m512 __A) 89630b57cec5SDimitry Andric { 89640b57cec5SDimitry Andric return (__m512d) _mm512_cvtps_pd(_mm512_castps512_ps256(__A)); 89650b57cec5SDimitry Andric } 89660b57cec5SDimitry Andric 89670b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 89680b57cec5SDimitry Andric _mm512_mask_cvtpslo_pd (__m512d __W, __mmask8 __U, __m512 __A) 89690b57cec5SDimitry Andric { 89700b57cec5SDimitry Andric return (__m512d) _mm512_mask_cvtps_pd(__W, __U, _mm512_castps512_ps256(__A)); 89710b57cec5SDimitry Andric } 89720b57cec5SDimitry Andric 89730b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 89740b57cec5SDimitry Andric _mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A) 89750b57cec5SDimitry Andric { 89760b57cec5SDimitry Andric return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U, 89770b57cec5SDimitry Andric (__v8df) __A, 89780b57cec5SDimitry Andric (__v8df) __W); 89790b57cec5SDimitry Andric } 89800b57cec5SDimitry Andric 89810b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 89820b57cec5SDimitry Andric _mm512_maskz_mov_pd (__mmask8 __U, __m512d __A) 89830b57cec5SDimitry Andric { 89840b57cec5SDimitry Andric return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U, 89850b57cec5SDimitry Andric (__v8df) __A, 89860b57cec5SDimitry Andric (__v8df) _mm512_setzero_pd ()); 89870b57cec5SDimitry Andric } 89880b57cec5SDimitry Andric 89890b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 89900b57cec5SDimitry Andric _mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A) 89910b57cec5SDimitry Andric { 89920b57cec5SDimitry Andric return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U, 89930b57cec5SDimitry Andric (__v16sf) __A, 89940b57cec5SDimitry Andric (__v16sf) __W); 89950b57cec5SDimitry Andric } 89960b57cec5SDimitry Andric 89970b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 89980b57cec5SDimitry Andric _mm512_maskz_mov_ps (__mmask16 __U, __m512 __A) 89990b57cec5SDimitry Andric { 90000b57cec5SDimitry Andric return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U, 90010b57cec5SDimitry Andric (__v16sf) __A, 90020b57cec5SDimitry Andric (__v16sf) _mm512_setzero_ps ()); 90030b57cec5SDimitry Andric } 90040b57cec5SDimitry Andric 90050b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS512 90060b57cec5SDimitry Andric _mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A) 90070b57cec5SDimitry Andric { 90080b57cec5SDimitry Andric __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A, 90090b57cec5SDimitry Andric (__mmask8) __U); 90100b57cec5SDimitry Andric } 90110b57cec5SDimitry Andric 90120b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS512 90130b57cec5SDimitry Andric _mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A) 90140b57cec5SDimitry Andric { 90150b57cec5SDimitry Andric __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A, 90160b57cec5SDimitry Andric (__mmask8) __U); 90170b57cec5SDimitry Andric } 90180b57cec5SDimitry Andric 90190b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS512 90200b57cec5SDimitry Andric _mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A) 90210b57cec5SDimitry Andric { 90220b57cec5SDimitry Andric __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A, 90230b57cec5SDimitry Andric (__mmask16) __U); 90240b57cec5SDimitry Andric } 90250b57cec5SDimitry Andric 90260b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS512 90270b57cec5SDimitry Andric _mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A) 90280b57cec5SDimitry Andric { 90290b57cec5SDimitry Andric __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A, 90300b57cec5SDimitry Andric (__mmask16) __U); 90310b57cec5SDimitry Andric } 90320b57cec5SDimitry Andric 90330b57cec5SDimitry Andric #define _mm_cvt_roundsd_ss(A, B, R) \ 9034349cc55cSDimitry Andric ((__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \ 90350b57cec5SDimitry Andric (__v2df)(__m128d)(B), \ 90360b57cec5SDimitry Andric (__v4sf)_mm_undefined_ps(), \ 9037349cc55cSDimitry Andric (__mmask8)-1, (int)(R))) 90380b57cec5SDimitry Andric 90390b57cec5SDimitry Andric #define _mm_mask_cvt_roundsd_ss(W, U, A, B, R) \ 9040349cc55cSDimitry Andric ((__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \ 90410b57cec5SDimitry Andric (__v2df)(__m128d)(B), \ 90420b57cec5SDimitry Andric (__v4sf)(__m128)(W), \ 9043349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 90440b57cec5SDimitry Andric 90450b57cec5SDimitry Andric #define _mm_maskz_cvt_roundsd_ss(U, A, B, R) \ 9046349cc55cSDimitry Andric ((__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \ 90470b57cec5SDimitry Andric (__v2df)(__m128d)(B), \ 90480b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps(), \ 9049349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 90500b57cec5SDimitry Andric 90510b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 90520b57cec5SDimitry Andric _mm_mask_cvtsd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128d __B) 90530b57cec5SDimitry Andric { 90540b57cec5SDimitry Andric return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A, 90550b57cec5SDimitry Andric (__v2df)__B, 90560b57cec5SDimitry Andric (__v4sf)__W, 90570b57cec5SDimitry Andric (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); 90580b57cec5SDimitry Andric } 90590b57cec5SDimitry Andric 90600b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 90610b57cec5SDimitry Andric _mm_maskz_cvtsd_ss (__mmask8 __U, __m128 __A, __m128d __B) 90620b57cec5SDimitry Andric { 90630b57cec5SDimitry Andric return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A, 90640b57cec5SDimitry Andric (__v2df)__B, 90650b57cec5SDimitry Andric (__v4sf)_mm_setzero_ps(), 90660b57cec5SDimitry Andric (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); 90670b57cec5SDimitry Andric } 90680b57cec5SDimitry Andric 90690b57cec5SDimitry Andric #define _mm_cvtss_i32 _mm_cvtss_si32 90700b57cec5SDimitry Andric #define _mm_cvtsd_i32 _mm_cvtsd_si32 90710b57cec5SDimitry Andric #define _mm_cvti32_sd _mm_cvtsi32_sd 90720b57cec5SDimitry Andric #define _mm_cvti32_ss _mm_cvtsi32_ss 90730b57cec5SDimitry Andric #ifdef __x86_64__ 90740b57cec5SDimitry Andric #define _mm_cvtss_i64 _mm_cvtss_si64 90750b57cec5SDimitry Andric #define _mm_cvtsd_i64 _mm_cvtsd_si64 90760b57cec5SDimitry Andric #define _mm_cvti64_sd _mm_cvtsi64_sd 90770b57cec5SDimitry Andric #define _mm_cvti64_ss _mm_cvtsi64_ss 90780b57cec5SDimitry Andric #endif 90790b57cec5SDimitry Andric 90800b57cec5SDimitry Andric #ifdef __x86_64__ 90810b57cec5SDimitry Andric #define _mm_cvt_roundi64_sd(A, B, R) \ 9082349cc55cSDimitry Andric ((__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \ 9083349cc55cSDimitry Andric (int)(R))) 90840b57cec5SDimitry Andric 90850b57cec5SDimitry Andric #define _mm_cvt_roundsi64_sd(A, B, R) \ 9086349cc55cSDimitry Andric ((__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \ 9087349cc55cSDimitry Andric (int)(R))) 90880b57cec5SDimitry Andric #endif 90890b57cec5SDimitry Andric 90900b57cec5SDimitry Andric #define _mm_cvt_roundsi32_ss(A, B, R) \ 9091349cc55cSDimitry Andric ((__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R))) 90920b57cec5SDimitry Andric 90930b57cec5SDimitry Andric #define _mm_cvt_roundi32_ss(A, B, R) \ 9094349cc55cSDimitry Andric ((__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R))) 90950b57cec5SDimitry Andric 90960b57cec5SDimitry Andric #ifdef __x86_64__ 90970b57cec5SDimitry Andric #define _mm_cvt_roundsi64_ss(A, B, R) \ 9098349cc55cSDimitry Andric ((__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \ 9099349cc55cSDimitry Andric (int)(R))) 91000b57cec5SDimitry Andric 91010b57cec5SDimitry Andric #define _mm_cvt_roundi64_ss(A, B, R) \ 9102349cc55cSDimitry Andric ((__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \ 9103349cc55cSDimitry Andric (int)(R))) 91040b57cec5SDimitry Andric #endif 91050b57cec5SDimitry Andric 91060b57cec5SDimitry Andric #define _mm_cvt_roundss_sd(A, B, R) \ 9107349cc55cSDimitry Andric ((__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \ 91080b57cec5SDimitry Andric (__v4sf)(__m128)(B), \ 91090b57cec5SDimitry Andric (__v2df)_mm_undefined_pd(), \ 9110349cc55cSDimitry Andric (__mmask8)-1, (int)(R))) 91110b57cec5SDimitry Andric 91120b57cec5SDimitry Andric #define _mm_mask_cvt_roundss_sd(W, U, A, B, R) \ 9113349cc55cSDimitry Andric ((__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \ 91140b57cec5SDimitry Andric (__v4sf)(__m128)(B), \ 91150b57cec5SDimitry Andric (__v2df)(__m128d)(W), \ 9116349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 91170b57cec5SDimitry Andric 91180b57cec5SDimitry Andric #define _mm_maskz_cvt_roundss_sd(U, A, B, R) \ 9119349cc55cSDimitry Andric ((__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \ 91200b57cec5SDimitry Andric (__v4sf)(__m128)(B), \ 91210b57cec5SDimitry Andric (__v2df)_mm_setzero_pd(), \ 9122349cc55cSDimitry Andric (__mmask8)(U), (int)(R))) 91230b57cec5SDimitry Andric 91240b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 91250b57cec5SDimitry Andric _mm_mask_cvtss_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128 __B) 91260b57cec5SDimitry Andric { 91270b57cec5SDimitry Andric return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A, 91280b57cec5SDimitry Andric (__v4sf)__B, 91290b57cec5SDimitry Andric (__v2df)__W, 91300b57cec5SDimitry Andric (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); 91310b57cec5SDimitry Andric } 91320b57cec5SDimitry Andric 91330b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 91340b57cec5SDimitry Andric _mm_maskz_cvtss_sd (__mmask8 __U, __m128d __A, __m128 __B) 91350b57cec5SDimitry Andric { 91360b57cec5SDimitry Andric return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A, 91370b57cec5SDimitry Andric (__v4sf)__B, 91380b57cec5SDimitry Andric (__v2df)_mm_setzero_pd(), 91390b57cec5SDimitry Andric (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); 91400b57cec5SDimitry Andric } 91410b57cec5SDimitry Andric 91420b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 91430b57cec5SDimitry Andric _mm_cvtu32_sd (__m128d __A, unsigned __B) 91440b57cec5SDimitry Andric { 91450b57cec5SDimitry Andric __A[0] = __B; 91460b57cec5SDimitry Andric return __A; 91470b57cec5SDimitry Andric } 91480b57cec5SDimitry Andric 91490b57cec5SDimitry Andric #ifdef __x86_64__ 91500b57cec5SDimitry Andric #define _mm_cvt_roundu64_sd(A, B, R) \ 9151349cc55cSDimitry Andric ((__m128d)__builtin_ia32_cvtusi2sd64((__v2df)(__m128d)(A), \ 9152349cc55cSDimitry Andric (unsigned long long)(B), (int)(R))) 91530b57cec5SDimitry Andric 91540b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS128 91550b57cec5SDimitry Andric _mm_cvtu64_sd (__m128d __A, unsigned long long __B) 91560b57cec5SDimitry Andric { 91570b57cec5SDimitry Andric __A[0] = __B; 91580b57cec5SDimitry Andric return __A; 91590b57cec5SDimitry Andric } 91600b57cec5SDimitry Andric #endif 91610b57cec5SDimitry Andric 91620b57cec5SDimitry Andric #define _mm_cvt_roundu32_ss(A, B, R) \ 9163349cc55cSDimitry Andric ((__m128)__builtin_ia32_cvtusi2ss32((__v4sf)(__m128)(A), (unsigned int)(B), \ 9164349cc55cSDimitry Andric (int)(R))) 91650b57cec5SDimitry Andric 91660b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 91670b57cec5SDimitry Andric _mm_cvtu32_ss (__m128 __A, unsigned __B) 91680b57cec5SDimitry Andric { 91690b57cec5SDimitry Andric __A[0] = __B; 91700b57cec5SDimitry Andric return __A; 91710b57cec5SDimitry Andric } 91720b57cec5SDimitry Andric 91730b57cec5SDimitry Andric #ifdef __x86_64__ 91740b57cec5SDimitry Andric #define _mm_cvt_roundu64_ss(A, B, R) \ 9175349cc55cSDimitry Andric ((__m128)__builtin_ia32_cvtusi2ss64((__v4sf)(__m128)(A), \ 9176349cc55cSDimitry Andric (unsigned long long)(B), (int)(R))) 91770b57cec5SDimitry Andric 91780b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS128 91790b57cec5SDimitry Andric _mm_cvtu64_ss (__m128 __A, unsigned long long __B) 91800b57cec5SDimitry Andric { 91810b57cec5SDimitry Andric __A[0] = __B; 91820b57cec5SDimitry Andric return __A; 91830b57cec5SDimitry Andric } 91840b57cec5SDimitry Andric #endif 91850b57cec5SDimitry Andric 91860b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 91870b57cec5SDimitry Andric _mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A) 91880b57cec5SDimitry Andric { 91890b57cec5SDimitry Andric return (__m512i) __builtin_ia32_selectd_512(__M, 91900b57cec5SDimitry Andric (__v16si) _mm512_set1_epi32(__A), 91910b57cec5SDimitry Andric (__v16si) __O); 91920b57cec5SDimitry Andric } 91930b57cec5SDimitry Andric 91940b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 91950b57cec5SDimitry Andric _mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A) 91960b57cec5SDimitry Andric { 91970b57cec5SDimitry Andric return (__m512i) __builtin_ia32_selectq_512(__M, 91980b57cec5SDimitry Andric (__v8di) _mm512_set1_epi64(__A), 91990b57cec5SDimitry Andric (__v8di) __O); 92000b57cec5SDimitry Andric } 92010b57cec5SDimitry Andric 92020b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512 92030b57cec5SDimitry Andric _mm512_set_epi8 (char __e63, char __e62, char __e61, char __e60, char __e59, 92040b57cec5SDimitry Andric char __e58, char __e57, char __e56, char __e55, char __e54, char __e53, 92050b57cec5SDimitry Andric char __e52, char __e51, char __e50, char __e49, char __e48, char __e47, 92060b57cec5SDimitry Andric char __e46, char __e45, char __e44, char __e43, char __e42, char __e41, 92070b57cec5SDimitry Andric char __e40, char __e39, char __e38, char __e37, char __e36, char __e35, 92080b57cec5SDimitry Andric char __e34, char __e33, char __e32, char __e31, char __e30, char __e29, 92090b57cec5SDimitry Andric char __e28, char __e27, char __e26, char __e25, char __e24, char __e23, 92100b57cec5SDimitry Andric char __e22, char __e21, char __e20, char __e19, char __e18, char __e17, 92110b57cec5SDimitry Andric char __e16, char __e15, char __e14, char __e13, char __e12, char __e11, 92120b57cec5SDimitry Andric char __e10, char __e9, char __e8, char __e7, char __e6, char __e5, 92130b57cec5SDimitry Andric char __e4, char __e3, char __e2, char __e1, char __e0) { 92140b57cec5SDimitry Andric 92150b57cec5SDimitry Andric return __extension__ (__m512i)(__v64qi) 92160b57cec5SDimitry Andric {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7, 92170b57cec5SDimitry Andric __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15, 92180b57cec5SDimitry Andric __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23, 92190b57cec5SDimitry Andric __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31, 92200b57cec5SDimitry Andric __e32, __e33, __e34, __e35, __e36, __e37, __e38, __e39, 92210b57cec5SDimitry Andric __e40, __e41, __e42, __e43, __e44, __e45, __e46, __e47, 92220b57cec5SDimitry Andric __e48, __e49, __e50, __e51, __e52, __e53, __e54, __e55, 92230b57cec5SDimitry Andric __e56, __e57, __e58, __e59, __e60, __e61, __e62, __e63}; 92240b57cec5SDimitry Andric } 92250b57cec5SDimitry Andric 92260b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512 92270b57cec5SDimitry Andric _mm512_set_epi16(short __e31, short __e30, short __e29, short __e28, 92280b57cec5SDimitry Andric short __e27, short __e26, short __e25, short __e24, short __e23, 92290b57cec5SDimitry Andric short __e22, short __e21, short __e20, short __e19, short __e18, 92300b57cec5SDimitry Andric short __e17, short __e16, short __e15, short __e14, short __e13, 92310b57cec5SDimitry Andric short __e12, short __e11, short __e10, short __e9, short __e8, 92320b57cec5SDimitry Andric short __e7, short __e6, short __e5, short __e4, short __e3, 92330b57cec5SDimitry Andric short __e2, short __e1, short __e0) { 92340b57cec5SDimitry Andric return __extension__ (__m512i)(__v32hi) 92350b57cec5SDimitry Andric {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7, 92360b57cec5SDimitry Andric __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15, 92370b57cec5SDimitry Andric __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23, 92380b57cec5SDimitry Andric __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31 }; 92390b57cec5SDimitry Andric } 92400b57cec5SDimitry Andric 92410b57cec5SDimitry Andric static __inline __m512i __DEFAULT_FN_ATTRS512 92420b57cec5SDimitry Andric _mm512_set_epi32 (int __A, int __B, int __C, int __D, 92430b57cec5SDimitry Andric int __E, int __F, int __G, int __H, 92440b57cec5SDimitry Andric int __I, int __J, int __K, int __L, 92450b57cec5SDimitry Andric int __M, int __N, int __O, int __P) 92460b57cec5SDimitry Andric { 92470b57cec5SDimitry Andric return __extension__ (__m512i)(__v16si) 92480b57cec5SDimitry Andric { __P, __O, __N, __M, __L, __K, __J, __I, 92490b57cec5SDimitry Andric __H, __G, __F, __E, __D, __C, __B, __A }; 92500b57cec5SDimitry Andric } 92510b57cec5SDimitry Andric 92520b57cec5SDimitry Andric #define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7, \ 92530b57cec5SDimitry Andric e8,e9,e10,e11,e12,e13,e14,e15) \ 92540b57cec5SDimitry Andric _mm512_set_epi32((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6), \ 92550b57cec5SDimitry Andric (e5),(e4),(e3),(e2),(e1),(e0)) 92560b57cec5SDimitry Andric 92570b57cec5SDimitry Andric static __inline__ __m512i __DEFAULT_FN_ATTRS512 92580b57cec5SDimitry Andric _mm512_set_epi64 (long long __A, long long __B, long long __C, 92590b57cec5SDimitry Andric long long __D, long long __E, long long __F, 92600b57cec5SDimitry Andric long long __G, long long __H) 92610b57cec5SDimitry Andric { 92620b57cec5SDimitry Andric return __extension__ (__m512i) (__v8di) 92630b57cec5SDimitry Andric { __H, __G, __F, __E, __D, __C, __B, __A }; 92640b57cec5SDimitry Andric } 92650b57cec5SDimitry Andric 92660b57cec5SDimitry Andric #define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \ 92670b57cec5SDimitry Andric _mm512_set_epi64((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0)) 92680b57cec5SDimitry Andric 92690b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 92700b57cec5SDimitry Andric _mm512_set_pd (double __A, double __B, double __C, double __D, 92710b57cec5SDimitry Andric double __E, double __F, double __G, double __H) 92720b57cec5SDimitry Andric { 92730b57cec5SDimitry Andric return __extension__ (__m512d) 92740b57cec5SDimitry Andric { __H, __G, __F, __E, __D, __C, __B, __A }; 92750b57cec5SDimitry Andric } 92760b57cec5SDimitry Andric 92770b57cec5SDimitry Andric #define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \ 92780b57cec5SDimitry Andric _mm512_set_pd((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0)) 92790b57cec5SDimitry Andric 92800b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 92810b57cec5SDimitry Andric _mm512_set_ps (float __A, float __B, float __C, float __D, 92820b57cec5SDimitry Andric float __E, float __F, float __G, float __H, 92830b57cec5SDimitry Andric float __I, float __J, float __K, float __L, 92840b57cec5SDimitry Andric float __M, float __N, float __O, float __P) 92850b57cec5SDimitry Andric { 92860b57cec5SDimitry Andric return __extension__ (__m512) 92870b57cec5SDimitry Andric { __P, __O, __N, __M, __L, __K, __J, __I, 92880b57cec5SDimitry Andric __H, __G, __F, __E, __D, __C, __B, __A }; 92890b57cec5SDimitry Andric } 92900b57cec5SDimitry Andric 92910b57cec5SDimitry Andric #define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \ 92920b57cec5SDimitry Andric _mm512_set_ps((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6),(e5), \ 92930b57cec5SDimitry Andric (e4),(e3),(e2),(e1),(e0)) 92940b57cec5SDimitry Andric 92950b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 92960b57cec5SDimitry Andric _mm512_abs_ps(__m512 __A) 92970b57cec5SDimitry Andric { 92980b57cec5SDimitry Andric return (__m512)_mm512_and_epi32(_mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ; 92990b57cec5SDimitry Andric } 93000b57cec5SDimitry Andric 93010b57cec5SDimitry Andric static __inline__ __m512 __DEFAULT_FN_ATTRS512 93020b57cec5SDimitry Andric _mm512_mask_abs_ps(__m512 __W, __mmask16 __K, __m512 __A) 93030b57cec5SDimitry Andric { 93040b57cec5SDimitry Andric return (__m512)_mm512_mask_and_epi32((__m512i)__W, __K, _mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ; 93050b57cec5SDimitry Andric } 93060b57cec5SDimitry Andric 93070b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 93080b57cec5SDimitry Andric _mm512_abs_pd(__m512d __A) 93090b57cec5SDimitry Andric { 93100b57cec5SDimitry Andric return (__m512d)_mm512_and_epi64(_mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A) ; 93110b57cec5SDimitry Andric } 93120b57cec5SDimitry Andric 93130b57cec5SDimitry Andric static __inline__ __m512d __DEFAULT_FN_ATTRS512 93140b57cec5SDimitry Andric _mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A) 93150b57cec5SDimitry Andric { 93160b57cec5SDimitry Andric return (__m512d)_mm512_mask_and_epi64((__v8di)__W, __K, _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A); 93170b57cec5SDimitry Andric } 93180b57cec5SDimitry Andric 93190b57cec5SDimitry Andric /* Vector-reduction arithmetic accepts vectors as inputs and produces scalars as 93200b57cec5SDimitry Andric * outputs. This class of vector operation forms the basis of many scientific 9321d409305fSDimitry Andric * computations. In vector-reduction arithmetic, the evaluation order is 93220b57cec5SDimitry Andric * independent of the order of the input elements of V. 93230b57cec5SDimitry Andric 9324fe6060f1SDimitry Andric * For floating-point intrinsics: 9325fe6060f1SDimitry Andric * 1. When using fadd/fmul intrinsics, the order of operations within the 9326fe6060f1SDimitry Andric * vector is unspecified (associative math). 9327fe6060f1SDimitry Andric * 2. When using fmin/fmax intrinsics, NaN or -0.0 elements within the vector 9328fe6060f1SDimitry Andric * produce unspecified results. 9329d409305fSDimitry Andric 93300b57cec5SDimitry Andric * Used bisection method. At each step, we partition the vector with previous 93310b57cec5SDimitry Andric * step in half, and the operation is performed on its two halves. 93320b57cec5SDimitry Andric * This takes log2(n) steps where n is the number of elements in the vector. 93330b57cec5SDimitry Andric */ 93340b57cec5SDimitry Andric 93350b57cec5SDimitry Andric static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_add_epi64(__m512i __W) { 933681ad6265SDimitry Andric return __builtin_reduce_add((__v8di)__W); 93370b57cec5SDimitry Andric } 93380b57cec5SDimitry Andric 93390b57cec5SDimitry Andric static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_epi64(__m512i __W) { 934081ad6265SDimitry Andric return __builtin_reduce_mul((__v8di)__W); 93410b57cec5SDimitry Andric } 93420b57cec5SDimitry Andric 93430b57cec5SDimitry Andric static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_and_epi64(__m512i __W) { 934404eeddc0SDimitry Andric return __builtin_reduce_and((__v8di)__W); 93450b57cec5SDimitry Andric } 93460b57cec5SDimitry Andric 93470b57cec5SDimitry Andric static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_or_epi64(__m512i __W) { 934804eeddc0SDimitry Andric return __builtin_reduce_or((__v8di)__W); 93490b57cec5SDimitry Andric } 93500b57cec5SDimitry Andric 93510b57cec5SDimitry Andric static __inline__ long long __DEFAULT_FN_ATTRS512 93520b57cec5SDimitry Andric _mm512_mask_reduce_add_epi64(__mmask8 __M, __m512i __W) { 93530b57cec5SDimitry Andric __W = _mm512_maskz_mov_epi64(__M, __W); 935481ad6265SDimitry Andric return __builtin_reduce_add((__v8di)__W); 93550b57cec5SDimitry Andric } 93560b57cec5SDimitry Andric 93570b57cec5SDimitry Andric static __inline__ long long __DEFAULT_FN_ATTRS512 93580b57cec5SDimitry Andric _mm512_mask_reduce_mul_epi64(__mmask8 __M, __m512i __W) { 93590b57cec5SDimitry Andric __W = _mm512_mask_mov_epi64(_mm512_set1_epi64(1), __M, __W); 936081ad6265SDimitry Andric return __builtin_reduce_mul((__v8di)__W); 93610b57cec5SDimitry Andric } 93620b57cec5SDimitry Andric 93630b57cec5SDimitry Andric static __inline__ long long __DEFAULT_FN_ATTRS512 93640b57cec5SDimitry Andric _mm512_mask_reduce_and_epi64(__mmask8 __M, __m512i __W) { 936581ad6265SDimitry Andric __W = _mm512_mask_mov_epi64(_mm512_set1_epi64(-1LL), __M, __W); 936604eeddc0SDimitry Andric return __builtin_reduce_and((__v8di)__W); 93670b57cec5SDimitry Andric } 93680b57cec5SDimitry Andric 93690b57cec5SDimitry Andric static __inline__ long long __DEFAULT_FN_ATTRS512 93700b57cec5SDimitry Andric _mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W) { 93710b57cec5SDimitry Andric __W = _mm512_maskz_mov_epi64(__M, __W); 937204eeddc0SDimitry Andric return __builtin_reduce_or((__v8di)__W); 93730b57cec5SDimitry Andric } 93740b57cec5SDimitry Andric 9375d409305fSDimitry Andric // -0.0 is used to ignore the start value since it is the neutral value of 9376d409305fSDimitry Andric // floating point addition. For more information, please refer to 9377d409305fSDimitry Andric // https://llvm.org/docs/LangRef.html#llvm-vector-reduce-fadd-intrinsic 93780b57cec5SDimitry Andric static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_add_pd(__m512d __W) { 9379d409305fSDimitry Andric return __builtin_ia32_reduce_fadd_pd512(-0.0, __W); 93800b57cec5SDimitry Andric } 93810b57cec5SDimitry Andric 93820b57cec5SDimitry Andric static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_pd(__m512d __W) { 9383e8d8bef9SDimitry Andric return __builtin_ia32_reduce_fmul_pd512(1.0, __W); 93840b57cec5SDimitry Andric } 93850b57cec5SDimitry Andric 93860b57cec5SDimitry Andric static __inline__ double __DEFAULT_FN_ATTRS512 93870b57cec5SDimitry Andric _mm512_mask_reduce_add_pd(__mmask8 __M, __m512d __W) { 93880b57cec5SDimitry Andric __W = _mm512_maskz_mov_pd(__M, __W); 9389d409305fSDimitry Andric return __builtin_ia32_reduce_fadd_pd512(-0.0, __W); 93900b57cec5SDimitry Andric } 93910b57cec5SDimitry Andric 93920b57cec5SDimitry Andric static __inline__ double __DEFAULT_FN_ATTRS512 93930b57cec5SDimitry Andric _mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W) { 93940b57cec5SDimitry Andric __W = _mm512_mask_mov_pd(_mm512_set1_pd(1.0), __M, __W); 9395e8d8bef9SDimitry Andric return __builtin_ia32_reduce_fmul_pd512(1.0, __W); 93960b57cec5SDimitry Andric } 93970b57cec5SDimitry Andric 93980b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS512 93990b57cec5SDimitry Andric _mm512_reduce_add_epi32(__m512i __W) { 940081ad6265SDimitry Andric return __builtin_reduce_add((__v16si)__W); 94010b57cec5SDimitry Andric } 94020b57cec5SDimitry Andric 94030b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS512 94040b57cec5SDimitry Andric _mm512_reduce_mul_epi32(__m512i __W) { 940581ad6265SDimitry Andric return __builtin_reduce_mul((__v16si)__W); 94060b57cec5SDimitry Andric } 94070b57cec5SDimitry Andric 94080b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS512 94090b57cec5SDimitry Andric _mm512_reduce_and_epi32(__m512i __W) { 941004eeddc0SDimitry Andric return __builtin_reduce_and((__v16si)__W); 94110b57cec5SDimitry Andric } 94120b57cec5SDimitry Andric 94130b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS512 94140b57cec5SDimitry Andric _mm512_reduce_or_epi32(__m512i __W) { 941504eeddc0SDimitry Andric return __builtin_reduce_or((__v16si)__W); 94160b57cec5SDimitry Andric } 94170b57cec5SDimitry Andric 94180b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS512 94190b57cec5SDimitry Andric _mm512_mask_reduce_add_epi32( __mmask16 __M, __m512i __W) { 94200b57cec5SDimitry Andric __W = _mm512_maskz_mov_epi32(__M, __W); 942181ad6265SDimitry Andric return __builtin_reduce_add((__v16si)__W); 94220b57cec5SDimitry Andric } 94230b57cec5SDimitry Andric 94240b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS512 94250b57cec5SDimitry Andric _mm512_mask_reduce_mul_epi32( __mmask16 __M, __m512i __W) { 94260b57cec5SDimitry Andric __W = _mm512_mask_mov_epi32(_mm512_set1_epi32(1), __M, __W); 942781ad6265SDimitry Andric return __builtin_reduce_mul((__v16si)__W); 94280b57cec5SDimitry Andric } 94290b57cec5SDimitry Andric 94300b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS512 94310b57cec5SDimitry Andric _mm512_mask_reduce_and_epi32( __mmask16 __M, __m512i __W) { 943281ad6265SDimitry Andric __W = _mm512_mask_mov_epi32(_mm512_set1_epi32(-1), __M, __W); 943304eeddc0SDimitry Andric return __builtin_reduce_and((__v16si)__W); 94340b57cec5SDimitry Andric } 94350b57cec5SDimitry Andric 94360b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS512 94370b57cec5SDimitry Andric _mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W) { 94380b57cec5SDimitry Andric __W = _mm512_maskz_mov_epi32(__M, __W); 943904eeddc0SDimitry Andric return __builtin_reduce_or((__v16si)__W); 94400b57cec5SDimitry Andric } 94410b57cec5SDimitry Andric 94420b57cec5SDimitry Andric static __inline__ float __DEFAULT_FN_ATTRS512 94430b57cec5SDimitry Andric _mm512_reduce_add_ps(__m512 __W) { 9444d409305fSDimitry Andric return __builtin_ia32_reduce_fadd_ps512(-0.0f, __W); 94450b57cec5SDimitry Andric } 94460b57cec5SDimitry Andric 94470b57cec5SDimitry Andric static __inline__ float __DEFAULT_FN_ATTRS512 94480b57cec5SDimitry Andric _mm512_reduce_mul_ps(__m512 __W) { 9449e8d8bef9SDimitry Andric return __builtin_ia32_reduce_fmul_ps512(1.0f, __W); 94500b57cec5SDimitry Andric } 94510b57cec5SDimitry Andric 94520b57cec5SDimitry Andric static __inline__ float __DEFAULT_FN_ATTRS512 94530b57cec5SDimitry Andric _mm512_mask_reduce_add_ps(__mmask16 __M, __m512 __W) { 94540b57cec5SDimitry Andric __W = _mm512_maskz_mov_ps(__M, __W); 9455d409305fSDimitry Andric return __builtin_ia32_reduce_fadd_ps512(-0.0f, __W); 94560b57cec5SDimitry Andric } 94570b57cec5SDimitry Andric 94580b57cec5SDimitry Andric static __inline__ float __DEFAULT_FN_ATTRS512 94590b57cec5SDimitry Andric _mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W) { 94600b57cec5SDimitry Andric __W = _mm512_mask_mov_ps(_mm512_set1_ps(1.0f), __M, __W); 9461e8d8bef9SDimitry Andric return __builtin_ia32_reduce_fmul_ps512(1.0f, __W); 94620b57cec5SDimitry Andric } 94630b57cec5SDimitry Andric 94640b57cec5SDimitry Andric static __inline__ long long __DEFAULT_FN_ATTRS512 94650b57cec5SDimitry Andric _mm512_reduce_max_epi64(__m512i __V) { 946604eeddc0SDimitry Andric return __builtin_reduce_max((__v8di)__V); 94670b57cec5SDimitry Andric } 94680b57cec5SDimitry Andric 94690b57cec5SDimitry Andric static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 94700b57cec5SDimitry Andric _mm512_reduce_max_epu64(__m512i __V) { 947104eeddc0SDimitry Andric return __builtin_reduce_max((__v8du)__V); 94720b57cec5SDimitry Andric } 94730b57cec5SDimitry Andric 94740b57cec5SDimitry Andric static __inline__ long long __DEFAULT_FN_ATTRS512 94750b57cec5SDimitry Andric _mm512_reduce_min_epi64(__m512i __V) { 947604eeddc0SDimitry Andric return __builtin_reduce_min((__v8di)__V); 94770b57cec5SDimitry Andric } 94780b57cec5SDimitry Andric 94790b57cec5SDimitry Andric static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 94800b57cec5SDimitry Andric _mm512_reduce_min_epu64(__m512i __V) { 948104eeddc0SDimitry Andric return __builtin_reduce_min((__v8du)__V); 94820b57cec5SDimitry Andric } 94830b57cec5SDimitry Andric 94840b57cec5SDimitry Andric static __inline__ long long __DEFAULT_FN_ATTRS512 94850b57cec5SDimitry Andric _mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __V) { 94860b57cec5SDimitry Andric __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(-__LONG_LONG_MAX__ - 1LL), __M, __V); 948704eeddc0SDimitry Andric return __builtin_reduce_max((__v8di)__V); 94880b57cec5SDimitry Andric } 94890b57cec5SDimitry Andric 94900b57cec5SDimitry Andric static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 94910b57cec5SDimitry Andric _mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __V) { 94920b57cec5SDimitry Andric __V = _mm512_maskz_mov_epi64(__M, __V); 949304eeddc0SDimitry Andric return __builtin_reduce_max((__v8du)__V); 94940b57cec5SDimitry Andric } 94950b57cec5SDimitry Andric 94960b57cec5SDimitry Andric static __inline__ long long __DEFAULT_FN_ATTRS512 94970b57cec5SDimitry Andric _mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __V) { 94980b57cec5SDimitry Andric __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(__LONG_LONG_MAX__), __M, __V); 949904eeddc0SDimitry Andric return __builtin_reduce_min((__v8di)__V); 95000b57cec5SDimitry Andric } 95010b57cec5SDimitry Andric 95020b57cec5SDimitry Andric static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 95030b57cec5SDimitry Andric _mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __V) { 950481ad6265SDimitry Andric __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(-1LL), __M, __V); 950504eeddc0SDimitry Andric return __builtin_reduce_min((__v8du)__V); 95060b57cec5SDimitry Andric } 95070b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS512 95080b57cec5SDimitry Andric _mm512_reduce_max_epi32(__m512i __V) { 950904eeddc0SDimitry Andric return __builtin_reduce_max((__v16si)__V); 95100b57cec5SDimitry Andric } 95110b57cec5SDimitry Andric 95120b57cec5SDimitry Andric static __inline__ unsigned int __DEFAULT_FN_ATTRS512 95130b57cec5SDimitry Andric _mm512_reduce_max_epu32(__m512i __V) { 951404eeddc0SDimitry Andric return __builtin_reduce_max((__v16su)__V); 95150b57cec5SDimitry Andric } 95160b57cec5SDimitry Andric 95170b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS512 95180b57cec5SDimitry Andric _mm512_reduce_min_epi32(__m512i __V) { 951904eeddc0SDimitry Andric return __builtin_reduce_min((__v16si)__V); 95200b57cec5SDimitry Andric } 95210b57cec5SDimitry Andric 95220b57cec5SDimitry Andric static __inline__ unsigned int __DEFAULT_FN_ATTRS512 95230b57cec5SDimitry Andric _mm512_reduce_min_epu32(__m512i __V) { 952404eeddc0SDimitry Andric return __builtin_reduce_min((__v16su)__V); 95250b57cec5SDimitry Andric } 95260b57cec5SDimitry Andric 95270b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS512 95280b57cec5SDimitry Andric _mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __V) { 95290b57cec5SDimitry Andric __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(-__INT_MAX__ - 1), __M, __V); 953004eeddc0SDimitry Andric return __builtin_reduce_max((__v16si)__V); 95310b57cec5SDimitry Andric } 95320b57cec5SDimitry Andric 95330b57cec5SDimitry Andric static __inline__ unsigned int __DEFAULT_FN_ATTRS512 95340b57cec5SDimitry Andric _mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __V) { 95350b57cec5SDimitry Andric __V = _mm512_maskz_mov_epi32(__M, __V); 953604eeddc0SDimitry Andric return __builtin_reduce_max((__v16su)__V); 95370b57cec5SDimitry Andric } 95380b57cec5SDimitry Andric 95390b57cec5SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS512 95400b57cec5SDimitry Andric _mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __V) { 95410b57cec5SDimitry Andric __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(__INT_MAX__), __M, __V); 954204eeddc0SDimitry Andric return __builtin_reduce_min((__v16si)__V); 95430b57cec5SDimitry Andric } 95440b57cec5SDimitry Andric 95450b57cec5SDimitry Andric static __inline__ unsigned int __DEFAULT_FN_ATTRS512 95460b57cec5SDimitry Andric _mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __V) { 954781ad6265SDimitry Andric __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(-1), __M, __V); 954804eeddc0SDimitry Andric return __builtin_reduce_min((__v16su)__V); 95490b57cec5SDimitry Andric } 95500b57cec5SDimitry Andric 95510b57cec5SDimitry Andric static __inline__ double __DEFAULT_FN_ATTRS512 95520b57cec5SDimitry Andric _mm512_reduce_max_pd(__m512d __V) { 9553fe6060f1SDimitry Andric return __builtin_ia32_reduce_fmax_pd512(__V); 95540b57cec5SDimitry Andric } 95550b57cec5SDimitry Andric 95560b57cec5SDimitry Andric static __inline__ double __DEFAULT_FN_ATTRS512 95570b57cec5SDimitry Andric _mm512_reduce_min_pd(__m512d __V) { 9558fe6060f1SDimitry Andric return __builtin_ia32_reduce_fmin_pd512(__V); 95590b57cec5SDimitry Andric } 95600b57cec5SDimitry Andric 95610b57cec5SDimitry Andric static __inline__ double __DEFAULT_FN_ATTRS512 95620b57cec5SDimitry Andric _mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __V) { 95630b57cec5SDimitry Andric __V = _mm512_mask_mov_pd(_mm512_set1_pd(-__builtin_inf()), __M, __V); 9564fe6060f1SDimitry Andric return __builtin_ia32_reduce_fmax_pd512(__V); 95650b57cec5SDimitry Andric } 95660b57cec5SDimitry Andric 95670b57cec5SDimitry Andric static __inline__ double __DEFAULT_FN_ATTRS512 95680b57cec5SDimitry Andric _mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __V) { 95690b57cec5SDimitry Andric __V = _mm512_mask_mov_pd(_mm512_set1_pd(__builtin_inf()), __M, __V); 9570fe6060f1SDimitry Andric return __builtin_ia32_reduce_fmin_pd512(__V); 95710b57cec5SDimitry Andric } 95720b57cec5SDimitry Andric 95730b57cec5SDimitry Andric static __inline__ float __DEFAULT_FN_ATTRS512 95740b57cec5SDimitry Andric _mm512_reduce_max_ps(__m512 __V) { 9575fe6060f1SDimitry Andric return __builtin_ia32_reduce_fmax_ps512(__V); 95760b57cec5SDimitry Andric } 95770b57cec5SDimitry Andric 95780b57cec5SDimitry Andric static __inline__ float __DEFAULT_FN_ATTRS512 95790b57cec5SDimitry Andric _mm512_reduce_min_ps(__m512 __V) { 9580fe6060f1SDimitry Andric return __builtin_ia32_reduce_fmin_ps512(__V); 95810b57cec5SDimitry Andric } 95820b57cec5SDimitry Andric 95830b57cec5SDimitry Andric static __inline__ float __DEFAULT_FN_ATTRS512 95840b57cec5SDimitry Andric _mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __V) { 95850b57cec5SDimitry Andric __V = _mm512_mask_mov_ps(_mm512_set1_ps(-__builtin_inff()), __M, __V); 9586fe6060f1SDimitry Andric return __builtin_ia32_reduce_fmax_ps512(__V); 95870b57cec5SDimitry Andric } 95880b57cec5SDimitry Andric 95890b57cec5SDimitry Andric static __inline__ float __DEFAULT_FN_ATTRS512 95900b57cec5SDimitry Andric _mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __V) { 95910b57cec5SDimitry Andric __V = _mm512_mask_mov_ps(_mm512_set1_ps(__builtin_inff()), __M, __V); 9592fe6060f1SDimitry Andric return __builtin_ia32_reduce_fmin_ps512(__V); 95930b57cec5SDimitry Andric } 95940b57cec5SDimitry Andric 9595a7dea167SDimitry Andric /// Moves the least significant 32 bits of a vector of [16 x i32] to a 9596a7dea167SDimitry Andric /// 32-bit signed integer value. 9597a7dea167SDimitry Andric /// 9598a7dea167SDimitry Andric /// \headerfile <x86intrin.h> 9599a7dea167SDimitry Andric /// 9600a7dea167SDimitry Andric /// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction. 9601a7dea167SDimitry Andric /// 9602a7dea167SDimitry Andric /// \param __A 9603a7dea167SDimitry Andric /// A vector of [16 x i32]. The least significant 32 bits are moved to the 9604a7dea167SDimitry Andric /// destination. 9605a7dea167SDimitry Andric /// \returns A 32-bit signed integer containing the moved value. 9606a7dea167SDimitry Andric static __inline__ int __DEFAULT_FN_ATTRS512 9607a7dea167SDimitry Andric _mm512_cvtsi512_si32(__m512i __A) { 9608a7dea167SDimitry Andric __v16si __b = (__v16si)__A; 9609a7dea167SDimitry Andric return __b[0]; 9610a7dea167SDimitry Andric } 9611a7dea167SDimitry Andric 9612fe6060f1SDimitry Andric /// Loads 8 double-precision (64-bit) floating-point elements stored at memory 9613fe6060f1SDimitry Andric /// locations starting at location \a base_addr at packed 32-bit integer indices 9614fe6060f1SDimitry Andric /// stored in the lower half of \a vindex scaled by \a scale them in dst. 9615fe6060f1SDimitry Andric /// 9616fe6060f1SDimitry Andric /// This intrinsic corresponds to the <c> VGATHERDPD </c> instructions. 9617fe6060f1SDimitry Andric /// 961881ad6265SDimitry Andric /// \code{.operation} 9619fe6060f1SDimitry Andric /// FOR j := 0 to 7 9620fe6060f1SDimitry Andric /// i := j*64 9621fe6060f1SDimitry Andric /// m := j*32 9622fe6060f1SDimitry Andric /// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 9623fe6060f1SDimitry Andric /// dst[i+63:i] := MEM[addr+63:addr] 9624fe6060f1SDimitry Andric /// ENDFOR 9625fe6060f1SDimitry Andric /// dst[MAX:512] := 0 962681ad6265SDimitry Andric /// \endcode 9627fe6060f1SDimitry Andric #define _mm512_i32logather_pd(vindex, base_addr, scale) \ 9628fe6060f1SDimitry Andric _mm512_i32gather_pd(_mm512_castsi512_si256(vindex), (base_addr), (scale)) 9629fe6060f1SDimitry Andric 9630fe6060f1SDimitry Andric /// Loads 8 double-precision (64-bit) floating-point elements from memory 9631fe6060f1SDimitry Andric /// starting at location \a base_addr at packed 32-bit integer indices stored in 9632fe6060f1SDimitry Andric /// the lower half of \a vindex scaled by \a scale into dst using writemask 9633fe6060f1SDimitry Andric /// \a mask (elements are copied from \a src when the corresponding mask bit is 9634fe6060f1SDimitry Andric /// not set). 9635fe6060f1SDimitry Andric /// 9636fe6060f1SDimitry Andric /// This intrinsic corresponds to the <c> VGATHERDPD </c> instructions. 9637fe6060f1SDimitry Andric /// 963881ad6265SDimitry Andric /// \code{.operation} 9639fe6060f1SDimitry Andric /// FOR j := 0 to 7 9640fe6060f1SDimitry Andric /// i := j*64 9641fe6060f1SDimitry Andric /// m := j*32 9642fe6060f1SDimitry Andric /// IF mask[j] 9643fe6060f1SDimitry Andric /// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 9644fe6060f1SDimitry Andric /// dst[i+63:i] := MEM[addr+63:addr] 9645fe6060f1SDimitry Andric /// ELSE 9646fe6060f1SDimitry Andric /// dst[i+63:i] := src[i+63:i] 9647fe6060f1SDimitry Andric /// FI 9648fe6060f1SDimitry Andric /// ENDFOR 9649fe6060f1SDimitry Andric /// dst[MAX:512] := 0 965081ad6265SDimitry Andric /// \endcode 9651fe6060f1SDimitry Andric #define _mm512_mask_i32logather_pd(src, mask, vindex, base_addr, scale) \ 9652fe6060f1SDimitry Andric _mm512_mask_i32gather_pd((src), (mask), _mm512_castsi512_si256(vindex), \ 9653fe6060f1SDimitry Andric (base_addr), (scale)) 9654fe6060f1SDimitry Andric 9655fe6060f1SDimitry Andric /// Loads 8 64-bit integer elements from memory starting at location \a base_addr 9656fe6060f1SDimitry Andric /// at packed 32-bit integer indices stored in the lower half of \a vindex 9657fe6060f1SDimitry Andric /// scaled by \a scale and stores them in dst. 9658fe6060f1SDimitry Andric /// 9659fe6060f1SDimitry Andric /// This intrinsic corresponds to the <c> VPGATHERDQ </c> instructions. 9660fe6060f1SDimitry Andric /// 966181ad6265SDimitry Andric /// \code{.operation} 9662fe6060f1SDimitry Andric /// FOR j := 0 to 7 9663fe6060f1SDimitry Andric /// i := j*64 9664fe6060f1SDimitry Andric /// m := j*32 9665fe6060f1SDimitry Andric /// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 9666fe6060f1SDimitry Andric /// dst[i+63:i] := MEM[addr+63:addr] 9667fe6060f1SDimitry Andric /// ENDFOR 9668fe6060f1SDimitry Andric /// dst[MAX:512] := 0 966981ad6265SDimitry Andric /// \endcode 9670fe6060f1SDimitry Andric #define _mm512_i32logather_epi64(vindex, base_addr, scale) \ 9671fe6060f1SDimitry Andric _mm512_i32gather_epi64(_mm512_castsi512_si256(vindex), (base_addr), (scale)) 9672fe6060f1SDimitry Andric 9673fe6060f1SDimitry Andric /// Loads 8 64-bit integer elements from memory starting at location \a base_addr 9674fe6060f1SDimitry Andric /// at packed 32-bit integer indices stored in the lower half of \a vindex 9675fe6060f1SDimitry Andric /// scaled by \a scale and stores them in dst using writemask \a mask (elements 9676fe6060f1SDimitry Andric /// are copied from \a src when the corresponding mask bit is not set). 9677fe6060f1SDimitry Andric /// 9678fe6060f1SDimitry Andric /// This intrinsic corresponds to the <c> VPGATHERDQ </c> instructions. 9679fe6060f1SDimitry Andric /// 968081ad6265SDimitry Andric /// \code{.operation} 9681fe6060f1SDimitry Andric /// FOR j := 0 to 7 9682fe6060f1SDimitry Andric /// i := j*64 9683fe6060f1SDimitry Andric /// m := j*32 9684fe6060f1SDimitry Andric /// IF mask[j] 9685fe6060f1SDimitry Andric /// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 9686fe6060f1SDimitry Andric /// dst[i+63:i] := MEM[addr+63:addr] 9687fe6060f1SDimitry Andric /// ELSE 9688fe6060f1SDimitry Andric /// dst[i+63:i] := src[i+63:i] 9689fe6060f1SDimitry Andric /// FI 9690fe6060f1SDimitry Andric /// ENDFOR 9691fe6060f1SDimitry Andric /// dst[MAX:512] := 0 969281ad6265SDimitry Andric /// \endcode 9693fe6060f1SDimitry Andric #define _mm512_mask_i32logather_epi64(src, mask, vindex, base_addr, scale) \ 9694fe6060f1SDimitry Andric _mm512_mask_i32gather_epi64((src), (mask), _mm512_castsi512_si256(vindex), \ 9695fe6060f1SDimitry Andric (base_addr), (scale)) 9696fe6060f1SDimitry Andric 9697fe6060f1SDimitry Andric /// Stores 8 packed double-precision (64-bit) floating-point elements in \a v1 9698fe6060f1SDimitry Andric /// and to memory locations starting at location \a base_addr at packed 32-bit 9699fe6060f1SDimitry Andric /// integer indices stored in \a vindex scaled by \a scale. 9700fe6060f1SDimitry Andric /// 9701fe6060f1SDimitry Andric /// This intrinsic corresponds to the <c> VSCATTERDPD </c> instructions. 9702fe6060f1SDimitry Andric /// 970381ad6265SDimitry Andric /// \code{.operation} 9704fe6060f1SDimitry Andric /// FOR j := 0 to 7 9705fe6060f1SDimitry Andric /// i := j*64 9706fe6060f1SDimitry Andric /// m := j*32 9707fe6060f1SDimitry Andric /// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 9708fe6060f1SDimitry Andric /// MEM[addr+63:addr] := v1[i+63:i] 9709fe6060f1SDimitry Andric /// ENDFOR 971081ad6265SDimitry Andric /// \endcode 9711fe6060f1SDimitry Andric #define _mm512_i32loscatter_pd(base_addr, vindex, v1, scale) \ 9712fe6060f1SDimitry Andric _mm512_i32scatter_pd((base_addr), _mm512_castsi512_si256(vindex), (v1), (scale)) 9713fe6060f1SDimitry Andric 9714fe6060f1SDimitry Andric /// Stores 8 packed double-precision (64-bit) floating-point elements in \a v1 9715fe6060f1SDimitry Andric /// to memory locations starting at location \a base_addr at packed 32-bit 9716fe6060f1SDimitry Andric /// integer indices stored in \a vindex scaled by \a scale. Only those elements 9717fe6060f1SDimitry Andric /// whose corresponding mask bit is set in writemask \a mask are written to 9718fe6060f1SDimitry Andric /// memory. 9719fe6060f1SDimitry Andric /// 9720fe6060f1SDimitry Andric /// This intrinsic corresponds to the <c> VSCATTERDPD </c> instructions. 9721fe6060f1SDimitry Andric /// 972281ad6265SDimitry Andric /// \code{.operation} 9723fe6060f1SDimitry Andric /// FOR j := 0 to 7 9724fe6060f1SDimitry Andric /// i := j*64 9725fe6060f1SDimitry Andric /// m := j*32 9726fe6060f1SDimitry Andric /// IF mask[j] 9727fe6060f1SDimitry Andric /// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 9728fe6060f1SDimitry Andric /// MEM[addr+63:addr] := a[i+63:i] 9729fe6060f1SDimitry Andric /// FI 9730fe6060f1SDimitry Andric /// ENDFOR 973181ad6265SDimitry Andric /// \endcode 9732fe6060f1SDimitry Andric #define _mm512_mask_i32loscatter_pd(base_addr, mask, vindex, v1, scale) \ 9733fe6060f1SDimitry Andric _mm512_mask_i32scatter_pd((base_addr), (mask), \ 9734fe6060f1SDimitry Andric _mm512_castsi512_si256(vindex), (v1), (scale)) 9735fe6060f1SDimitry Andric 9736fe6060f1SDimitry Andric /// Stores 8 packed 64-bit integer elements located in \a v1 and stores them in 9737fe6060f1SDimitry Andric /// memory locations starting at location \a base_addr at packed 32-bit integer 9738fe6060f1SDimitry Andric /// indices stored in \a vindex scaled by \a scale. 9739fe6060f1SDimitry Andric /// 9740fe6060f1SDimitry Andric /// This intrinsic corresponds to the <c> VPSCATTERDQ </c> instructions. 9741fe6060f1SDimitry Andric /// 974281ad6265SDimitry Andric /// \code{.operation} 9743fe6060f1SDimitry Andric /// FOR j := 0 to 7 9744fe6060f1SDimitry Andric /// i := j*64 9745fe6060f1SDimitry Andric /// m := j*32 9746fe6060f1SDimitry Andric /// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 9747fe6060f1SDimitry Andric /// MEM[addr+63:addr] := a[i+63:i] 9748fe6060f1SDimitry Andric /// ENDFOR 974981ad6265SDimitry Andric /// \endcode 9750fe6060f1SDimitry Andric #define _mm512_i32loscatter_epi64(base_addr, vindex, v1, scale) \ 9751fe6060f1SDimitry Andric _mm512_i32scatter_epi64((base_addr), \ 9752fe6060f1SDimitry Andric _mm512_castsi512_si256(vindex), (v1), (scale)) 9753fe6060f1SDimitry Andric 9754fe6060f1SDimitry Andric /// Stores 8 packed 64-bit integer elements located in a and stores them in 9755fe6060f1SDimitry Andric /// memory locations starting at location \a base_addr at packed 32-bit integer 9756fe6060f1SDimitry Andric /// indices stored in \a vindex scaled by scale using writemask \a mask (elements 9757fe6060f1SDimitry Andric /// whose corresponding mask bit is not set are not written to memory). 9758fe6060f1SDimitry Andric /// 9759fe6060f1SDimitry Andric /// This intrinsic corresponds to the <c> VPSCATTERDQ </c> instructions. 9760fe6060f1SDimitry Andric /// 976181ad6265SDimitry Andric /// \code{.operation} 9762fe6060f1SDimitry Andric /// FOR j := 0 to 7 9763fe6060f1SDimitry Andric /// i := j*64 9764fe6060f1SDimitry Andric /// m := j*32 9765fe6060f1SDimitry Andric /// IF mask[j] 9766fe6060f1SDimitry Andric /// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8 9767fe6060f1SDimitry Andric /// MEM[addr+63:addr] := a[i+63:i] 9768fe6060f1SDimitry Andric /// FI 9769fe6060f1SDimitry Andric /// ENDFOR 977081ad6265SDimitry Andric /// \endcode 9771fe6060f1SDimitry Andric #define _mm512_mask_i32loscatter_epi64(base_addr, mask, vindex, v1, scale) \ 9772fe6060f1SDimitry Andric _mm512_mask_i32scatter_epi64((base_addr), (mask), \ 9773fe6060f1SDimitry Andric _mm512_castsi512_si256(vindex), (v1), (scale)) 9774fe6060f1SDimitry Andric 97750b57cec5SDimitry Andric #undef __DEFAULT_FN_ATTRS512 97760b57cec5SDimitry Andric #undef __DEFAULT_FN_ATTRS128 97770b57cec5SDimitry Andric #undef __DEFAULT_FN_ATTRS 97780b57cec5SDimitry Andric 97790b57cec5SDimitry Andric #endif /* __AVX512FINTRIN_H */ 9780